code.grnet.gr Git - ganeti-local/blob - lib/cmdlib.py

   1 #
   2 #
   3
   4 # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011 Google Inc.
   5 #
   6 # This program is free software; you can redistribute it and/or modify
   7 # it under the terms of the GNU General Public License as published by
   8 # the Free Software Foundation; either version 2 of the License, or
   9 # (at your option) any later version.
  10 #
  11 # This program is distributed in the hope that it will be useful, but
  12 # WITHOUT ANY WARRANTY; without even the implied warranty of
  13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 # General Public License for more details.
  15 #
  16 # You should have received a copy of the GNU General Public License
  17 # along with this program; if not, write to the Free Software
  18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  19 # 02110-1301, USA.
  20
  21
  22 """Module implementing the master-side code."""
  23
  24 # pylint: disable=W0201,C0302
  25
  26 # W0201 since most LU attributes are defined in CheckPrereq or similar
  27 # functions
  28
  29 # C0302: since we have waaaay too many lines in this module
  30
  31 import os
  32 import os.path
  33 import time
  34 import re
  35 import platform
  36 import logging
  37 import copy
  38 import OpenSSL
  39 import socket
  40 import tempfile
  41 import shutil
  42 import itertools
  43 import operator
  44
  45 from ganeti import ssh
  46 from ganeti import utils
  47 from ganeti import errors
  48 from ganeti import hypervisor
  49 from ganeti import locking
  50 from ganeti import constants
  51 from ganeti import objects
  52 from ganeti import serializer
  53 from ganeti import ssconf
  54 from ganeti import uidpool
  55 from ganeti import compat
  56 from ganeti import masterd
  57 from ganeti import netutils
  58 from ganeti import query
  59 from ganeti import qlang
  60 from ganeti import opcodes
  61 from ganeti import ht
  62 from ganeti import rpc
  63
  64 import ganeti.masterd.instance # pylint: disable=W0611
  65
  66
  67 #: Size of DRBD meta block device
  68 DRBD_META_SIZE = 128
  69
  70 # States of instance
  71 INSTANCE_UP = [constants.ADMINST_UP]
  72 INSTANCE_DOWN = [constants.ADMINST_DOWN]
  73 INSTANCE_OFFLINE = [constants.ADMINST_OFFLINE]
  74 INSTANCE_ONLINE = [constants.ADMINST_DOWN, constants.ADMINST_UP]
  75 INSTANCE_NOT_RUNNING = [constants.ADMINST_DOWN, constants.ADMINST_OFFLINE]
  76
  77
  78 class ResultWithJobs:
  79   """Data container for LU results with jobs.
  80
  81   Instances of this class returned from L{LogicalUnit.Exec} will be recognized
  82   by L{mcpu.Processor._ProcessResult}. The latter will then submit the jobs
  83   contained in the C{jobs} attribute and include the job IDs in the opcode
  84   result.
  85
  86   """
  87   def __init__(self, jobs, **kwargs):
  88     """Initializes this class.
  89
  90     Additional return values can be specified as keyword arguments.
  91
  92     @type jobs: list of lists of L{opcode.OpCode}
  93     @param jobs: A list of lists of opcode objects
  94
  95     """
  96     self.jobs = jobs
  97     self.other = kwargs
  98
  99
 100 class LogicalUnit(object):
 101   """Logical Unit base class.
 102
 103   Subclasses must follow these rules:
 104     - implement ExpandNames
 105     - implement CheckPrereq (except when tasklets are used)
 106     - implement Exec (except when tasklets are used)
 107     - implement BuildHooksEnv
 108     - implement BuildHooksNodes
 109     - redefine HPATH and HTYPE
 110     - optionally redefine their run requirements:
 111         REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
 112
 113   Note that all commands require root permissions.
 114
 115   @ivar dry_run_result: the value (if any) that will be returned to the caller
 116       in dry-run mode (signalled by opcode dry_run parameter)
 117
 118   """
 119   HPATH = None
 120   HTYPE = None
 121   REQ_BGL = True
 122
 123   def __init__(self, processor, op, context, rpc_runner):
 124     """Constructor for LogicalUnit.
 125
 126     This needs to be overridden in derived classes in order to check op
 127     validity.
 128
 129     """
 130     self.proc = processor
 131     self.op = op
 132     self.cfg = context.cfg
 133     self.glm = context.glm
 134     # readability alias
 135     self.owned_locks = context.glm.list_owned
 136     self.context = context
 137     self.rpc = rpc_runner
 138     # Dicts used to declare locking needs to mcpu
 139     self.needed_locks = None
 140     self.share_locks = dict.fromkeys(locking.LEVELS, 0)
 141     self.add_locks = {}
 142     self.remove_locks = {}
 143     # Used to force good behavior when calling helper functions
 144     self.recalculate_locks = {}
 145     # logging
 146     self.Log = processor.Log # pylint: disable=C0103
 147     self.LogWarning = processor.LogWarning # pylint: disable=C0103
 148     self.LogInfo = processor.LogInfo # pylint: disable=C0103
 149     self.LogStep = processor.LogStep # pylint: disable=C0103
 150     # support for dry-run
 151     self.dry_run_result = None
 152     # support for generic debug attribute
 153     if (not hasattr(self.op, "debug_level") or
 154         not isinstance(self.op.debug_level, int)):
 155       self.op.debug_level = 0
 156
 157     # Tasklets
 158     self.tasklets = None
 159
 160     # Validate opcode parameters and set defaults
 161     self.op.Validate(True)
 162
 163     self.CheckArguments()
 164
 165   def CheckArguments(self):
 166     """Check syntactic validity for the opcode arguments.
 167
 168     This method is for doing a simple syntactic check and ensure
 169     validity of opcode parameters, without any cluster-related
 170     checks. While the same can be accomplished in ExpandNames and/or
 171     CheckPrereq, doing these separate is better because:
 172
 173       - ExpandNames is left as as purely a lock-related function
 174       - CheckPrereq is run after we have acquired locks (and possible
 175         waited for them)
 176
 177     The function is allowed to change the self.op attribute so that
 178     later methods can no longer worry about missing parameters.
 179
 180     """
 181     pass
 182
 183   def ExpandNames(self):
 184     """Expand names for this LU.
 185
 186     This method is called before starting to execute the opcode, and it should
 187     update all the parameters of the opcode to their canonical form (e.g. a
 188     short node name must be fully expanded after this method has successfully
 189     completed). This way locking, hooks, logging, etc. can work correctly.
 190
 191     LUs which implement this method must also populate the self.needed_locks
 192     member, as a dict with lock levels as keys, and a list of needed lock names
 193     as values. Rules:
 194
 195       - use an empty dict if you don't need any lock
 196       - if you don't need any lock at a particular level omit that level
 197       - don't put anything for the BGL level
 198       - if you want all locks at a level use locking.ALL_SET as a value
 199
 200     If you need to share locks (rather than acquire them exclusively) at one
 201     level you can modify self.share_locks, setting a true value (usually 1) for
 202     that level. By default locks are not shared.
 203
 204     This function can also define a list of tasklets, which then will be
 205     executed in order instead of the usual LU-level CheckPrereq and Exec
 206     functions, if those are not defined by the LU.
 207
 208     Examples::
 209
 210       # Acquire all nodes and one instance
 211       self.needed_locks = {
 212         locking.LEVEL_NODE: locking.ALL_SET,
 213         locking.LEVEL_INSTANCE: ['instance1.example.com'],
 214       }
 215       # Acquire just two nodes
 216       self.needed_locks = {
 217         locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
 218       }
 219       # Acquire no locks
 220       self.needed_locks = {} # No, you can't leave it to the default value None
 221
 222     """
 223     # The implementation of this method is mandatory only if the new LU is
 224     # concurrent, so that old LUs don't need to be changed all at the same
 225     # time.
 226     if self.REQ_BGL:
 227       self.needed_locks = {} # Exclusive LUs don't need locks.
 228     else:
 229       raise NotImplementedError
 230
 231   def DeclareLocks(self, level):
 232     """Declare LU locking needs for a level
 233
 234     While most LUs can just declare their locking needs at ExpandNames time,
 235     sometimes there's the need to calculate some locks after having acquired
 236     the ones before. This function is called just before acquiring locks at a
 237     particular level, but after acquiring the ones at lower levels, and permits
 238     such calculations. It can be used to modify self.needed_locks, and by
 239     default it does nothing.
 240
 241     This function is only called if you have something already set in
 242     self.needed_locks for the level.
 243
 244     @param level: Locking level which is going to be locked
 245     @type level: member of ganeti.locking.LEVELS
 246
 247     """
 248
 249   def CheckPrereq(self):
 250     """Check prerequisites for this LU.
 251
 252     This method should check that the prerequisites for the execution
 253     of this LU are fulfilled. It can do internode communication, but
 254     it should be idempotent - no cluster or system changes are
 255     allowed.
 256
 257     The method should raise errors.OpPrereqError in case something is
 258     not fulfilled. Its return value is ignored.
 259
 260     This method should also update all the parameters of the opcode to
 261     their canonical form if it hasn't been done by ExpandNames before.
 262
 263     """
 264     if self.tasklets is not None:
 265       for (idx, tl) in enumerate(self.tasklets):
 266         logging.debug("Checking prerequisites for tasklet %s/%s",
 267                       idx + 1, len(self.tasklets))
 268         tl.CheckPrereq()
 269     else:
 270       pass
 271
 272   def Exec(self, feedback_fn):
 273     """Execute the LU.
 274
 275     This method should implement the actual work. It should raise
 276     errors.OpExecError for failures that are somewhat dealt with in
 277     code, or expected.
 278
 279     """
 280     if self.tasklets is not None:
 281       for (idx, tl) in enumerate(self.tasklets):
 282         logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
 283         tl.Exec(feedback_fn)
 284     else:
 285       raise NotImplementedError
 286
 287   def BuildHooksEnv(self):
 288     """Build hooks environment for this LU.
 289
 290     @rtype: dict
 291     @return: Dictionary containing the environment that will be used for
 292       running the hooks for this LU. The keys of the dict must not be prefixed
 293       with "GANETI_"--that'll be added by the hooks runner. The hooks runner
 294       will extend the environment with additional variables. If no environment
 295       should be defined, an empty dictionary should be returned (not C{None}).
 296     @note: If the C{HPATH} attribute of the LU class is C{None}, this function
 297       will not be called.
 298
 299     """
 300     raise NotImplementedError
 301
 302   def BuildHooksNodes(self):
 303     """Build list of nodes to run LU's hooks.
 304
 305     @rtype: tuple; (list, list)
 306     @return: Tuple containing a list of node names on which the hook
 307       should run before the execution and a list of node names on which the
 308       hook should run after the execution. No nodes should be returned as an
 309       empty list (and not None).
 310     @note: If the C{HPATH} attribute of the LU class is C{None}, this function
 311       will not be called.
 312
 313     """
 314     raise NotImplementedError
 315
 316   def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
 317     """Notify the LU about the results of its hooks.
 318
 319     This method is called every time a hooks phase is executed, and notifies
 320     the Logical Unit about the hooks' result. The LU can then use it to alter
 321     its result based on the hooks.  By default the method does nothing and the
 322     previous result is passed back unchanged but any LU can define it if it
 323     wants to use the local cluster hook-scripts somehow.
 324
 325     @param phase: one of L{constants.HOOKS_PHASE_POST} or
 326         L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
 327     @param hook_results: the results of the multi-node hooks rpc call
 328     @param feedback_fn: function used send feedback back to the caller
 329     @param lu_result: the previous Exec result this LU had, or None
 330         in the PRE phase
 331     @return: the new Exec result, based on the previous result
 332         and hook results
 333
 334     """
 335     # API must be kept, thus we ignore the unused argument and could
 336     # be a function warnings
 337     # pylint: disable=W0613,R0201
 338     return lu_result
 339
 340   def _ExpandAndLockInstance(self):
 341     """Helper function to expand and lock an instance.
 342
 343     Many LUs that work on an instance take its name in self.op.instance_name
 344     and need to expand it and then declare the expanded name for locking. This
 345     function does it, and then updates self.op.instance_name to the expanded
 346     name. It also initializes needed_locks as a dict, if this hasn't been done
 347     before.
 348
 349     """
 350     if self.needed_locks is None:
 351       self.needed_locks = {}
 352     else:
 353       assert locking.LEVEL_INSTANCE not in self.needed_locks, \
 354         "_ExpandAndLockInstance called with instance-level locks set"
 355     self.op.instance_name = _ExpandInstanceName(self.cfg,
 356                                                 self.op.instance_name)
 357     self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
 358
 359   def _LockInstancesNodes(self, primary_only=False,
 360                           level=locking.LEVEL_NODE):
 361     """Helper function to declare instances' nodes for locking.
 362
 363     This function should be called after locking one or more instances to lock
 364     their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
 365     with all primary or secondary nodes for instances already locked and
 366     present in self.needed_locks[locking.LEVEL_INSTANCE].
 367
 368     It should be called from DeclareLocks, and for safety only works if
 369     self.recalculate_locks[locking.LEVEL_NODE] is set.
 370
 371     In the future it may grow parameters to just lock some instance's nodes, or
 372     to just lock primaries or secondary nodes, if needed.
 373
 374     If should be called in DeclareLocks in a way similar to::
 375
 376       if level == locking.LEVEL_NODE:
 377         self._LockInstancesNodes()
 378
 379     @type primary_only: boolean
 380     @param primary_only: only lock primary nodes of locked instances
 381     @param level: Which lock level to use for locking nodes
 382
 383     """
 384     assert level in self.recalculate_locks, \
 385       "_LockInstancesNodes helper function called with no nodes to recalculate"
 386
 387     # TODO: check if we're really been called with the instance locks held
 388
 389     # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
 390     # future we might want to have different behaviors depending on the value
 391     # of self.recalculate_locks[locking.LEVEL_NODE]
 392     wanted_nodes = []
 393     locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
 394     for _, instance in self.cfg.GetMultiInstanceInfo(locked_i):
 395       wanted_nodes.append(instance.primary_node)
 396       if not primary_only:
 397         wanted_nodes.extend(instance.secondary_nodes)
 398
 399     if self.recalculate_locks[level] == constants.LOCKS_REPLACE:
 400       self.needed_locks[level] = wanted_nodes
 401     elif self.recalculate_locks[level] == constants.LOCKS_APPEND:
 402       self.needed_locks[level].extend(wanted_nodes)
 403     else:
 404       raise errors.ProgrammerError("Unknown recalculation mode")
 405
 406     del self.recalculate_locks[level]
 407
 408
 409 class NoHooksLU(LogicalUnit): # pylint: disable=W0223
 410   """Simple LU which runs no hooks.
 411
 412   This LU is intended as a parent for other LogicalUnits which will
 413   run no hooks, in order to reduce duplicate code.
 414
 415   """
 416   HPATH = None
 417   HTYPE = None
 418
 419   def BuildHooksEnv(self):
 420     """Empty BuildHooksEnv for NoHooksLu.
 421
 422     This just raises an error.
 423
 424     """
 425     raise AssertionError("BuildHooksEnv called for NoHooksLUs")
 426
 427   def BuildHooksNodes(self):
 428     """Empty BuildHooksNodes for NoHooksLU.
 429
 430     """
 431     raise AssertionError("BuildHooksNodes called for NoHooksLU")
 432
 433
 434 class Tasklet:
 435   """Tasklet base class.
 436
 437   Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
 438   they can mix legacy code with tasklets. Locking needs to be done in the LU,
 439   tasklets know nothing about locks.
 440
 441   Subclasses must follow these rules:
 442     - Implement CheckPrereq
 443     - Implement Exec
 444
 445   """
 446   def __init__(self, lu):
 447     self.lu = lu
 448
 449     # Shortcuts
 450     self.cfg = lu.cfg
 451     self.rpc = lu.rpc
 452
 453   def CheckPrereq(self):
 454     """Check prerequisites for this tasklets.
 455
 456     This method should check whether the prerequisites for the execution of
 457     this tasklet are fulfilled. It can do internode communication, but it
 458     should be idempotent - no cluster or system changes are allowed.
 459
 460     The method should raise errors.OpPrereqError in case something is not
 461     fulfilled. Its return value is ignored.
 462
 463     This method should also update all parameters to their canonical form if it
 464     hasn't been done before.
 465
 466     """
 467     pass
 468
 469   def Exec(self, feedback_fn):
 470     """Execute the tasklet.
 471
 472     This method should implement the actual work. It should raise
 473     errors.OpExecError for failures that are somewhat dealt with in code, or
 474     expected.
 475
 476     """
 477     raise NotImplementedError
 478
 479
 480 class _QueryBase:
 481   """Base for query utility classes.
 482
 483   """
 484   #: Attribute holding field definitions
 485   FIELDS = None
 486
 487   def __init__(self, qfilter, fields, use_locking):
 488     """Initializes this class.
 489
 490     """
 491     self.use_locking = use_locking
 492
 493     self.query = query.Query(self.FIELDS, fields, qfilter=qfilter,
 494                              namefield="name")
 495     self.requested_data = self.query.RequestedData()
 496     self.names = self.query.RequestedNames()
 497
 498     # Sort only if no names were requested
 499     self.sort_by_name = not self.names
 500
 501     self.do_locking = None
 502     self.wanted = None
 503
 504   def _GetNames(self, lu, all_names, lock_level):
 505     """Helper function to determine names asked for in the query.
 506
 507     """
 508     if self.do_locking:
 509       names = lu.owned_locks(lock_level)
 510     else:
 511       names = all_names
 512
 513     if self.wanted == locking.ALL_SET:
 514       assert not self.names
 515       # caller didn't specify names, so ordering is not important
 516       return utils.NiceSort(names)
 517
 518     # caller specified names and we must keep the same order
 519     assert self.names
 520     assert not self.do_locking or lu.glm.is_owned(lock_level)
 521
 522     missing = set(self.wanted).difference(names)
 523     if missing:
 524       raise errors.OpExecError("Some items were removed before retrieving"
 525                                " their data: %s" % missing)
 526
 527     # Return expanded names
 528     return self.wanted
 529
 530   def ExpandNames(self, lu):
 531     """Expand names for this query.
 532
 533     See L{LogicalUnit.ExpandNames}.
 534
 535     """
 536     raise NotImplementedError()
 537
 538   def DeclareLocks(self, lu, level):
 539     """Declare locks for this query.
 540
 541     See L{LogicalUnit.DeclareLocks}.
 542
 543     """
 544     raise NotImplementedError()
 545
 546   def _GetQueryData(self, lu):
 547     """Collects all data for this query.
 548
 549     @return: Query data object
 550
 551     """
 552     raise NotImplementedError()
 553
 554   def NewStyleQuery(self, lu):
 555     """Collect data and execute query.
 556
 557     """
 558     return query.GetQueryResponse(self.query, self._GetQueryData(lu),
 559                                   sort_by_name=self.sort_by_name)
 560
 561   def OldStyleQuery(self, lu):
 562     """Collect data and execute query.
 563
 564     """
 565     return self.query.OldStyleQuery(self._GetQueryData(lu),
 566                                     sort_by_name=self.sort_by_name)
 567
 568
 569 def _ShareAll():
 570   """Returns a dict declaring all lock levels shared.
 571
 572   """
 573   return dict.fromkeys(locking.LEVELS, 1)
 574
 575
 576 def _MakeLegacyNodeInfo(data):
 577   """Formats the data returned by L{rpc.RpcRunner.call_node_info}.
 578
 579   Converts the data into a single dictionary. This is fine for most use cases,
 580   but some require information from more than one volume group or hypervisor.
 581
 582   """
 583   (bootid, (vg_info, ), (hv_info, )) = data
 584
 585   return utils.JoinDisjointDicts(utils.JoinDisjointDicts(vg_info, hv_info), {
 586     "bootid": bootid,
 587     })
 588
 589
 590 def _CheckInstanceNodeGroups(cfg, instance_name, owned_groups):
 591   """Checks if the owned node groups are still correct for an instance.
 592
 593   @type cfg: L{config.ConfigWriter}
 594   @param cfg: The cluster configuration
 595   @type instance_name: string
 596   @param instance_name: Instance name
 597   @type owned_groups: set or frozenset
 598   @param owned_groups: List of currently owned node groups
 599
 600   """
 601   inst_groups = cfg.GetInstanceNodeGroups(instance_name)
 602
 603   if not owned_groups.issuperset(inst_groups):
 604     raise errors.OpPrereqError("Instance %s's node groups changed since"
 605                                " locks were acquired, current groups are"
 606                                " are '%s', owning groups '%s'; retry the"
 607                                " operation" %
 608                                (instance_name,
 609                                 utils.CommaJoin(inst_groups),
 610                                 utils.CommaJoin(owned_groups)),
 611                                errors.ECODE_STATE)
 612
 613   return inst_groups
 614
 615
 616 def _CheckNodeGroupInstances(cfg, group_uuid, owned_instances):
 617   """Checks if the instances in a node group are still correct.
 618
 619   @type cfg: L{config.ConfigWriter}
 620   @param cfg: The cluster configuration
 621   @type group_uuid: string
 622   @param group_uuid: Node group UUID
 623   @type owned_instances: set or frozenset
 624   @param owned_instances: List of currently owned instances
 625
 626   """
 627   wanted_instances = cfg.GetNodeGroupInstances(group_uuid)
 628   if owned_instances != wanted_instances:
 629     raise errors.OpPrereqError("Instances in node group '%s' changed since"
 630                                " locks were acquired, wanted '%s', have '%s';"
 631                                " retry the operation" %
 632                                (group_uuid,
 633                                 utils.CommaJoin(wanted_instances),
 634                                 utils.CommaJoin(owned_instances)),
 635                                errors.ECODE_STATE)
 636
 637   return wanted_instances
 638
 639
 640 def _SupportsOob(cfg, node):
 641   """Tells if node supports OOB.
 642
 643   @type cfg: L{config.ConfigWriter}
 644   @param cfg: The cluster configuration
 645   @type node: L{objects.Node}
 646   @param node: The node
 647   @return: The OOB script if supported or an empty string otherwise
 648
 649   """
 650   return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
 651
 652
 653 def _GetWantedNodes(lu, nodes):
 654   """Returns list of checked and expanded node names.
 655
 656   @type lu: L{LogicalUnit}
 657   @param lu: the logical unit on whose behalf we execute
 658   @type nodes: list
 659   @param nodes: list of node names or None for all nodes
 660   @rtype: list
 661   @return: the list of nodes, sorted
 662   @raise errors.ProgrammerError: if the nodes parameter is wrong type
 663
 664   """
 665   if nodes:
 666     return [_ExpandNodeName(lu.cfg, name) for name in nodes]
 667
 668   return utils.NiceSort(lu.cfg.GetNodeList())
 669
 670
 671 def _GetWantedInstances(lu, instances):
 672   """Returns list of checked and expanded instance names.
 673
 674   @type lu: L{LogicalUnit}
 675   @param lu: the logical unit on whose behalf we execute
 676   @type instances: list
 677   @param instances: list of instance names or None for all instances
 678   @rtype: list
 679   @return: the list of instances, sorted
 680   @raise errors.OpPrereqError: if the instances parameter is wrong type
 681   @raise errors.OpPrereqError: if any of the passed instances is not found
 682
 683   """
 684   if instances:
 685     wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
 686   else:
 687     wanted = utils.NiceSort(lu.cfg.GetInstanceList())
 688   return wanted
 689
 690
 691 def _GetUpdatedParams(old_params, update_dict,
 692                       use_default=True, use_none=False):
 693   """Return the new version of a parameter dictionary.
 694
 695   @type old_params: dict
 696   @param old_params: old parameters
 697   @type update_dict: dict
 698   @param update_dict: dict containing new parameter values, or
 699       constants.VALUE_DEFAULT to reset the parameter to its default
 700       value
 701   @param use_default: boolean
 702   @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
 703       values as 'to be deleted' values
 704   @param use_none: boolean
 705   @type use_none: whether to recognise C{None} values as 'to be
 706       deleted' values
 707   @rtype: dict
 708   @return: the new parameter dictionary
 709
 710   """
 711   params_copy = copy.deepcopy(old_params)
 712   for key, val in update_dict.iteritems():
 713     if ((use_default and val == constants.VALUE_DEFAULT) or
 714         (use_none and val is None)):
 715       try:
 716         del params_copy[key]
 717       except KeyError:
 718         pass
 719     else:
 720       params_copy[key] = val
 721   return params_copy
 722
 723
 724 def _ReleaseLocks(lu, level, names=None, keep=None):
 725   """Releases locks owned by an LU.
 726
 727   @type lu: L{LogicalUnit}
 728   @param level: Lock level
 729   @type names: list or None
 730   @param names: Names of locks to release
 731   @type keep: list or None
 732   @param keep: Names of locks to retain
 733
 734   """
 735   assert not (keep is not None and names is not None), \
 736          "Only one of the 'names' and the 'keep' parameters can be given"
 737
 738   if names is not None:
 739     should_release = names.__contains__
 740   elif keep:
 741     should_release = lambda name: name not in keep
 742   else:
 743     should_release = None
 744
 745   owned = lu.owned_locks(level)
 746   if not owned:
 747     # Not owning any lock at this level, do nothing
 748     pass
 749
 750   elif should_release:
 751     retain = []
 752     release = []
 753
 754     # Determine which locks to release
 755     for name in owned:
 756       if should_release(name):
 757         release.append(name)
 758       else:
 759         retain.append(name)
 760
 761     assert len(lu.owned_locks(level)) == (len(retain) + len(release))
 762
 763     # Release just some locks
 764     lu.glm.release(level, names=release)
 765
 766     assert frozenset(lu.owned_locks(level)) == frozenset(retain)
 767   else:
 768     # Release everything
 769     lu.glm.release(level)
 770
 771     assert not lu.glm.is_owned(level), "No locks should be owned"
 772
 773
 774 def _MapInstanceDisksToNodes(instances):
 775   """Creates a map from (node, volume) to instance name.
 776
 777   @type instances: list of L{objects.Instance}
 778   @rtype: dict; tuple of (node name, volume name) as key, instance name as value
 779
 780   """
 781   return dict(((node, vol), inst.name)
 782               for inst in instances
 783               for (node, vols) in inst.MapLVsByNode().items()
 784               for vol in vols)
 785
 786
 787 def _RunPostHook(lu, node_name):
 788   """Runs the post-hook for an opcode on a single node.
 789
 790   """
 791   hm = lu.proc.BuildHooksManager(lu)
 792   try:
 793     hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
 794   except:
 795     # pylint: disable=W0702
 796     lu.LogWarning("Errors occurred running hooks on %s" % node_name)
 797
 798
 799 def _CheckOutputFields(static, dynamic, selected):
 800   """Checks whether all selected fields are valid.
 801
 802   @type static: L{utils.FieldSet}
 803   @param static: static fields set
 804   @type dynamic: L{utils.FieldSet}
 805   @param dynamic: dynamic fields set
 806
 807   """
 808   f = utils.FieldSet()
 809   f.Extend(static)
 810   f.Extend(dynamic)
 811
 812   delta = f.NonMatching(selected)
 813   if delta:
 814     raise errors.OpPrereqError("Unknown output fields selected: %s"
 815                                % ",".join(delta), errors.ECODE_INVAL)
 816
 817
 818 def _CheckGlobalHvParams(params):
 819   """Validates that given hypervisor params are not global ones.
 820
 821   This will ensure that instances don't get customised versions of
 822   global params.
 823
 824   """
 825   used_globals = constants.HVC_GLOBALS.intersection(params)
 826   if used_globals:
 827     msg = ("The following hypervisor parameters are global and cannot"
 828            " be customized at instance level, please modify them at"
 829            " cluster level: %s" % utils.CommaJoin(used_globals))
 830     raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
 831
 832
 833 def _CheckNodeOnline(lu, node, msg=None):
 834   """Ensure that a given node is online.
 835
 836   @param lu: the LU on behalf of which we make the check
 837   @param node: the node to check
 838   @param msg: if passed, should be a message to replace the default one
 839   @raise errors.OpPrereqError: if the node is offline
 840
 841   """
 842   if msg is None:
 843     msg = "Can't use offline node"
 844   if lu.cfg.GetNodeInfo(node).offline:
 845     raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
 846
 847
 848 def _CheckNodeNotDrained(lu, node):
 849   """Ensure that a given node is not drained.
 850
 851   @param lu: the LU on behalf of which we make the check
 852   @param node: the node to check
 853   @raise errors.OpPrereqError: if the node is drained
 854
 855   """
 856   if lu.cfg.GetNodeInfo(node).drained:
 857     raise errors.OpPrereqError("Can't use drained node %s" % node,
 858                                errors.ECODE_STATE)
 859
 860
 861 def _CheckNodeVmCapable(lu, node):
 862   """Ensure that a given node is vm capable.
 863
 864   @param lu: the LU on behalf of which we make the check
 865   @param node: the node to check
 866   @raise errors.OpPrereqError: if the node is not vm capable
 867
 868   """
 869   if not lu.cfg.GetNodeInfo(node).vm_capable:
 870     raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
 871                                errors.ECODE_STATE)
 872
 873
 874 def _CheckNodeHasOS(lu, node, os_name, force_variant):
 875   """Ensure that a node supports a given OS.
 876
 877   @param lu: the LU on behalf of which we make the check
 878   @param node: the node to check
 879   @param os_name: the OS to query about
 880   @param force_variant: whether to ignore variant errors
 881   @raise errors.OpPrereqError: if the node is not supporting the OS
 882
 883   """
 884   result = lu.rpc.call_os_get(node, os_name)
 885   result.Raise("OS '%s' not in supported OS list for node %s" %
 886                (os_name, node),
 887                prereq=True, ecode=errors.ECODE_INVAL)
 888   if not force_variant:
 889     _CheckOSVariant(result.payload, os_name)
 890
 891
 892 def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
 893   """Ensure that a node has the given secondary ip.
 894
 895   @type lu: L{LogicalUnit}
 896   @param lu: the LU on behalf of which we make the check
 897   @type node: string
 898   @param node: the node to check
 899   @type secondary_ip: string
 900   @param secondary_ip: the ip to check
 901   @type prereq: boolean
 902   @param prereq: whether to throw a prerequisite or an execute error
 903   @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
 904   @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
 905
 906   """
 907   result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
 908   result.Raise("Failure checking secondary ip on node %s" % node,
 909                prereq=prereq, ecode=errors.ECODE_ENVIRON)
 910   if not result.payload:
 911     msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
 912            " please fix and re-run this command" % secondary_ip)
 913     if prereq:
 914       raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
 915     else:
 916       raise errors.OpExecError(msg)
 917
 918
 919 def _GetClusterDomainSecret():
 920   """Reads the cluster domain secret.
 921
 922   """
 923   return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
 924                                strict=True)
 925
 926
 927 def _CheckInstanceState(lu, instance, req_states, msg=None):
 928   """Ensure that an instance is in one of the required states.
 929
 930   @param lu: the LU on behalf of which we make the check
 931   @param instance: the instance to check
 932   @param msg: if passed, should be a message to replace the default one
 933   @raise errors.OpPrereqError: if the instance is not in the required state
 934
 935   """
 936   if msg is None:
 937     msg = "can't use instance from outside %s states" % ", ".join(req_states)
 938   if instance.admin_state not in req_states:
 939     raise errors.OpPrereqError("Instance %s is marked to be %s, %s" %
 940                                (instance, instance.admin_state, msg),
 941                                errors.ECODE_STATE)
 942
 943   if constants.ADMINST_UP not in req_states:
 944     pnode = instance.primary_node
 945     ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
 946     ins_l.Raise("Can't contact node %s for instance information" % pnode,
 947                 prereq=True, ecode=errors.ECODE_ENVIRON)
 948
 949     if instance.name in ins_l.payload:
 950       raise errors.OpPrereqError("Instance %s is running, %s" %
 951                                  (instance.name, msg), errors.ECODE_STATE)
 952
 953
 954 def _ExpandItemName(fn, name, kind):
 955   """Expand an item name.
 956
 957   @param fn: the function to use for expansion
 958   @param name: requested item name
 959   @param kind: text description ('Node' or 'Instance')
 960   @return: the resolved (full) name
 961   @raise errors.OpPrereqError: if the item is not found
 962
 963   """
 964   full_name = fn(name)
 965   if full_name is None:
 966     raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
 967                                errors.ECODE_NOENT)
 968   return full_name
 969
 970
 971 def _ExpandNodeName(cfg, name):
 972   """Wrapper over L{_ExpandItemName} for nodes."""
 973   return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
 974
 975
 976 def _ExpandInstanceName(cfg, name):
 977   """Wrapper over L{_ExpandItemName} for instance."""
 978   return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
 979
 980
 981 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
 982                           minmem, maxmem, vcpus, nics, disk_template, disks,
 983                           bep, hvp, hypervisor_name, tags):
 984   """Builds instance related env variables for hooks
 985
 986   This builds the hook environment from individual variables.
 987
 988   @type name: string
 989   @param name: the name of the instance
 990   @type primary_node: string
 991   @param primary_node: the name of the instance's primary node
 992   @type secondary_nodes: list
 993   @param secondary_nodes: list of secondary nodes as strings
 994   @type os_type: string
 995   @param os_type: the name of the instance's OS
 996   @type status: string
 997   @param status: the desired status of the instance
 998   @type minmem: string
 999   @param minmem: the minimum memory size of the instance
1000   @type maxmem: string
1001   @param maxmem: the maximum memory size of the instance
1002   @type vcpus: string
1003   @param vcpus: the count of VCPUs the instance has
1004   @type nics: list
1005   @param nics: list of tuples (ip, mac, mode, link) representing
1006       the NICs the instance has
1007   @type disk_template: string
1008   @param disk_template: the disk template of the instance
1009   @type disks: list
1010   @param disks: the list of (size, mode) pairs
1011   @type bep: dict
1012   @param bep: the backend parameters for the instance
1013   @type hvp: dict
1014   @param hvp: the hypervisor parameters for the instance
1015   @type hypervisor_name: string
1016   @param hypervisor_name: the hypervisor for the instance
1017   @type tags: list
1018   @param tags: list of instance tags as strings
1019   @rtype: dict
1020   @return: the hook environment for this instance
1021
1022   """
1023   env = {
1024     "OP_TARGET": name,
1025     "INSTANCE_NAME": name,
1026     "INSTANCE_PRIMARY": primary_node,
1027     "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
1028     "INSTANCE_OS_TYPE": os_type,
1029     "INSTANCE_STATUS": status,
1030     "INSTANCE_MINMEM": minmem,
1031     "INSTANCE_MAXMEM": maxmem,
1032     # TODO(2.7) remove deprecated "memory" value
1033     "INSTANCE_MEMORY": maxmem,
1034     "INSTANCE_VCPUS": vcpus,
1035     "INSTANCE_DISK_TEMPLATE": disk_template,
1036     "INSTANCE_HYPERVISOR": hypervisor_name,
1037   }
1038   if nics:
1039     nic_count = len(nics)
1040     for idx, (ip, mac, mode, link) in enumerate(nics):
1041       if ip is None:
1042         ip = ""
1043       env["INSTANCE_NIC%d_IP" % idx] = ip
1044       env["INSTANCE_NIC%d_MAC" % idx] = mac
1045       env["INSTANCE_NIC%d_MODE" % idx] = mode
1046       env["INSTANCE_NIC%d_LINK" % idx] = link
1047       if mode == constants.NIC_MODE_BRIDGED:
1048         env["INSTANCE_NIC%d_BRIDGE" % idx] = link
1049   else:
1050     nic_count = 0
1051
1052   env["INSTANCE_NIC_COUNT"] = nic_count
1053
1054   if disks:
1055     disk_count = len(disks)
1056     for idx, (size, mode) in enumerate(disks):
1057       env["INSTANCE_DISK%d_SIZE" % idx] = size
1058       env["INSTANCE_DISK%d_MODE" % idx] = mode
1059   else:
1060     disk_count = 0
1061
1062   env["INSTANCE_DISK_COUNT"] = disk_count
1063
1064   if not tags:
1065     tags = []
1066
1067   env["INSTANCE_TAGS"] = " ".join(tags)
1068
1069   for source, kind in [(bep, "BE"), (hvp, "HV")]:
1070     for key, value in source.items():
1071       env["INSTANCE_%s_%s" % (kind, key)] = value
1072
1073   return env
1074
1075
1076 def _NICListToTuple(lu, nics):
1077   """Build a list of nic information tuples.
1078
1079   This list is suitable to be passed to _BuildInstanceHookEnv or as a return
1080   value in LUInstanceQueryData.
1081
1082   @type lu:  L{LogicalUnit}
1083   @param lu: the logical unit on whose behalf we execute
1084   @type nics: list of L{objects.NIC}
1085   @param nics: list of nics to convert to hooks tuples
1086
1087   """
1088   hooks_nics = []
1089   cluster = lu.cfg.GetClusterInfo()
1090   for nic in nics:
1091     ip = nic.ip
1092     mac = nic.mac
1093     filled_params = cluster.SimpleFillNIC(nic.nicparams)
1094     mode = filled_params[constants.NIC_MODE]
1095     link = filled_params[constants.NIC_LINK]
1096     hooks_nics.append((ip, mac, mode, link))
1097   return hooks_nics
1098
1099
1100 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
1101   """Builds instance related env variables for hooks from an object.
1102
1103   @type lu: L{LogicalUnit}
1104   @param lu: the logical unit on whose behalf we execute
1105   @type instance: L{objects.Instance}
1106   @param instance: the instance for which we should build the
1107       environment
1108   @type override: dict
1109   @param override: dictionary with key/values that will override
1110       our values
1111   @rtype: dict
1112   @return: the hook environment dictionary
1113
1114   """
1115   cluster = lu.cfg.GetClusterInfo()
1116   bep = cluster.FillBE(instance)
1117   hvp = cluster.FillHV(instance)
1118   args = {
1119     "name": instance.name,
1120     "primary_node": instance.primary_node,
1121     "secondary_nodes": instance.secondary_nodes,
1122     "os_type": instance.os,
1123     "status": instance.admin_state,
1124     "maxmem": bep[constants.BE_MAXMEM],
1125     "minmem": bep[constants.BE_MINMEM],
1126     "vcpus": bep[constants.BE_VCPUS],
1127     "nics": _NICListToTuple(lu, instance.nics),
1128     "disk_template": instance.disk_template,
1129     "disks": [(disk.size, disk.mode) for disk in instance.disks],
1130     "bep": bep,
1131     "hvp": hvp,
1132     "hypervisor_name": instance.hypervisor,
1133     "tags": instance.tags,
1134   }
1135   if override:
1136     args.update(override)
1137   return _BuildInstanceHookEnv(**args) # pylint: disable=W0142
1138
1139
1140 def _AdjustCandidatePool(lu, exceptions):
1141   """Adjust the candidate pool after node operations.
1142
1143   """
1144   mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1145   if mod_list:
1146     lu.LogInfo("Promoted nodes to master candidate role: %s",
1147                utils.CommaJoin(node.name for node in mod_list))
1148     for name in mod_list:
1149       lu.context.ReaddNode(name)
1150   mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1151   if mc_now > mc_max:
1152     lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1153                (mc_now, mc_max))
1154
1155
1156 def _DecideSelfPromotion(lu, exceptions=None):
1157   """Decide whether I should promote myself as a master candidate.
1158
1159   """
1160   cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1161   mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1162   # the new node will increase mc_max with one, so:
1163   mc_should = min(mc_should + 1, cp_size)
1164   return mc_now < mc_should
1165
1166
1167 def _CheckNicsBridgesExist(lu, target_nics, target_node):
1168   """Check that the brigdes needed by a list of nics exist.
1169
1170   """
1171   cluster = lu.cfg.GetClusterInfo()
1172   paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1173   brlist = [params[constants.NIC_LINK] for params in paramslist
1174             if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1175   if brlist:
1176     result = lu.rpc.call_bridges_exist(target_node, brlist)
1177     result.Raise("Error checking bridges on destination node '%s'" %
1178                  target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1179
1180
1181 def _CheckInstanceBridgesExist(lu, instance, node=None):
1182   """Check that the brigdes needed by an instance exist.
1183
1184   """
1185   if node is None:
1186     node = instance.primary_node
1187   _CheckNicsBridgesExist(lu, instance.nics, node)
1188
1189
1190 def _CheckOSVariant(os_obj, name):
1191   """Check whether an OS name conforms to the os variants specification.
1192
1193   @type os_obj: L{objects.OS}
1194   @param os_obj: OS object to check
1195   @type name: string
1196   @param name: OS name passed by the user, to check for validity
1197
1198   """
1199   variant = objects.OS.GetVariant(name)
1200   if not os_obj.supported_variants:
1201     if variant:
1202       raise errors.OpPrereqError("OS '%s' doesn't support variants ('%s'"
1203                                  " passed)" % (os_obj.name, variant),
1204                                  errors.ECODE_INVAL)
1205     return
1206   if not variant:
1207     raise errors.OpPrereqError("OS name must include a variant",
1208                                errors.ECODE_INVAL)
1209
1210   if variant not in os_obj.supported_variants:
1211     raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1212
1213
1214 def _GetNodeInstancesInner(cfg, fn):
1215   return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1216
1217
1218 def _GetNodeInstances(cfg, node_name):
1219   """Returns a list of all primary and secondary instances on a node.
1220
1221   """
1222
1223   return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1224
1225
1226 def _GetNodePrimaryInstances(cfg, node_name):
1227   """Returns primary instances on a node.
1228
1229   """
1230   return _GetNodeInstancesInner(cfg,
1231                                 lambda inst: node_name == inst.primary_node)
1232
1233
1234 def _GetNodeSecondaryInstances(cfg, node_name):
1235   """Returns secondary instances on a node.
1236
1237   """
1238   return _GetNodeInstancesInner(cfg,
1239                                 lambda inst: node_name in inst.secondary_nodes)
1240
1241
1242 def _GetStorageTypeArgs(cfg, storage_type):
1243   """Returns the arguments for a storage type.
1244
1245   """
1246   # Special case for file storage
1247   if storage_type == constants.ST_FILE:
1248     # storage.FileStorage wants a list of storage directories
1249     return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1250
1251   return []
1252
1253
1254 def _FindFaultyInstanceDisks(cfg, rpc_runner, instance, node_name, prereq):
1255   faulty = []
1256
1257   for dev in instance.disks:
1258     cfg.SetDiskID(dev, node_name)
1259
1260   result = rpc_runner.call_blockdev_getmirrorstatus(node_name, instance.disks)
1261   result.Raise("Failed to get disk status from node %s" % node_name,
1262                prereq=prereq, ecode=errors.ECODE_ENVIRON)
1263
1264   for idx, bdev_status in enumerate(result.payload):
1265     if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1266       faulty.append(idx)
1267
1268   return faulty
1269
1270
1271 def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1272   """Check the sanity of iallocator and node arguments and use the
1273   cluster-wide iallocator if appropriate.
1274
1275   Check that at most one of (iallocator, node) is specified. If none is
1276   specified, then the LU's opcode's iallocator slot is filled with the
1277   cluster-wide default iallocator.
1278
1279   @type iallocator_slot: string
1280   @param iallocator_slot: the name of the opcode iallocator slot
1281   @type node_slot: string
1282   @param node_slot: the name of the opcode target node slot
1283
1284   """
1285   node = getattr(lu.op, node_slot, None)
1286   iallocator = getattr(lu.op, iallocator_slot, None)
1287
1288   if node is not None and iallocator is not None:
1289     raise errors.OpPrereqError("Do not specify both, iallocator and node",
1290                                errors.ECODE_INVAL)
1291   elif node is None and iallocator is None:
1292     default_iallocator = lu.cfg.GetDefaultIAllocator()
1293     if default_iallocator:
1294       setattr(lu.op, iallocator_slot, default_iallocator)
1295     else:
1296       raise errors.OpPrereqError("No iallocator or node given and no"
1297                                  " cluster-wide default iallocator found;"
1298                                  " please specify either an iallocator or a"
1299                                  " node, or set a cluster-wide default"
1300                                  " iallocator")
1301
1302
1303 def _GetDefaultIAllocator(cfg, iallocator):
1304   """Decides on which iallocator to use.
1305
1306   @type cfg: L{config.ConfigWriter}
1307   @param cfg: Cluster configuration object
1308   @type iallocator: string or None
1309   @param iallocator: Iallocator specified in opcode
1310   @rtype: string
1311   @return: Iallocator name
1312
1313   """
1314   if not iallocator:
1315     # Use default iallocator
1316     iallocator = cfg.GetDefaultIAllocator()
1317
1318   if not iallocator:
1319     raise errors.OpPrereqError("No iallocator was specified, neither in the"
1320                                " opcode nor as a cluster-wide default",
1321                                errors.ECODE_INVAL)
1322
1323   return iallocator
1324
1325
1326 class LUClusterPostInit(LogicalUnit):
1327   """Logical unit for running hooks after cluster initialization.
1328
1329   """
1330   HPATH = "cluster-init"
1331   HTYPE = constants.HTYPE_CLUSTER
1332
1333   def BuildHooksEnv(self):
1334     """Build hooks env.
1335
1336     """
1337     return {
1338       "OP_TARGET": self.cfg.GetClusterName(),
1339       }
1340
1341   def BuildHooksNodes(self):
1342     """Build hooks nodes.
1343
1344     """
1345     return ([], [self.cfg.GetMasterNode()])
1346
1347   def Exec(self, feedback_fn):
1348     """Nothing to do.
1349
1350     """
1351     return True
1352
1353
1354 class LUClusterDestroy(LogicalUnit):
1355   """Logical unit for destroying the cluster.
1356
1357   """
1358   HPATH = "cluster-destroy"
1359   HTYPE = constants.HTYPE_CLUSTER
1360
1361   def BuildHooksEnv(self):
1362     """Build hooks env.
1363
1364     """
1365     return {
1366       "OP_TARGET": self.cfg.GetClusterName(),
1367       }
1368
1369   def BuildHooksNodes(self):
1370     """Build hooks nodes.
1371
1372     """
1373     return ([], [])
1374
1375   def CheckPrereq(self):
1376     """Check prerequisites.
1377
1378     This checks whether the cluster is empty.
1379
1380     Any errors are signaled by raising errors.OpPrereqError.
1381
1382     """
1383     master = self.cfg.GetMasterNode()
1384
1385     nodelist = self.cfg.GetNodeList()
1386     if len(nodelist) != 1 or nodelist[0] != master:
1387       raise errors.OpPrereqError("There are still %d node(s) in"
1388                                  " this cluster." % (len(nodelist) - 1),
1389                                  errors.ECODE_INVAL)
1390     instancelist = self.cfg.GetInstanceList()
1391     if instancelist:
1392       raise errors.OpPrereqError("There are still %d instance(s) in"
1393                                  " this cluster." % len(instancelist),
1394                                  errors.ECODE_INVAL)
1395
1396   def Exec(self, feedback_fn):
1397     """Destroys the cluster.
1398
1399     """
1400     master_params = self.cfg.GetMasterNetworkParameters()
1401
1402     # Run post hooks on master node before it's removed
1403     _RunPostHook(self, master_params.name)
1404
1405     ems = self.cfg.GetUseExternalMipScript()
1406     result = self.rpc.call_node_deactivate_master_ip(master_params.name,
1407                                                      master_params, ems)
1408     result.Raise("Could not disable the master role")
1409
1410     return master_params.name
1411
1412
1413 def _VerifyCertificate(filename):
1414   """Verifies a certificate for L{LUClusterVerifyConfig}.
1415
1416   @type filename: string
1417   @param filename: Path to PEM file
1418
1419   """
1420   try:
1421     cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1422                                            utils.ReadFile(filename))
1423   except Exception, err: # pylint: disable=W0703
1424     return (LUClusterVerifyConfig.ETYPE_ERROR,
1425             "Failed to load X509 certificate %s: %s" % (filename, err))
1426
1427   (errcode, msg) = \
1428     utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1429                                 constants.SSL_CERT_EXPIRATION_ERROR)
1430
1431   if msg:
1432     fnamemsg = "While verifying %s: %s" % (filename, msg)
1433   else:
1434     fnamemsg = None
1435
1436   if errcode is None:
1437     return (None, fnamemsg)
1438   elif errcode == utils.CERT_WARNING:
1439     return (LUClusterVerifyConfig.ETYPE_WARNING, fnamemsg)
1440   elif errcode == utils.CERT_ERROR:
1441     return (LUClusterVerifyConfig.ETYPE_ERROR, fnamemsg)
1442
1443   raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1444
1445
1446 def _GetAllHypervisorParameters(cluster, instances):
1447   """Compute the set of all hypervisor parameters.
1448
1449   @type cluster: L{objects.Cluster}
1450   @param cluster: the cluster object
1451   @param instances: list of L{objects.Instance}
1452   @param instances: additional instances from which to obtain parameters
1453   @rtype: list of (origin, hypervisor, parameters)
1454   @return: a list with all parameters found, indicating the hypervisor they
1455        apply to, and the origin (can be "cluster", "os X", or "instance Y")
1456
1457   """
1458   hvp_data = []
1459
1460   for hv_name in cluster.enabled_hypervisors:
1461     hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
1462
1463   for os_name, os_hvp in cluster.os_hvp.items():
1464     for hv_name, hv_params in os_hvp.items():
1465       if hv_params:
1466         full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
1467         hvp_data.append(("os %s" % os_name, hv_name, full_params))
1468
1469   # TODO: collapse identical parameter values in a single one
1470   for instance in instances:
1471     if instance.hvparams:
1472       hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
1473                        cluster.FillHV(instance)))
1474
1475   return hvp_data
1476
1477
1478 class _VerifyErrors(object):
1479   """Mix-in for cluster/group verify LUs.
1480
1481   It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects
1482   self.op and self._feedback_fn to be available.)
1483
1484   """
1485
1486   ETYPE_FIELD = "code"
1487   ETYPE_ERROR = "ERROR"
1488   ETYPE_WARNING = "WARNING"
1489
1490   def _Error(self, ecode, item, msg, *args, **kwargs):
1491     """Format an error message.
1492
1493     Based on the opcode's error_codes parameter, either format a
1494     parseable error code, or a simpler error string.
1495
1496     This must be called only from Exec and functions called from Exec.
1497
1498     """
1499     ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1500     itype, etxt, _ = ecode
1501     # first complete the msg
1502     if args:
1503       msg = msg % args
1504     # then format the whole message
1505     if self.op.error_codes: # This is a mix-in. pylint: disable=E1101
1506       msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1507     else:
1508       if item:
1509         item = " " + item
1510       else:
1511         item = ""
1512       msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1513     # and finally report it via the feedback_fn
1514     self._feedback_fn("  - %s" % msg) # Mix-in. pylint: disable=E1101
1515
1516   def _ErrorIf(self, cond, ecode, *args, **kwargs):
1517     """Log an error message if the passed condition is True.
1518
1519     """
1520     cond = (bool(cond)
1521             or self.op.debug_simulate_errors) # pylint: disable=E1101
1522
1523     # If the error code is in the list of ignored errors, demote the error to a
1524     # warning
1525     (_, etxt, _) = ecode
1526     if etxt in self.op.ignore_errors:     # pylint: disable=E1101
1527       kwargs[self.ETYPE_FIELD] = self.ETYPE_WARNING
1528
1529     if cond:
1530       self._Error(ecode, *args, **kwargs)
1531
1532     # do not mark the operation as failed for WARN cases only
1533     if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1534       self.bad = self.bad or cond
1535
1536
1537 class LUClusterVerify(NoHooksLU):
1538   """Submits all jobs necessary to verify the cluster.
1539
1540   """
1541   REQ_BGL = False
1542
1543   def ExpandNames(self):
1544     self.needed_locks = {}
1545
1546   def Exec(self, feedback_fn):
1547     jobs = []
1548
1549     if self.op.group_name:
1550       groups = [self.op.group_name]
1551       depends_fn = lambda: None
1552     else:
1553       groups = self.cfg.GetNodeGroupList()
1554
1555       # Verify global configuration
1556       jobs.append([
1557         opcodes.OpClusterVerifyConfig(ignore_errors=self.op.ignore_errors)
1558         ])
1559
1560       # Always depend on global verification
1561       depends_fn = lambda: [(-len(jobs), [])]
1562
1563     jobs.extend([opcodes.OpClusterVerifyGroup(group_name=group,
1564                                             ignore_errors=self.op.ignore_errors,
1565                                             depends=depends_fn())]
1566                 for group in groups)
1567
1568     # Fix up all parameters
1569     for op in itertools.chain(*jobs): # pylint: disable=W0142
1570       op.debug_simulate_errors = self.op.debug_simulate_errors
1571       op.verbose = self.op.verbose
1572       op.error_codes = self.op.error_codes
1573       try:
1574         op.skip_checks = self.op.skip_checks
1575       except AttributeError:
1576         assert not isinstance(op, opcodes.OpClusterVerifyGroup)
1577
1578     return ResultWithJobs(jobs)
1579
1580
1581 class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
1582   """Verifies the cluster config.
1583
1584   """
1585   REQ_BGL = True
1586
1587   def _VerifyHVP(self, hvp_data):
1588     """Verifies locally the syntax of the hypervisor parameters.
1589
1590     """
1591     for item, hv_name, hv_params in hvp_data:
1592       msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
1593              (item, hv_name))
1594       try:
1595         hv_class = hypervisor.GetHypervisor(hv_name)
1596         utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
1597         hv_class.CheckParameterSyntax(hv_params)
1598       except errors.GenericError, err:
1599         self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg % str(err))
1600
1601   def ExpandNames(self):
1602     # Information can be safely retrieved as the BGL is acquired in exclusive
1603     # mode
1604     assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER)
1605     self.all_group_info = self.cfg.GetAllNodeGroupsInfo()
1606     self.all_node_info = self.cfg.GetAllNodesInfo()
1607     self.all_inst_info = self.cfg.GetAllInstancesInfo()
1608     self.needed_locks = {}
1609
1610   def Exec(self, feedback_fn):
1611     """Verify integrity of cluster, performing various test on nodes.
1612
1613     """
1614     self.bad = False
1615     self._feedback_fn = feedback_fn
1616
1617     feedback_fn("* Verifying cluster config")
1618
1619     for msg in self.cfg.VerifyConfig():
1620       self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg)
1621
1622     feedback_fn("* Verifying cluster certificate files")
1623
1624     for cert_filename in constants.ALL_CERT_FILES:
1625       (errcode, msg) = _VerifyCertificate(cert_filename)
1626       self._ErrorIf(errcode, constants.CV_ECLUSTERCERT, None, msg, code=errcode)
1627
1628     feedback_fn("* Verifying hypervisor parameters")
1629
1630     self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(),
1631                                                 self.all_inst_info.values()))
1632
1633     feedback_fn("* Verifying all nodes belong to an existing group")
1634
1635     # We do this verification here because, should this bogus circumstance
1636     # occur, it would never be caught by VerifyGroup, which only acts on
1637     # nodes/instances reachable from existing node groups.
1638
1639     dangling_nodes = set(node.name for node in self.all_node_info.values()
1640                          if node.group not in self.all_group_info)
1641
1642     dangling_instances = {}
1643     no_node_instances = []
1644
1645     for inst in self.all_inst_info.values():
1646       if inst.primary_node in dangling_nodes:
1647         dangling_instances.setdefault(inst.primary_node, []).append(inst.name)
1648       elif inst.primary_node not in self.all_node_info:
1649         no_node_instances.append(inst.name)
1650
1651     pretty_dangling = [
1652         "%s (%s)" %
1653         (node.name,
1654          utils.CommaJoin(dangling_instances.get(node.name,
1655                                                 ["no instances"])))
1656         for node in dangling_nodes]
1657
1658     self._ErrorIf(bool(dangling_nodes), constants.CV_ECLUSTERDANGLINGNODES,
1659                   None,
1660                   "the following nodes (and their instances) belong to a non"
1661                   " existing group: %s", utils.CommaJoin(pretty_dangling))
1662
1663     self._ErrorIf(bool(no_node_instances), constants.CV_ECLUSTERDANGLINGINST,
1664                   None,
1665                   "the following instances have a non-existing primary-node:"
1666                   " %s", utils.CommaJoin(no_node_instances))
1667
1668     return not self.bad
1669
1670
1671 class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
1672   """Verifies the status of a node group.
1673
1674   """
1675   HPATH = "cluster-verify"
1676   HTYPE = constants.HTYPE_CLUSTER
1677   REQ_BGL = False
1678
1679   _HOOKS_INDENT_RE = re.compile("^", re.M)
1680
1681   class NodeImage(object):
1682     """A class representing the logical and physical status of a node.
1683
1684     @type name: string
1685     @ivar name: the node name to which this object refers
1686     @ivar volumes: a structure as returned from
1687         L{ganeti.backend.GetVolumeList} (runtime)
1688     @ivar instances: a list of running instances (runtime)
1689     @ivar pinst: list of configured primary instances (config)
1690     @ivar sinst: list of configured secondary instances (config)
1691     @ivar sbp: dictionary of {primary-node: list of instances} for all
1692         instances for which this node is secondary (config)
1693     @ivar mfree: free memory, as reported by hypervisor (runtime)
1694     @ivar dfree: free disk, as reported by the node (runtime)
1695     @ivar offline: the offline status (config)
1696     @type rpc_fail: boolean
1697     @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1698         not whether the individual keys were correct) (runtime)
1699     @type lvm_fail: boolean
1700     @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1701     @type hyp_fail: boolean
1702     @ivar hyp_fail: whether the RPC call didn't return the instance list
1703     @type ghost: boolean
1704     @ivar ghost: whether this is a known node or not (config)
1705     @type os_fail: boolean
1706     @ivar os_fail: whether the RPC call didn't return valid OS data
1707     @type oslist: list
1708     @ivar oslist: list of OSes as diagnosed by DiagnoseOS
1709     @type vm_capable: boolean
1710     @ivar vm_capable: whether the node can host instances
1711
1712     """
1713     def __init__(self, offline=False, name=None, vm_capable=True):
1714       self.name = name
1715       self.volumes = {}
1716       self.instances = []
1717       self.pinst = []
1718       self.sinst = []
1719       self.sbp = {}
1720       self.mfree = 0
1721       self.dfree = 0
1722       self.offline = offline
1723       self.vm_capable = vm_capable
1724       self.rpc_fail = False
1725       self.lvm_fail = False
1726       self.hyp_fail = False
1727       self.ghost = False
1728       self.os_fail = False
1729       self.oslist = {}
1730
1731   def ExpandNames(self):
1732     # This raises errors.OpPrereqError on its own:
1733     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
1734
1735     # Get instances in node group; this is unsafe and needs verification later
1736     inst_names = self.cfg.GetNodeGroupInstances(self.group_uuid)
1737
1738     self.needed_locks = {
1739       locking.LEVEL_INSTANCE: inst_names,
1740       locking.LEVEL_NODEGROUP: [self.group_uuid],
1741       locking.LEVEL_NODE: [],
1742       }
1743
1744     self.share_locks = _ShareAll()
1745
1746   def DeclareLocks(self, level):
1747     if level == locking.LEVEL_NODE:
1748       # Get members of node group; this is unsafe and needs verification later
1749       nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
1750
1751       all_inst_info = self.cfg.GetAllInstancesInfo()
1752
1753       # In Exec(), we warn about mirrored instances that have primary and
1754       # secondary living in separate node groups. To fully verify that
1755       # volumes for these instances are healthy, we will need to do an
1756       # extra call to their secondaries. We ensure here those nodes will
1757       # be locked.
1758       for inst in self.owned_locks(locking.LEVEL_INSTANCE):
1759         # Important: access only the instances whose lock is owned
1760         if all_inst_info[inst].disk_template in constants.DTS_INT_MIRROR:
1761           nodes.update(all_inst_info[inst].secondary_nodes)
1762
1763       self.needed_locks[locking.LEVEL_NODE] = nodes
1764
1765   def CheckPrereq(self):
1766     assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
1767     self.group_info = self.cfg.GetNodeGroup(self.group_uuid)
1768
1769     group_nodes = set(self.group_info.members)
1770     group_instances = self.cfg.GetNodeGroupInstances(self.group_uuid)
1771
1772     unlocked_nodes = \
1773         group_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
1774
1775     unlocked_instances = \
1776         group_instances.difference(self.owned_locks(locking.LEVEL_INSTANCE))
1777
1778     if unlocked_nodes:
1779       raise errors.OpPrereqError("Missing lock for nodes: %s" %
1780                                  utils.CommaJoin(unlocked_nodes))
1781
1782     if unlocked_instances:
1783       raise errors.OpPrereqError("Missing lock for instances: %s" %
1784                                  utils.CommaJoin(unlocked_instances))
1785
1786     self.all_node_info = self.cfg.GetAllNodesInfo()
1787     self.all_inst_info = self.cfg.GetAllInstancesInfo()
1788
1789     self.my_node_names = utils.NiceSort(group_nodes)
1790     self.my_inst_names = utils.NiceSort(group_instances)
1791
1792     self.my_node_info = dict((name, self.all_node_info[name])
1793                              for name in self.my_node_names)
1794
1795     self.my_inst_info = dict((name, self.all_inst_info[name])
1796                              for name in self.my_inst_names)
1797
1798     # We detect here the nodes that will need the extra RPC calls for verifying
1799     # split LV volumes; they should be locked.
1800     extra_lv_nodes = set()
1801
1802     for inst in self.my_inst_info.values():
1803       if inst.disk_template in constants.DTS_INT_MIRROR:
1804         group = self.my_node_info[inst.primary_node].group
1805         for nname in inst.secondary_nodes:
1806           if self.all_node_info[nname].group != group:
1807             extra_lv_nodes.add(nname)
1808
1809     unlocked_lv_nodes = \
1810         extra_lv_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
1811
1812     if unlocked_lv_nodes:
1813       raise errors.OpPrereqError("these nodes could be locked: %s" %
1814                                  utils.CommaJoin(unlocked_lv_nodes))
1815     self.extra_lv_nodes = list(extra_lv_nodes)
1816
1817   def _VerifyNode(self, ninfo, nresult):
1818     """Perform some basic validation on data returned from a node.
1819
1820       - check the result data structure is well formed and has all the
1821         mandatory fields
1822       - check ganeti version
1823
1824     @type ninfo: L{objects.Node}
1825     @param ninfo: the node to check
1826     @param nresult: the results from the node
1827     @rtype: boolean
1828     @return: whether overall this call was successful (and we can expect
1829          reasonable values in the respose)
1830
1831     """
1832     node = ninfo.name
1833     _ErrorIf = self._ErrorIf # pylint: disable=C0103
1834
1835     # main result, nresult should be a non-empty dict
1836     test = not nresult or not isinstance(nresult, dict)
1837     _ErrorIf(test, constants.CV_ENODERPC, node,
1838                   "unable to verify node: no data returned")
1839     if test:
1840       return False
1841
1842     # compares ganeti version
1843     local_version = constants.PROTOCOL_VERSION
1844     remote_version = nresult.get("version", None)
1845     test = not (remote_version and
1846                 isinstance(remote_version, (list, tuple)) and
1847                 len(remote_version) == 2)
1848     _ErrorIf(test, constants.CV_ENODERPC, node,
1849              "connection to node returned invalid data")
1850     if test:
1851       return False
1852
1853     test = local_version != remote_version[0]
1854     _ErrorIf(test, constants.CV_ENODEVERSION, node,
1855              "incompatible protocol versions: master %s,"
1856              " node %s", local_version, remote_version[0])
1857     if test:
1858       return False
1859
1860     # node seems compatible, we can actually try to look into its results
1861
1862     # full package version
1863     self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1864                   constants.CV_ENODEVERSION, node,
1865                   "software version mismatch: master %s, node %s",
1866                   constants.RELEASE_VERSION, remote_version[1],
1867                   code=self.ETYPE_WARNING)
1868
1869     hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1870     if ninfo.vm_capable and isinstance(hyp_result, dict):
1871       for hv_name, hv_result in hyp_result.iteritems():
1872         test = hv_result is not None
1873         _ErrorIf(test, constants.CV_ENODEHV, node,
1874                  "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1875
1876     hvp_result = nresult.get(constants.NV_HVPARAMS, None)
1877     if ninfo.vm_capable and isinstance(hvp_result, list):
1878       for item, hv_name, hv_result in hvp_result:
1879         _ErrorIf(True, constants.CV_ENODEHV, node,
1880                  "hypervisor %s parameter verify failure (source %s): %s",
1881                  hv_name, item, hv_result)
1882
1883     test = nresult.get(constants.NV_NODESETUP,
1884                        ["Missing NODESETUP results"])
1885     _ErrorIf(test, constants.CV_ENODESETUP, node, "node setup error: %s",
1886              "; ".join(test))
1887
1888     return True
1889
1890   def _VerifyNodeTime(self, ninfo, nresult,
1891                       nvinfo_starttime, nvinfo_endtime):
1892     """Check the node time.
1893
1894     @type ninfo: L{objects.Node}
1895     @param ninfo: the node to check
1896     @param nresult: the remote results for the node
1897     @param nvinfo_starttime: the start time of the RPC call
1898     @param nvinfo_endtime: the end time of the RPC call
1899
1900     """
1901     node = ninfo.name
1902     _ErrorIf = self._ErrorIf # pylint: disable=C0103
1903
1904     ntime = nresult.get(constants.NV_TIME, None)
1905     try:
1906       ntime_merged = utils.MergeTime(ntime)
1907     except (ValueError, TypeError):
1908       _ErrorIf(True, constants.CV_ENODETIME, node, "Node returned invalid time")
1909       return
1910
1911     if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1912       ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1913     elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1914       ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1915     else:
1916       ntime_diff = None
1917
1918     _ErrorIf(ntime_diff is not None, constants.CV_ENODETIME, node,
1919              "Node time diverges by at least %s from master node time",
1920              ntime_diff)
1921
1922   def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
1923     """Check the node LVM results.
1924
1925     @type ninfo: L{objects.Node}
1926     @param ninfo: the node to check
1927     @param nresult: the remote results for the node
1928     @param vg_name: the configured VG name
1929
1930     """
1931     if vg_name is None:
1932       return
1933
1934     node = ninfo.name
1935     _ErrorIf = self._ErrorIf # pylint: disable=C0103
1936
1937     # checks vg existence and size > 20G
1938     vglist = nresult.get(constants.NV_VGLIST, None)
1939     test = not vglist
1940     _ErrorIf(test, constants.CV_ENODELVM, node, "unable to check volume groups")
1941     if not test:
1942       vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1943                                             constants.MIN_VG_SIZE)
1944       _ErrorIf(vgstatus, constants.CV_ENODELVM, node, vgstatus)
1945
1946     # check pv names
1947     pvlist = nresult.get(constants.NV_PVLIST, None)
1948     test = pvlist is None
1949     _ErrorIf(test, constants.CV_ENODELVM, node, "Can't get PV list from node")
1950     if not test:
1951       # check that ':' is not present in PV names, since it's a
1952       # special character for lvcreate (denotes the range of PEs to
1953       # use on the PV)
1954       for _, pvname, owner_vg in pvlist:
1955         test = ":" in pvname
1956         _ErrorIf(test, constants.CV_ENODELVM, node,
1957                  "Invalid character ':' in PV '%s' of VG '%s'",
1958                  pvname, owner_vg)
1959
1960   def _VerifyNodeBridges(self, ninfo, nresult, bridges):
1961     """Check the node bridges.
1962
1963     @type ninfo: L{objects.Node}
1964     @param ninfo: the node to check
1965     @param nresult: the remote results for the node
1966     @param bridges: the expected list of bridges
1967
1968     """
1969     if not bridges:
1970       return
1971
1972     node = ninfo.name
1973     _ErrorIf = self._ErrorIf # pylint: disable=C0103
1974
1975     missing = nresult.get(constants.NV_BRIDGES, None)
1976     test = not isinstance(missing, list)
1977     _ErrorIf(test, constants.CV_ENODENET, node,
1978              "did not return valid bridge information")
1979     if not test:
1980       _ErrorIf(bool(missing), constants.CV_ENODENET, node,
1981                "missing bridges: %s" % utils.CommaJoin(sorted(missing)))
1982
1983   def _VerifyNodeUserScripts(self, ninfo, nresult):
1984     """Check the results of user scripts presence and executability on the node
1985
1986     @type ninfo: L{objects.Node}
1987     @param ninfo: the node to check
1988     @param nresult: the remote results for the node
1989
1990     """
1991     node = ninfo.name
1992
1993     test = not constants.NV_USERSCRIPTS in nresult
1994     self._ErrorIf(test, constants.CV_ENODEUSERSCRIPTS, node,
1995                   "did not return user scripts information")
1996
1997     broken_scripts = nresult.get(constants.NV_USERSCRIPTS, None)
1998     if not test:
1999       self._ErrorIf(broken_scripts, constants.CV_ENODEUSERSCRIPTS, node,
2000                     "user scripts not present or not executable: %s" %
2001                     utils.CommaJoin(sorted(broken_scripts)))
2002
2003   def _VerifyNodeNetwork(self, ninfo, nresult):
2004     """Check the node network connectivity results.
2005
2006     @type ninfo: L{objects.Node}
2007     @param ninfo: the node to check
2008     @param nresult: the remote results for the node
2009
2010     """
2011     node = ninfo.name
2012     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2013
2014     test = constants.NV_NODELIST not in nresult
2015     _ErrorIf(test, constants.CV_ENODESSH, node,
2016              "node hasn't returned node ssh connectivity data")
2017     if not test:
2018       if nresult[constants.NV_NODELIST]:
2019         for a_node, a_msg in nresult[constants.NV_NODELIST].items():
2020           _ErrorIf(True, constants.CV_ENODESSH, node,
2021                    "ssh communication with node '%s': %s", a_node, a_msg)
2022
2023     test = constants.NV_NODENETTEST not in nresult
2024     _ErrorIf(test, constants.CV_ENODENET, node,
2025              "node hasn't returned node tcp connectivity data")
2026     if not test:
2027       if nresult[constants.NV_NODENETTEST]:
2028         nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
2029         for anode in nlist:
2030           _ErrorIf(True, constants.CV_ENODENET, node,
2031                    "tcp communication with node '%s': %s",
2032                    anode, nresult[constants.NV_NODENETTEST][anode])
2033
2034     test = constants.NV_MASTERIP not in nresult
2035     _ErrorIf(test, constants.CV_ENODENET, node,
2036              "node hasn't returned node master IP reachability data")
2037     if not test:
2038       if not nresult[constants.NV_MASTERIP]:
2039         if node == self.master_node:
2040           msg = "the master node cannot reach the master IP (not configured?)"
2041         else:
2042           msg = "cannot reach the master IP"
2043         _ErrorIf(True, constants.CV_ENODENET, node, msg)
2044
2045   def _VerifyInstance(self, instance, instanceconfig, node_image,
2046                       diskstatus):
2047     """Verify an instance.
2048
2049     This function checks to see if the required block devices are
2050     available on the instance's node.
2051
2052     """
2053     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2054     node_current = instanceconfig.primary_node
2055
2056     node_vol_should = {}
2057     instanceconfig.MapLVsByNode(node_vol_should)
2058
2059     for node in node_vol_should:
2060       n_img = node_image[node]
2061       if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2062         # ignore missing volumes on offline or broken nodes
2063         continue
2064       for volume in node_vol_should[node]:
2065         test = volume not in n_img.volumes
2066         _ErrorIf(test, constants.CV_EINSTANCEMISSINGDISK, instance,
2067                  "volume %s missing on node %s", volume, node)
2068
2069     if instanceconfig.admin_state == constants.ADMINST_UP:
2070       pri_img = node_image[node_current]
2071       test = instance not in pri_img.instances and not pri_img.offline
2072       _ErrorIf(test, constants.CV_EINSTANCEDOWN, instance,
2073                "instance not running on its primary node %s",
2074                node_current)
2075
2076     diskdata = [(nname, success, status, idx)
2077                 for (nname, disks) in diskstatus.items()
2078                 for idx, (success, status) in enumerate(disks)]
2079
2080     for nname, success, bdev_status, idx in diskdata:
2081       # the 'ghost node' construction in Exec() ensures that we have a
2082       # node here
2083       snode = node_image[nname]
2084       bad_snode = snode.ghost or snode.offline
2085       _ErrorIf(instanceconfig.admin_state == constants.ADMINST_UP and
2086                not success and not bad_snode,
2087                constants.CV_EINSTANCEFAULTYDISK, instance,
2088                "couldn't retrieve status for disk/%s on %s: %s",
2089                idx, nname, bdev_status)
2090       _ErrorIf((instanceconfig.admin_state == constants.ADMINST_UP and
2091                 success and bdev_status.ldisk_status == constants.LDS_FAULTY),
2092                constants.CV_EINSTANCEFAULTYDISK, instance,
2093                "disk/%s on %s is faulty", idx, nname)
2094
2095   def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
2096     """Verify if there are any unknown volumes in the cluster.
2097
2098     The .os, .swap and backup volumes are ignored. All other volumes are
2099     reported as unknown.
2100
2101     @type reserved: L{ganeti.utils.FieldSet}
2102     @param reserved: a FieldSet of reserved volume names
2103
2104     """
2105     for node, n_img in node_image.items():
2106       if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2107         # skip non-healthy nodes
2108         continue
2109       for volume in n_img.volumes:
2110         test = ((node not in node_vol_should or
2111                 volume not in node_vol_should[node]) and
2112                 not reserved.Matches(volume))
2113         self._ErrorIf(test, constants.CV_ENODEORPHANLV, node,
2114                       "volume %s is unknown", volume)
2115
2116   def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
2117     """Verify N+1 Memory Resilience.
2118
2119     Check that if one single node dies we can still start all the
2120     instances it was primary for.
2121
2122     """
2123     cluster_info = self.cfg.GetClusterInfo()
2124     for node, n_img in node_image.items():
2125       # This code checks that every node which is now listed as
2126       # secondary has enough memory to host all instances it is
2127       # supposed to should a single other node in the cluster fail.
2128       # FIXME: not ready for failover to an arbitrary node
2129       # FIXME: does not support file-backed instances
2130       # WARNING: we currently take into account down instances as well
2131       # as up ones, considering that even if they're down someone
2132       # might want to start them even in the event of a node failure.
2133       if n_img.offline:
2134         # we're skipping offline nodes from the N+1 warning, since
2135         # most likely we don't have good memory infromation from them;
2136         # we already list instances living on such nodes, and that's
2137         # enough warning
2138         continue
2139       #TODO(dynmem): use MINMEM for checking
2140       #TODO(dynmem): also consider ballooning out other instances
2141       for prinode, instances in n_img.sbp.items():
2142         needed_mem = 0
2143         for instance in instances:
2144           bep = cluster_info.FillBE(instance_cfg[instance])
2145           if bep[constants.BE_AUTO_BALANCE]:
2146             needed_mem += bep[constants.BE_MAXMEM]
2147         test = n_img.mfree < needed_mem
2148         self._ErrorIf(test, constants.CV_ENODEN1, node,
2149                       "not enough memory to accomodate instance failovers"
2150                       " should node %s fail (%dMiB needed, %dMiB available)",
2151                       prinode, needed_mem, n_img.mfree)
2152
2153   @classmethod
2154   def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
2155                    (files_all, files_opt, files_mc, files_vm)):
2156     """Verifies file checksums collected from all nodes.
2157
2158     @param errorif: Callback for reporting errors
2159     @param nodeinfo: List of L{objects.Node} objects
2160     @param master_node: Name of master node
2161     @param all_nvinfo: RPC results
2162
2163     """
2164     # Define functions determining which nodes to consider for a file
2165     files2nodefn = [
2166       (files_all, None),
2167       (files_mc, lambda node: (node.master_candidate or
2168                                node.name == master_node)),
2169       (files_vm, lambda node: node.vm_capable),
2170       ]
2171
2172     # Build mapping from filename to list of nodes which should have the file
2173     nodefiles = {}
2174     for (files, fn) in files2nodefn:
2175       if fn is None:
2176         filenodes = nodeinfo
2177       else:
2178         filenodes = filter(fn, nodeinfo)
2179       nodefiles.update((filename,
2180                         frozenset(map(operator.attrgetter("name"), filenodes)))
2181                        for filename in files)
2182
2183     assert set(nodefiles) == (files_all | files_mc | files_vm)
2184
2185     fileinfo = dict((filename, {}) for filename in nodefiles)
2186     ignore_nodes = set()
2187
2188     for node in nodeinfo:
2189       if node.offline:
2190         ignore_nodes.add(node.name)
2191         continue
2192
2193       nresult = all_nvinfo[node.name]
2194
2195       if nresult.fail_msg or not nresult.payload:
2196         node_files = None
2197       else:
2198         node_files = nresult.payload.get(constants.NV_FILELIST, None)
2199
2200       test = not (node_files and isinstance(node_files, dict))
2201       errorif(test, constants.CV_ENODEFILECHECK, node.name,
2202               "Node did not return file checksum data")
2203       if test:
2204         ignore_nodes.add(node.name)
2205         continue
2206
2207       # Build per-checksum mapping from filename to nodes having it
2208       for (filename, checksum) in node_files.items():
2209         assert filename in nodefiles
2210         fileinfo[filename].setdefault(checksum, set()).add(node.name)
2211
2212     for (filename, checksums) in fileinfo.items():
2213       assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
2214
2215       # Nodes having the file
2216       with_file = frozenset(node_name
2217                             for nodes in fileinfo[filename].values()
2218                             for node_name in nodes) - ignore_nodes
2219
2220       expected_nodes = nodefiles[filename] - ignore_nodes
2221
2222       # Nodes missing file
2223       missing_file = expected_nodes - with_file
2224
2225       if filename in files_opt:
2226         # All or no nodes
2227         errorif(missing_file and missing_file != expected_nodes,
2228                 constants.CV_ECLUSTERFILECHECK, None,
2229                 "File %s is optional, but it must exist on all or no"
2230                 " nodes (not found on %s)",
2231                 filename, utils.CommaJoin(utils.NiceSort(missing_file)))
2232       else:
2233         errorif(missing_file, constants.CV_ECLUSTERFILECHECK, None,
2234                 "File %s is missing from node(s) %s", filename,
2235                 utils.CommaJoin(utils.NiceSort(missing_file)))
2236
2237         # Warn if a node has a file it shouldn't
2238         unexpected = with_file - expected_nodes
2239         errorif(unexpected,
2240                 constants.CV_ECLUSTERFILECHECK, None,
2241                 "File %s should not exist on node(s) %s",
2242                 filename, utils.CommaJoin(utils.NiceSort(unexpected)))
2243
2244       # See if there are multiple versions of the file
2245       test = len(checksums) > 1
2246       if test:
2247         variants = ["variant %s on %s" %
2248                     (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
2249                     for (idx, (checksum, nodes)) in
2250                       enumerate(sorted(checksums.items()))]
2251       else:
2252         variants = []
2253
2254       errorif(test, constants.CV_ECLUSTERFILECHECK, None,
2255               "File %s found with %s different checksums (%s)",
2256               filename, len(checksums), "; ".join(variants))
2257
2258   def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
2259                       drbd_map):
2260     """Verifies and the node DRBD status.
2261
2262     @type ninfo: L{objects.Node}
2263     @param ninfo: the node to check
2264     @param nresult: the remote results for the node
2265     @param instanceinfo: the dict of instances
2266     @param drbd_helper: the configured DRBD usermode helper
2267     @param drbd_map: the DRBD map as returned by
2268         L{ganeti.config.ConfigWriter.ComputeDRBDMap}
2269
2270     """
2271     node = ninfo.name
2272     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2273
2274     if drbd_helper:
2275       helper_result = nresult.get(constants.NV_DRBDHELPER, None)
2276       test = (helper_result == None)
2277       _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2278                "no drbd usermode helper returned")
2279       if helper_result:
2280         status, payload = helper_result
2281         test = not status
2282         _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2283                  "drbd usermode helper check unsuccessful: %s", payload)
2284         test = status and (payload != drbd_helper)
2285         _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2286                  "wrong drbd usermode helper: %s", payload)
2287
2288     # compute the DRBD minors
2289     node_drbd = {}
2290     for minor, instance in drbd_map[node].items():
2291       test = instance not in instanceinfo
2292       _ErrorIf(test, constants.CV_ECLUSTERCFG, None,
2293                "ghost instance '%s' in temporary DRBD map", instance)
2294         # ghost instance should not be running, but otherwise we
2295         # don't give double warnings (both ghost instance and
2296         # unallocated minor in use)
2297       if test:
2298         node_drbd[minor] = (instance, False)
2299       else:
2300         instance = instanceinfo[instance]
2301         node_drbd[minor] = (instance.name,
2302                             instance.admin_state == constants.ADMINST_UP)
2303
2304     # and now check them
2305     used_minors = nresult.get(constants.NV_DRBDLIST, [])
2306     test = not isinstance(used_minors, (tuple, list))
2307     _ErrorIf(test, constants.CV_ENODEDRBD, node,
2308              "cannot parse drbd status file: %s", str(used_minors))
2309     if test:
2310       # we cannot check drbd status
2311       return
2312
2313     for minor, (iname, must_exist) in node_drbd.items():
2314       test = minor not in used_minors and must_exist
2315       _ErrorIf(test, constants.CV_ENODEDRBD, node,
2316                "drbd minor %d of instance %s is not active", minor, iname)
2317     for minor in used_minors:
2318       test = minor not in node_drbd
2319       _ErrorIf(test, constants.CV_ENODEDRBD, node,
2320                "unallocated drbd minor %d is in use", minor)
2321
2322   def _UpdateNodeOS(self, ninfo, nresult, nimg):
2323     """Builds the node OS structures.
2324
2325     @type ninfo: L{objects.Node}
2326     @param ninfo: the node to check
2327     @param nresult: the remote results for the node
2328     @param nimg: the node image object
2329
2330     """
2331     node = ninfo.name
2332     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2333
2334     remote_os = nresult.get(constants.NV_OSLIST, None)
2335     test = (not isinstance(remote_os, list) or
2336             not compat.all(isinstance(v, list) and len(v) == 7
2337                            for v in remote_os))
2338
2339     _ErrorIf(test, constants.CV_ENODEOS, node,
2340              "node hasn't returned valid OS data")
2341
2342     nimg.os_fail = test
2343
2344     if test:
2345       return
2346
2347     os_dict = {}
2348
2349     for (name, os_path, status, diagnose,
2350          variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
2351
2352       if name not in os_dict:
2353         os_dict[name] = []
2354
2355       # parameters is a list of lists instead of list of tuples due to
2356       # JSON lacking a real tuple type, fix it:
2357       parameters = [tuple(v) for v in parameters]
2358       os_dict[name].append((os_path, status, diagnose,
2359                             set(variants), set(parameters), set(api_ver)))
2360
2361     nimg.oslist = os_dict
2362
2363   def _VerifyNodeOS(self, ninfo, nimg, base):
2364     """Verifies the node OS list.
2365
2366     @type ninfo: L{objects.Node}
2367     @param ninfo: the node to check
2368     @param nimg: the node image object
2369     @param base: the 'template' node we match against (e.g. from the master)
2370
2371     """
2372     node = ninfo.name
2373     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2374
2375     assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
2376
2377     beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
2378     for os_name, os_data in nimg.oslist.items():
2379       assert os_data, "Empty OS status for OS %s?!" % os_name
2380       f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
2381       _ErrorIf(not f_status, constants.CV_ENODEOS, node,
2382                "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
2383       _ErrorIf(len(os_data) > 1, constants.CV_ENODEOS, node,
2384                "OS '%s' has multiple entries (first one shadows the rest): %s",
2385                os_name, utils.CommaJoin([v[0] for v in os_data]))
2386       # comparisons with the 'base' image
2387       test = os_name not in base.oslist
2388       _ErrorIf(test, constants.CV_ENODEOS, node,
2389                "Extra OS %s not present on reference node (%s)",
2390                os_name, base.name)
2391       if test:
2392         continue
2393       assert base.oslist[os_name], "Base node has empty OS status?"
2394       _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
2395       if not b_status:
2396         # base OS is invalid, skipping
2397         continue
2398       for kind, a, b in [("API version", f_api, b_api),
2399                          ("variants list", f_var, b_var),
2400                          ("parameters", beautify_params(f_param),
2401                           beautify_params(b_param))]:
2402         _ErrorIf(a != b, constants.CV_ENODEOS, node,
2403                  "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
2404                  kind, os_name, base.name,
2405                  utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
2406
2407     # check any missing OSes
2408     missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
2409     _ErrorIf(missing, constants.CV_ENODEOS, node,
2410              "OSes present on reference node %s but missing on this node: %s",
2411              base.name, utils.CommaJoin(missing))
2412
2413   def _VerifyOob(self, ninfo, nresult):
2414     """Verifies out of band functionality of a node.
2415
2416     @type ninfo: L{objects.Node}
2417     @param ninfo: the node to check
2418     @param nresult: the remote results for the node
2419
2420     """
2421     node = ninfo.name
2422     # We just have to verify the paths on master and/or master candidates
2423     # as the oob helper is invoked on the master
2424     if ((ninfo.master_candidate or ninfo.master_capable) and
2425         constants.NV_OOB_PATHS in nresult):
2426       for path_result in nresult[constants.NV_OOB_PATHS]:
2427         self._ErrorIf(path_result, constants.CV_ENODEOOBPATH, node, path_result)
2428
2429   def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
2430     """Verifies and updates the node volume data.
2431
2432     This function will update a L{NodeImage}'s internal structures
2433     with data from the remote call.
2434
2435     @type ninfo: L{objects.Node}
2436     @param ninfo: the node to check
2437     @param nresult: the remote results for the node
2438     @param nimg: the node image object
2439     @param vg_name: the configured VG name
2440
2441     """
2442     node = ninfo.name
2443     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2444
2445     nimg.lvm_fail = True
2446     lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
2447     if vg_name is None:
2448       pass
2449     elif isinstance(lvdata, basestring):
2450       _ErrorIf(True, constants.CV_ENODELVM, node, "LVM problem on node: %s",
2451                utils.SafeEncode(lvdata))
2452     elif not isinstance(lvdata, dict):
2453       _ErrorIf(True, constants.CV_ENODELVM, node,
2454                "rpc call to node failed (lvlist)")
2455     else:
2456       nimg.volumes = lvdata
2457       nimg.lvm_fail = False
2458
2459   def _UpdateNodeInstances(self, ninfo, nresult, nimg):
2460     """Verifies and updates the node instance list.
2461
2462     If the listing was successful, then updates this node's instance
2463     list. Otherwise, it marks the RPC call as failed for the instance
2464     list key.
2465
2466     @type ninfo: L{objects.Node}
2467     @param ninfo: the node to check
2468     @param nresult: the remote results for the node
2469     @param nimg: the node image object
2470
2471     """
2472     idata = nresult.get(constants.NV_INSTANCELIST, None)
2473     test = not isinstance(idata, list)
2474     self._ErrorIf(test, constants.CV_ENODEHV, ninfo.name,
2475                   "rpc call to node failed (instancelist): %s",
2476                   utils.SafeEncode(str(idata)))
2477     if test:
2478       nimg.hyp_fail = True
2479     else:
2480       nimg.instances = idata
2481
2482   def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
2483     """Verifies and computes a node information map
2484
2485     @type ninfo: L{objects.Node}
2486     @param ninfo: the node to check
2487     @param nresult: the remote results for the node
2488     @param nimg: the node image object
2489     @param vg_name: the configured VG name
2490
2491     """
2492     node = ninfo.name
2493     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2494
2495     # try to read free memory (from the hypervisor)
2496     hv_info = nresult.get(constants.NV_HVINFO, None)
2497     test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
2498     _ErrorIf(test, constants.CV_ENODEHV, node,
2499              "rpc call to node failed (hvinfo)")
2500     if not test:
2501       try:
2502         nimg.mfree = int(hv_info["memory_free"])
2503       except (ValueError, TypeError):
2504         _ErrorIf(True, constants.CV_ENODERPC, node,
2505                  "node returned invalid nodeinfo, check hypervisor")
2506
2507     # FIXME: devise a free space model for file based instances as well
2508     if vg_name is not None:
2509       test = (constants.NV_VGLIST not in nresult or
2510               vg_name not in nresult[constants.NV_VGLIST])
2511       _ErrorIf(test, constants.CV_ENODELVM, node,
2512                "node didn't return data for the volume group '%s'"
2513                " - it is either missing or broken", vg_name)
2514       if not test:
2515         try:
2516           nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
2517         except (ValueError, TypeError):
2518           _ErrorIf(True, constants.CV_ENODERPC, node,
2519                    "node returned invalid LVM info, check LVM status")
2520
2521   def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
2522     """Gets per-disk status information for all instances.
2523
2524     @type nodelist: list of strings
2525     @param nodelist: Node names
2526     @type node_image: dict of (name, L{objects.Node})
2527     @param node_image: Node objects
2528     @type instanceinfo: dict of (name, L{objects.Instance})
2529     @param instanceinfo: Instance objects
2530     @rtype: {instance: {node: [(succes, payload)]}}
2531     @return: a dictionary of per-instance dictionaries with nodes as
2532         keys and disk information as values; the disk information is a
2533         list of tuples (success, payload)
2534
2535     """
2536     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2537
2538     node_disks = {}
2539     node_disks_devonly = {}
2540     diskless_instances = set()
2541     diskless = constants.DT_DISKLESS
2542
2543     for nname in nodelist:
2544       node_instances = list(itertools.chain(node_image[nname].pinst,
2545                                             node_image[nname].sinst))
2546       diskless_instances.update(inst for inst in node_instances
2547                                 if instanceinfo[inst].disk_template == diskless)
2548       disks = [(inst, disk)
2549                for inst in node_instances
2550                for disk in instanceinfo[inst].disks]
2551
2552       if not disks:
2553         # No need to collect data
2554         continue
2555
2556       node_disks[nname] = disks
2557
2558       # Creating copies as SetDiskID below will modify the objects and that can
2559       # lead to incorrect data returned from nodes
2560       devonly = [dev.Copy() for (_, dev) in disks]
2561
2562       for dev in devonly:
2563         self.cfg.SetDiskID(dev, nname)
2564
2565       node_disks_devonly[nname] = devonly
2566
2567     assert len(node_disks) == len(node_disks_devonly)
2568
2569     # Collect data from all nodes with disks
2570     result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
2571                                                           node_disks_devonly)
2572
2573     assert len(result) == len(node_disks)
2574
2575     instdisk = {}
2576
2577     for (nname, nres) in result.items():
2578       disks = node_disks[nname]
2579
2580       if nres.offline:
2581         # No data from this node
2582         data = len(disks) * [(False, "node offline")]
2583       else:
2584         msg = nres.fail_msg
2585         _ErrorIf(msg, constants.CV_ENODERPC, nname,
2586                  "while getting disk information: %s", msg)
2587         if msg:
2588           # No data from this node
2589           data = len(disks) * [(False, msg)]
2590         else:
2591           data = []
2592           for idx, i in enumerate(nres.payload):
2593             if isinstance(i, (tuple, list)) and len(i) == 2:
2594               data.append(i)
2595             else:
2596               logging.warning("Invalid result from node %s, entry %d: %s",
2597                               nname, idx, i)
2598               data.append((False, "Invalid result from the remote node"))
2599
2600       for ((inst, _), status) in zip(disks, data):
2601         instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
2602
2603     # Add empty entries for diskless instances.
2604     for inst in diskless_instances:
2605       assert inst not in instdisk
2606       instdisk[inst] = {}
2607
2608     assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
2609                       len(nnames) <= len(instanceinfo[inst].all_nodes) and
2610                       compat.all(isinstance(s, (tuple, list)) and
2611                                  len(s) == 2 for s in statuses)
2612                       for inst, nnames in instdisk.items()
2613                       for nname, statuses in nnames.items())
2614     assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
2615
2616     return instdisk
2617
2618   @staticmethod
2619   def _SshNodeSelector(group_uuid, all_nodes):
2620     """Create endless iterators for all potential SSH check hosts.
2621
2622     """
2623     nodes = [node for node in all_nodes
2624              if (node.group != group_uuid and
2625                  not node.offline)]
2626     keyfunc = operator.attrgetter("group")
2627
2628     return map(itertools.cycle,
2629                [sorted(map(operator.attrgetter("name"), names))
2630                 for _, names in itertools.groupby(sorted(nodes, key=keyfunc),
2631                                                   keyfunc)])
2632
2633   @classmethod
2634   def _SelectSshCheckNodes(cls, group_nodes, group_uuid, all_nodes):
2635     """Choose which nodes should talk to which other nodes.
2636
2637     We will make nodes contact all nodes in their group, and one node from
2638     every other group.
2639
2640     @warning: This algorithm has a known issue if one node group is much
2641       smaller than others (e.g. just one node). In such a case all other
2642       nodes will talk to the single node.
2643
2644     """
2645     online_nodes = sorted(node.name for node in group_nodes if not node.offline)
2646     sel = cls._SshNodeSelector(group_uuid, all_nodes)
2647
2648     return (online_nodes,
2649             dict((name, sorted([i.next() for i in sel]))
2650                  for name in online_nodes))
2651
2652   def BuildHooksEnv(self):
2653     """Build hooks env.
2654
2655     Cluster-Verify hooks just ran in the post phase and their failure makes
2656     the output be logged in the verify output and the verification to fail.
2657
2658     """
2659     env = {
2660       "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
2661       }
2662
2663     env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
2664                for node in self.my_node_info.values())
2665
2666     return env
2667
2668   def BuildHooksNodes(self):
2669     """Build hooks nodes.
2670
2671     """
2672     return ([], self.my_node_names)
2673
2674   def Exec(self, feedback_fn):
2675     """Verify integrity of the node group, performing various test on nodes.
2676
2677     """
2678     # This method has too many local variables. pylint: disable=R0914
2679     feedback_fn("* Verifying group '%s'" % self.group_info.name)
2680
2681     if not self.my_node_names:
2682       # empty node group
2683       feedback_fn("* Empty node group, skipping verification")
2684       return True
2685
2686     self.bad = False
2687     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2688     verbose = self.op.verbose
2689     self._feedback_fn = feedback_fn
2690
2691     vg_name = self.cfg.GetVGName()
2692     drbd_helper = self.cfg.GetDRBDHelper()
2693     cluster = self.cfg.GetClusterInfo()
2694     groupinfo = self.cfg.GetAllNodeGroupsInfo()
2695     hypervisors = cluster.enabled_hypervisors
2696     node_data_list = [self.my_node_info[name] for name in self.my_node_names]
2697
2698     i_non_redundant = [] # Non redundant instances
2699     i_non_a_balanced = [] # Non auto-balanced instances
2700     i_offline = 0 # Count of offline instances
2701     n_offline = 0 # Count of offline nodes
2702     n_drained = 0 # Count of nodes being drained
2703     node_vol_should = {}
2704
2705     # FIXME: verify OS list
2706
2707     # File verification
2708     filemap = _ComputeAncillaryFiles(cluster, False)
2709
2710     # do local checksums
2711     master_node = self.master_node = self.cfg.GetMasterNode()
2712     master_ip = self.cfg.GetMasterIP()
2713
2714     feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names))
2715
2716     user_scripts = []
2717     if self.cfg.GetUseExternalMipScript():
2718       user_scripts.append(constants.EXTERNAL_MASTER_SETUP_SCRIPT)
2719
2720     node_verify_param = {
2721       constants.NV_FILELIST:
2722         utils.UniqueSequence(filename
2723                              for files in filemap
2724                              for filename in files),
2725       constants.NV_NODELIST:
2726         self._SelectSshCheckNodes(node_data_list, self.group_uuid,
2727                                   self.all_node_info.values()),
2728       constants.NV_HYPERVISOR: hypervisors,
2729       constants.NV_HVPARAMS:
2730         _GetAllHypervisorParameters(cluster, self.all_inst_info.values()),
2731       constants.NV_NODENETTEST: [(node.name, node.primary_ip, node.secondary_ip)
2732                                  for node in node_data_list
2733                                  if not node.offline],
2734       constants.NV_INSTANCELIST: hypervisors,
2735       constants.NV_VERSION: None,
2736       constants.NV_HVINFO: self.cfg.GetHypervisorType(),
2737       constants.NV_NODESETUP: None,
2738       constants.NV_TIME: None,
2739       constants.NV_MASTERIP: (master_node, master_ip),
2740       constants.NV_OSLIST: None,
2741       constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
2742       constants.NV_USERSCRIPTS: user_scripts,
2743       }
2744
2745     if vg_name is not None:
2746       node_verify_param[constants.NV_VGLIST] = None
2747       node_verify_param[constants.NV_LVLIST] = vg_name
2748       node_verify_param[constants.NV_PVLIST] = [vg_name]
2749       node_verify_param[constants.NV_DRBDLIST] = None
2750
2751     if drbd_helper:
2752       node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
2753
2754     # bridge checks
2755     # FIXME: this needs to be changed per node-group, not cluster-wide
2756     bridges = set()
2757     default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
2758     if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
2759       bridges.add(default_nicpp[constants.NIC_LINK])
2760     for instance in self.my_inst_info.values():
2761       for nic in instance.nics:
2762         full_nic = cluster.SimpleFillNIC(nic.nicparams)
2763         if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
2764           bridges.add(full_nic[constants.NIC_LINK])
2765
2766     if bridges:
2767       node_verify_param[constants.NV_BRIDGES] = list(bridges)
2768
2769     # Build our expected cluster state
2770     node_image = dict((node.name, self.NodeImage(offline=node.offline,
2771                                                  name=node.name,
2772                                                  vm_capable=node.vm_capable))
2773                       for node in node_data_list)
2774
2775     # Gather OOB paths
2776     oob_paths = []
2777     for node in self.all_node_info.values():
2778       path = _SupportsOob(self.cfg, node)
2779       if path and path not in oob_paths:
2780         oob_paths.append(path)
2781
2782     if oob_paths:
2783       node_verify_param[constants.NV_OOB_PATHS] = oob_paths
2784
2785     for instance in self.my_inst_names:
2786       inst_config = self.my_inst_info[instance]
2787
2788       for nname in inst_config.all_nodes:
2789         if nname not in node_image:
2790           gnode = self.NodeImage(name=nname)
2791           gnode.ghost = (nname not in self.all_node_info)
2792           node_image[nname] = gnode
2793
2794       inst_config.MapLVsByNode(node_vol_should)
2795
2796       pnode = inst_config.primary_node
2797       node_image[pnode].pinst.append(instance)
2798
2799       for snode in inst_config.secondary_nodes:
2800         nimg = node_image[snode]
2801         nimg.sinst.append(instance)
2802         if pnode not in nimg.sbp:
2803           nimg.sbp[pnode] = []
2804         nimg.sbp[pnode].append(instance)
2805
2806     # At this point, we have the in-memory data structures complete,
2807     # except for the runtime information, which we'll gather next
2808
2809     # Due to the way our RPC system works, exact response times cannot be
2810     # guaranteed (e.g. a broken node could run into a timeout). By keeping the
2811     # time before and after executing the request, we can at least have a time
2812     # window.
2813     nvinfo_starttime = time.time()
2814     all_nvinfo = self.rpc.call_node_verify(self.my_node_names,
2815                                            node_verify_param,
2816                                            self.cfg.GetClusterName())
2817     nvinfo_endtime = time.time()
2818
2819     if self.extra_lv_nodes and vg_name is not None:
2820       extra_lv_nvinfo = \
2821           self.rpc.call_node_verify(self.extra_lv_nodes,
2822                                     {constants.NV_LVLIST: vg_name},
2823                                     self.cfg.GetClusterName())
2824     else:
2825       extra_lv_nvinfo = {}
2826
2827     all_drbd_map = self.cfg.ComputeDRBDMap()
2828
2829     feedback_fn("* Gathering disk information (%s nodes)" %
2830                 len(self.my_node_names))
2831     instdisk = self._CollectDiskInfo(self.my_node_names, node_image,
2832                                      self.my_inst_info)
2833
2834     feedback_fn("* Verifying configuration file consistency")
2835
2836     # If not all nodes are being checked, we need to make sure the master node
2837     # and a non-checked vm_capable node are in the list.
2838     absent_nodes = set(self.all_node_info).difference(self.my_node_info)
2839     if absent_nodes:
2840       vf_nvinfo = all_nvinfo.copy()
2841       vf_node_info = list(self.my_node_info.values())
2842       additional_nodes = []
2843       if master_node not in self.my_node_info:
2844         additional_nodes.append(master_node)
2845         vf_node_info.append(self.all_node_info[master_node])
2846       # Add the first vm_capable node we find which is not included
2847       for node in absent_nodes:
2848         nodeinfo = self.all_node_info[node]
2849         if nodeinfo.vm_capable and not nodeinfo.offline:
2850           additional_nodes.append(node)
2851           vf_node_info.append(self.all_node_info[node])
2852           break
2853       key = constants.NV_FILELIST
2854       vf_nvinfo.update(self.rpc.call_node_verify(additional_nodes,
2855                                                  {key: node_verify_param[key]},
2856                                                  self.cfg.GetClusterName()))
2857     else:
2858       vf_nvinfo = all_nvinfo
2859       vf_node_info = self.my_node_info.values()
2860
2861     self._VerifyFiles(_ErrorIf, vf_node_info, master_node, vf_nvinfo, filemap)
2862
2863     feedback_fn("* Verifying node status")
2864
2865     refos_img = None
2866
2867     for node_i in node_data_list:
2868       node = node_i.name
2869       nimg = node_image[node]
2870
2871       if node_i.offline:
2872         if verbose:
2873           feedback_fn("* Skipping offline node %s" % (node,))
2874         n_offline += 1
2875         continue
2876
2877       if node == master_node:
2878         ntype = "master"
2879       elif node_i.master_candidate:
2880         ntype = "master candidate"
2881       elif node_i.drained:
2882         ntype = "drained"
2883         n_drained += 1
2884       else:
2885         ntype = "regular"
2886       if verbose:
2887         feedback_fn("* Verifying node %s (%s)" % (node, ntype))
2888
2889       msg = all_nvinfo[node].fail_msg
2890       _ErrorIf(msg, constants.CV_ENODERPC, node, "while contacting node: %s",
2891                msg)
2892       if msg:
2893         nimg.rpc_fail = True
2894         continue
2895
2896       nresult = all_nvinfo[node].payload
2897
2898       nimg.call_ok = self._VerifyNode(node_i, nresult)
2899       self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
2900       self._VerifyNodeNetwork(node_i, nresult)
2901       self._VerifyNodeUserScripts(node_i, nresult)
2902       self._VerifyOob(node_i, nresult)
2903
2904       if nimg.vm_capable:
2905         self._VerifyNodeLVM(node_i, nresult, vg_name)
2906         self._VerifyNodeDrbd(node_i, nresult, self.all_inst_info, drbd_helper,
2907                              all_drbd_map)
2908
2909         self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
2910         self._UpdateNodeInstances(node_i, nresult, nimg)
2911         self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
2912         self._UpdateNodeOS(node_i, nresult, nimg)
2913
2914         if not nimg.os_fail:
2915           if refos_img is None:
2916             refos_img = nimg
2917           self._VerifyNodeOS(node_i, nimg, refos_img)
2918         self._VerifyNodeBridges(node_i, nresult, bridges)
2919
2920         # Check whether all running instancies are primary for the node. (This
2921         # can no longer be done from _VerifyInstance below, since some of the
2922         # wrong instances could be from other node groups.)
2923         non_primary_inst = set(nimg.instances).difference(nimg.pinst)
2924
2925         for inst in non_primary_inst:
2926           # FIXME: investigate best way to handle offline insts
2927           if inst.admin_state == constants.ADMINST_OFFLINE:
2928             if verbose:
2929               feedback_fn("* Skipping offline instance %s" % inst.name)
2930             i_offline += 1
2931             continue
2932           test = inst in self.all_inst_info
2933           _ErrorIf(test, constants.CV_EINSTANCEWRONGNODE, inst,
2934                    "instance should not run on node %s", node_i.name)
2935           _ErrorIf(not test, constants.CV_ENODEORPHANINSTANCE, node_i.name,
2936                    "node is running unknown instance %s", inst)
2937
2938     for node, result in extra_lv_nvinfo.items():
2939       self._UpdateNodeVolumes(self.all_node_info[node], result.payload,
2940                               node_image[node], vg_name)
2941
2942     feedback_fn("* Verifying instance status")
2943     for instance in self.my_inst_names:
2944       if verbose:
2945         feedback_fn("* Verifying instance %s" % instance)
2946       inst_config = self.my_inst_info[instance]
2947       self._VerifyInstance(instance, inst_config, node_image,
2948                            instdisk[instance])
2949       inst_nodes_offline = []
2950
2951       pnode = inst_config.primary_node
2952       pnode_img = node_image[pnode]
2953       _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
2954                constants.CV_ENODERPC, pnode, "instance %s, connection to"
2955                " primary node failed", instance)
2956
2957       _ErrorIf(inst_config.admin_state == constants.ADMINST_UP and
2958                pnode_img.offline,
2959                constants.CV_EINSTANCEBADNODE, instance,
2960                "instance is marked as running and lives on offline node %s",
2961                inst_config.primary_node)
2962
2963       # If the instance is non-redundant we cannot survive losing its primary
2964       # node, so we are not N+1 compliant. On the other hand we have no disk
2965       # templates with more than one secondary so that situation is not well
2966       # supported either.
2967       # FIXME: does not support file-backed instances
2968       if not inst_config.secondary_nodes:
2969         i_non_redundant.append(instance)
2970
2971       _ErrorIf(len(inst_config.secondary_nodes) > 1,
2972                constants.CV_EINSTANCELAYOUT,
2973                instance, "instance has multiple secondary nodes: %s",
2974                utils.CommaJoin(inst_config.secondary_nodes),
2975                code=self.ETYPE_WARNING)
2976
2977       if inst_config.disk_template in constants.DTS_INT_MIRROR:
2978         pnode = inst_config.primary_node
2979         instance_nodes = utils.NiceSort(inst_config.all_nodes)
2980         instance_groups = {}
2981
2982         for node in instance_nodes:
2983           instance_groups.setdefault(self.all_node_info[node].group,
2984                                      []).append(node)
2985
2986         pretty_list = [
2987           "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
2988           # Sort so that we always list the primary node first.
2989           for group, nodes in sorted(instance_groups.items(),
2990                                      key=lambda (_, nodes): pnode in nodes,
2991                                      reverse=True)]
2992
2993         self._ErrorIf(len(instance_groups) > 1,
2994                       constants.CV_EINSTANCESPLITGROUPS,
2995                       instance, "instance has primary and secondary nodes in"
2996                       " different groups: %s", utils.CommaJoin(pretty_list),
2997                       code=self.ETYPE_WARNING)
2998
2999       if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
3000         i_non_a_balanced.append(instance)
3001
3002       for snode in inst_config.secondary_nodes:
3003         s_img = node_image[snode]
3004         _ErrorIf(s_img.rpc_fail and not s_img.offline, constants.CV_ENODERPC,
3005                  snode, "instance %s, connection to secondary node failed",
3006                  instance)
3007
3008         if s_img.offline:
3009           inst_nodes_offline.append(snode)
3010
3011       # warn that the instance lives on offline nodes
3012       _ErrorIf(inst_nodes_offline, constants.CV_EINSTANCEBADNODE, instance,
3013                "instance has offline secondary node(s) %s",
3014                utils.CommaJoin(inst_nodes_offline))
3015       # ... or ghost/non-vm_capable nodes
3016       for node in inst_config.all_nodes:
3017         _ErrorIf(node_image[node].ghost, constants.CV_EINSTANCEBADNODE,
3018                  instance, "instance lives on ghost node %s", node)
3019         _ErrorIf(not node_image[node].vm_capable, constants.CV_EINSTANCEBADNODE,
3020                  instance, "instance lives on non-vm_capable node %s", node)
3021
3022     feedback_fn("* Verifying orphan volumes")
3023     reserved = utils.FieldSet(*cluster.reserved_lvs)
3024
3025     # We will get spurious "unknown volume" warnings if any node of this group
3026     # is secondary for an instance whose primary is in another group. To avoid
3027     # them, we find these instances and add their volumes to node_vol_should.
3028     for inst in self.all_inst_info.values():
3029       for secondary in inst.secondary_nodes:
3030         if (secondary in self.my_node_info
3031             and inst.name not in self.my_inst_info):
3032           inst.MapLVsByNode(node_vol_should)
3033           break
3034
3035     self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
3036
3037     if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
3038       feedback_fn("* Verifying N+1 Memory redundancy")
3039       self._VerifyNPlusOneMemory(node_image, self.my_inst_info)
3040
3041     feedback_fn("* Other Notes")
3042     if i_non_redundant:
3043       feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
3044                   % len(i_non_redundant))
3045
3046     if i_non_a_balanced:
3047       feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
3048                   % len(i_non_a_balanced))
3049
3050     if i_offline:
3051       feedback_fn("  - NOTICE: %d offline instance(s) found." % i_offline)
3052
3053     if n_offline:
3054       feedback_fn("  - NOTICE: %d offline node(s) found." % n_offline)
3055
3056     if n_drained:
3057       feedback_fn("  - NOTICE: %d drained node(s) found." % n_drained)
3058
3059     return not self.bad
3060
3061   def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
3062     """Analyze the post-hooks' result
3063
3064     This method analyses the hook result, handles it, and sends some
3065     nicely-formatted feedback back to the user.
3066
3067     @param phase: one of L{constants.HOOKS_PHASE_POST} or
3068         L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
3069     @param hooks_results: the results of the multi-node hooks rpc call
3070     @param feedback_fn: function used send feedback back to the caller
3071     @param lu_result: previous Exec result
3072     @return: the new Exec result, based on the previous result
3073         and hook results
3074
3075     """
3076     # We only really run POST phase hooks, only for non-empty groups,
3077     # and are only interested in their results
3078     if not self.my_node_names:
3079       # empty node group
3080       pass
3081     elif phase == constants.HOOKS_PHASE_POST:
3082       # Used to change hooks' output to proper indentation
3083       feedback_fn("* Hooks Results")
3084       assert hooks_results, "invalid result from hooks"
3085
3086       for node_name in hooks_results:
3087         res = hooks_results[node_name]
3088         msg = res.fail_msg
3089         test = msg and not res.offline
3090         self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3091                       "Communication failure in hooks execution: %s", msg)
3092         if res.offline or msg:
3093           # No need to investigate payload if node is offline or gave
3094           # an error.
3095           continue
3096         for script, hkr, output in res.payload:
3097           test = hkr == constants.HKR_FAIL
3098           self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3099                         "Script %s failed, output:", script)
3100           if test:
3101             output = self._HOOKS_INDENT_RE.sub("      ", output)
3102             feedback_fn("%s" % output)
3103             lu_result = False
3104
3105     return lu_result
3106
3107
3108 class LUClusterVerifyDisks(NoHooksLU):
3109   """Verifies the cluster disks status.
3110
3111   """
3112   REQ_BGL = False
3113
3114   def ExpandNames(self):
3115     self.share_locks = _ShareAll()
3116     self.needed_locks = {
3117       locking.LEVEL_NODEGROUP: locking.ALL_SET,
3118       }
3119
3120   def Exec(self, feedback_fn):
3121     group_names = self.owned_locks(locking.LEVEL_NODEGROUP)
3122
3123     # Submit one instance of L{opcodes.OpGroupVerifyDisks} per node group
3124     return ResultWithJobs([[opcodes.OpGroupVerifyDisks(group_name=group)]
3125                            for group in group_names])
3126
3127
3128 class LUGroupVerifyDisks(NoHooksLU):
3129   """Verifies the status of all disks in a node group.
3130
3131   """
3132   REQ_BGL = False
3133
3134   def ExpandNames(self):
3135     # Raises errors.OpPrereqError on its own if group can't be found
3136     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
3137
3138     self.share_locks = _ShareAll()
3139     self.needed_locks = {
3140       locking.LEVEL_INSTANCE: [],
3141       locking.LEVEL_NODEGROUP: [],
3142       locking.LEVEL_NODE: [],
3143       }
3144
3145   def DeclareLocks(self, level):
3146     if level == locking.LEVEL_INSTANCE:
3147       assert not self.needed_locks[locking.LEVEL_INSTANCE]
3148
3149       # Lock instances optimistically, needs verification once node and group
3150       # locks have been acquired
3151       self.needed_locks[locking.LEVEL_INSTANCE] = \
3152         self.cfg.GetNodeGroupInstances(self.group_uuid)
3153
3154     elif level == locking.LEVEL_NODEGROUP:
3155       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
3156
3157       self.needed_locks[locking.LEVEL_NODEGROUP] = \
3158         set([self.group_uuid] +
3159             # Lock all groups used by instances optimistically; this requires
3160             # going via the node before it's locked, requiring verification
3161             # later on
3162             [group_uuid
3163              for instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
3164              for group_uuid in self.cfg.GetInstanceNodeGroups(instance_name)])
3165
3166     elif level == locking.LEVEL_NODE:
3167       # This will only lock the nodes in the group to be verified which contain
3168       # actual instances
3169       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
3170       self._LockInstancesNodes()
3171
3172       # Lock all nodes in group to be verified
3173       assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
3174       member_nodes = self.cfg.GetNodeGroup(self.group_uuid).members
3175       self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
3176
3177   def CheckPrereq(self):
3178     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
3179     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
3180     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
3181
3182     assert self.group_uuid in owned_groups
3183
3184     # Check if locked instances are still correct
3185     _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
3186
3187     # Get instance information
3188     self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
3189
3190     # Check if node groups for locked instances are still correct
3191     for (instance_name, inst) in self.instances.items():
3192       assert owned_nodes.issuperset(inst.all_nodes), \
3193         "Instance %s's nodes changed while we kept the lock" % instance_name
3194
3195       inst_groups = _CheckInstanceNodeGroups(self.cfg, instance_name,
3196                                              owned_groups)
3197
3198       assert self.group_uuid in inst_groups, \
3199         "Instance %s has no node in group %s" % (instance_name, self.group_uuid)
3200
3201   def Exec(self, feedback_fn):
3202     """Verify integrity of cluster disks.
3203
3204     @rtype: tuple of three items
3205     @return: a tuple of (dict of node-to-node_error, list of instances
3206         which need activate-disks, dict of instance: (node, volume) for
3207         missing volumes
3208
3209     """
3210     res_nodes = {}
3211     res_instances = set()
3212     res_missing = {}
3213
3214     nv_dict = _MapInstanceDisksToNodes([inst
3215             for inst in self.instances.values()
3216             if inst.admin_state == constants.ADMINST_UP])
3217
3218     if nv_dict:
3219       nodes = utils.NiceSort(set(self.owned_locks(locking.LEVEL_NODE)) &
3220                              set(self.cfg.GetVmCapableNodeList()))
3221
3222       node_lvs = self.rpc.call_lv_list(nodes, [])
3223
3224       for (node, node_res) in node_lvs.items():
3225         if node_res.offline:
3226           continue
3227
3228         msg = node_res.fail_msg
3229         if msg:
3230           logging.warning("Error enumerating LVs on node %s: %s", node, msg)
3231           res_nodes[node] = msg
3232           continue
3233
3234         for lv_name, (_, _, lv_online) in node_res.payload.items():
3235           inst = nv_dict.pop((node, lv_name), None)
3236           if not (lv_online or inst is None):
3237             res_instances.add(inst)
3238
3239       # any leftover items in nv_dict are missing LVs, let's arrange the data
3240       # better
3241       for key, inst in nv_dict.iteritems():
3242         res_missing.setdefault(inst, []).append(list(key))
3243
3244     return (res_nodes, list(res_instances), res_missing)
3245
3246
3247 class LUClusterRepairDiskSizes(NoHooksLU):
3248   """Verifies the cluster disks sizes.
3249
3250   """
3251   REQ_BGL = False
3252
3253   def ExpandNames(self):
3254     if self.op.instances:
3255       self.wanted_names = _GetWantedInstances(self, self.op.instances)
3256       self.needed_locks = {
3257         locking.LEVEL_NODE_RES: [],
3258         locking.LEVEL_INSTANCE: self.wanted_names,
3259         }
3260       self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
3261     else:
3262       self.wanted_names = None
3263       self.needed_locks = {
3264         locking.LEVEL_NODE_RES: locking.ALL_SET,
3265         locking.LEVEL_INSTANCE: locking.ALL_SET,
3266         }
3267     self.share_locks = {
3268       locking.LEVEL_NODE_RES: 1,
3269       locking.LEVEL_INSTANCE: 0,
3270       }
3271
3272   def DeclareLocks(self, level):
3273     if level == locking.LEVEL_NODE_RES and self.wanted_names is not None:
3274       self._LockInstancesNodes(primary_only=True, level=level)
3275
3276   def CheckPrereq(self):
3277     """Check prerequisites.
3278
3279     This only checks the optional instance list against the existing names.
3280
3281     """
3282     if self.wanted_names is None:
3283       self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
3284
3285     self.wanted_instances = \
3286         map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
3287
3288   def _EnsureChildSizes(self, disk):
3289     """Ensure children of the disk have the needed disk size.
3290
3291     This is valid mainly for DRBD8 and fixes an issue where the
3292     children have smaller disk size.
3293
3294     @param disk: an L{ganeti.objects.Disk} object
3295
3296     """
3297     if disk.dev_type == constants.LD_DRBD8:
3298       assert disk.children, "Empty children for DRBD8?"
3299       fchild = disk.children[0]
3300       mismatch = fchild.size < disk.size
3301       if mismatch:
3302         self.LogInfo("Child disk has size %d, parent %d, fixing",
3303                      fchild.size, disk.size)
3304         fchild.size = disk.size
3305
3306       # and we recurse on this child only, not on the metadev
3307       return self._EnsureChildSizes(fchild) or mismatch
3308     else:
3309       return False
3310
3311   def Exec(self, feedback_fn):
3312     """Verify the size of cluster disks.
3313
3314     """
3315     # TODO: check child disks too
3316     # TODO: check differences in size between primary/secondary nodes
3317     per_node_disks = {}
3318     for instance in self.wanted_instances:
3319       pnode = instance.primary_node
3320       if pnode not in per_node_disks:
3321         per_node_disks[pnode] = []
3322       for idx, disk in enumerate(instance.disks):
3323         per_node_disks[pnode].append((instance, idx, disk))
3324
3325     assert not (frozenset(per_node_disks.keys()) -
3326                 self.owned_locks(locking.LEVEL_NODE_RES)), \
3327       "Not owning correct locks"
3328     assert not self.owned_locks(locking.LEVEL_NODE)
3329
3330     changed = []
3331     for node, dskl in per_node_disks.items():
3332       newl = [v[2].Copy() for v in dskl]
3333       for dsk in newl:
3334         self.cfg.SetDiskID(dsk, node)
3335       result = self.rpc.call_blockdev_getsize(node, newl)
3336       if result.fail_msg:
3337         self.LogWarning("Failure in blockdev_getsize call to node"
3338                         " %s, ignoring", node)
3339         continue
3340       if len(result.payload) != len(dskl):
3341         logging.warning("Invalid result from node %s: len(dksl)=%d,"
3342                         " result.payload=%s", node, len(dskl), result.payload)
3343         self.LogWarning("Invalid result from node %s, ignoring node results",
3344                         node)
3345         continue
3346       for ((instance, idx, disk), size) in zip(dskl, result.payload):
3347         if size is None:
3348           self.LogWarning("Disk %d of instance %s did not return size"
3349                           " information, ignoring", idx, instance.name)
3350           continue
3351         if not isinstance(size, (int, long)):
3352           self.LogWarning("Disk %d of instance %s did not return valid"
3353                           " size information, ignoring", idx, instance.name)
3354           continue
3355         size = size >> 20
3356         if size != disk.size:
3357           self.LogInfo("Disk %d of instance %s has mismatched size,"
3358                        " correcting: recorded %d, actual %d", idx,
3359                        instance.name, disk.size, size)
3360           disk.size = size
3361           self.cfg.Update(instance, feedback_fn)
3362           changed.append((instance.name, idx, size))
3363         if self._EnsureChildSizes(disk):
3364           self.cfg.Update(instance, feedback_fn)
3365           changed.append((instance.name, idx, disk.size))
3366     return changed
3367
3368
3369 class LUClusterRename(LogicalUnit):
3370   """Rename the cluster.
3371
3372   """
3373   HPATH = "cluster-rename"
3374   HTYPE = constants.HTYPE_CLUSTER
3375
3376   def BuildHooksEnv(self):
3377     """Build hooks env.
3378
3379     """
3380     return {
3381       "OP_TARGET": self.cfg.GetClusterName(),
3382       "NEW_NAME": self.op.name,
3383       }
3384
3385   def BuildHooksNodes(self):
3386     """Build hooks nodes.
3387
3388     """
3389     return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
3390
3391   def CheckPrereq(self):
3392     """Verify that the passed name is a valid one.
3393
3394     """
3395     hostname = netutils.GetHostname(name=self.op.name,
3396                                     family=self.cfg.GetPrimaryIPFamily())
3397
3398     new_name = hostname.name
3399     self.ip = new_ip = hostname.ip
3400     old_name = self.cfg.GetClusterName()
3401     old_ip = self.cfg.GetMasterIP()
3402     if new_name == old_name and new_ip == old_ip:
3403       raise errors.OpPrereqError("Neither the name nor the IP address of the"
3404                                  " cluster has changed",
3405                                  errors.ECODE_INVAL)
3406     if new_ip != old_ip:
3407       if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
3408         raise errors.OpPrereqError("The given cluster IP address (%s) is"
3409                                    " reachable on the network" %
3410                                    new_ip, errors.ECODE_NOTUNIQUE)
3411
3412     self.op.name = new_name
3413
3414   def Exec(self, feedback_fn):
3415     """Rename the cluster.
3416
3417     """
3418     clustername = self.op.name
3419     new_ip = self.ip
3420
3421     # shutdown the master IP
3422     master_params = self.cfg.GetMasterNetworkParameters()
3423     ems = self.cfg.GetUseExternalMipScript()
3424     result = self.rpc.call_node_deactivate_master_ip(master_params.name,
3425                                                      master_params, ems)
3426     result.Raise("Could not disable the master role")
3427
3428     try:
3429       cluster = self.cfg.GetClusterInfo()
3430       cluster.cluster_name = clustername
3431       cluster.master_ip = new_ip
3432       self.cfg.Update(cluster, feedback_fn)
3433
3434       # update the known hosts file
3435       ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
3436       node_list = self.cfg.GetOnlineNodeList()
3437       try:
3438         node_list.remove(master_params.name)
3439       except ValueError:
3440         pass
3441       _UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE)
3442     finally:
3443       master_params.ip = new_ip
3444       result = self.rpc.call_node_activate_master_ip(master_params.name,
3445                                                      master_params, ems)
3446       msg = result.fail_msg
3447       if msg:
3448         self.LogWarning("Could not re-enable the master role on"
3449                         " the master, please restart manually: %s", msg)
3450
3451     return clustername
3452
3453
3454 def _ValidateNetmask(cfg, netmask):
3455   """Checks if a netmask is valid.
3456
3457   @type cfg: L{config.ConfigWriter}
3458   @param cfg: The cluster configuration
3459   @type netmask: int
3460   @param netmask: the netmask to be verified
3461   @raise errors.OpPrereqError: if the validation fails
3462
3463   """
3464   ip_family = cfg.GetPrimaryIPFamily()
3465   try:
3466     ipcls = netutils.IPAddress.GetClassFromIpFamily(ip_family)
3467   except errors.ProgrammerError:
3468     raise errors.OpPrereqError("Invalid primary ip family: %s." %
3469                                ip_family)
3470   if not ipcls.ValidateNetmask(netmask):
3471     raise errors.OpPrereqError("CIDR netmask (%s) not valid" %
3472                                 (netmask))
3473
3474
3475 class LUClusterSetParams(LogicalUnit):
3476   """Change the parameters of the cluster.
3477
3478   """
3479   HPATH = "cluster-modify"
3480   HTYPE = constants.HTYPE_CLUSTER
3481   REQ_BGL = False
3482
3483   def CheckArguments(self):
3484     """Check parameters
3485
3486     """
3487     if self.op.uid_pool:
3488       uidpool.CheckUidPool(self.op.uid_pool)
3489
3490     if self.op.add_uids:
3491       uidpool.CheckUidPool(self.op.add_uids)
3492
3493     if self.op.remove_uids:
3494       uidpool.CheckUidPool(self.op.remove_uids)
3495
3496     if self.op.master_netmask is not None:
3497       _ValidateNetmask(self.cfg, self.op.master_netmask)
3498
3499     if self.op.diskparams:
3500       for dt_params in self.op.diskparams.values():
3501         utils.ForceDictType(dt_params, constants.DISK_DT_TYPES)
3502
3503   def ExpandNames(self):
3504     # FIXME: in the future maybe other cluster params won't require checking on
3505     # all nodes to be modified.
3506     self.needed_locks = {
3507       locking.LEVEL_NODE: locking.ALL_SET,
3508     }
3509     self.share_locks[locking.LEVEL_NODE] = 1
3510
3511   def BuildHooksEnv(self):
3512     """Build hooks env.
3513
3514     """
3515     return {
3516       "OP_TARGET": self.cfg.GetClusterName(),
3517       "NEW_VG_NAME": self.op.vg_name,
3518       }
3519
3520   def BuildHooksNodes(self):
3521     """Build hooks nodes.
3522
3523     """
3524     mn = self.cfg.GetMasterNode()
3525     return ([mn], [mn])
3526
3527   def CheckPrereq(self):
3528     """Check prerequisites.
3529
3530     This checks whether the given params don't conflict and
3531     if the given volume group is valid.
3532
3533     """
3534     if self.op.vg_name is not None and not self.op.vg_name:
3535       if self.cfg.HasAnyDiskOfType(constants.LD_LV):
3536         raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
3537                                    " instances exist", errors.ECODE_INVAL)
3538
3539     if self.op.drbd_helper is not None and not self.op.drbd_helper:
3540       if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
3541         raise errors.OpPrereqError("Cannot disable drbd helper while"
3542                                    " drbd-based instances exist",
3543                                    errors.ECODE_INVAL)
3544
3545     node_list = self.owned_locks(locking.LEVEL_NODE)
3546
3547     # if vg_name not None, checks given volume group on all nodes
3548     if self.op.vg_name:
3549       vglist = self.rpc.call_vg_list(node_list)
3550       for node in node_list:
3551         msg = vglist[node].fail_msg
3552         if msg:
3553           # ignoring down node
3554           self.LogWarning("Error while gathering data on node %s"
3555                           " (ignoring node): %s", node, msg)
3556           continue
3557         vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
3558                                               self.op.vg_name,
3559                                               constants.MIN_VG_SIZE)
3560         if vgstatus:
3561           raise errors.OpPrereqError("Error on node '%s': %s" %
3562                                      (node, vgstatus), errors.ECODE_ENVIRON)
3563
3564     if self.op.drbd_helper:
3565       # checks given drbd helper on all nodes
3566       helpers = self.rpc.call_drbd_helper(node_list)
3567       for (node, ninfo) in self.cfg.GetMultiNodeInfo(node_list):
3568         if ninfo.offline:
3569           self.LogInfo("Not checking drbd helper on offline node %s", node)
3570           continue
3571         msg = helpers[node].fail_msg
3572         if msg:
3573           raise errors.OpPrereqError("Error checking drbd helper on node"
3574                                      " '%s': %s" % (node, msg),
3575                                      errors.ECODE_ENVIRON)
3576         node_helper = helpers[node].payload
3577         if node_helper != self.op.drbd_helper:
3578           raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
3579                                      (node, node_helper), errors.ECODE_ENVIRON)
3580
3581     self.cluster = cluster = self.cfg.GetClusterInfo()
3582     # validate params changes
3583     if self.op.beparams:
3584       objects.UpgradeBeParams(self.op.beparams)
3585       utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
3586       self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
3587
3588     if self.op.ndparams:
3589       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
3590       self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
3591
3592       # TODO: we need a more general way to handle resetting
3593       # cluster-level parameters to default values
3594       if self.new_ndparams["oob_program"] == "":
3595         self.new_ndparams["oob_program"] = \
3596             constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
3597
3598     if self.op.nicparams:
3599       utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
3600       self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
3601       objects.NIC.CheckParameterSyntax(self.new_nicparams)
3602       nic_errors = []
3603
3604       # check all instances for consistency
3605       for instance in self.cfg.GetAllInstancesInfo().values():
3606         for nic_idx, nic in enumerate(instance.nics):
3607           params_copy = copy.deepcopy(nic.nicparams)
3608           params_filled = objects.FillDict(self.new_nicparams, params_copy)
3609
3610           # check parameter syntax
3611           try:
3612             objects.NIC.CheckParameterSyntax(params_filled)
3613           except errors.ConfigurationError, err:
3614             nic_errors.append("Instance %s, nic/%d: %s" %
3615                               (instance.name, nic_idx, err))
3616
3617           # if we're moving instances to routed, check that they have an ip
3618           target_mode = params_filled[constants.NIC_MODE]
3619           if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
3620             nic_errors.append("Instance %s, nic/%d: routed NIC with no ip"
3621                               " address" % (instance.name, nic_idx))
3622       if nic_errors:
3623         raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
3624                                    "\n".join(nic_errors))
3625
3626     # hypervisor list/parameters
3627     self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
3628     if self.op.hvparams:
3629       for hv_name, hv_dict in self.op.hvparams.items():
3630         if hv_name not in self.new_hvparams:
3631           self.new_hvparams[hv_name] = hv_dict
3632         else:
3633           self.new_hvparams[hv_name].update(hv_dict)
3634
3635     # disk template parameters
3636     self.new_diskparams = objects.FillDict(cluster.diskparams, {})
3637     if self.op.diskparams:
3638       for dt_name, dt_params in self.op.diskparams.items():
3639         if dt_name not in self.op.diskparams:
3640           self.new_diskparams[dt_name] = dt_params
3641         else:
3642           self.new_diskparams[dt_name].update(dt_params)
3643
3644     # os hypervisor parameters
3645     self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
3646     if self.op.os_hvp:
3647       for os_name, hvs in self.op.os_hvp.items():
3648         if os_name not in self.new_os_hvp:
3649           self.new_os_hvp[os_name] = hvs
3650         else:
3651           for hv_name, hv_dict in hvs.items():
3652             if hv_name not in self.new_os_hvp[os_name]:
3653               self.new_os_hvp[os_name][hv_name] = hv_dict
3654             else:
3655               self.new_os_hvp[os_name][hv_name].update(hv_dict)
3656
3657     # os parameters
3658     self.new_osp = objects.FillDict(cluster.osparams, {})
3659     if self.op.osparams:
3660       for os_name, osp in self.op.osparams.items():
3661         if os_name not in self.new_osp:
3662           self.new_osp[os_name] = {}
3663
3664         self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
3665                                                   use_none=True)
3666
3667         if not self.new_osp[os_name]:
3668           # we removed all parameters
3669           del self.new_osp[os_name]
3670         else:
3671           # check the parameter validity (remote check)
3672           _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
3673                          os_name, self.new_osp[os_name])
3674
3675     # changes to the hypervisor list
3676     if self.op.enabled_hypervisors is not None:
3677       self.hv_list = self.op.enabled_hypervisors
3678       for hv in self.hv_list:
3679         # if the hypervisor doesn't already exist in the cluster
3680         # hvparams, we initialize it to empty, and then (in both
3681         # cases) we make sure to fill the defaults, as we might not
3682         # have a complete defaults list if the hypervisor wasn't
3683         # enabled before
3684         if hv not in new_hvp:
3685           new_hvp[hv] = {}
3686         new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
3687         utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
3688     else:
3689       self.hv_list = cluster.enabled_hypervisors
3690
3691     if self.op.hvparams or self.op.enabled_hypervisors is not None:
3692       # either the enabled list has changed, or the parameters have, validate
3693       for hv_name, hv_params in self.new_hvparams.items():
3694         if ((self.op.hvparams and hv_name in self.op.hvparams) or
3695             (self.op.enabled_hypervisors and
3696              hv_name in self.op.enabled_hypervisors)):
3697           # either this is a new hypervisor, or its parameters have changed
3698           hv_class = hypervisor.GetHypervisor(hv_name)
3699           utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3700           hv_class.CheckParameterSyntax(hv_params)
3701           _CheckHVParams(self, node_list, hv_name, hv_params)
3702
3703     if self.op.os_hvp:
3704       # no need to check any newly-enabled hypervisors, since the
3705       # defaults have already been checked in the above code-block
3706       for os_name, os_hvp in self.new_os_hvp.items():
3707         for hv_name, hv_params in os_hvp.items():
3708           utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3709           # we need to fill in the new os_hvp on top of the actual hv_p
3710           cluster_defaults = self.new_hvparams.get(hv_name, {})
3711           new_osp = objects.FillDict(cluster_defaults, hv_params)
3712           hv_class = hypervisor.GetHypervisor(hv_name)
3713           hv_class.CheckParameterSyntax(new_osp)
3714           _CheckHVParams(self, node_list, hv_name, new_osp)
3715
3716     if self.op.default_iallocator:
3717       alloc_script = utils.FindFile(self.op.default_iallocator,
3718                                     constants.IALLOCATOR_SEARCH_PATH,
3719                                     os.path.isfile)
3720       if alloc_script is None:
3721         raise errors.OpPrereqError("Invalid default iallocator script '%s'"
3722                                    " specified" % self.op.default_iallocator,
3723                                    errors.ECODE_INVAL)
3724
3725   def Exec(self, feedback_fn):
3726     """Change the parameters of the cluster.
3727
3728     """
3729     if self.op.vg_name is not None:
3730       new_volume = self.op.vg_name
3731       if not new_volume:
3732         new_volume = None
3733       if new_volume != self.cfg.GetVGName():
3734         self.cfg.SetVGName(new_volume)
3735       else:
3736         feedback_fn("Cluster LVM configuration already in desired"
3737                     " state, not changing")
3738     if self.op.drbd_helper is not None:
3739       new_helper = self.op.drbd_helper
3740       if not new_helper:
3741         new_helper = None
3742       if new_helper != self.cfg.GetDRBDHelper():
3743         self.cfg.SetDRBDHelper(new_helper)
3744       else:
3745         feedback_fn("Cluster DRBD helper already in desired state,"
3746                     " not changing")
3747     if self.op.hvparams:
3748       self.cluster.hvparams = self.new_hvparams
3749     if self.op.os_hvp:
3750       self.cluster.os_hvp = self.new_os_hvp
3751     if self.op.enabled_hypervisors is not None:
3752       self.cluster.hvparams = self.new_hvparams
3753       self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
3754     if self.op.beparams:
3755       self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
3756     if self.op.nicparams:
3757       self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
3758     if self.op.osparams:
3759       self.cluster.osparams = self.new_osp
3760     if self.op.ndparams:
3761       self.cluster.ndparams = self.new_ndparams
3762     if self.op.diskparams:
3763       self.cluster.diskparams = self.new_diskparams
3764
3765     if self.op.candidate_pool_size is not None:
3766       self.cluster.candidate_pool_size = self.op.candidate_pool_size
3767       # we need to update the pool size here, otherwise the save will fail
3768       _AdjustCandidatePool(self, [])
3769
3770     if self.op.maintain_node_health is not None:
3771       if self.op.maintain_node_health and not constants.ENABLE_CONFD:
3772         feedback_fn("Note: CONFD was disabled at build time, node health"
3773                     " maintenance is not useful (still enabling it)")
3774       self.cluster.maintain_node_health = self.op.maintain_node_health
3775
3776     if self.op.prealloc_wipe_disks is not None:
3777       self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
3778
3779     if self.op.add_uids is not None:
3780       uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
3781
3782     if self.op.remove_uids is not None:
3783       uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
3784
3785     if self.op.uid_pool is not None:
3786       self.cluster.uid_pool = self.op.uid_pool
3787
3788     if self.op.default_iallocator is not None:
3789       self.cluster.default_iallocator = self.op.default_iallocator
3790
3791     if self.op.reserved_lvs is not None:
3792       self.cluster.reserved_lvs = self.op.reserved_lvs
3793
3794     if self.op.use_external_mip_script is not None:
3795       self.cluster.use_external_mip_script = self.op.use_external_mip_script
3796
3797     def helper_os(aname, mods, desc):
3798       desc += " OS list"
3799       lst = getattr(self.cluster, aname)
3800       for key, val in mods:
3801         if key == constants.DDM_ADD:
3802           if val in lst:
3803             feedback_fn("OS %s already in %s, ignoring" % (val, desc))
3804           else:
3805             lst.append(val)
3806         elif key == constants.DDM_REMOVE:
3807           if val in lst:
3808             lst.remove(val)
3809           else:
3810             feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
3811         else:
3812           raise errors.ProgrammerError("Invalid modification '%s'" % key)
3813
3814     if self.op.hidden_os:
3815       helper_os("hidden_os", self.op.hidden_os, "hidden")
3816
3817     if self.op.blacklisted_os:
3818       helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
3819
3820     if self.op.master_netdev:
3821       master_params = self.cfg.GetMasterNetworkParameters()
3822       ems = self.cfg.GetUseExternalMipScript()
3823       feedback_fn("Shutting down master ip on the current netdev (%s)" %
3824                   self.cluster.master_netdev)
3825       result = self.rpc.call_node_deactivate_master_ip(master_params.name,
3826                                                        master_params, ems)
3827       result.Raise("Could not disable the master ip")
3828       feedback_fn("Changing master_netdev from %s to %s" %
3829                   (master_params.netdev, self.op.master_netdev))
3830       self.cluster.master_netdev = self.op.master_netdev
3831
3832     if self.op.master_netmask:
3833       master_params = self.cfg.GetMasterNetworkParameters()
3834       feedback_fn("Changing master IP netmask to %s" % self.op.master_netmask)
3835       result = self.rpc.call_node_change_master_netmask(master_params.name,
3836                                                         master_params.netmask,
3837                                                         self.op.master_netmask,
3838                                                         master_params.ip,
3839                                                         master_params.netdev)
3840       if result.fail_msg:
3841         msg = "Could not change the master IP netmask: %s" % result.fail_msg
3842         feedback_fn(msg)
3843
3844       self.cluster.master_netmask = self.op.master_netmask
3845
3846     self.cfg.Update(self.cluster, feedback_fn)
3847
3848     if self.op.master_netdev:
3849       master_params = self.cfg.GetMasterNetworkParameters()
3850       feedback_fn("Starting the master ip on the new master netdev (%s)" %
3851                   self.op.master_netdev)
3852       ems = self.cfg.GetUseExternalMipScript()
3853       result = self.rpc.call_node_activate_master_ip(master_params.name,
3854                                                      master_params, ems)
3855       if result.fail_msg:
3856         self.LogWarning("Could not re-enable the master ip on"
3857                         " the master, please restart manually: %s",
3858                         result.fail_msg)
3859
3860
3861 def _UploadHelper(lu, nodes, fname):
3862   """Helper for uploading a file and showing warnings.
3863
3864   """
3865   if os.path.exists(fname):
3866     result = lu.rpc.call_upload_file(nodes, fname)
3867     for to_node, to_result in result.items():
3868       msg = to_result.fail_msg
3869       if msg:
3870         msg = ("Copy of file %s to node %s failed: %s" %
3871                (fname, to_node, msg))
3872         lu.proc.LogWarning(msg)
3873
3874
3875 def _ComputeAncillaryFiles(cluster, redist):
3876   """Compute files external to Ganeti which need to be consistent.
3877
3878   @type redist: boolean
3879   @param redist: Whether to include files which need to be redistributed
3880
3881   """
3882   # Compute files for all nodes
3883   files_all = set([
3884     constants.SSH_KNOWN_HOSTS_FILE,
3885     constants.CONFD_HMAC_KEY,
3886     constants.CLUSTER_DOMAIN_SECRET_FILE,
3887     constants.SPICE_CERT_FILE,
3888     constants.SPICE_CACERT_FILE,
3889     constants.RAPI_USERS_FILE,
3890     ])
3891
3892   if not redist:
3893     files_all.update(constants.ALL_CERT_FILES)
3894     files_all.update(ssconf.SimpleStore().GetFileList())
3895   else:
3896     # we need to ship at least the RAPI certificate
3897     files_all.add(constants.RAPI_CERT_FILE)
3898
3899   if cluster.modify_etc_hosts:
3900     files_all.add(constants.ETC_HOSTS)
3901
3902   # Files which are optional, these must:
3903   # - be present in one other category as well
3904   # - either exist or not exist on all nodes of that category (mc, vm all)
3905   files_opt = set([
3906     constants.RAPI_USERS_FILE,
3907     ])
3908
3909   # Files which should only be on master candidates
3910   files_mc = set()
3911
3912   if not redist:
3913     files_mc.add(constants.CLUSTER_CONF_FILE)
3914
3915     # FIXME: this should also be replicated but Ganeti doesn't support files_mc
3916     # replication
3917     files_mc.add(constants.DEFAULT_MASTER_SETUP_SCRIPT)
3918
3919   # Files which should only be on VM-capable nodes
3920   files_vm = set(filename
3921     for hv_name in cluster.enabled_hypervisors
3922     for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[0])
3923
3924   files_opt |= set(filename
3925     for hv_name in cluster.enabled_hypervisors
3926     for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[1])
3927
3928   # Filenames in each category must be unique
3929   all_files_set = files_all | files_mc | files_vm
3930   assert (len(all_files_set) ==
3931           sum(map(len, [files_all, files_mc, files_vm]))), \
3932          "Found file listed in more than one file list"
3933
3934   # Optional files must be present in one other category
3935   assert all_files_set.issuperset(files_opt), \
3936          "Optional file not in a different required list"
3937
3938   return (files_all, files_opt, files_mc, files_vm)
3939
3940
3941 def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
3942   """Distribute additional files which are part of the cluster configuration.
3943
3944   ConfigWriter takes care of distributing the config and ssconf files, but
3945   there are more files which should be distributed to all nodes. This function
3946   makes sure those are copied.
3947
3948   @param lu: calling logical unit
3949   @param additional_nodes: list of nodes not in the config to distribute to
3950   @type additional_vm: boolean
3951   @param additional_vm: whether the additional nodes are vm-capable or not
3952
3953   """
3954   # Gather target nodes
3955   cluster = lu.cfg.GetClusterInfo()
3956   master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
3957
3958   online_nodes = lu.cfg.GetOnlineNodeList()
3959   vm_nodes = lu.cfg.GetVmCapableNodeList()
3960
3961   if additional_nodes is not None:
3962     online_nodes.extend(additional_nodes)
3963     if additional_vm:
3964       vm_nodes.extend(additional_nodes)
3965
3966   # Never distribute to master node
3967   for nodelist in [online_nodes, vm_nodes]:
3968     if master_info.name in nodelist:
3969       nodelist.remove(master_info.name)
3970
3971   # Gather file lists
3972   (files_all, _, files_mc, files_vm) = \
3973     _ComputeAncillaryFiles(cluster, True)
3974
3975   # Never re-distribute configuration file from here
3976   assert not (constants.CLUSTER_CONF_FILE in files_all or
3977               constants.CLUSTER_CONF_FILE in files_vm)
3978   assert not files_mc, "Master candidates not handled in this function"
3979
3980   filemap = [
3981     (online_nodes, files_all),
3982     (vm_nodes, files_vm),
3983     ]
3984
3985   # Upload the files
3986   for (node_list, files) in filemap:
3987     for fname in files:
3988       _UploadHelper(lu, node_list, fname)
3989
3990
3991 class LUClusterRedistConf(NoHooksLU):
3992   """Force the redistribution of cluster configuration.
3993
3994   This is a very simple LU.
3995
3996   """
3997   REQ_BGL = False
3998
3999   def ExpandNames(self):
4000     self.needed_locks = {
4001       locking.LEVEL_NODE: locking.ALL_SET,
4002     }
4003     self.share_locks[locking.LEVEL_NODE] = 1
4004
4005   def Exec(self, feedback_fn):
4006     """Redistribute the configuration.
4007
4008     """
4009     self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
4010     _RedistributeAncillaryFiles(self)
4011
4012
4013 class LUClusterActivateMasterIp(NoHooksLU):
4014   """Activate the master IP on the master node.
4015
4016   """
4017   def Exec(self, feedback_fn):
4018     """Activate the master IP.
4019
4020     """
4021     master_params = self.cfg.GetMasterNetworkParameters()
4022     ems = self.cfg.GetUseExternalMipScript()
4023     result = self.rpc.call_node_activate_master_ip(master_params.name,
4024                                                    master_params, ems)
4025     result.Raise("Could not activate the master IP")
4026
4027
4028 class LUClusterDeactivateMasterIp(NoHooksLU):
4029   """Deactivate the master IP on the master node.
4030
4031   """
4032   def Exec(self, feedback_fn):
4033     """Deactivate the master IP.
4034
4035     """
4036     master_params = self.cfg.GetMasterNetworkParameters()
4037     ems = self.cfg.GetUseExternalMipScript()
4038     result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4039                                                      master_params, ems)
4040     result.Raise("Could not deactivate the master IP")
4041
4042
4043 def _WaitForSync(lu, instance, disks=None, oneshot=False):
4044   """Sleep and poll for an instance's disk to sync.
4045
4046   """
4047   if not instance.disks or disks is not None and not disks:
4048     return True
4049
4050   disks = _ExpandCheckDisks(instance, disks)
4051
4052   if not oneshot:
4053     lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
4054
4055   node = instance.primary_node
4056
4057   for dev in disks:
4058     lu.cfg.SetDiskID(dev, node)
4059
4060   # TODO: Convert to utils.Retry
4061
4062   retries = 0
4063   degr_retries = 10 # in seconds, as we sleep 1 second each time
4064   while True:
4065     max_time = 0
4066     done = True
4067     cumul_degraded = False
4068     rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
4069     msg = rstats.fail_msg
4070     if msg:
4071       lu.LogWarning("Can't get any data from node %s: %s", node, msg)
4072       retries += 1
4073       if retries >= 10:
4074         raise errors.RemoteError("Can't contact node %s for mirror data,"
4075                                  " aborting." % node)
4076       time.sleep(6)
4077       continue
4078     rstats = rstats.payload
4079     retries = 0
4080     for i, mstat in enumerate(rstats):
4081       if mstat is None:
4082         lu.LogWarning("Can't compute data for node %s/%s",
4083                            node, disks[i].iv_name)
4084         continue
4085
4086       cumul_degraded = (cumul_degraded or
4087                         (mstat.is_degraded and mstat.sync_percent is None))
4088       if mstat.sync_percent is not None:
4089         done = False
4090         if mstat.estimated_time is not None:
4091           rem_time = ("%s remaining (estimated)" %
4092                       utils.FormatSeconds(mstat.estimated_time))
4093           max_time = mstat.estimated_time
4094         else:
4095           rem_time = "no time estimate"
4096         lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
4097                         (disks[i].iv_name, mstat.sync_percent, rem_time))
4098
4099     # if we're done but degraded, let's do a few small retries, to
4100     # make sure we see a stable and not transient situation; therefore
4101     # we force restart of the loop
4102     if (done or oneshot) and cumul_degraded and degr_retries > 0:
4103       logging.info("Degraded disks found, %d retries left", degr_retries)
4104       degr_retries -= 1
4105       time.sleep(1)
4106       continue
4107
4108     if done or oneshot:
4109       break
4110
4111     time.sleep(min(60, max_time))
4112
4113   if done:
4114     lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
4115   return not cumul_degraded
4116
4117
4118 def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
4119   """Check that mirrors are not degraded.
4120
4121   The ldisk parameter, if True, will change the test from the
4122   is_degraded attribute (which represents overall non-ok status for
4123   the device(s)) to the ldisk (representing the local storage status).
4124
4125   """
4126   lu.cfg.SetDiskID(dev, node)
4127
4128   result = True
4129
4130   if on_primary or dev.AssembleOnSecondary():
4131     rstats = lu.rpc.call_blockdev_find(node, dev)
4132     msg = rstats.fail_msg
4133     if msg:
4134       lu.LogWarning("Can't find disk on node %s: %s", node, msg)
4135       result = False
4136     elif not rstats.payload:
4137       lu.LogWarning("Can't find disk on node %s", node)
4138       result = False
4139     else:
4140       if ldisk:
4141         result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
4142       else:
4143         result = result and not rstats.payload.is_degraded
4144
4145   if dev.children:
4146     for child in dev.children:
4147       result = result and _CheckDiskConsistency(lu, child, node, on_primary)
4148
4149   return result
4150
4151
4152 class LUOobCommand(NoHooksLU):
4153   """Logical unit for OOB handling.
4154
4155   """
4156   REG_BGL = False
4157   _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
4158
4159   def ExpandNames(self):
4160     """Gather locks we need.
4161
4162     """
4163     if self.op.node_names:
4164       self.op.node_names = _GetWantedNodes(self, self.op.node_names)
4165       lock_names = self.op.node_names
4166     else:
4167       lock_names = locking.ALL_SET
4168
4169     self.needed_locks = {
4170       locking.LEVEL_NODE: lock_names,
4171       }
4172
4173   def CheckPrereq(self):
4174     """Check prerequisites.
4175
4176     This checks:
4177      - the node exists in the configuration
4178      - OOB is supported
4179
4180     Any errors are signaled by raising errors.OpPrereqError.
4181
4182     """
4183     self.nodes = []
4184     self.master_node = self.cfg.GetMasterNode()
4185
4186     assert self.op.power_delay >= 0.0
4187
4188     if self.op.node_names:
4189       if (self.op.command in self._SKIP_MASTER and
4190           self.master_node in self.op.node_names):
4191         master_node_obj = self.cfg.GetNodeInfo(self.master_node)
4192         master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
4193
4194         if master_oob_handler:
4195           additional_text = ("run '%s %s %s' if you want to operate on the"
4196                              " master regardless") % (master_oob_handler,
4197                                                       self.op.command,
4198                                                       self.master_node)
4199         else:
4200           additional_text = "it does not support out-of-band operations"
4201
4202         raise errors.OpPrereqError(("Operating on the master node %s is not"
4203                                     " allowed for %s; %s") %
4204                                    (self.master_node, self.op.command,
4205                                     additional_text), errors.ECODE_INVAL)
4206     else:
4207       self.op.node_names = self.cfg.GetNodeList()
4208       if self.op.command in self._SKIP_MASTER:
4209         self.op.node_names.remove(self.master_node)
4210
4211     if self.op.command in self._SKIP_MASTER:
4212       assert self.master_node not in self.op.node_names
4213
4214     for (node_name, node) in self.cfg.GetMultiNodeInfo(self.op.node_names):
4215       if node is None:
4216         raise errors.OpPrereqError("Node %s not found" % node_name,
4217                                    errors.ECODE_NOENT)
4218       else:
4219         self.nodes.append(node)
4220
4221       if (not self.op.ignore_status and
4222           (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
4223         raise errors.OpPrereqError(("Cannot power off node %s because it is"
4224                                     " not marked offline") % node_name,
4225                                    errors.ECODE_STATE)
4226
4227   def Exec(self, feedback_fn):
4228     """Execute OOB and return result if we expect any.
4229
4230     """
4231     master_node = self.master_node
4232     ret = []
4233
4234     for idx, node in enumerate(utils.NiceSort(self.nodes,
4235                                               key=lambda node: node.name)):
4236       node_entry = [(constants.RS_NORMAL, node.name)]
4237       ret.append(node_entry)
4238
4239       oob_program = _SupportsOob(self.cfg, node)
4240
4241       if not oob_program:
4242         node_entry.append((constants.RS_UNAVAIL, None))
4243         continue
4244
4245       logging.info("Executing out-of-band command '%s' using '%s' on %s",
4246                    self.op.command, oob_program, node.name)
4247       result = self.rpc.call_run_oob(master_node, oob_program,
4248                                      self.op.command, node.name,
4249                                      self.op.timeout)
4250
4251       if result.fail_msg:
4252         self.LogWarning("Out-of-band RPC failed on node '%s': %s",
4253                         node.name, result.fail_msg)
4254         node_entry.append((constants.RS_NODATA, None))
4255       else:
4256         try:
4257           self._CheckPayload(result)
4258         except errors.OpExecError, err:
4259           self.LogWarning("Payload returned by node '%s' is not valid: %s",
4260                           node.name, err)
4261           node_entry.append((constants.RS_NODATA, None))
4262         else:
4263           if self.op.command == constants.OOB_HEALTH:
4264             # For health we should log important events
4265             for item, status in result.payload:
4266               if status in [constants.OOB_STATUS_WARNING,
4267                             constants.OOB_STATUS_CRITICAL]:
4268                 self.LogWarning("Item '%s' on node '%s' has status '%s'",
4269                                 item, node.name, status)
4270
4271           if self.op.command == constants.OOB_POWER_ON:
4272             node.powered = True
4273           elif self.op.command == constants.OOB_POWER_OFF:
4274             node.powered = False
4275           elif self.op.command == constants.OOB_POWER_STATUS:
4276             powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
4277             if powered != node.powered:
4278               logging.warning(("Recorded power state (%s) of node '%s' does not"
4279                                " match actual power state (%s)"), node.powered,
4280                               node.name, powered)
4281
4282           # For configuration changing commands we should update the node
4283           if self.op.command in (constants.OOB_POWER_ON,
4284                                  constants.OOB_POWER_OFF):
4285             self.cfg.Update(node, feedback_fn)
4286
4287           node_entry.append((constants.RS_NORMAL, result.payload))
4288
4289           if (self.op.command == constants.OOB_POWER_ON and
4290               idx < len(self.nodes) - 1):
4291             time.sleep(self.op.power_delay)
4292
4293     return ret
4294
4295   def _CheckPayload(self, result):
4296     """Checks if the payload is valid.
4297
4298     @param result: RPC result
4299     @raises errors.OpExecError: If payload is not valid
4300
4301     """
4302     errs = []
4303     if self.op.command == constants.OOB_HEALTH:
4304       if not isinstance(result.payload, list):
4305         errs.append("command 'health' is expected to return a list but got %s" %
4306                     type(result.payload))
4307       else:
4308         for item, status in result.payload:
4309           if status not in constants.OOB_STATUSES:
4310             errs.append("health item '%s' has invalid status '%s'" %
4311                         (item, status))
4312
4313     if self.op.command == constants.OOB_POWER_STATUS:
4314       if not isinstance(result.payload, dict):
4315         errs.append("power-status is expected to return a dict but got %s" %
4316                     type(result.payload))
4317
4318     if self.op.command in [
4319         constants.OOB_POWER_ON,
4320         constants.OOB_POWER_OFF,
4321         constants.OOB_POWER_CYCLE,
4322         ]:
4323       if result.payload is not None:
4324         errs.append("%s is expected to not return payload but got '%s'" %
4325                     (self.op.command, result.payload))
4326
4327     if errs:
4328       raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
4329                                utils.CommaJoin(errs))
4330
4331
4332 class _OsQuery(_QueryBase):
4333   FIELDS = query.OS_FIELDS
4334
4335   def ExpandNames(self, lu):
4336     # Lock all nodes in shared mode
4337     # Temporary removal of locks, should be reverted later
4338     # TODO: reintroduce locks when they are lighter-weight
4339     lu.needed_locks = {}
4340     #self.share_locks[locking.LEVEL_NODE] = 1
4341     #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4342
4343     # The following variables interact with _QueryBase._GetNames
4344     if self.names:
4345       self.wanted = self.names
4346     else:
4347       self.wanted = locking.ALL_SET
4348
4349     self.do_locking = self.use_locking
4350
4351   def DeclareLocks(self, lu, level):
4352     pass
4353
4354   @staticmethod
4355   def _DiagnoseByOS(rlist):
4356     """Remaps a per-node return list into an a per-os per-node dictionary
4357
4358     @param rlist: a map with node names as keys and OS objects as values
4359
4360     @rtype: dict
4361     @return: a dictionary with osnames as keys and as value another
4362         map, with nodes as keys and tuples of (path, status, diagnose,
4363         variants, parameters, api_versions) as values, eg::
4364
4365           {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
4366                                      (/srv/..., False, "invalid api")],
4367                            "node2": [(/srv/..., True, "", [], [])]}
4368           }
4369
4370     """
4371     all_os = {}
4372     # we build here the list of nodes that didn't fail the RPC (at RPC
4373     # level), so that nodes with a non-responding node daemon don't
4374     # make all OSes invalid
4375     good_nodes = [node_name for node_name in rlist
4376                   if not rlist[node_name].fail_msg]
4377     for node_name, nr in rlist.items():
4378       if nr.fail_msg or not nr.payload:
4379         continue
4380       for (name, path, status, diagnose, variants,
4381            params, api_versions) in nr.payload:
4382         if name not in all_os:
4383           # build a list of nodes for this os containing empty lists
4384           # for each node in node_list
4385           all_os[name] = {}
4386           for nname in good_nodes:
4387             all_os[name][nname] = []
4388         # convert params from [name, help] to (name, help)
4389         params = [tuple(v) for v in params]
4390         all_os[name][node_name].append((path, status, diagnose,
4391                                         variants, params, api_versions))
4392     return all_os
4393
4394   def _GetQueryData(self, lu):
4395     """Computes the list of nodes and their attributes.
4396
4397     """
4398     # Locking is not used
4399     assert not (compat.any(lu.glm.is_owned(level)
4400                            for level in locking.LEVELS
4401                            if level != locking.LEVEL_CLUSTER) or
4402                 self.do_locking or self.use_locking)
4403
4404     valid_nodes = [node.name
4405                    for node in lu.cfg.GetAllNodesInfo().values()
4406                    if not node.offline and node.vm_capable]
4407     pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
4408     cluster = lu.cfg.GetClusterInfo()
4409
4410     data = {}
4411
4412     for (os_name, os_data) in pol.items():
4413       info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
4414                           hidden=(os_name in cluster.hidden_os),
4415                           blacklisted=(os_name in cluster.blacklisted_os))
4416
4417       variants = set()
4418       parameters = set()
4419       api_versions = set()
4420
4421       for idx, osl in enumerate(os_data.values()):
4422         info.valid = bool(info.valid and osl and osl[0][1])
4423         if not info.valid:
4424           break
4425
4426         (node_variants, node_params, node_api) = osl[0][3:6]
4427         if idx == 0:
4428           # First entry
4429           variants.update(node_variants)
4430           parameters.update(node_params)
4431           api_versions.update(node_api)
4432         else:
4433           # Filter out inconsistent values
4434           variants.intersection_update(node_variants)
4435           parameters.intersection_update(node_params)
4436           api_versions.intersection_update(node_api)
4437
4438       info.variants = list(variants)
4439       info.parameters = list(parameters)
4440       info.api_versions = list(api_versions)
4441
4442       data[os_name] = info
4443
4444     # Prepare data in requested order
4445     return [data[name] for name in self._GetNames(lu, pol.keys(), None)
4446             if name in data]
4447
4448
4449 class LUOsDiagnose(NoHooksLU):
4450   """Logical unit for OS diagnose/query.
4451
4452   """
4453   REQ_BGL = False
4454
4455   @staticmethod
4456   def _BuildFilter(fields, names):
4457     """Builds a filter for querying OSes.
4458
4459     """
4460     name_filter = qlang.MakeSimpleFilter("name", names)
4461
4462     # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
4463     # respective field is not requested
4464     status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
4465                      for fname in ["hidden", "blacklisted"]
4466                      if fname not in fields]
4467     if "valid" not in fields:
4468       status_filter.append([qlang.OP_TRUE, "valid"])
4469
4470     if status_filter:
4471       status_filter.insert(0, qlang.OP_AND)
4472     else:
4473       status_filter = None
4474
4475     if name_filter and status_filter:
4476       return [qlang.OP_AND, name_filter, status_filter]
4477     elif name_filter:
4478       return name_filter
4479     else:
4480       return status_filter
4481
4482   def CheckArguments(self):
4483     self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
4484                        self.op.output_fields, False)
4485
4486   def ExpandNames(self):
4487     self.oq.ExpandNames(self)
4488
4489   def Exec(self, feedback_fn):
4490     return self.oq.OldStyleQuery(self)
4491
4492
4493 class LUNodeRemove(LogicalUnit):
4494   """Logical unit for removing a node.
4495
4496   """
4497   HPATH = "node-remove"
4498   HTYPE = constants.HTYPE_NODE
4499
4500   def BuildHooksEnv(self):
4501     """Build hooks env.
4502
4503     This doesn't run on the target node in the pre phase as a failed
4504     node would then be impossible to remove.
4505
4506     """
4507     return {
4508       "OP_TARGET": self.op.node_name,
4509       "NODE_NAME": self.op.node_name,
4510       }
4511
4512   def BuildHooksNodes(self):
4513     """Build hooks nodes.
4514
4515     """
4516     all_nodes = self.cfg.GetNodeList()
4517     try:
4518       all_nodes.remove(self.op.node_name)
4519     except ValueError:
4520       logging.warning("Node '%s', which is about to be removed, was not found"
4521                       " in the list of all nodes", self.op.node_name)
4522     return (all_nodes, all_nodes)
4523
4524   def CheckPrereq(self):
4525     """Check prerequisites.
4526
4527     This checks:
4528      - the node exists in the configuration
4529      - it does not have primary or secondary instances
4530      - it's not the master
4531
4532     Any errors are signaled by raising errors.OpPrereqError.
4533
4534     """
4535     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4536     node = self.cfg.GetNodeInfo(self.op.node_name)
4537     assert node is not None
4538
4539     masternode = self.cfg.GetMasterNode()
4540     if node.name == masternode:
4541       raise errors.OpPrereqError("Node is the master node, failover to another"
4542                                  " node is required", errors.ECODE_INVAL)
4543
4544     for instance_name, instance in self.cfg.GetAllInstancesInfo().items():
4545       if node.name in instance.all_nodes:
4546         raise errors.OpPrereqError("Instance %s is still running on the node,"
4547                                    " please remove first" % instance_name,
4548                                    errors.ECODE_INVAL)
4549     self.op.node_name = node.name
4550     self.node = node
4551
4552   def Exec(self, feedback_fn):
4553     """Removes the node from the cluster.
4554
4555     """
4556     node = self.node
4557     logging.info("Stopping the node daemon and removing configs from node %s",
4558                  node.name)
4559
4560     modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
4561
4562     assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
4563       "Not owning BGL"
4564
4565     # Promote nodes to master candidate as needed
4566     _AdjustCandidatePool(self, exceptions=[node.name])
4567     self.context.RemoveNode(node.name)
4568
4569     # Run post hooks on the node before it's removed
4570     _RunPostHook(self, node.name)
4571
4572     result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
4573     msg = result.fail_msg
4574     if msg:
4575       self.LogWarning("Errors encountered on the remote node while leaving"
4576                       " the cluster: %s", msg)
4577
4578     # Remove node from our /etc/hosts
4579     if self.cfg.GetClusterInfo().modify_etc_hosts:
4580       master_node = self.cfg.GetMasterNode()
4581       result = self.rpc.call_etc_hosts_modify(master_node,
4582                                               constants.ETC_HOSTS_REMOVE,
4583                                               node.name, None)
4584       result.Raise("Can't update hosts file with new host data")
4585       _RedistributeAncillaryFiles(self)
4586
4587
4588 class _NodeQuery(_QueryBase):
4589   FIELDS = query.NODE_FIELDS
4590
4591   def ExpandNames(self, lu):
4592     lu.needed_locks = {}
4593     lu.share_locks = _ShareAll()
4594
4595     if self.names:
4596       self.wanted = _GetWantedNodes(lu, self.names)
4597     else:
4598       self.wanted = locking.ALL_SET
4599
4600     self.do_locking = (self.use_locking and
4601                        query.NQ_LIVE in self.requested_data)
4602
4603     if self.do_locking:
4604       # If any non-static field is requested we need to lock the nodes
4605       lu.needed_locks[locking.LEVEL_NODE] = self.wanted
4606
4607   def DeclareLocks(self, lu, level):
4608     pass
4609
4610   def _GetQueryData(self, lu):
4611     """Computes the list of nodes and their attributes.
4612
4613     """
4614     all_info = lu.cfg.GetAllNodesInfo()
4615
4616     nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
4617
4618     # Gather data as requested
4619     if query.NQ_LIVE in self.requested_data:
4620       # filter out non-vm_capable nodes
4621       toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
4622
4623       node_data = lu.rpc.call_node_info(toquery_nodes, [lu.cfg.GetVGName()],
4624                                         [lu.cfg.GetHypervisorType()])
4625       live_data = dict((name, _MakeLegacyNodeInfo(nresult.payload))
4626                        for (name, nresult) in node_data.items()
4627                        if not nresult.fail_msg and nresult.payload)
4628     else:
4629       live_data = None
4630
4631     if query.NQ_INST in self.requested_data:
4632       node_to_primary = dict([(name, set()) for name in nodenames])
4633       node_to_secondary = dict([(name, set()) for name in nodenames])
4634
4635       inst_data = lu.cfg.GetAllInstancesInfo()
4636
4637       for inst in inst_data.values():
4638         if inst.primary_node in node_to_primary:
4639           node_to_primary[inst.primary_node].add(inst.name)
4640         for secnode in inst.secondary_nodes:
4641           if secnode in node_to_secondary:
4642             node_to_secondary[secnode].add(inst.name)
4643     else:
4644       node_to_primary = None
4645       node_to_secondary = None
4646
4647     if query.NQ_OOB in self.requested_data:
4648       oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
4649                          for name, node in all_info.iteritems())
4650     else:
4651       oob_support = None
4652
4653     if query.NQ_GROUP in self.requested_data:
4654       groups = lu.cfg.GetAllNodeGroupsInfo()
4655     else:
4656       groups = {}
4657
4658     return query.NodeQueryData([all_info[name] for name in nodenames],
4659                                live_data, lu.cfg.GetMasterNode(),
4660                                node_to_primary, node_to_secondary, groups,
4661                                oob_support, lu.cfg.GetClusterInfo())
4662
4663
4664 class LUNodeQuery(NoHooksLU):
4665   """Logical unit for querying nodes.
4666
4667   """
4668   # pylint: disable=W0142
4669   REQ_BGL = False
4670
4671   def CheckArguments(self):
4672     self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
4673                          self.op.output_fields, self.op.use_locking)
4674
4675   def ExpandNames(self):
4676     self.nq.ExpandNames(self)
4677
4678   def DeclareLocks(self, level):
4679     self.nq.DeclareLocks(self, level)
4680
4681   def Exec(self, feedback_fn):
4682     return self.nq.OldStyleQuery(self)
4683
4684
4685 class LUNodeQueryvols(NoHooksLU):
4686   """Logical unit for getting volumes on node(s).
4687
4688   """
4689   REQ_BGL = False
4690   _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
4691   _FIELDS_STATIC = utils.FieldSet("node")
4692
4693   def CheckArguments(self):
4694     _CheckOutputFields(static=self._FIELDS_STATIC,
4695                        dynamic=self._FIELDS_DYNAMIC,
4696                        selected=self.op.output_fields)
4697
4698   def ExpandNames(self):
4699     self.share_locks = _ShareAll()
4700     self.needed_locks = {}
4701
4702     if not self.op.nodes:
4703       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4704     else:
4705       self.needed_locks[locking.LEVEL_NODE] = \
4706         _GetWantedNodes(self, self.op.nodes)
4707
4708   def Exec(self, feedback_fn):
4709     """Computes the list of nodes and their attributes.
4710
4711     """
4712     nodenames = self.owned_locks(locking.LEVEL_NODE)
4713     volumes = self.rpc.call_node_volumes(nodenames)
4714
4715     ilist = self.cfg.GetAllInstancesInfo()
4716     vol2inst = _MapInstanceDisksToNodes(ilist.values())
4717
4718     output = []
4719     for node in nodenames:
4720       nresult = volumes[node]
4721       if nresult.offline:
4722         continue
4723       msg = nresult.fail_msg
4724       if msg:
4725         self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
4726         continue
4727
4728       node_vols = sorted(nresult.payload,
4729                          key=operator.itemgetter("dev"))
4730
4731       for vol in node_vols:
4732         node_output = []
4733         for field in self.op.output_fields:
4734           if field == "node":
4735             val = node
4736           elif field == "phys":
4737             val = vol["dev"]
4738           elif field == "vg":
4739             val = vol["vg"]
4740           elif field == "name":
4741             val = vol["name"]
4742           elif field == "size":
4743             val = int(float(vol["size"]))
4744           elif field == "instance":
4745             val = vol2inst.get((node, vol["vg"] + "/" + vol["name"]), "-")
4746           else:
4747             raise errors.ParameterError(field)
4748           node_output.append(str(val))
4749
4750         output.append(node_output)
4751
4752     return output
4753
4754
4755 class LUNodeQueryStorage(NoHooksLU):
4756   """Logical unit for getting information on storage units on node(s).
4757
4758   """
4759   _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
4760   REQ_BGL = False
4761
4762   def CheckArguments(self):
4763     _CheckOutputFields(static=self._FIELDS_STATIC,
4764                        dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
4765                        selected=self.op.output_fields)
4766
4767   def ExpandNames(self):
4768     self.share_locks = _ShareAll()
4769     self.needed_locks = {}
4770
4771     if self.op.nodes:
4772       self.needed_locks[locking.LEVEL_NODE] = \
4773         _GetWantedNodes(self, self.op.nodes)
4774     else:
4775       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4776
4777   def Exec(self, feedback_fn):
4778     """Computes the list of nodes and their attributes.
4779
4780     """
4781     self.nodes = self.owned_locks(locking.LEVEL_NODE)
4782
4783     # Always get name to sort by
4784     if constants.SF_NAME in self.op.output_fields:
4785       fields = self.op.output_fields[:]
4786     else:
4787       fields = [constants.SF_NAME] + self.op.output_fields
4788
4789     # Never ask for node or type as it's only known to the LU
4790     for extra in [constants.SF_NODE, constants.SF_TYPE]:
4791       while extra in fields:
4792         fields.remove(extra)
4793
4794     field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
4795     name_idx = field_idx[constants.SF_NAME]
4796
4797     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4798     data = self.rpc.call_storage_list(self.nodes,
4799                                       self.op.storage_type, st_args,
4800                                       self.op.name, fields)
4801
4802     result = []
4803
4804     for node in utils.NiceSort(self.nodes):
4805       nresult = data[node]
4806       if nresult.offline:
4807         continue
4808
4809       msg = nresult.fail_msg
4810       if msg:
4811         self.LogWarning("Can't get storage data from node %s: %s", node, msg)
4812         continue
4813
4814       rows = dict([(row[name_idx], row) for row in nresult.payload])
4815
4816       for name in utils.NiceSort(rows.keys()):
4817         row = rows[name]
4818
4819         out = []
4820
4821         for field in self.op.output_fields:
4822           if field == constants.SF_NODE:
4823             val = node
4824           elif field == constants.SF_TYPE:
4825             val = self.op.storage_type
4826           elif field in field_idx:
4827             val = row[field_idx[field]]
4828           else:
4829             raise errors.ParameterError(field)
4830
4831           out.append(val)
4832
4833         result.append(out)
4834
4835     return result
4836
4837
4838 class _InstanceQuery(_QueryBase):
4839   FIELDS = query.INSTANCE_FIELDS
4840
4841   def ExpandNames(self, lu):
4842     lu.needed_locks = {}
4843     lu.share_locks = _ShareAll()
4844
4845     if self.names:
4846       self.wanted = _GetWantedInstances(lu, self.names)
4847     else:
4848       self.wanted = locking.ALL_SET
4849
4850     self.do_locking = (self.use_locking and
4851                        query.IQ_LIVE in self.requested_data)
4852     if self.do_locking:
4853       lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
4854       lu.needed_locks[locking.LEVEL_NODEGROUP] = []
4855       lu.needed_locks[locking.LEVEL_NODE] = []
4856       lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4857
4858     self.do_grouplocks = (self.do_locking and
4859                           query.IQ_NODES in self.requested_data)
4860
4861   def DeclareLocks(self, lu, level):
4862     if self.do_locking:
4863       if level == locking.LEVEL_NODEGROUP and self.do_grouplocks:
4864         assert not lu.needed_locks[locking.LEVEL_NODEGROUP]
4865
4866         # Lock all groups used by instances optimistically; this requires going
4867         # via the node before it's locked, requiring verification later on
4868         lu.needed_locks[locking.LEVEL_NODEGROUP] = \
4869           set(group_uuid
4870               for instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
4871               for group_uuid in lu.cfg.GetInstanceNodeGroups(instance_name))
4872       elif level == locking.LEVEL_NODE:
4873         lu._LockInstancesNodes() # pylint: disable=W0212
4874
4875   @staticmethod
4876   def _CheckGroupLocks(lu):
4877     owned_instances = frozenset(lu.owned_locks(locking.LEVEL_INSTANCE))
4878     owned_groups = frozenset(lu.owned_locks(locking.LEVEL_NODEGROUP))
4879
4880     # Check if node groups for locked instances are still correct
4881     for instance_name in owned_instances:
4882       _CheckInstanceNodeGroups(lu.cfg, instance_name, owned_groups)
4883
4884   def _GetQueryData(self, lu):
4885     """Computes the list of instances and their attributes.
4886
4887     """
4888     if self.do_grouplocks:
4889       self._CheckGroupLocks(lu)
4890
4891     cluster = lu.cfg.GetClusterInfo()
4892     all_info = lu.cfg.GetAllInstancesInfo()
4893
4894     instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
4895
4896     instance_list = [all_info[name] for name in instance_names]
4897     nodes = frozenset(itertools.chain(*(inst.all_nodes
4898                                         for inst in instance_list)))
4899     hv_list = list(set([inst.hypervisor for inst in instance_list]))
4900     bad_nodes = []
4901     offline_nodes = []
4902     wrongnode_inst = set()
4903
4904     # Gather data as requested
4905     if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
4906       live_data = {}
4907       node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
4908       for name in nodes:
4909         result = node_data[name]
4910         if result.offline:
4911           # offline nodes will be in both lists
4912           assert result.fail_msg
4913           offline_nodes.append(name)
4914         if result.fail_msg:
4915           bad_nodes.append(name)
4916         elif result.payload:
4917           for inst in result.payload:
4918             if inst in all_info:
4919               if all_info[inst].primary_node == name:
4920                 live_data.update(result.payload)
4921               else:
4922                 wrongnode_inst.add(inst)
4923             else:
4924               # orphan instance; we don't list it here as we don't
4925               # handle this case yet in the output of instance listing
4926               logging.warning("Orphan instance '%s' found on node %s",
4927                               inst, name)
4928         # else no instance is alive
4929     else:
4930       live_data = {}
4931
4932     if query.IQ_DISKUSAGE in self.requested_data:
4933       disk_usage = dict((inst.name,
4934                          _ComputeDiskSize(inst.disk_template,
4935                                           [{constants.IDISK_SIZE: disk.size}
4936                                            for disk in inst.disks]))
4937                         for inst in instance_list)
4938     else:
4939       disk_usage = None
4940
4941     if query.IQ_CONSOLE in self.requested_data:
4942       consinfo = {}
4943       for inst in instance_list:
4944         if inst.name in live_data:
4945           # Instance is running
4946           consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
4947         else:
4948           consinfo[inst.name] = None
4949       assert set(consinfo.keys()) == set(instance_names)
4950     else:
4951       consinfo = None
4952
4953     if query.IQ_NODES in self.requested_data:
4954       node_names = set(itertools.chain(*map(operator.attrgetter("all_nodes"),
4955                                             instance_list)))
4956       nodes = dict(lu.cfg.GetMultiNodeInfo(node_names))
4957       groups = dict((uuid, lu.cfg.GetNodeGroup(uuid))
4958                     for uuid in set(map(operator.attrgetter("group"),
4959                                         nodes.values())))
4960     else:
4961       nodes = None
4962       groups = None
4963
4964     return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
4965                                    disk_usage, offline_nodes, bad_nodes,
4966                                    live_data, wrongnode_inst, consinfo,
4967                                    nodes, groups)
4968
4969
4970 class LUQuery(NoHooksLU):
4971   """Query for resources/items of a certain kind.
4972
4973   """
4974   # pylint: disable=W0142
4975   REQ_BGL = False
4976
4977   def CheckArguments(self):
4978     qcls = _GetQueryImplementation(self.op.what)
4979
4980     self.impl = qcls(self.op.qfilter, self.op.fields, self.op.use_locking)
4981
4982   def ExpandNames(self):
4983     self.impl.ExpandNames(self)
4984
4985   def DeclareLocks(self, level):
4986     self.impl.DeclareLocks(self, level)
4987
4988   def Exec(self, feedback_fn):
4989     return self.impl.NewStyleQuery(self)
4990
4991
4992 class LUQueryFields(NoHooksLU):
4993   """Query for resources/items of a certain kind.
4994
4995   """
4996   # pylint: disable=W0142
4997   REQ_BGL = False
4998
4999   def CheckArguments(self):
5000     self.qcls = _GetQueryImplementation(self.op.what)
5001
5002   def ExpandNames(self):
5003     self.needed_locks = {}
5004
5005   def Exec(self, feedback_fn):
5006     return query.QueryFields(self.qcls.FIELDS, self.op.fields)
5007
5008
5009 class LUNodeModifyStorage(NoHooksLU):
5010   """Logical unit for modifying a storage volume on a node.
5011
5012   """
5013   REQ_BGL = False
5014
5015   def CheckArguments(self):
5016     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5017
5018     storage_type = self.op.storage_type
5019
5020     try:
5021       modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
5022     except KeyError:
5023       raise errors.OpPrereqError("Storage units of type '%s' can not be"
5024                                  " modified" % storage_type,
5025                                  errors.ECODE_INVAL)
5026
5027     diff = set(self.op.changes.keys()) - modifiable
5028     if diff:
5029       raise errors.OpPrereqError("The following fields can not be modified for"
5030                                  " storage units of type '%s': %r" %
5031                                  (storage_type, list(diff)),
5032                                  errors.ECODE_INVAL)
5033
5034   def ExpandNames(self):
5035     self.needed_locks = {
5036       locking.LEVEL_NODE: self.op.node_name,
5037       }
5038
5039   def Exec(self, feedback_fn):
5040     """Computes the list of nodes and their attributes.
5041
5042     """
5043     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5044     result = self.rpc.call_storage_modify(self.op.node_name,
5045                                           self.op.storage_type, st_args,
5046                                           self.op.name, self.op.changes)
5047     result.Raise("Failed to modify storage unit '%s' on %s" %
5048                  (self.op.name, self.op.node_name))
5049
5050
5051 class LUNodeAdd(LogicalUnit):
5052   """Logical unit for adding node to the cluster.
5053
5054   """
5055   HPATH = "node-add"
5056   HTYPE = constants.HTYPE_NODE
5057   _NFLAGS = ["master_capable", "vm_capable"]
5058
5059   def CheckArguments(self):
5060     self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
5061     # validate/normalize the node name
5062     self.hostname = netutils.GetHostname(name=self.op.node_name,
5063                                          family=self.primary_ip_family)
5064     self.op.node_name = self.hostname.name
5065
5066     if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
5067       raise errors.OpPrereqError("Cannot readd the master node",
5068                                  errors.ECODE_STATE)
5069
5070     if self.op.readd and self.op.group:
5071       raise errors.OpPrereqError("Cannot pass a node group when a node is"
5072                                  " being readded", errors.ECODE_INVAL)
5073
5074   def BuildHooksEnv(self):
5075     """Build hooks env.
5076
5077     This will run on all nodes before, and on all nodes + the new node after.
5078
5079     """
5080     return {
5081       "OP_TARGET": self.op.node_name,
5082       "NODE_NAME": self.op.node_name,
5083       "NODE_PIP": self.op.primary_ip,
5084       "NODE_SIP": self.op.secondary_ip,
5085       "MASTER_CAPABLE": str(self.op.master_capable),
5086       "VM_CAPABLE": str(self.op.vm_capable),
5087       }
5088
5089   def BuildHooksNodes(self):
5090     """Build hooks nodes.
5091
5092     """
5093     # Exclude added node
5094     pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
5095     post_nodes = pre_nodes + [self.op.node_name, ]
5096
5097     return (pre_nodes, post_nodes)
5098
5099   def CheckPrereq(self):
5100     """Check prerequisites.
5101
5102     This checks:
5103      - the new node is not already in the config
5104      - it is resolvable
5105      - its parameters (single/dual homed) matches the cluster
5106
5107     Any errors are signaled by raising errors.OpPrereqError.
5108
5109     """
5110     cfg = self.cfg
5111     hostname = self.hostname
5112     node = hostname.name
5113     primary_ip = self.op.primary_ip = hostname.ip
5114     if self.op.secondary_ip is None:
5115       if self.primary_ip_family == netutils.IP6Address.family:
5116         raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
5117                                    " IPv4 address must be given as secondary",
5118                                    errors.ECODE_INVAL)
5119       self.op.secondary_ip = primary_ip
5120
5121     secondary_ip = self.op.secondary_ip
5122     if not netutils.IP4Address.IsValid(secondary_ip):
5123       raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5124                                  " address" % secondary_ip, errors.ECODE_INVAL)
5125
5126     node_list = cfg.GetNodeList()
5127     if not self.op.readd and node in node_list:
5128       raise errors.OpPrereqError("Node %s is already in the configuration" %
5129                                  node, errors.ECODE_EXISTS)
5130     elif self.op.readd and node not in node_list:
5131       raise errors.OpPrereqError("Node %s is not in the configuration" % node,
5132                                  errors.ECODE_NOENT)
5133
5134     self.changed_primary_ip = False
5135
5136     for existing_node_name, existing_node in cfg.GetMultiNodeInfo(node_list):
5137       if self.op.readd and node == existing_node_name:
5138         if existing_node.secondary_ip != secondary_ip:
5139           raise errors.OpPrereqError("Readded node doesn't have the same IP"
5140                                      " address configuration as before",
5141                                      errors.ECODE_INVAL)
5142         if existing_node.primary_ip != primary_ip:
5143           self.changed_primary_ip = True
5144
5145         continue
5146
5147       if (existing_node.primary_ip == primary_ip or
5148           existing_node.secondary_ip == primary_ip or
5149           existing_node.primary_ip == secondary_ip or
5150           existing_node.secondary_ip == secondary_ip):
5151         raise errors.OpPrereqError("New node ip address(es) conflict with"
5152                                    " existing node %s" % existing_node.name,
5153                                    errors.ECODE_NOTUNIQUE)
5154
5155     # After this 'if' block, None is no longer a valid value for the
5156     # _capable op attributes
5157     if self.op.readd:
5158       old_node = self.cfg.GetNodeInfo(node)
5159       assert old_node is not None, "Can't retrieve locked node %s" % node
5160       for attr in self._NFLAGS:
5161         if getattr(self.op, attr) is None:
5162           setattr(self.op, attr, getattr(old_node, attr))
5163     else:
5164       for attr in self._NFLAGS:
5165         if getattr(self.op, attr) is None:
5166           setattr(self.op, attr, True)
5167
5168     if self.op.readd and not self.op.vm_capable:
5169       pri, sec = cfg.GetNodeInstances(node)
5170       if pri or sec:
5171         raise errors.OpPrereqError("Node %s being re-added with vm_capable"
5172                                    " flag set to false, but it already holds"
5173                                    " instances" % node,
5174                                    errors.ECODE_STATE)
5175
5176     # check that the type of the node (single versus dual homed) is the
5177     # same as for the master
5178     myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
5179     master_singlehomed = myself.secondary_ip == myself.primary_ip
5180     newbie_singlehomed = secondary_ip == primary_ip
5181     if master_singlehomed != newbie_singlehomed:
5182       if master_singlehomed:
5183         raise errors.OpPrereqError("The master has no secondary ip but the"
5184                                    " new node has one",
5185                                    errors.ECODE_INVAL)
5186       else:
5187         raise errors.OpPrereqError("The master has a secondary ip but the"
5188                                    " new node doesn't have one",
5189                                    errors.ECODE_INVAL)
5190
5191     # checks reachability
5192     if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
5193       raise errors.OpPrereqError("Node not reachable by ping",
5194                                  errors.ECODE_ENVIRON)
5195
5196     if not newbie_singlehomed:
5197       # check reachability from my secondary ip to newbie's secondary ip
5198       if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
5199                            source=myself.secondary_ip):
5200         raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5201                                    " based ping to node daemon port",
5202                                    errors.ECODE_ENVIRON)
5203
5204     if self.op.readd:
5205       exceptions = [node]
5206     else:
5207       exceptions = []
5208
5209     if self.op.master_capable:
5210       self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
5211     else:
5212       self.master_candidate = False
5213
5214     if self.op.readd:
5215       self.new_node = old_node
5216     else:
5217       node_group = cfg.LookupNodeGroup(self.op.group)
5218       self.new_node = objects.Node(name=node,
5219                                    primary_ip=primary_ip,
5220                                    secondary_ip=secondary_ip,
5221                                    master_candidate=self.master_candidate,
5222                                    offline=False, drained=False,
5223                                    group=node_group)
5224
5225     if self.op.ndparams:
5226       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
5227
5228   def Exec(self, feedback_fn):
5229     """Adds the new node to the cluster.
5230
5231     """
5232     new_node = self.new_node
5233     node = new_node.name
5234
5235     assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
5236       "Not owning BGL"
5237
5238     # We adding a new node so we assume it's powered
5239     new_node.powered = True
5240
5241     # for re-adds, reset the offline/drained/master-candidate flags;
5242     # we need to reset here, otherwise offline would prevent RPC calls
5243     # later in the procedure; this also means that if the re-add
5244     # fails, we are left with a non-offlined, broken node
5245     if self.op.readd:
5246       new_node.drained = new_node.offline = False # pylint: disable=W0201
5247       self.LogInfo("Readding a node, the offline/drained flags were reset")
5248       # if we demote the node, we do cleanup later in the procedure
5249       new_node.master_candidate = self.master_candidate
5250       if self.changed_primary_ip:
5251         new_node.primary_ip = self.op.primary_ip
5252
5253     # copy the master/vm_capable flags
5254     for attr in self._NFLAGS:
5255       setattr(new_node, attr, getattr(self.op, attr))
5256
5257     # notify the user about any possible mc promotion
5258     if new_node.master_candidate:
5259       self.LogInfo("Node will be a master candidate")
5260
5261     if self.op.ndparams:
5262       new_node.ndparams = self.op.ndparams
5263     else:
5264       new_node.ndparams = {}
5265
5266     # check connectivity
5267     result = self.rpc.call_version([node])[node]
5268     result.Raise("Can't get version information from node %s" % node)
5269     if constants.PROTOCOL_VERSION == result.payload:
5270       logging.info("Communication to node %s fine, sw version %s match",
5271                    node, result.payload)
5272     else:
5273       raise errors.OpExecError("Version mismatch master version %s,"
5274                                " node version %s" %
5275                                (constants.PROTOCOL_VERSION, result.payload))
5276
5277     # Add node to our /etc/hosts, and add key to known_hosts
5278     if self.cfg.GetClusterInfo().modify_etc_hosts:
5279       master_node = self.cfg.GetMasterNode()
5280       result = self.rpc.call_etc_hosts_modify(master_node,
5281                                               constants.ETC_HOSTS_ADD,
5282                                               self.hostname.name,
5283                                               self.hostname.ip)
5284       result.Raise("Can't update hosts file with new host data")
5285
5286     if new_node.secondary_ip != new_node.primary_ip:
5287       _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
5288                                False)
5289
5290     node_verify_list = [self.cfg.GetMasterNode()]
5291     node_verify_param = {
5292       constants.NV_NODELIST: ([node], {}),
5293       # TODO: do a node-net-test as well?
5294     }
5295
5296     result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
5297                                        self.cfg.GetClusterName())
5298     for verifier in node_verify_list:
5299       result[verifier].Raise("Cannot communicate with node %s" % verifier)
5300       nl_payload = result[verifier].payload[constants.NV_NODELIST]
5301       if nl_payload:
5302         for failed in nl_payload:
5303           feedback_fn("ssh/hostname verification failed"
5304                       " (checking from %s): %s" %
5305                       (verifier, nl_payload[failed]))
5306         raise errors.OpExecError("ssh/hostname verification failed")
5307
5308     if self.op.readd:
5309       _RedistributeAncillaryFiles(self)
5310       self.context.ReaddNode(new_node)
5311       # make sure we redistribute the config
5312       self.cfg.Update(new_node, feedback_fn)
5313       # and make sure the new node will not have old files around
5314       if not new_node.master_candidate:
5315         result = self.rpc.call_node_demote_from_mc(new_node.name)
5316         msg = result.fail_msg
5317         if msg:
5318           self.LogWarning("Node failed to demote itself from master"
5319                           " candidate status: %s" % msg)
5320     else:
5321       _RedistributeAncillaryFiles(self, additional_nodes=[node],
5322                                   additional_vm=self.op.vm_capable)
5323       self.context.AddNode(new_node, self.proc.GetECId())
5324
5325
5326 class LUNodeSetParams(LogicalUnit):
5327   """Modifies the parameters of a node.
5328
5329   @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
5330       to the node role (as _ROLE_*)
5331   @cvar _R2F: a dictionary from node role to tuples of flags
5332   @cvar _FLAGS: a list of attribute names corresponding to the flags
5333
5334   """
5335   HPATH = "node-modify"
5336   HTYPE = constants.HTYPE_NODE
5337   REQ_BGL = False
5338   (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
5339   _F2R = {
5340     (True, False, False): _ROLE_CANDIDATE,
5341     (False, True, False): _ROLE_DRAINED,
5342     (False, False, True): _ROLE_OFFLINE,
5343     (False, False, False): _ROLE_REGULAR,
5344     }
5345   _R2F = dict((v, k) for k, v in _F2R.items())
5346   _FLAGS = ["master_candidate", "drained", "offline"]
5347
5348   def CheckArguments(self):
5349     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5350     all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
5351                 self.op.master_capable, self.op.vm_capable,
5352                 self.op.secondary_ip, self.op.ndparams]
5353     if all_mods.count(None) == len(all_mods):
5354       raise errors.OpPrereqError("Please pass at least one modification",
5355                                  errors.ECODE_INVAL)
5356     if all_mods.count(True) > 1:
5357       raise errors.OpPrereqError("Can't set the node into more than one"
5358                                  " state at the same time",
5359                                  errors.ECODE_INVAL)
5360
5361     # Boolean value that tells us whether we might be demoting from MC
5362     self.might_demote = (self.op.master_candidate == False or
5363                          self.op.offline == True or
5364                          self.op.drained == True or
5365                          self.op.master_capable == False)
5366
5367     if self.op.secondary_ip:
5368       if not netutils.IP4Address.IsValid(self.op.secondary_ip):
5369         raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5370                                    " address" % self.op.secondary_ip,
5371                                    errors.ECODE_INVAL)
5372
5373     self.lock_all = self.op.auto_promote and self.might_demote
5374     self.lock_instances = self.op.secondary_ip is not None
5375
5376   def _InstanceFilter(self, instance):
5377     """Filter for getting affected instances.
5378
5379     """
5380     return (instance.disk_template in constants.DTS_INT_MIRROR and
5381             self.op.node_name in instance.all_nodes)
5382
5383   def ExpandNames(self):
5384     if self.lock_all:
5385       self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
5386     else:
5387       self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
5388
5389     # Since modifying a node can have severe effects on currently running
5390     # operations the resource lock is at least acquired in shared mode
5391     self.needed_locks[locking.LEVEL_NODE_RES] = \
5392       self.needed_locks[locking.LEVEL_NODE]
5393
5394     # Get node resource and instance locks in shared mode; they are not used
5395     # for anything but read-only access
5396     self.share_locks[locking.LEVEL_NODE_RES] = 1
5397     self.share_locks[locking.LEVEL_INSTANCE] = 1
5398
5399     if self.lock_instances:
5400       self.needed_locks[locking.LEVEL_INSTANCE] = \
5401         frozenset(self.cfg.GetInstancesInfoByFilter(self._InstanceFilter))
5402
5403   def BuildHooksEnv(self):
5404     """Build hooks env.
5405
5406     This runs on the master node.
5407
5408     """
5409     return {
5410       "OP_TARGET": self.op.node_name,
5411       "MASTER_CANDIDATE": str(self.op.master_candidate),
5412       "OFFLINE": str(self.op.offline),
5413       "DRAINED": str(self.op.drained),
5414       "MASTER_CAPABLE": str(self.op.master_capable),
5415       "VM_CAPABLE": str(self.op.vm_capable),
5416       }
5417
5418   def BuildHooksNodes(self):
5419     """Build hooks nodes.
5420
5421     """
5422     nl = [self.cfg.GetMasterNode(), self.op.node_name]
5423     return (nl, nl)
5424
5425   def CheckPrereq(self):
5426     """Check prerequisites.
5427
5428     This only checks the instance list against the existing names.
5429
5430     """
5431     node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
5432
5433     if self.lock_instances:
5434       affected_instances = \
5435         self.cfg.GetInstancesInfoByFilter(self._InstanceFilter)
5436
5437       # Verify instance locks
5438       owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
5439       wanted_instances = frozenset(affected_instances.keys())
5440       if wanted_instances - owned_instances:
5441         raise errors.OpPrereqError("Instances affected by changing node %s's"
5442                                    " secondary IP address have changed since"
5443                                    " locks were acquired, wanted '%s', have"
5444                                    " '%s'; retry the operation" %
5445                                    (self.op.node_name,
5446                                     utils.CommaJoin(wanted_instances),
5447                                     utils.CommaJoin(owned_instances)),
5448                                    errors.ECODE_STATE)
5449     else:
5450       affected_instances = None
5451
5452     if (self.op.master_candidate is not None or
5453         self.op.drained is not None or
5454         self.op.offline is not None):
5455       # we can't change the master's node flags
5456       if self.op.node_name == self.cfg.GetMasterNode():
5457         raise errors.OpPrereqError("The master role can be changed"
5458                                    " only via master-failover",
5459                                    errors.ECODE_INVAL)
5460
5461     if self.op.master_candidate and not node.master_capable:
5462       raise errors.OpPrereqError("Node %s is not master capable, cannot make"
5463                                  " it a master candidate" % node.name,
5464                                  errors.ECODE_STATE)
5465
5466     if self.op.vm_capable == False:
5467       (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
5468       if ipri or isec:
5469         raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
5470                                    " the vm_capable flag" % node.name,
5471                                    errors.ECODE_STATE)
5472
5473     if node.master_candidate and self.might_demote and not self.lock_all:
5474       assert not self.op.auto_promote, "auto_promote set but lock_all not"
5475       # check if after removing the current node, we're missing master
5476       # candidates
5477       (mc_remaining, mc_should, _) = \
5478           self.cfg.GetMasterCandidateStats(exceptions=[node.name])
5479       if mc_remaining < mc_should:
5480         raise errors.OpPrereqError("Not enough master candidates, please"
5481                                    " pass auto promote option to allow"
5482                                    " promotion", errors.ECODE_STATE)
5483
5484     self.old_flags = old_flags = (node.master_candidate,
5485                                   node.drained, node.offline)
5486     assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
5487     self.old_role = old_role = self._F2R[old_flags]
5488
5489     # Check for ineffective changes
5490     for attr in self._FLAGS:
5491       if (getattr(self.op, attr) == False and getattr(node, attr) == False):
5492         self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
5493         setattr(self.op, attr, None)
5494
5495     # Past this point, any flag change to False means a transition
5496     # away from the respective state, as only real changes are kept
5497
5498     # TODO: We might query the real power state if it supports OOB
5499     if _SupportsOob(self.cfg, node):
5500       if self.op.offline is False and not (node.powered or
5501                                            self.op.powered == True):
5502         raise errors.OpPrereqError(("Node %s needs to be turned on before its"
5503                                     " offline status can be reset") %
5504                                    self.op.node_name)
5505     elif self.op.powered is not None:
5506       raise errors.OpPrereqError(("Unable to change powered state for node %s"
5507                                   " as it does not support out-of-band"
5508                                   " handling") % self.op.node_name)
5509
5510     # If we're being deofflined/drained, we'll MC ourself if needed
5511     if (self.op.drained == False or self.op.offline == False or
5512         (self.op.master_capable and not node.master_capable)):
5513       if _DecideSelfPromotion(self):
5514         self.op.master_candidate = True
5515         self.LogInfo("Auto-promoting node to master candidate")
5516
5517     # If we're no longer master capable, we'll demote ourselves from MC
5518     if self.op.master_capable == False and node.master_candidate:
5519       self.LogInfo("Demoting from master candidate")
5520       self.op.master_candidate = False
5521
5522     # Compute new role
5523     assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
5524     if self.op.master_candidate:
5525       new_role = self._ROLE_CANDIDATE
5526     elif self.op.drained:
5527       new_role = self._ROLE_DRAINED
5528     elif self.op.offline:
5529       new_role = self._ROLE_OFFLINE
5530     elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
5531       # False is still in new flags, which means we're un-setting (the
5532       # only) True flag
5533       new_role = self._ROLE_REGULAR
5534     else: # no new flags, nothing, keep old role
5535       new_role = old_role
5536
5537     self.new_role = new_role
5538
5539     if old_role == self._ROLE_OFFLINE and new_role != old_role:
5540       # Trying to transition out of offline status
5541       # TODO: Use standard RPC runner, but make sure it works when the node is
5542       # still marked offline
5543       result = rpc.BootstrapRunner().call_version([node.name])[node.name]
5544       if result.fail_msg:
5545         raise errors.OpPrereqError("Node %s is being de-offlined but fails"
5546                                    " to report its version: %s" %
5547                                    (node.name, result.fail_msg),
5548                                    errors.ECODE_STATE)
5549       else:
5550         self.LogWarning("Transitioning node from offline to online state"
5551                         " without using re-add. Please make sure the node"
5552                         " is healthy!")
5553
5554     if self.op.secondary_ip:
5555       # Ok even without locking, because this can't be changed by any LU
5556       master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
5557       master_singlehomed = master.secondary_ip == master.primary_ip
5558       if master_singlehomed and self.op.secondary_ip:
5559         raise errors.OpPrereqError("Cannot change the secondary ip on a single"
5560                                    " homed cluster", errors.ECODE_INVAL)
5561
5562       assert not (frozenset(affected_instances) -
5563                   self.owned_locks(locking.LEVEL_INSTANCE))
5564
5565       if node.offline:
5566         if affected_instances:
5567           raise errors.OpPrereqError("Cannot change secondary IP address:"
5568                                      " offline node has instances (%s)"
5569                                      " configured to use it" %
5570                                      utils.CommaJoin(affected_instances.keys()))
5571       else:
5572         # On online nodes, check that no instances are running, and that
5573         # the node has the new ip and we can reach it.
5574         for instance in affected_instances.values():
5575           _CheckInstanceState(self, instance, INSTANCE_DOWN,
5576                               msg="cannot change secondary ip")
5577
5578         _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
5579         if master.name != node.name:
5580           # check reachability from master secondary ip to new secondary ip
5581           if not netutils.TcpPing(self.op.secondary_ip,
5582                                   constants.DEFAULT_NODED_PORT,
5583                                   source=master.secondary_ip):
5584             raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5585                                        " based ping to node daemon port",
5586                                        errors.ECODE_ENVIRON)
5587
5588     if self.op.ndparams:
5589       new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
5590       utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
5591       self.new_ndparams = new_ndparams
5592
5593   def Exec(self, feedback_fn):
5594     """Modifies a node.
5595
5596     """
5597     node = self.node
5598     old_role = self.old_role
5599     new_role = self.new_role
5600
5601     result = []
5602
5603     if self.op.ndparams:
5604       node.ndparams = self.new_ndparams
5605
5606     if self.op.powered is not None:
5607       node.powered = self.op.powered
5608
5609     for attr in ["master_capable", "vm_capable"]:
5610       val = getattr(self.op, attr)
5611       if val is not None:
5612         setattr(node, attr, val)
5613         result.append((attr, str(val)))
5614
5615     if new_role != old_role:
5616       # Tell the node to demote itself, if no longer MC and not offline
5617       if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
5618         msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
5619         if msg:
5620           self.LogWarning("Node failed to demote itself: %s", msg)
5621
5622       new_flags = self._R2F[new_role]
5623       for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
5624         if of != nf:
5625           result.append((desc, str(nf)))
5626       (node.master_candidate, node.drained, node.offline) = new_flags
5627
5628       # we locked all nodes, we adjust the CP before updating this node
5629       if self.lock_all:
5630         _AdjustCandidatePool(self, [node.name])
5631
5632     if self.op.secondary_ip:
5633       node.secondary_ip = self.op.secondary_ip
5634       result.append(("secondary_ip", self.op.secondary_ip))
5635
5636     # this will trigger configuration file update, if needed
5637     self.cfg.Update(node, feedback_fn)
5638
5639     # this will trigger job queue propagation or cleanup if the mc
5640     # flag changed
5641     if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
5642       self.context.ReaddNode(node)
5643
5644     return result
5645
5646
5647 class LUNodePowercycle(NoHooksLU):
5648   """Powercycles a node.
5649
5650   """
5651   REQ_BGL = False
5652
5653   def CheckArguments(self):
5654     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5655     if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
5656       raise errors.OpPrereqError("The node is the master and the force"
5657                                  " parameter was not set",
5658                                  errors.ECODE_INVAL)
5659
5660   def ExpandNames(self):
5661     """Locking for PowercycleNode.
5662
5663     This is a last-resort option and shouldn't block on other
5664     jobs. Therefore, we grab no locks.
5665
5666     """
5667     self.needed_locks = {}
5668
5669   def Exec(self, feedback_fn):
5670     """Reboots a node.
5671
5672     """
5673     result = self.rpc.call_node_powercycle(self.op.node_name,
5674                                            self.cfg.GetHypervisorType())
5675     result.Raise("Failed to schedule the reboot")
5676     return result.payload
5677
5678
5679 class LUClusterQuery(NoHooksLU):
5680   """Query cluster configuration.
5681
5682   """
5683   REQ_BGL = False
5684
5685   def ExpandNames(self):
5686     self.needed_locks = {}
5687
5688   def Exec(self, feedback_fn):
5689     """Return cluster config.
5690
5691     """
5692     cluster = self.cfg.GetClusterInfo()
5693     os_hvp = {}
5694
5695     # Filter just for enabled hypervisors
5696     for os_name, hv_dict in cluster.os_hvp.items():
5697       os_hvp[os_name] = {}
5698       for hv_name, hv_params in hv_dict.items():
5699         if hv_name in cluster.enabled_hypervisors:
5700           os_hvp[os_name][hv_name] = hv_params
5701
5702     # Convert ip_family to ip_version
5703     primary_ip_version = constants.IP4_VERSION
5704     if cluster.primary_ip_family == netutils.IP6Address.family:
5705       primary_ip_version = constants.IP6_VERSION
5706
5707     result = {
5708       "software_version": constants.RELEASE_VERSION,
5709       "protocol_version": constants.PROTOCOL_VERSION,
5710       "config_version": constants.CONFIG_VERSION,
5711       "os_api_version": max(constants.OS_API_VERSIONS),
5712       "export_version": constants.EXPORT_VERSION,
5713       "architecture": (platform.architecture()[0], platform.machine()),
5714       "name": cluster.cluster_name,
5715       "master": cluster.master_node,
5716       "default_hypervisor": cluster.enabled_hypervisors[0],
5717       "enabled_hypervisors": cluster.enabled_hypervisors,
5718       "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
5719                         for hypervisor_name in cluster.enabled_hypervisors]),
5720       "os_hvp": os_hvp,
5721       "beparams": cluster.beparams,
5722       "osparams": cluster.osparams,
5723       "nicparams": cluster.nicparams,
5724       "ndparams": cluster.ndparams,
5725       "candidate_pool_size": cluster.candidate_pool_size,
5726       "master_netdev": cluster.master_netdev,
5727       "master_netmask": cluster.master_netmask,
5728       "use_external_mip_script": cluster.use_external_mip_script,
5729       "volume_group_name": cluster.volume_group_name,
5730       "drbd_usermode_helper": cluster.drbd_usermode_helper,
5731       "file_storage_dir": cluster.file_storage_dir,
5732       "shared_file_storage_dir": cluster.shared_file_storage_dir,
5733       "maintain_node_health": cluster.maintain_node_health,
5734       "ctime": cluster.ctime,
5735       "mtime": cluster.mtime,
5736       "uuid": cluster.uuid,
5737       "tags": list(cluster.GetTags()),
5738       "uid_pool": cluster.uid_pool,
5739       "default_iallocator": cluster.default_iallocator,
5740       "reserved_lvs": cluster.reserved_lvs,
5741       "primary_ip_version": primary_ip_version,
5742       "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
5743       "hidden_os": cluster.hidden_os,
5744       "blacklisted_os": cluster.blacklisted_os,
5745       }
5746
5747     return result
5748
5749
5750 class LUClusterConfigQuery(NoHooksLU):
5751   """Return configuration values.
5752
5753   """
5754   REQ_BGL = False
5755   _FIELDS_DYNAMIC = utils.FieldSet()
5756   _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
5757                                   "watcher_pause", "volume_group_name")
5758
5759   def CheckArguments(self):
5760     _CheckOutputFields(static=self._FIELDS_STATIC,
5761                        dynamic=self._FIELDS_DYNAMIC,
5762                        selected=self.op.output_fields)
5763
5764   def ExpandNames(self):
5765     self.needed_locks = {}
5766
5767   def Exec(self, feedback_fn):
5768     """Dump a representation of the cluster config to the standard output.
5769
5770     """
5771     values = []
5772     for field in self.op.output_fields:
5773       if field == "cluster_name":
5774         entry = self.cfg.GetClusterName()
5775       elif field == "master_node":
5776         entry = self.cfg.GetMasterNode()
5777       elif field == "drain_flag":
5778         entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
5779       elif field == "watcher_pause":
5780         entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
5781       elif field == "volume_group_name":
5782         entry = self.cfg.GetVGName()
5783       else:
5784         raise errors.ParameterError(field)
5785       values.append(entry)
5786     return values
5787
5788
5789 class LUInstanceActivateDisks(NoHooksLU):
5790   """Bring up an instance's disks.
5791
5792   """
5793   REQ_BGL = False
5794
5795   def ExpandNames(self):
5796     self._ExpandAndLockInstance()
5797     self.needed_locks[locking.LEVEL_NODE] = []
5798     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5799
5800   def DeclareLocks(self, level):
5801     if level == locking.LEVEL_NODE:
5802       self._LockInstancesNodes()
5803
5804   def CheckPrereq(self):
5805     """Check prerequisites.
5806
5807     This checks that the instance is in the cluster.
5808
5809     """
5810     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5811     assert self.instance is not None, \
5812       "Cannot retrieve locked instance %s" % self.op.instance_name
5813     _CheckNodeOnline(self, self.instance.primary_node)
5814
5815   def Exec(self, feedback_fn):
5816     """Activate the disks.
5817
5818     """
5819     disks_ok, disks_info = \
5820               _AssembleInstanceDisks(self, self.instance,
5821                                      ignore_size=self.op.ignore_size)
5822     if not disks_ok:
5823       raise errors.OpExecError("Cannot activate block devices")
5824
5825     return disks_info
5826
5827
5828 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
5829                            ignore_size=False):
5830   """Prepare the block devices for an instance.
5831
5832   This sets up the block devices on all nodes.
5833
5834   @type lu: L{LogicalUnit}
5835   @param lu: the logical unit on whose behalf we execute
5836   @type instance: L{objects.Instance}
5837   @param instance: the instance for whose disks we assemble
5838   @type disks: list of L{objects.Disk} or None
5839   @param disks: which disks to assemble (or all, if None)
5840   @type ignore_secondaries: boolean
5841   @param ignore_secondaries: if true, errors on secondary nodes
5842       won't result in an error return from the function
5843   @type ignore_size: boolean
5844   @param ignore_size: if true, the current known size of the disk
5845       will not be used during the disk activation, useful for cases
5846       when the size is wrong
5847   @return: False if the operation failed, otherwise a list of
5848       (host, instance_visible_name, node_visible_name)
5849       with the mapping from node devices to instance devices
5850
5851   """
5852   device_info = []
5853   disks_ok = True
5854   iname = instance.name
5855   disks = _ExpandCheckDisks(instance, disks)
5856
5857   # With the two passes mechanism we try to reduce the window of
5858   # opportunity for the race condition of switching DRBD to primary
5859   # before handshaking occured, but we do not eliminate it
5860
5861   # The proper fix would be to wait (with some limits) until the
5862   # connection has been made and drbd transitions from WFConnection
5863   # into any other network-connected state (Connected, SyncTarget,
5864   # SyncSource, etc.)
5865
5866   # 1st pass, assemble on all nodes in secondary mode
5867   for idx, inst_disk in enumerate(disks):
5868     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
5869       if ignore_size:
5870         node_disk = node_disk.Copy()
5871         node_disk.UnsetSize()
5872       lu.cfg.SetDiskID(node_disk, node)
5873       result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False, idx)
5874       msg = result.fail_msg
5875       if msg:
5876         lu.proc.LogWarning("Could not prepare block device %s on node %s"
5877                            " (is_primary=False, pass=1): %s",
5878                            inst_disk.iv_name, node, msg)
5879         if not ignore_secondaries:
5880           disks_ok = False
5881
5882   # FIXME: race condition on drbd migration to primary
5883
5884   # 2nd pass, do only the primary node
5885   for idx, inst_disk in enumerate(disks):
5886     dev_path = None
5887
5888     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
5889       if node != instance.primary_node:
5890         continue
5891       if ignore_size:
5892         node_disk = node_disk.Copy()
5893         node_disk.UnsetSize()
5894       lu.cfg.SetDiskID(node_disk, node)
5895       result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True, idx)
5896       msg = result.fail_msg
5897       if msg:
5898         lu.proc.LogWarning("Could not prepare block device %s on node %s"
5899                            " (is_primary=True, pass=2): %s",
5900                            inst_disk.iv_name, node, msg)
5901         disks_ok = False
5902       else:
5903         dev_path = result.payload
5904
5905     device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
5906
5907   # leave the disks configured for the primary node
5908   # this is a workaround that would be fixed better by
5909   # improving the logical/physical id handling
5910   for disk in disks:
5911     lu.cfg.SetDiskID(disk, instance.primary_node)
5912
5913   return disks_ok, device_info
5914
5915
5916 def _StartInstanceDisks(lu, instance, force):
5917   """Start the disks of an instance.
5918
5919   """
5920   disks_ok, _ = _AssembleInstanceDisks(lu, instance,
5921                                            ignore_secondaries=force)
5922   if not disks_ok:
5923     _ShutdownInstanceDisks(lu, instance)
5924     if force is not None and not force:
5925       lu.proc.LogWarning("", hint="If the message above refers to a"
5926                          " secondary node,"
5927                          " you can retry the operation using '--force'.")
5928     raise errors.OpExecError("Disk consistency error")
5929
5930
5931 class LUInstanceDeactivateDisks(NoHooksLU):
5932   """Shutdown an instance's disks.
5933
5934   """
5935   REQ_BGL = False
5936
5937   def ExpandNames(self):
5938     self._ExpandAndLockInstance()
5939     self.needed_locks[locking.LEVEL_NODE] = []
5940     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5941
5942   def DeclareLocks(self, level):
5943     if level == locking.LEVEL_NODE:
5944       self._LockInstancesNodes()
5945
5946   def CheckPrereq(self):
5947     """Check prerequisites.
5948
5949     This checks that the instance is in the cluster.
5950
5951     """
5952     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5953     assert self.instance is not None, \
5954       "Cannot retrieve locked instance %s" % self.op.instance_name
5955
5956   def Exec(self, feedback_fn):
5957     """Deactivate the disks
5958
5959     """
5960     instance = self.instance
5961     if self.op.force:
5962       _ShutdownInstanceDisks(self, instance)
5963     else:
5964       _SafeShutdownInstanceDisks(self, instance)
5965
5966
5967 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
5968   """Shutdown block devices of an instance.
5969
5970   This function checks if an instance is running, before calling
5971   _ShutdownInstanceDisks.
5972
5973   """
5974   _CheckInstanceState(lu, instance, INSTANCE_DOWN, msg="cannot shutdown disks")
5975   _ShutdownInstanceDisks(lu, instance, disks=disks)
5976
5977
5978 def _ExpandCheckDisks(instance, disks):
5979   """Return the instance disks selected by the disks list
5980
5981   @type disks: list of L{objects.Disk} or None
5982   @param disks: selected disks
5983   @rtype: list of L{objects.Disk}
5984   @return: selected instance disks to act on
5985
5986   """
5987   if disks is None:
5988     return instance.disks
5989   else:
5990     if not set(disks).issubset(instance.disks):
5991       raise errors.ProgrammerError("Can only act on disks belonging to the"
5992                                    " target instance")
5993     return disks
5994
5995
5996 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
5997   """Shutdown block devices of an instance.
5998
5999   This does the shutdown on all nodes of the instance.
6000
6001   If the ignore_primary is false, errors on the primary node are
6002   ignored.
6003
6004   """
6005   all_result = True
6006   disks = _ExpandCheckDisks(instance, disks)
6007
6008   for disk in disks:
6009     for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
6010       lu.cfg.SetDiskID(top_disk, node)
6011       result = lu.rpc.call_blockdev_shutdown(node, top_disk)
6012       msg = result.fail_msg
6013       if msg:
6014         lu.LogWarning("Could not shutdown block device %s on node %s: %s",
6015                       disk.iv_name, node, msg)
6016         if ((node == instance.primary_node and not ignore_primary) or
6017             (node != instance.primary_node and not result.offline)):
6018           all_result = False
6019   return all_result
6020
6021
6022 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
6023   """Checks if a node has enough free memory.
6024
6025   This function check if a given node has the needed amount of free
6026   memory. In case the node has less memory or we cannot get the
6027   information from the node, this function raise an OpPrereqError
6028   exception.
6029
6030   @type lu: C{LogicalUnit}
6031   @param lu: a logical unit from which we get configuration data
6032   @type node: C{str}
6033   @param node: the node to check
6034   @type reason: C{str}
6035   @param reason: string to use in the error message
6036   @type requested: C{int}
6037   @param requested: the amount of memory in MiB to check for
6038   @type hypervisor_name: C{str}
6039   @param hypervisor_name: the hypervisor to ask for memory stats
6040   @raise errors.OpPrereqError: if the node doesn't have enough memory, or
6041       we cannot check the node
6042
6043   """
6044   nodeinfo = lu.rpc.call_node_info([node], None, [hypervisor_name])
6045   nodeinfo[node].Raise("Can't get data from node %s" % node,
6046                        prereq=True, ecode=errors.ECODE_ENVIRON)
6047   (_, _, (hv_info, )) = nodeinfo[node].payload
6048
6049   free_mem = hv_info.get("memory_free", None)
6050   if not isinstance(free_mem, int):
6051     raise errors.OpPrereqError("Can't compute free memory on node %s, result"
6052                                " was '%s'" % (node, free_mem),
6053                                errors.ECODE_ENVIRON)
6054   if requested > free_mem:
6055     raise errors.OpPrereqError("Not enough memory on node %s for %s:"
6056                                " needed %s MiB, available %s MiB" %
6057                                (node, reason, requested, free_mem),
6058                                errors.ECODE_NORES)
6059
6060
6061 def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
6062   """Checks if nodes have enough free disk space in the all VGs.
6063
6064   This function check if all given nodes have the needed amount of
6065   free disk. In case any node has less disk or we cannot get the
6066   information from the node, this function raise an OpPrereqError
6067   exception.
6068
6069   @type lu: C{LogicalUnit}
6070   @param lu: a logical unit from which we get configuration data
6071   @type nodenames: C{list}
6072   @param nodenames: the list of node names to check
6073   @type req_sizes: C{dict}
6074   @param req_sizes: the hash of vg and corresponding amount of disk in
6075       MiB to check for
6076   @raise errors.OpPrereqError: if the node doesn't have enough disk,
6077       or we cannot check the node
6078
6079   """
6080   for vg, req_size in req_sizes.items():
6081     _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
6082
6083
6084 def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
6085   """Checks if nodes have enough free disk space in the specified VG.
6086
6087   This function check if all given nodes have the needed amount of
6088   free disk. In case any node has less disk or we cannot get the
6089   information from the node, this function raise an OpPrereqError
6090   exception.
6091
6092   @type lu: C{LogicalUnit}
6093   @param lu: a logical unit from which we get configuration data
6094   @type nodenames: C{list}
6095   @param nodenames: the list of node names to check
6096   @type vg: C{str}
6097   @param vg: the volume group to check
6098   @type requested: C{int}
6099   @param requested: the amount of disk in MiB to check for
6100   @raise errors.OpPrereqError: if the node doesn't have enough disk,
6101       or we cannot check the node
6102
6103   """
6104   nodeinfo = lu.rpc.call_node_info(nodenames, [vg], None)
6105   for node in nodenames:
6106     info = nodeinfo[node]
6107     info.Raise("Cannot get current information from node %s" % node,
6108                prereq=True, ecode=errors.ECODE_ENVIRON)
6109     (_, (vg_info, ), _) = info.payload
6110     vg_free = vg_info.get("vg_free", None)
6111     if not isinstance(vg_free, int):
6112       raise errors.OpPrereqError("Can't compute free disk space on node"
6113                                  " %s for vg %s, result was '%s'" %
6114                                  (node, vg, vg_free), errors.ECODE_ENVIRON)
6115     if requested > vg_free:
6116       raise errors.OpPrereqError("Not enough disk space on target node %s"
6117                                  " vg %s: required %d MiB, available %d MiB" %
6118                                  (node, vg, requested, vg_free),
6119                                  errors.ECODE_NORES)
6120
6121
6122 def _CheckNodesPhysicalCPUs(lu, nodenames, requested, hypervisor_name):
6123   """Checks if nodes have enough physical CPUs
6124
6125   This function checks if all given nodes have the needed number of
6126   physical CPUs. In case any node has less CPUs or we cannot get the
6127   information from the node, this function raises an OpPrereqError
6128   exception.
6129
6130   @type lu: C{LogicalUnit}
6131   @param lu: a logical unit from which we get configuration data
6132   @type nodenames: C{list}
6133   @param nodenames: the list of node names to check
6134   @type requested: C{int}
6135   @param requested: the minimum acceptable number of physical CPUs
6136   @raise errors.OpPrereqError: if the node doesn't have enough CPUs,
6137       or we cannot check the node
6138
6139   """
6140   nodeinfo = lu.rpc.call_node_info(nodenames, None, [hypervisor_name])
6141   for node in nodenames:
6142     info = nodeinfo[node]
6143     info.Raise("Cannot get current information from node %s" % node,
6144                prereq=True, ecode=errors.ECODE_ENVIRON)
6145     (_, _, (hv_info, )) = info.payload
6146     num_cpus = hv_info.get("cpu_total", None)
6147     if not isinstance(num_cpus, int):
6148       raise errors.OpPrereqError("Can't compute the number of physical CPUs"
6149                                  " on node %s, result was '%s'" %
6150                                  (node, num_cpus), errors.ECODE_ENVIRON)
6151     if requested > num_cpus:
6152       raise errors.OpPrereqError("Node %s has %s physical CPUs, but %s are "
6153                                  "required" % (node, num_cpus, requested),
6154                                  errors.ECODE_NORES)
6155
6156
6157 class LUInstanceStartup(LogicalUnit):
6158   """Starts an instance.
6159
6160   """
6161   HPATH = "instance-start"
6162   HTYPE = constants.HTYPE_INSTANCE
6163   REQ_BGL = False
6164
6165   def CheckArguments(self):
6166     # extra beparams
6167     if self.op.beparams:
6168       # fill the beparams dict
6169       objects.UpgradeBeParams(self.op.beparams)
6170       utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
6171
6172   def ExpandNames(self):
6173     self._ExpandAndLockInstance()
6174
6175   def BuildHooksEnv(self):
6176     """Build hooks env.
6177
6178     This runs on master, primary and secondary nodes of the instance.
6179
6180     """
6181     env = {
6182       "FORCE": self.op.force,
6183       }
6184
6185     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6186
6187     return env
6188
6189   def BuildHooksNodes(self):
6190     """Build hooks nodes.
6191
6192     """
6193     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6194     return (nl, nl)
6195
6196   def CheckPrereq(self):
6197     """Check prerequisites.
6198
6199     This checks that the instance is in the cluster.
6200
6201     """
6202     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6203     assert self.instance is not None, \
6204       "Cannot retrieve locked instance %s" % self.op.instance_name
6205
6206     # extra hvparams
6207     if self.op.hvparams:
6208       # check hypervisor parameter syntax (locally)
6209       cluster = self.cfg.GetClusterInfo()
6210       utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
6211       filled_hvp = cluster.FillHV(instance)
6212       filled_hvp.update(self.op.hvparams)
6213       hv_type = hypervisor.GetHypervisor(instance.hypervisor)
6214       hv_type.CheckParameterSyntax(filled_hvp)
6215       _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
6216
6217     _CheckInstanceState(self, instance, INSTANCE_ONLINE)
6218
6219     self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
6220
6221     if self.primary_offline and self.op.ignore_offline_nodes:
6222       self.proc.LogWarning("Ignoring offline primary node")
6223
6224       if self.op.hvparams or self.op.beparams:
6225         self.proc.LogWarning("Overridden parameters are ignored")
6226     else:
6227       _CheckNodeOnline(self, instance.primary_node)
6228
6229       bep = self.cfg.GetClusterInfo().FillBE(instance)
6230
6231       # check bridges existence
6232       _CheckInstanceBridgesExist(self, instance)
6233
6234       remote_info = self.rpc.call_instance_info(instance.primary_node,
6235                                                 instance.name,
6236                                                 instance.hypervisor)
6237       remote_info.Raise("Error checking node %s" % instance.primary_node,
6238                         prereq=True, ecode=errors.ECODE_ENVIRON)
6239       if not remote_info.payload: # not running already
6240         _CheckNodeFreeMemory(self, instance.primary_node,
6241                              "starting instance %s" % instance.name,
6242                              bep[constants.BE_MAXMEM], instance.hypervisor)
6243
6244   def Exec(self, feedback_fn):
6245     """Start the instance.
6246
6247     """
6248     instance = self.instance
6249     force = self.op.force
6250
6251     if not self.op.no_remember:
6252       self.cfg.MarkInstanceUp(instance.name)
6253
6254     if self.primary_offline:
6255       assert self.op.ignore_offline_nodes
6256       self.proc.LogInfo("Primary node offline, marked instance as started")
6257     else:
6258       node_current = instance.primary_node
6259
6260       _StartInstanceDisks(self, instance, force)
6261
6262       result = \
6263         self.rpc.call_instance_start(node_current,
6264                                      (instance, self.op.hvparams,
6265                                       self.op.beparams),
6266                                      self.op.startup_paused)
6267       msg = result.fail_msg
6268       if msg:
6269         _ShutdownInstanceDisks(self, instance)
6270         raise errors.OpExecError("Could not start instance: %s" % msg)
6271
6272
6273 class LUInstanceReboot(LogicalUnit):
6274   """Reboot an instance.
6275
6276   """
6277   HPATH = "instance-reboot"
6278   HTYPE = constants.HTYPE_INSTANCE
6279   REQ_BGL = False
6280
6281   def ExpandNames(self):
6282     self._ExpandAndLockInstance()
6283
6284   def BuildHooksEnv(self):
6285     """Build hooks env.
6286
6287     This runs on master, primary and secondary nodes of the instance.
6288
6289     """
6290     env = {
6291       "IGNORE_SECONDARIES": self.op.ignore_secondaries,
6292       "REBOOT_TYPE": self.op.reboot_type,
6293       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6294       }
6295
6296     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6297
6298     return env
6299
6300   def BuildHooksNodes(self):
6301     """Build hooks nodes.
6302
6303     """
6304     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6305     return (nl, nl)
6306
6307   def CheckPrereq(self):
6308     """Check prerequisites.
6309
6310     This checks that the instance is in the cluster.
6311
6312     """
6313     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6314     assert self.instance is not None, \
6315       "Cannot retrieve locked instance %s" % self.op.instance_name
6316     _CheckInstanceState(self, instance, INSTANCE_ONLINE)
6317     _CheckNodeOnline(self, instance.primary_node)
6318
6319     # check bridges existence
6320     _CheckInstanceBridgesExist(self, instance)
6321
6322   def Exec(self, feedback_fn):
6323     """Reboot the instance.
6324
6325     """
6326     instance = self.instance
6327     ignore_secondaries = self.op.ignore_secondaries
6328     reboot_type = self.op.reboot_type
6329
6330     remote_info = self.rpc.call_instance_info(instance.primary_node,
6331                                               instance.name,
6332                                               instance.hypervisor)
6333     remote_info.Raise("Error checking node %s" % instance.primary_node)
6334     instance_running = bool(remote_info.payload)
6335
6336     node_current = instance.primary_node
6337
6338     if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
6339                                             constants.INSTANCE_REBOOT_HARD]:
6340       for disk in instance.disks:
6341         self.cfg.SetDiskID(disk, node_current)
6342       result = self.rpc.call_instance_reboot(node_current, instance,
6343                                              reboot_type,
6344                                              self.op.shutdown_timeout)
6345       result.Raise("Could not reboot instance")
6346     else:
6347       if instance_running:
6348         result = self.rpc.call_instance_shutdown(node_current, instance,
6349                                                  self.op.shutdown_timeout)
6350         result.Raise("Could not shutdown instance for full reboot")
6351         _ShutdownInstanceDisks(self, instance)
6352       else:
6353         self.LogInfo("Instance %s was already stopped, starting now",
6354                      instance.name)
6355       _StartInstanceDisks(self, instance, ignore_secondaries)
6356       result = self.rpc.call_instance_start(node_current,
6357                                             (instance, None, None), False)
6358       msg = result.fail_msg
6359       if msg:
6360         _ShutdownInstanceDisks(self, instance)
6361         raise errors.OpExecError("Could not start instance for"
6362                                  " full reboot: %s" % msg)
6363
6364     self.cfg.MarkInstanceUp(instance.name)
6365
6366
6367 class LUInstanceShutdown(LogicalUnit):
6368   """Shutdown an instance.
6369
6370   """
6371   HPATH = "instance-stop"
6372   HTYPE = constants.HTYPE_INSTANCE
6373   REQ_BGL = False
6374
6375   def ExpandNames(self):
6376     self._ExpandAndLockInstance()
6377
6378   def BuildHooksEnv(self):
6379     """Build hooks env.
6380
6381     This runs on master, primary and secondary nodes of the instance.
6382
6383     """
6384     env = _BuildInstanceHookEnvByObject(self, self.instance)
6385     env["TIMEOUT"] = self.op.timeout
6386     return env
6387
6388   def BuildHooksNodes(self):
6389     """Build hooks nodes.
6390
6391     """
6392     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6393     return (nl, nl)
6394
6395   def CheckPrereq(self):
6396     """Check prerequisites.
6397
6398     This checks that the instance is in the cluster.
6399
6400     """
6401     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6402     assert self.instance is not None, \
6403       "Cannot retrieve locked instance %s" % self.op.instance_name
6404
6405     _CheckInstanceState(self, self.instance, INSTANCE_ONLINE)
6406
6407     self.primary_offline = \
6408       self.cfg.GetNodeInfo(self.instance.primary_node).offline
6409
6410     if self.primary_offline and self.op.ignore_offline_nodes:
6411       self.proc.LogWarning("Ignoring offline primary node")
6412     else:
6413       _CheckNodeOnline(self, self.instance.primary_node)
6414
6415   def Exec(self, feedback_fn):
6416     """Shutdown the instance.
6417
6418     """
6419     instance = self.instance
6420     node_current = instance.primary_node
6421     timeout = self.op.timeout
6422
6423     if not self.op.no_remember:
6424       self.cfg.MarkInstanceDown(instance.name)
6425
6426     if self.primary_offline:
6427       assert self.op.ignore_offline_nodes
6428       self.proc.LogInfo("Primary node offline, marked instance as stopped")
6429     else:
6430       result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
6431       msg = result.fail_msg
6432       if msg:
6433         self.proc.LogWarning("Could not shutdown instance: %s" % msg)
6434
6435       _ShutdownInstanceDisks(self, instance)
6436
6437
6438 class LUInstanceReinstall(LogicalUnit):
6439   """Reinstall an instance.
6440
6441   """
6442   HPATH = "instance-reinstall"
6443   HTYPE = constants.HTYPE_INSTANCE
6444   REQ_BGL = False
6445
6446   def ExpandNames(self):
6447     self._ExpandAndLockInstance()
6448
6449   def BuildHooksEnv(self):
6450     """Build hooks env.
6451
6452     This runs on master, primary and secondary nodes of the instance.
6453
6454     """
6455     return _BuildInstanceHookEnvByObject(self, self.instance)
6456
6457   def BuildHooksNodes(self):
6458     """Build hooks nodes.
6459
6460     """
6461     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6462     return (nl, nl)
6463
6464   def CheckPrereq(self):
6465     """Check prerequisites.
6466
6467     This checks that the instance is in the cluster and is not running.
6468
6469     """
6470     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6471     assert instance is not None, \
6472       "Cannot retrieve locked instance %s" % self.op.instance_name
6473     _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
6474                      " offline, cannot reinstall")
6475     for node in instance.secondary_nodes:
6476       _CheckNodeOnline(self, node, "Instance secondary node offline,"
6477                        " cannot reinstall")
6478
6479     if instance.disk_template == constants.DT_DISKLESS:
6480       raise errors.OpPrereqError("Instance '%s' has no disks" %
6481                                  self.op.instance_name,
6482                                  errors.ECODE_INVAL)
6483     _CheckInstanceState(self, instance, INSTANCE_DOWN, msg="cannot reinstall")
6484
6485     if self.op.os_type is not None:
6486       # OS verification
6487       pnode = _ExpandNodeName(self.cfg, instance.primary_node)
6488       _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
6489       instance_os = self.op.os_type
6490     else:
6491       instance_os = instance.os
6492
6493     nodelist = list(instance.all_nodes)
6494
6495     if self.op.osparams:
6496       i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
6497       _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
6498       self.os_inst = i_osdict # the new dict (without defaults)
6499     else:
6500       self.os_inst = None
6501
6502     self.instance = instance
6503
6504   def Exec(self, feedback_fn):
6505     """Reinstall the instance.
6506
6507     """
6508     inst = self.instance
6509
6510     if self.op.os_type is not None:
6511       feedback_fn("Changing OS to '%s'..." % self.op.os_type)
6512       inst.os = self.op.os_type
6513       # Write to configuration
6514       self.cfg.Update(inst, feedback_fn)
6515
6516     _StartInstanceDisks(self, inst, None)
6517     try:
6518       feedback_fn("Running the instance OS create scripts...")
6519       # FIXME: pass debug option from opcode to backend
6520       result = self.rpc.call_instance_os_add(inst.primary_node,
6521                                              (inst, self.os_inst), True,
6522                                              self.op.debug_level)
6523       result.Raise("Could not install OS for instance %s on node %s" %
6524                    (inst.name, inst.primary_node))
6525     finally:
6526       _ShutdownInstanceDisks(self, inst)
6527
6528
6529 class LUInstanceRecreateDisks(LogicalUnit):
6530   """Recreate an instance's missing disks.
6531
6532   """
6533   HPATH = "instance-recreate-disks"
6534   HTYPE = constants.HTYPE_INSTANCE
6535   REQ_BGL = False
6536
6537   def CheckArguments(self):
6538     # normalise the disk list
6539     self.op.disks = sorted(frozenset(self.op.disks))
6540
6541   def ExpandNames(self):
6542     self._ExpandAndLockInstance()
6543     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6544     if self.op.nodes:
6545       self.op.nodes = [_ExpandNodeName(self.cfg, n) for n in self.op.nodes]
6546       self.needed_locks[locking.LEVEL_NODE] = list(self.op.nodes)
6547     else:
6548       self.needed_locks[locking.LEVEL_NODE] = []
6549
6550   def DeclareLocks(self, level):
6551     if level == locking.LEVEL_NODE:
6552       # if we replace the nodes, we only need to lock the old primary,
6553       # otherwise we need to lock all nodes for disk re-creation
6554       primary_only = bool(self.op.nodes)
6555       self._LockInstancesNodes(primary_only=primary_only)
6556     elif level == locking.LEVEL_NODE_RES:
6557       # Copy node locks
6558       self.needed_locks[locking.LEVEL_NODE_RES] = \
6559         self.needed_locks[locking.LEVEL_NODE][:]
6560
6561   def BuildHooksEnv(self):
6562     """Build hooks env.
6563
6564     This runs on master, primary and secondary nodes of the instance.
6565
6566     """
6567     return _BuildInstanceHookEnvByObject(self, self.instance)
6568
6569   def BuildHooksNodes(self):
6570     """Build hooks nodes.
6571
6572     """
6573     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6574     return (nl, nl)
6575
6576   def CheckPrereq(self):
6577     """Check prerequisites.
6578
6579     This checks that the instance is in the cluster and is not running.
6580
6581     """
6582     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6583     assert instance is not None, \
6584       "Cannot retrieve locked instance %s" % self.op.instance_name
6585     if self.op.nodes:
6586       if len(self.op.nodes) != len(instance.all_nodes):
6587         raise errors.OpPrereqError("Instance %s currently has %d nodes, but"
6588                                    " %d replacement nodes were specified" %
6589                                    (instance.name, len(instance.all_nodes),
6590                                     len(self.op.nodes)),
6591                                    errors.ECODE_INVAL)
6592       assert instance.disk_template != constants.DT_DRBD8 or \
6593           len(self.op.nodes) == 2
6594       assert instance.disk_template != constants.DT_PLAIN or \
6595           len(self.op.nodes) == 1
6596       primary_node = self.op.nodes[0]
6597     else:
6598       primary_node = instance.primary_node
6599     _CheckNodeOnline(self, primary_node)
6600
6601     if instance.disk_template == constants.DT_DISKLESS:
6602       raise errors.OpPrereqError("Instance '%s' has no disks" %
6603                                  self.op.instance_name, errors.ECODE_INVAL)
6604     # if we replace nodes *and* the old primary is offline, we don't
6605     # check
6606     assert instance.primary_node in self.owned_locks(locking.LEVEL_NODE)
6607     assert instance.primary_node in self.owned_locks(locking.LEVEL_NODE_RES)
6608     old_pnode = self.cfg.GetNodeInfo(instance.primary_node)
6609     if not (self.op.nodes and old_pnode.offline):
6610       _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
6611                           msg="cannot recreate disks")
6612
6613     if not self.op.disks:
6614       self.op.disks = range(len(instance.disks))
6615     else:
6616       for idx in self.op.disks:
6617         if idx >= len(instance.disks):
6618           raise errors.OpPrereqError("Invalid disk index '%s'" % idx,
6619                                      errors.ECODE_INVAL)
6620     if self.op.disks != range(len(instance.disks)) and self.op.nodes:
6621       raise errors.OpPrereqError("Can't recreate disks partially and"
6622                                  " change the nodes at the same time",
6623                                  errors.ECODE_INVAL)
6624     self.instance = instance
6625
6626   def Exec(self, feedback_fn):
6627     """Recreate the disks.
6628
6629     """
6630     instance = self.instance
6631
6632     assert (self.owned_locks(locking.LEVEL_NODE) ==
6633             self.owned_locks(locking.LEVEL_NODE_RES))
6634
6635     to_skip = []
6636     mods = [] # keeps track of needed logical_id changes
6637
6638     for idx, disk in enumerate(instance.disks):
6639       if idx not in self.op.disks: # disk idx has not been passed in
6640         to_skip.append(idx)
6641         continue
6642       # update secondaries for disks, if needed
6643       if self.op.nodes:
6644         if disk.dev_type == constants.LD_DRBD8:
6645           # need to update the nodes and minors
6646           assert len(self.op.nodes) == 2
6647           assert len(disk.logical_id) == 6 # otherwise disk internals
6648                                            # have changed
6649           (_, _, old_port, _, _, old_secret) = disk.logical_id
6650           new_minors = self.cfg.AllocateDRBDMinor(self.op.nodes, instance.name)
6651           new_id = (self.op.nodes[0], self.op.nodes[1], old_port,
6652                     new_minors[0], new_minors[1], old_secret)
6653           assert len(disk.logical_id) == len(new_id)
6654           mods.append((idx, new_id))
6655
6656     # now that we have passed all asserts above, we can apply the mods
6657     # in a single run (to avoid partial changes)
6658     for idx, new_id in mods:
6659       instance.disks[idx].logical_id = new_id
6660
6661     # change primary node, if needed
6662     if self.op.nodes:
6663       instance.primary_node = self.op.nodes[0]
6664       self.LogWarning("Changing the instance's nodes, you will have to"
6665                       " remove any disks left on the older nodes manually")
6666
6667     if self.op.nodes:
6668       self.cfg.Update(instance, feedback_fn)
6669
6670     _CreateDisks(self, instance, to_skip=to_skip)
6671
6672
6673 class LUInstanceRename(LogicalUnit):
6674   """Rename an instance.
6675
6676   """
6677   HPATH = "instance-rename"
6678   HTYPE = constants.HTYPE_INSTANCE
6679
6680   def CheckArguments(self):
6681     """Check arguments.
6682
6683     """
6684     if self.op.ip_check and not self.op.name_check:
6685       # TODO: make the ip check more flexible and not depend on the name check
6686       raise errors.OpPrereqError("IP address check requires a name check",
6687                                  errors.ECODE_INVAL)
6688
6689   def BuildHooksEnv(self):
6690     """Build hooks env.
6691
6692     This runs on master, primary and secondary nodes of the instance.
6693
6694     """
6695     env = _BuildInstanceHookEnvByObject(self, self.instance)
6696     env["INSTANCE_NEW_NAME"] = self.op.new_name
6697     return env
6698
6699   def BuildHooksNodes(self):
6700     """Build hooks nodes.
6701
6702     """
6703     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6704     return (nl, nl)
6705
6706   def CheckPrereq(self):
6707     """Check prerequisites.
6708
6709     This checks that the instance is in the cluster and is not running.
6710
6711     """
6712     self.op.instance_name = _ExpandInstanceName(self.cfg,
6713                                                 self.op.instance_name)
6714     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6715     assert instance is not None
6716     _CheckNodeOnline(self, instance.primary_node)
6717     _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
6718                         msg="cannot rename")
6719     self.instance = instance
6720
6721     new_name = self.op.new_name
6722     if self.op.name_check:
6723       hostname = netutils.GetHostname(name=new_name)
6724       if hostname.name != new_name:
6725         self.LogInfo("Resolved given name '%s' to '%s'", new_name,
6726                      hostname.name)
6727       if not utils.MatchNameComponent(self.op.new_name, [hostname.name]):
6728         raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
6729                                     " same as given hostname '%s'") %
6730                                     (hostname.name, self.op.new_name),
6731                                     errors.ECODE_INVAL)
6732       new_name = self.op.new_name = hostname.name
6733       if (self.op.ip_check and
6734           netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
6735         raise errors.OpPrereqError("IP %s of instance %s already in use" %
6736                                    (hostname.ip, new_name),
6737                                    errors.ECODE_NOTUNIQUE)
6738
6739     instance_list = self.cfg.GetInstanceList()
6740     if new_name in instance_list and new_name != instance.name:
6741       raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
6742                                  new_name, errors.ECODE_EXISTS)
6743
6744   def Exec(self, feedback_fn):
6745     """Rename the instance.
6746
6747     """
6748     inst = self.instance
6749     old_name = inst.name
6750
6751     rename_file_storage = False
6752     if (inst.disk_template in constants.DTS_FILEBASED and
6753         self.op.new_name != inst.name):
6754       old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
6755       rename_file_storage = True
6756
6757     self.cfg.RenameInstance(inst.name, self.op.new_name)
6758     # Change the instance lock. This is definitely safe while we hold the BGL.
6759     # Otherwise the new lock would have to be added in acquired mode.
6760     assert self.REQ_BGL
6761     self.glm.remove(locking.LEVEL_INSTANCE, old_name)
6762     self.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
6763
6764     # re-read the instance from the configuration after rename
6765     inst = self.cfg.GetInstanceInfo(self.op.new_name)
6766
6767     if rename_file_storage:
6768       new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
6769       result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
6770                                                      old_file_storage_dir,
6771                                                      new_file_storage_dir)
6772       result.Raise("Could not rename on node %s directory '%s' to '%s'"
6773                    " (but the instance has been renamed in Ganeti)" %
6774                    (inst.primary_node, old_file_storage_dir,
6775                     new_file_storage_dir))
6776
6777     _StartInstanceDisks(self, inst, None)
6778     try:
6779       result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
6780                                                  old_name, self.op.debug_level)
6781       msg = result.fail_msg
6782       if msg:
6783         msg = ("Could not run OS rename script for instance %s on node %s"
6784                " (but the instance has been renamed in Ganeti): %s" %
6785                (inst.name, inst.primary_node, msg))
6786         self.proc.LogWarning(msg)
6787     finally:
6788       _ShutdownInstanceDisks(self, inst)
6789
6790     return inst.name
6791
6792
6793 class LUInstanceRemove(LogicalUnit):
6794   """Remove an instance.
6795
6796   """
6797   HPATH = "instance-remove"
6798   HTYPE = constants.HTYPE_INSTANCE
6799   REQ_BGL = False
6800
6801   def ExpandNames(self):
6802     self._ExpandAndLockInstance()
6803     self.needed_locks[locking.LEVEL_NODE] = []
6804     self.needed_locks[locking.LEVEL_NODE_RES] = []
6805     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6806
6807   def DeclareLocks(self, level):
6808     if level == locking.LEVEL_NODE:
6809       self._LockInstancesNodes()
6810     elif level == locking.LEVEL_NODE_RES:
6811       # Copy node locks
6812       self.needed_locks[locking.LEVEL_NODE_RES] = \
6813         self.needed_locks[locking.LEVEL_NODE][:]
6814
6815   def BuildHooksEnv(self):
6816     """Build hooks env.
6817
6818     This runs on master, primary and secondary nodes of the instance.
6819
6820     """
6821     env = _BuildInstanceHookEnvByObject(self, self.instance)
6822     env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
6823     return env
6824
6825   def BuildHooksNodes(self):
6826     """Build hooks nodes.
6827
6828     """
6829     nl = [self.cfg.GetMasterNode()]
6830     nl_post = list(self.instance.all_nodes) + nl
6831     return (nl, nl_post)
6832
6833   def CheckPrereq(self):
6834     """Check prerequisites.
6835
6836     This checks that the instance is in the cluster.
6837
6838     """
6839     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6840     assert self.instance is not None, \
6841       "Cannot retrieve locked instance %s" % self.op.instance_name
6842
6843   def Exec(self, feedback_fn):
6844     """Remove the instance.
6845
6846     """
6847     instance = self.instance
6848     logging.info("Shutting down instance %s on node %s",
6849                  instance.name, instance.primary_node)
6850
6851     result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
6852                                              self.op.shutdown_timeout)
6853     msg = result.fail_msg
6854     if msg:
6855       if self.op.ignore_failures:
6856         feedback_fn("Warning: can't shutdown instance: %s" % msg)
6857       else:
6858         raise errors.OpExecError("Could not shutdown instance %s on"
6859                                  " node %s: %s" %
6860                                  (instance.name, instance.primary_node, msg))
6861
6862     assert (self.owned_locks(locking.LEVEL_NODE) ==
6863             self.owned_locks(locking.LEVEL_NODE_RES))
6864     assert not (set(instance.all_nodes) -
6865                 self.owned_locks(locking.LEVEL_NODE)), \
6866       "Not owning correct locks"
6867
6868     _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
6869
6870
6871 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
6872   """Utility function to remove an instance.
6873
6874   """
6875   logging.info("Removing block devices for instance %s", instance.name)
6876
6877   if not _RemoveDisks(lu, instance):
6878     if not ignore_failures:
6879       raise errors.OpExecError("Can't remove instance's disks")
6880     feedback_fn("Warning: can't remove instance's disks")
6881
6882   logging.info("Removing instance %s out of cluster config", instance.name)
6883
6884   lu.cfg.RemoveInstance(instance.name)
6885
6886   assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
6887     "Instance lock removal conflict"
6888
6889   # Remove lock for the instance
6890   lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
6891
6892
6893 class LUInstanceQuery(NoHooksLU):
6894   """Logical unit for querying instances.
6895
6896   """
6897   # pylint: disable=W0142
6898   REQ_BGL = False
6899
6900   def CheckArguments(self):
6901     self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
6902                              self.op.output_fields, self.op.use_locking)
6903
6904   def ExpandNames(self):
6905     self.iq.ExpandNames(self)
6906
6907   def DeclareLocks(self, level):
6908     self.iq.DeclareLocks(self, level)
6909
6910   def Exec(self, feedback_fn):
6911     return self.iq.OldStyleQuery(self)
6912
6913
6914 class LUInstanceFailover(LogicalUnit):
6915   """Failover an instance.
6916
6917   """
6918   HPATH = "instance-failover"
6919   HTYPE = constants.HTYPE_INSTANCE
6920   REQ_BGL = False
6921
6922   def CheckArguments(self):
6923     """Check the arguments.
6924
6925     """
6926     self.iallocator = getattr(self.op, "iallocator", None)
6927     self.target_node = getattr(self.op, "target_node", None)
6928
6929   def ExpandNames(self):
6930     self._ExpandAndLockInstance()
6931
6932     if self.op.target_node is not None:
6933       self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6934
6935     self.needed_locks[locking.LEVEL_NODE] = []
6936     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6937
6938     ignore_consistency = self.op.ignore_consistency
6939     shutdown_timeout = self.op.shutdown_timeout
6940     self._migrater = TLMigrateInstance(self, self.op.instance_name,
6941                                        cleanup=False,
6942                                        failover=True,
6943                                        ignore_consistency=ignore_consistency,
6944                                        shutdown_timeout=shutdown_timeout)
6945     self.tasklets = [self._migrater]
6946
6947   def DeclareLocks(self, level):
6948     if level == locking.LEVEL_NODE:
6949       instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
6950       if instance.disk_template in constants.DTS_EXT_MIRROR:
6951         if self.op.target_node is None:
6952           self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6953         else:
6954           self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
6955                                                    self.op.target_node]
6956         del self.recalculate_locks[locking.LEVEL_NODE]
6957       else:
6958         self._LockInstancesNodes()
6959
6960   def BuildHooksEnv(self):
6961     """Build hooks env.
6962
6963     This runs on master, primary and secondary nodes of the instance.
6964
6965     """
6966     instance = self._migrater.instance
6967     source_node = instance.primary_node
6968     target_node = self.op.target_node
6969     env = {
6970       "IGNORE_CONSISTENCY": self.op.ignore_consistency,
6971       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6972       "OLD_PRIMARY": source_node,
6973       "NEW_PRIMARY": target_node,
6974       }
6975
6976     if instance.disk_template in constants.DTS_INT_MIRROR:
6977       env["OLD_SECONDARY"] = instance.secondary_nodes[0]
6978       env["NEW_SECONDARY"] = source_node
6979     else:
6980       env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
6981
6982     env.update(_BuildInstanceHookEnvByObject(self, instance))
6983
6984     return env
6985
6986   def BuildHooksNodes(self):
6987     """Build hooks nodes.
6988
6989     """
6990     instance = self._migrater.instance
6991     nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
6992     return (nl, nl + [instance.primary_node])
6993
6994
6995 class LUInstanceMigrate(LogicalUnit):
6996   """Migrate an instance.
6997
6998   This is migration without shutting down, compared to the failover,
6999   which is done with shutdown.
7000
7001   """
7002   HPATH = "instance-migrate"
7003   HTYPE = constants.HTYPE_INSTANCE
7004   REQ_BGL = False
7005
7006   def ExpandNames(self):
7007     self._ExpandAndLockInstance()
7008
7009     if self.op.target_node is not None:
7010       self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7011
7012     self.needed_locks[locking.LEVEL_NODE] = []
7013     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7014
7015     self._migrater = TLMigrateInstance(self, self.op.instance_name,
7016                                        cleanup=self.op.cleanup,
7017                                        failover=False,
7018                                        fallback=self.op.allow_failover)
7019     self.tasklets = [self._migrater]
7020
7021   def DeclareLocks(self, level):
7022     if level == locking.LEVEL_NODE:
7023       instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
7024       if instance.disk_template in constants.DTS_EXT_MIRROR:
7025         if self.op.target_node is None:
7026           self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7027         else:
7028           self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
7029                                                    self.op.target_node]
7030         del self.recalculate_locks[locking.LEVEL_NODE]
7031       else:
7032         self._LockInstancesNodes()
7033
7034   def BuildHooksEnv(self):
7035     """Build hooks env.
7036
7037     This runs on master, primary and secondary nodes of the instance.
7038
7039     """
7040     instance = self._migrater.instance
7041     source_node = instance.primary_node
7042     target_node = self.op.target_node
7043     env = _BuildInstanceHookEnvByObject(self, instance)
7044     env.update({
7045       "MIGRATE_LIVE": self._migrater.live,
7046       "MIGRATE_CLEANUP": self.op.cleanup,
7047       "OLD_PRIMARY": source_node,
7048       "NEW_PRIMARY": target_node,
7049       })
7050
7051     if instance.disk_template in constants.DTS_INT_MIRROR:
7052       env["OLD_SECONDARY"] = target_node
7053       env["NEW_SECONDARY"] = source_node
7054     else:
7055       env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
7056
7057     return env
7058
7059   def BuildHooksNodes(self):
7060     """Build hooks nodes.
7061
7062     """
7063     instance = self._migrater.instance
7064     nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
7065     return (nl, nl + [instance.primary_node])
7066
7067
7068 class LUInstanceMove(LogicalUnit):
7069   """Move an instance by data-copying.
7070
7071   """
7072   HPATH = "instance-move"
7073   HTYPE = constants.HTYPE_INSTANCE
7074   REQ_BGL = False
7075
7076   def ExpandNames(self):
7077     self._ExpandAndLockInstance()
7078     target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7079     self.op.target_node = target_node
7080     self.needed_locks[locking.LEVEL_NODE] = [target_node]
7081     self.needed_locks[locking.LEVEL_NODE_RES] = []
7082     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7083
7084   def DeclareLocks(self, level):
7085     if level == locking.LEVEL_NODE:
7086       self._LockInstancesNodes(primary_only=True)
7087     elif level == locking.LEVEL_NODE_RES:
7088       # Copy node locks
7089       self.needed_locks[locking.LEVEL_NODE_RES] = \
7090         self.needed_locks[locking.LEVEL_NODE][:]
7091
7092   def BuildHooksEnv(self):
7093     """Build hooks env.
7094
7095     This runs on master, primary and secondary nodes of the instance.
7096
7097     """
7098     env = {
7099       "TARGET_NODE": self.op.target_node,
7100       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7101       }
7102     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
7103     return env
7104
7105   def BuildHooksNodes(self):
7106     """Build hooks nodes.
7107
7108     """
7109     nl = [
7110       self.cfg.GetMasterNode(),
7111       self.instance.primary_node,
7112       self.op.target_node,
7113       ]
7114     return (nl, nl)
7115
7116   def CheckPrereq(self):
7117     """Check prerequisites.
7118
7119     This checks that the instance is in the cluster.
7120
7121     """
7122     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7123     assert self.instance is not None, \
7124       "Cannot retrieve locked instance %s" % self.op.instance_name
7125
7126     node = self.cfg.GetNodeInfo(self.op.target_node)
7127     assert node is not None, \
7128       "Cannot retrieve locked node %s" % self.op.target_node
7129
7130     self.target_node = target_node = node.name
7131
7132     if target_node == instance.primary_node:
7133       raise errors.OpPrereqError("Instance %s is already on the node %s" %
7134                                  (instance.name, target_node),
7135                                  errors.ECODE_STATE)
7136
7137     bep = self.cfg.GetClusterInfo().FillBE(instance)
7138
7139     for idx, dsk in enumerate(instance.disks):
7140       if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
7141         raise errors.OpPrereqError("Instance disk %d has a complex layout,"
7142                                    " cannot copy" % idx, errors.ECODE_STATE)
7143
7144     _CheckNodeOnline(self, target_node)
7145     _CheckNodeNotDrained(self, target_node)
7146     _CheckNodeVmCapable(self, target_node)
7147
7148     if instance.admin_state == constants.ADMINST_UP:
7149       # check memory requirements on the secondary node
7150       _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
7151                            instance.name, bep[constants.BE_MAXMEM],
7152                            instance.hypervisor)
7153     else:
7154       self.LogInfo("Not checking memory on the secondary node as"
7155                    " instance will not be started")
7156
7157     # check bridge existance
7158     _CheckInstanceBridgesExist(self, instance, node=target_node)
7159
7160   def Exec(self, feedback_fn):
7161     """Move an instance.
7162
7163     The move is done by shutting it down on its present node, copying
7164     the data over (slow) and starting it on the new node.
7165
7166     """
7167     instance = self.instance
7168
7169     source_node = instance.primary_node
7170     target_node = self.target_node
7171
7172     self.LogInfo("Shutting down instance %s on source node %s",
7173                  instance.name, source_node)
7174
7175     assert (self.owned_locks(locking.LEVEL_NODE) ==
7176             self.owned_locks(locking.LEVEL_NODE_RES))
7177
7178     result = self.rpc.call_instance_shutdown(source_node, instance,
7179                                              self.op.shutdown_timeout)
7180     msg = result.fail_msg
7181     if msg:
7182       if self.op.ignore_consistency:
7183         self.proc.LogWarning("Could not shutdown instance %s on node %s."
7184                              " Proceeding anyway. Please make sure node"
7185                              " %s is down. Error details: %s",
7186                              instance.name, source_node, source_node, msg)
7187       else:
7188         raise errors.OpExecError("Could not shutdown instance %s on"
7189                                  " node %s: %s" %
7190                                  (instance.name, source_node, msg))
7191
7192     # create the target disks
7193     try:
7194       _CreateDisks(self, instance, target_node=target_node)
7195     except errors.OpExecError:
7196       self.LogWarning("Device creation failed, reverting...")
7197       try:
7198         _RemoveDisks(self, instance, target_node=target_node)
7199       finally:
7200         self.cfg.ReleaseDRBDMinors(instance.name)
7201         raise
7202
7203     cluster_name = self.cfg.GetClusterInfo().cluster_name
7204
7205     errs = []
7206     # activate, get path, copy the data over
7207     for idx, disk in enumerate(instance.disks):
7208       self.LogInfo("Copying data for disk %d", idx)
7209       result = self.rpc.call_blockdev_assemble(target_node, disk,
7210                                                instance.name, True, idx)
7211       if result.fail_msg:
7212         self.LogWarning("Can't assemble newly created disk %d: %s",
7213                         idx, result.fail_msg)
7214         errs.append(result.fail_msg)
7215         break
7216       dev_path = result.payload
7217       result = self.rpc.call_blockdev_export(source_node, disk,
7218                                              target_node, dev_path,
7219                                              cluster_name)
7220       if result.fail_msg:
7221         self.LogWarning("Can't copy data over for disk %d: %s",
7222                         idx, result.fail_msg)
7223         errs.append(result.fail_msg)
7224         break
7225
7226     if errs:
7227       self.LogWarning("Some disks failed to copy, aborting")
7228       try:
7229         _RemoveDisks(self, instance, target_node=target_node)
7230       finally:
7231         self.cfg.ReleaseDRBDMinors(instance.name)
7232         raise errors.OpExecError("Errors during disk copy: %s" %
7233                                  (",".join(errs),))
7234
7235     instance.primary_node = target_node
7236     self.cfg.Update(instance, feedback_fn)
7237
7238     self.LogInfo("Removing the disks on the original node")
7239     _RemoveDisks(self, instance, target_node=source_node)
7240
7241     # Only start the instance if it's marked as up
7242     if instance.admin_state == constants.ADMINST_UP:
7243       self.LogInfo("Starting instance %s on node %s",
7244                    instance.name, target_node)
7245
7246       disks_ok, _ = _AssembleInstanceDisks(self, instance,
7247                                            ignore_secondaries=True)
7248       if not disks_ok:
7249         _ShutdownInstanceDisks(self, instance)
7250         raise errors.OpExecError("Can't activate the instance's disks")
7251
7252       result = self.rpc.call_instance_start(target_node,
7253                                             (instance, None, None), False)
7254       msg = result.fail_msg
7255       if msg:
7256         _ShutdownInstanceDisks(self, instance)
7257         raise errors.OpExecError("Could not start instance %s on node %s: %s" %
7258                                  (instance.name, target_node, msg))
7259
7260
7261 class LUNodeMigrate(LogicalUnit):
7262   """Migrate all instances from a node.
7263
7264   """
7265   HPATH = "node-migrate"
7266   HTYPE = constants.HTYPE_NODE
7267   REQ_BGL = False
7268
7269   def CheckArguments(self):
7270     pass
7271
7272   def ExpandNames(self):
7273     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
7274
7275     self.share_locks = _ShareAll()
7276     self.needed_locks = {
7277       locking.LEVEL_NODE: [self.op.node_name],
7278       }
7279
7280   def BuildHooksEnv(self):
7281     """Build hooks env.
7282
7283     This runs on the master, the primary and all the secondaries.
7284
7285     """
7286     return {
7287       "NODE_NAME": self.op.node_name,
7288       }
7289
7290   def BuildHooksNodes(self):
7291     """Build hooks nodes.
7292
7293     """
7294     nl = [self.cfg.GetMasterNode()]
7295     return (nl, nl)
7296
7297   def CheckPrereq(self):
7298     pass
7299
7300   def Exec(self, feedback_fn):
7301     # Prepare jobs for migration instances
7302     jobs = [
7303       [opcodes.OpInstanceMigrate(instance_name=inst.name,
7304                                  mode=self.op.mode,
7305                                  live=self.op.live,
7306                                  iallocator=self.op.iallocator,
7307                                  target_node=self.op.target_node)]
7308       for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name)
7309       ]
7310
7311     # TODO: Run iallocator in this opcode and pass correct placement options to
7312     # OpInstanceMigrate. Since other jobs can modify the cluster between
7313     # running the iallocator and the actual migration, a good consistency model
7314     # will have to be found.
7315
7316     assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
7317             frozenset([self.op.node_name]))
7318
7319     return ResultWithJobs(jobs)
7320
7321
7322 class TLMigrateInstance(Tasklet):
7323   """Tasklet class for instance migration.
7324
7325   @type live: boolean
7326   @ivar live: whether the migration will be done live or non-live;
7327       this variable is initalized only after CheckPrereq has run
7328   @type cleanup: boolean
7329   @ivar cleanup: Wheater we cleanup from a failed migration
7330   @type iallocator: string
7331   @ivar iallocator: The iallocator used to determine target_node
7332   @type target_node: string
7333   @ivar target_node: If given, the target_node to reallocate the instance to
7334   @type failover: boolean
7335   @ivar failover: Whether operation results in failover or migration
7336   @type fallback: boolean
7337   @ivar fallback: Whether fallback to failover is allowed if migration not
7338                   possible
7339   @type ignore_consistency: boolean
7340   @ivar ignore_consistency: Wheter we should ignore consistency between source
7341                             and target node
7342   @type shutdown_timeout: int
7343   @ivar shutdown_timeout: In case of failover timeout of the shutdown
7344
7345   """
7346
7347   # Constants
7348   _MIGRATION_POLL_INTERVAL = 1      # seconds
7349   _MIGRATION_FEEDBACK_INTERVAL = 10 # seconds
7350
7351   def __init__(self, lu, instance_name, cleanup=False,
7352                failover=False, fallback=False,
7353                ignore_consistency=False,
7354                shutdown_timeout=constants.DEFAULT_SHUTDOWN_TIMEOUT):
7355     """Initializes this class.
7356
7357     """
7358     Tasklet.__init__(self, lu)
7359
7360     # Parameters
7361     self.instance_name = instance_name
7362     self.cleanup = cleanup
7363     self.live = False # will be overridden later
7364     self.failover = failover
7365     self.fallback = fallback
7366     self.ignore_consistency = ignore_consistency
7367     self.shutdown_timeout = shutdown_timeout
7368
7369   def CheckPrereq(self):
7370     """Check prerequisites.
7371
7372     This checks that the instance is in the cluster.
7373
7374     """
7375     instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
7376     instance = self.cfg.GetInstanceInfo(instance_name)
7377     assert instance is not None
7378     self.instance = instance
7379
7380     if (not self.cleanup and
7381         not instance.admin_state == constants.ADMINST_UP and
7382         not self.failover and self.fallback):
7383       self.lu.LogInfo("Instance is marked down or offline, fallback allowed,"
7384                       " switching to failover")
7385       self.failover = True
7386
7387     if instance.disk_template not in constants.DTS_MIRRORED:
7388       if self.failover:
7389         text = "failovers"
7390       else:
7391         text = "migrations"
7392       raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
7393                                  " %s" % (instance.disk_template, text),
7394                                  errors.ECODE_STATE)
7395
7396     if instance.disk_template in constants.DTS_EXT_MIRROR:
7397       _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
7398
7399       if self.lu.op.iallocator:
7400         self._RunAllocator()
7401       else:
7402         # We set set self.target_node as it is required by
7403         # BuildHooksEnv
7404         self.target_node = self.lu.op.target_node
7405
7406       # self.target_node is already populated, either directly or by the
7407       # iallocator run
7408       target_node = self.target_node
7409       if self.target_node == instance.primary_node:
7410         raise errors.OpPrereqError("Cannot migrate instance %s"
7411                                    " to its primary (%s)" %
7412                                    (instance.name, instance.primary_node))
7413
7414       if len(self.lu.tasklets) == 1:
7415         # It is safe to release locks only when we're the only tasklet
7416         # in the LU
7417         _ReleaseLocks(self.lu, locking.LEVEL_NODE,
7418                       keep=[instance.primary_node, self.target_node])
7419
7420     else:
7421       secondary_nodes = instance.secondary_nodes
7422       if not secondary_nodes:
7423         raise errors.ConfigurationError("No secondary node but using"
7424                                         " %s disk template" %
7425                                         instance.disk_template)
7426       target_node = secondary_nodes[0]
7427       if self.lu.op.iallocator or (self.lu.op.target_node and
7428                                    self.lu.op.target_node != target_node):
7429         if self.failover:
7430           text = "failed over"
7431         else:
7432           text = "migrated"
7433         raise errors.OpPrereqError("Instances with disk template %s cannot"
7434                                    " be %s to arbitrary nodes"
7435                                    " (neither an iallocator nor a target"
7436                                    " node can be passed)" %
7437                                    (instance.disk_template, text),
7438                                    errors.ECODE_INVAL)
7439
7440     i_be = self.cfg.GetClusterInfo().FillBE(instance)
7441
7442     # check memory requirements on the secondary node
7443     if not self.failover or instance.admin_state == constants.ADMINST_UP:
7444       _CheckNodeFreeMemory(self.lu, target_node, "migrating instance %s" %
7445                            instance.name, i_be[constants.BE_MAXMEM],
7446                            instance.hypervisor)
7447     else:
7448       self.lu.LogInfo("Not checking memory on the secondary node as"
7449                       " instance will not be started")
7450
7451     # check bridge existance
7452     _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
7453
7454     if not self.cleanup:
7455       _CheckNodeNotDrained(self.lu, target_node)
7456       if not self.failover:
7457         result = self.rpc.call_instance_migratable(instance.primary_node,
7458                                                    instance)
7459         if result.fail_msg and self.fallback:
7460           self.lu.LogInfo("Can't migrate, instance offline, fallback to"
7461                           " failover")
7462           self.failover = True
7463         else:
7464           result.Raise("Can't migrate, please use failover",
7465                        prereq=True, ecode=errors.ECODE_STATE)
7466
7467     assert not (self.failover and self.cleanup)
7468
7469     if not self.failover:
7470       if self.lu.op.live is not None and self.lu.op.mode is not None:
7471         raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
7472                                    " parameters are accepted",
7473                                    errors.ECODE_INVAL)
7474       if self.lu.op.live is not None:
7475         if self.lu.op.live:
7476           self.lu.op.mode = constants.HT_MIGRATION_LIVE
7477         else:
7478           self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
7479         # reset the 'live' parameter to None so that repeated
7480         # invocations of CheckPrereq do not raise an exception
7481         self.lu.op.live = None
7482       elif self.lu.op.mode is None:
7483         # read the default value from the hypervisor
7484         i_hv = self.cfg.GetClusterInfo().FillHV(self.instance,
7485                                                 skip_globals=False)
7486         self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
7487
7488       self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
7489     else:
7490       # Failover is never live
7491       self.live = False
7492
7493   def _RunAllocator(self):
7494     """Run the allocator based on input opcode.
7495
7496     """
7497     ial = IAllocator(self.cfg, self.rpc,
7498                      mode=constants.IALLOCATOR_MODE_RELOC,
7499                      name=self.instance_name,
7500                      # TODO See why hail breaks with a single node below
7501                      relocate_from=[self.instance.primary_node,
7502                                     self.instance.primary_node],
7503                      )
7504
7505     ial.Run(self.lu.op.iallocator)
7506
7507     if not ial.success:
7508       raise errors.OpPrereqError("Can't compute nodes using"
7509                                  " iallocator '%s': %s" %
7510                                  (self.lu.op.iallocator, ial.info),
7511                                  errors.ECODE_NORES)
7512     if len(ial.result) != ial.required_nodes:
7513       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7514                                  " of nodes (%s), required %s" %
7515                                  (self.lu.op.iallocator, len(ial.result),
7516                                   ial.required_nodes), errors.ECODE_FAULT)
7517     self.target_node = ial.result[0]
7518     self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
7519                  self.instance_name, self.lu.op.iallocator,
7520                  utils.CommaJoin(ial.result))
7521
7522   def _WaitUntilSync(self):
7523     """Poll with custom rpc for disk sync.
7524
7525     This uses our own step-based rpc call.
7526
7527     """
7528     self.feedback_fn("* wait until resync is done")
7529     all_done = False
7530     while not all_done:
7531       all_done = True
7532       result = self.rpc.call_drbd_wait_sync(self.all_nodes,
7533                                             self.nodes_ip,
7534                                             self.instance.disks)
7535       min_percent = 100
7536       for node, nres in result.items():
7537         nres.Raise("Cannot resync disks on node %s" % node)
7538         node_done, node_percent = nres.payload
7539         all_done = all_done and node_done
7540         if node_percent is not None:
7541           min_percent = min(min_percent, node_percent)
7542       if not all_done:
7543         if min_percent < 100:
7544           self.feedback_fn("   - progress: %.1f%%" % min_percent)
7545         time.sleep(2)
7546
7547   def _EnsureSecondary(self, node):
7548     """Demote a node to secondary.
7549
7550     """
7551     self.feedback_fn("* switching node %s to secondary mode" % node)
7552
7553     for dev in self.instance.disks:
7554       self.cfg.SetDiskID(dev, node)
7555
7556     result = self.rpc.call_blockdev_close(node, self.instance.name,
7557                                           self.instance.disks)
7558     result.Raise("Cannot change disk to secondary on node %s" % node)
7559
7560   def _GoStandalone(self):
7561     """Disconnect from the network.
7562
7563     """
7564     self.feedback_fn("* changing into standalone mode")
7565     result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
7566                                                self.instance.disks)
7567     for node, nres in result.items():
7568       nres.Raise("Cannot disconnect disks node %s" % node)
7569
7570   def _GoReconnect(self, multimaster):
7571     """Reconnect to the network.
7572
7573     """
7574     if multimaster:
7575       msg = "dual-master"
7576     else:
7577       msg = "single-master"
7578     self.feedback_fn("* changing disks into %s mode" % msg)
7579     result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
7580                                            self.instance.disks,
7581                                            self.instance.name, multimaster)
7582     for node, nres in result.items():
7583       nres.Raise("Cannot change disks config on node %s" % node)
7584
7585   def _ExecCleanup(self):
7586     """Try to cleanup after a failed migration.
7587
7588     The cleanup is done by:
7589       - check that the instance is running only on one node
7590         (and update the config if needed)
7591       - change disks on its secondary node to secondary
7592       - wait until disks are fully synchronized
7593       - disconnect from the network
7594       - change disks into single-master mode
7595       - wait again until disks are fully synchronized
7596
7597     """
7598     instance = self.instance
7599     target_node = self.target_node
7600     source_node = self.source_node
7601
7602     # check running on only one node
7603     self.feedback_fn("* checking where the instance actually runs"
7604                      " (if this hangs, the hypervisor might be in"
7605                      " a bad state)")
7606     ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
7607     for node, result in ins_l.items():
7608       result.Raise("Can't contact node %s" % node)
7609
7610     runningon_source = instance.name in ins_l[source_node].payload
7611     runningon_target = instance.name in ins_l[target_node].payload
7612
7613     if runningon_source and runningon_target:
7614       raise errors.OpExecError("Instance seems to be running on two nodes,"
7615                                " or the hypervisor is confused; you will have"
7616                                " to ensure manually that it runs only on one"
7617                                " and restart this operation")
7618
7619     if not (runningon_source or runningon_target):
7620       raise errors.OpExecError("Instance does not seem to be running at all;"
7621                                " in this case it's safer to repair by"
7622                                " running 'gnt-instance stop' to ensure disk"
7623                                " shutdown, and then restarting it")
7624
7625     if runningon_target:
7626       # the migration has actually succeeded, we need to update the config
7627       self.feedback_fn("* instance running on secondary node (%s),"
7628                        " updating config" % target_node)
7629       instance.primary_node = target_node
7630       self.cfg.Update(instance, self.feedback_fn)
7631       demoted_node = source_node
7632     else:
7633       self.feedback_fn("* instance confirmed to be running on its"
7634                        " primary node (%s)" % source_node)
7635       demoted_node = target_node
7636
7637     if instance.disk_template in constants.DTS_INT_MIRROR:
7638       self._EnsureSecondary(demoted_node)
7639       try:
7640         self._WaitUntilSync()
7641       except errors.OpExecError:
7642         # we ignore here errors, since if the device is standalone, it
7643         # won't be able to sync
7644         pass
7645       self._GoStandalone()
7646       self._GoReconnect(False)
7647       self._WaitUntilSync()
7648
7649     self.feedback_fn("* done")
7650
7651   def _RevertDiskStatus(self):
7652     """Try to revert the disk status after a failed migration.
7653
7654     """
7655     target_node = self.target_node
7656     if self.instance.disk_template in constants.DTS_EXT_MIRROR:
7657       return
7658
7659     try:
7660       self._EnsureSecondary(target_node)
7661       self._GoStandalone()
7662       self._GoReconnect(False)
7663       self._WaitUntilSync()
7664     except errors.OpExecError, err:
7665       self.lu.LogWarning("Migration failed and I can't reconnect the drives,"
7666                          " please try to recover the instance manually;"
7667                          " error '%s'" % str(err))
7668
7669   def _AbortMigration(self):
7670     """Call the hypervisor code to abort a started migration.
7671
7672     """
7673     instance = self.instance
7674     target_node = self.target_node
7675     source_node = self.source_node
7676     migration_info = self.migration_info
7677
7678     abort_result = self.rpc.call_instance_finalize_migration_dst(target_node,
7679                                                                  instance,
7680                                                                  migration_info,
7681                                                                  False)
7682     abort_msg = abort_result.fail_msg
7683     if abort_msg:
7684       logging.error("Aborting migration failed on target node %s: %s",
7685                     target_node, abort_msg)
7686       # Don't raise an exception here, as we stil have to try to revert the
7687       # disk status, even if this step failed.
7688
7689     abort_result = self.rpc.call_instance_finalize_migration_src(source_node,
7690         instance, False, self.live)
7691     abort_msg = abort_result.fail_msg
7692     if abort_msg:
7693       logging.error("Aborting migration failed on source node %s: %s",
7694                     source_node, abort_msg)
7695
7696   def _ExecMigration(self):
7697     """Migrate an instance.
7698
7699     The migrate is done by:
7700       - change the disks into dual-master mode
7701       - wait until disks are fully synchronized again
7702       - migrate the instance
7703       - change disks on the new secondary node (the old primary) to secondary
7704       - wait until disks are fully synchronized
7705       - change disks into single-master mode
7706
7707     """
7708     instance = self.instance
7709     target_node = self.target_node
7710     source_node = self.source_node
7711
7712     # Check for hypervisor version mismatch and warn the user.
7713     nodeinfo = self.rpc.call_node_info([source_node, target_node],
7714                                        None, [self.instance.hypervisor])
7715     for ninfo in nodeinfo.values():
7716       ninfo.Raise("Unable to retrieve node information from node '%s'" %
7717                   ninfo.node)
7718     (_, _, (src_info, )) = nodeinfo[source_node].payload
7719     (_, _, (dst_info, )) = nodeinfo[target_node].payload
7720
7721     if ((constants.HV_NODEINFO_KEY_VERSION in src_info) and
7722         (constants.HV_NODEINFO_KEY_VERSION in dst_info)):
7723       src_version = src_info[constants.HV_NODEINFO_KEY_VERSION]
7724       dst_version = dst_info[constants.HV_NODEINFO_KEY_VERSION]
7725       if src_version != dst_version:
7726         self.feedback_fn("* warning: hypervisor version mismatch between"
7727                          " source (%s) and target (%s) node" %
7728                          (src_version, dst_version))
7729
7730     self.feedback_fn("* checking disk consistency between source and target")
7731     for dev in instance.disks:
7732       if not _CheckDiskConsistency(self.lu, dev, target_node, False):
7733         raise errors.OpExecError("Disk %s is degraded or not fully"
7734                                  " synchronized on target node,"
7735                                  " aborting migration" % dev.iv_name)
7736
7737     # First get the migration information from the remote node
7738     result = self.rpc.call_migration_info(source_node, instance)
7739     msg = result.fail_msg
7740     if msg:
7741       log_err = ("Failed fetching source migration information from %s: %s" %
7742                  (source_node, msg))
7743       logging.error(log_err)
7744       raise errors.OpExecError(log_err)
7745
7746     self.migration_info = migration_info = result.payload
7747
7748     if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
7749       # Then switch the disks to master/master mode
7750       self._EnsureSecondary(target_node)
7751       self._GoStandalone()
7752       self._GoReconnect(True)
7753       self._WaitUntilSync()
7754
7755     self.feedback_fn("* preparing %s to accept the instance" % target_node)
7756     result = self.rpc.call_accept_instance(target_node,
7757                                            instance,
7758                                            migration_info,
7759                                            self.nodes_ip[target_node])
7760
7761     msg = result.fail_msg
7762     if msg:
7763       logging.error("Instance pre-migration failed, trying to revert"
7764                     " disk status: %s", msg)
7765       self.feedback_fn("Pre-migration failed, aborting")
7766       self._AbortMigration()
7767       self._RevertDiskStatus()
7768       raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
7769                                (instance.name, msg))
7770
7771     self.feedback_fn("* migrating instance to %s" % target_node)
7772     result = self.rpc.call_instance_migrate(source_node, instance,
7773                                             self.nodes_ip[target_node],
7774                                             self.live)
7775     msg = result.fail_msg
7776     if msg:
7777       logging.error("Instance migration failed, trying to revert"
7778                     " disk status: %s", msg)
7779       self.feedback_fn("Migration failed, aborting")
7780       self._AbortMigration()
7781       self._RevertDiskStatus()
7782       raise errors.OpExecError("Could not migrate instance %s: %s" %
7783                                (instance.name, msg))
7784
7785     self.feedback_fn("* starting memory transfer")
7786     last_feedback = time.time()
7787     while True:
7788       result = self.rpc.call_instance_get_migration_status(source_node,
7789                                                            instance)
7790       msg = result.fail_msg
7791       ms = result.payload   # MigrationStatus instance
7792       if msg or (ms.status in constants.HV_MIGRATION_FAILED_STATUSES):
7793         logging.error("Instance migration failed, trying to revert"
7794                       " disk status: %s", msg)
7795         self.feedback_fn("Migration failed, aborting")
7796         self._AbortMigration()
7797         self._RevertDiskStatus()
7798         raise errors.OpExecError("Could not migrate instance %s: %s" %
7799                                  (instance.name, msg))
7800
7801       if result.payload.status != constants.HV_MIGRATION_ACTIVE:
7802         self.feedback_fn("* memory transfer complete")
7803         break
7804
7805       if (utils.TimeoutExpired(last_feedback,
7806                                self._MIGRATION_FEEDBACK_INTERVAL) and
7807           ms.transferred_ram is not None):
7808         mem_progress = 100 * float(ms.transferred_ram) / float(ms.total_ram)
7809         self.feedback_fn("* memory transfer progress: %.2f %%" % mem_progress)
7810         last_feedback = time.time()
7811
7812       time.sleep(self._MIGRATION_POLL_INTERVAL)
7813
7814     result = self.rpc.call_instance_finalize_migration_src(source_node,
7815                                                            instance,
7816                                                            True,
7817                                                            self.live)
7818     msg = result.fail_msg
7819     if msg:
7820       logging.error("Instance migration succeeded, but finalization failed"
7821                     " on the source node: %s", msg)
7822       raise errors.OpExecError("Could not finalize instance migration: %s" %
7823                                msg)
7824
7825     instance.primary_node = target_node
7826
7827     # distribute new instance config to the other nodes
7828     self.cfg.Update(instance, self.feedback_fn)
7829
7830     result = self.rpc.call_instance_finalize_migration_dst(target_node,
7831                                                            instance,
7832                                                            migration_info,
7833                                                            True)
7834     msg = result.fail_msg
7835     if msg:
7836       logging.error("Instance migration succeeded, but finalization failed"
7837                     " on the target node: %s", msg)
7838       raise errors.OpExecError("Could not finalize instance migration: %s" %
7839                                msg)
7840
7841     if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
7842       self._EnsureSecondary(source_node)
7843       self._WaitUntilSync()
7844       self._GoStandalone()
7845       self._GoReconnect(False)
7846       self._WaitUntilSync()
7847
7848     self.feedback_fn("* done")
7849
7850   def _ExecFailover(self):
7851     """Failover an instance.
7852
7853     The failover is done by shutting it down on its present node and
7854     starting it on the secondary.
7855
7856     """
7857     instance = self.instance
7858     primary_node = self.cfg.GetNodeInfo(instance.primary_node)
7859
7860     source_node = instance.primary_node
7861     target_node = self.target_node
7862
7863     if instance.admin_state == constants.ADMINST_UP:
7864       self.feedback_fn("* checking disk consistency between source and target")
7865       for dev in instance.disks:
7866         # for drbd, these are drbd over lvm
7867         if not _CheckDiskConsistency(self.lu, dev, target_node, False):
7868           if primary_node.offline:
7869             self.feedback_fn("Node %s is offline, ignoring degraded disk %s on"
7870                              " target node %s" %
7871                              (primary_node.name, dev.iv_name, target_node))
7872           elif not self.ignore_consistency:
7873             raise errors.OpExecError("Disk %s is degraded on target node,"
7874                                      " aborting failover" % dev.iv_name)
7875     else:
7876       self.feedback_fn("* not checking disk consistency as instance is not"
7877                        " running")
7878
7879     self.feedback_fn("* shutting down instance on source node")
7880     logging.info("Shutting down instance %s on node %s",
7881                  instance.name, source_node)
7882
7883     result = self.rpc.call_instance_shutdown(source_node, instance,
7884                                              self.shutdown_timeout)
7885     msg = result.fail_msg
7886     if msg:
7887       if self.ignore_consistency or primary_node.offline:
7888         self.lu.LogWarning("Could not shutdown instance %s on node %s,"
7889                            " proceeding anyway; please make sure node"
7890                            " %s is down; error details: %s",
7891                            instance.name, source_node, source_node, msg)
7892       else:
7893         raise errors.OpExecError("Could not shutdown instance %s on"
7894                                  " node %s: %s" %
7895                                  (instance.name, source_node, msg))
7896
7897     self.feedback_fn("* deactivating the instance's disks on source node")
7898     if not _ShutdownInstanceDisks(self.lu, instance, ignore_primary=True):
7899       raise errors.OpExecError("Can't shut down the instance's disks")
7900
7901     instance.primary_node = target_node
7902     # distribute new instance config to the other nodes
7903     self.cfg.Update(instance, self.feedback_fn)
7904
7905     # Only start the instance if it's marked as up
7906     if instance.admin_state == constants.ADMINST_UP:
7907       self.feedback_fn("* activating the instance's disks on target node %s" %
7908                        target_node)
7909       logging.info("Starting instance %s on node %s",
7910                    instance.name, target_node)
7911
7912       disks_ok, _ = _AssembleInstanceDisks(self.lu, instance,
7913                                            ignore_secondaries=True)
7914       if not disks_ok:
7915         _ShutdownInstanceDisks(self.lu, instance)
7916         raise errors.OpExecError("Can't activate the instance's disks")
7917
7918       self.feedback_fn("* starting the instance on the target node %s" %
7919                        target_node)
7920       result = self.rpc.call_instance_start(target_node, (instance, None, None),
7921                                             False)
7922       msg = result.fail_msg
7923       if msg:
7924         _ShutdownInstanceDisks(self.lu, instance)
7925         raise errors.OpExecError("Could not start instance %s on node %s: %s" %
7926                                  (instance.name, target_node, msg))
7927
7928   def Exec(self, feedback_fn):
7929     """Perform the migration.
7930
7931     """
7932     self.feedback_fn = feedback_fn
7933     self.source_node = self.instance.primary_node
7934
7935     # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
7936     if self.instance.disk_template in constants.DTS_INT_MIRROR:
7937       self.target_node = self.instance.secondary_nodes[0]
7938       # Otherwise self.target_node has been populated either
7939       # directly, or through an iallocator.
7940
7941     self.all_nodes = [self.source_node, self.target_node]
7942     self.nodes_ip = dict((name, node.secondary_ip) for (name, node)
7943                          in self.cfg.GetMultiNodeInfo(self.all_nodes))
7944
7945     if self.failover:
7946       feedback_fn("Failover instance %s" % self.instance.name)
7947       self._ExecFailover()
7948     else:
7949       feedback_fn("Migrating instance %s" % self.instance.name)
7950
7951       if self.cleanup:
7952         return self._ExecCleanup()
7953       else:
7954         return self._ExecMigration()
7955
7956
7957 def _CreateBlockDev(lu, node, instance, device, force_create,
7958                     info, force_open):
7959   """Create a tree of block devices on a given node.
7960
7961   If this device type has to be created on secondaries, create it and
7962   all its children.
7963
7964   If not, just recurse to children keeping the same 'force' value.
7965
7966   @param lu: the lu on whose behalf we execute
7967   @param node: the node on which to create the device
7968   @type instance: L{objects.Instance}
7969   @param instance: the instance which owns the device
7970   @type device: L{objects.Disk}
7971   @param device: the device to create
7972   @type force_create: boolean
7973   @param force_create: whether to force creation of this device; this
7974       will be change to True whenever we find a device which has
7975       CreateOnSecondary() attribute
7976   @param info: the extra 'metadata' we should attach to the device
7977       (this will be represented as a LVM tag)
7978   @type force_open: boolean
7979   @param force_open: this parameter will be passes to the
7980       L{backend.BlockdevCreate} function where it specifies
7981       whether we run on primary or not, and it affects both
7982       the child assembly and the device own Open() execution
7983
7984   """
7985   if device.CreateOnSecondary():
7986     force_create = True
7987
7988   if device.children:
7989     for child in device.children:
7990       _CreateBlockDev(lu, node, instance, child, force_create,
7991                       info, force_open)
7992
7993   if not force_create:
7994     return
7995
7996   _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
7997
7998
7999 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
8000   """Create a single block device on a given node.
8001
8002   This will not recurse over children of the device, so they must be
8003   created in advance.
8004
8005   @param lu: the lu on whose behalf we execute
8006   @param node: the node on which to create the device
8007   @type instance: L{objects.Instance}
8008   @param instance: the instance which owns the device
8009   @type device: L{objects.Disk}
8010   @param device: the device to create
8011   @param info: the extra 'metadata' we should attach to the device
8012       (this will be represented as a LVM tag)
8013   @type force_open: boolean
8014   @param force_open: this parameter will be passes to the
8015       L{backend.BlockdevCreate} function where it specifies
8016       whether we run on primary or not, and it affects both
8017       the child assembly and the device own Open() execution
8018
8019   """
8020   lu.cfg.SetDiskID(device, node)
8021   result = lu.rpc.call_blockdev_create(node, device, device.size,
8022                                        instance.name, force_open, info)
8023   result.Raise("Can't create block device %s on"
8024                " node %s for instance %s" % (device, node, instance.name))
8025   if device.physical_id is None:
8026     device.physical_id = result.payload
8027
8028
8029 def _GenerateUniqueNames(lu, exts):
8030   """Generate a suitable LV name.
8031
8032   This will generate a logical volume name for the given instance.
8033
8034   """
8035   results = []
8036   for val in exts:
8037     new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
8038     results.append("%s%s" % (new_id, val))
8039   return results
8040
8041
8042 def _ComputeLDParams(disk_template, disk_params):
8043   """Computes Logical Disk parameters from Disk Template parameters.
8044
8045   @type disk_template: string
8046   @param disk_template: disk template, one of L{constants.DISK_TEMPLATES}
8047   @type disk_params: dict
8048   @param disk_params: disk template parameters; dict(template_name -> parameters
8049   @rtype: list(dict)
8050   @return: a list of dicts, one for each node of the disk hierarchy. Each dict
8051     contains the LD parameters of the node. The tree is flattened in-order.
8052
8053   """
8054   if disk_template not in constants.DISK_TEMPLATES:
8055     raise errors.ProgrammerError("Unknown disk template %s" % disk_template)
8056
8057   result = list()
8058   dt_params = disk_params[disk_template]
8059   if disk_template == constants.DT_DRBD8:
8060     drbd_params = {
8061       constants.RESYNC_RATE: dt_params[constants.DRBD_RESYNC_RATE]
8062       }
8063
8064     drbd_params = \
8065       objects.FillDict(constants.DISK_LD_DEFAULTS[constants.LD_DRBD8],
8066                        drbd_params)
8067
8068     result.append(drbd_params)
8069
8070     # data LV
8071     data_params = {
8072       constants.STRIPES: dt_params[constants.DRBD_DATA_STRIPES],
8073       }
8074     data_params = \
8075       objects.FillDict(constants.DISK_LD_DEFAULTS[constants.LD_LV],
8076                        data_params)
8077     result.append(data_params)
8078
8079     # metadata LV
8080     meta_params = {
8081       constants.STRIPES: dt_params[constants.DRBD_META_STRIPES],
8082       }
8083     meta_params = \
8084       objects.FillDict(constants.DISK_LD_DEFAULTS[constants.LD_LV],
8085                        meta_params)
8086     result.append(meta_params)
8087
8088   elif (disk_template == constants.DT_FILE or
8089         disk_template == constants.DT_SHARED_FILE):
8090     result.append(constants.DISK_LD_DEFAULTS[constants.LD_FILE])
8091
8092   elif disk_template == constants.DT_PLAIN:
8093     params = {
8094       constants.STRIPES: dt_params[constants.LV_STRIPES],
8095       }
8096     params = \
8097       objects.FillDict(constants.DISK_LD_DEFAULTS[constants.LD_LV],
8098                        params)
8099     result.append(params)
8100
8101   elif disk_template == constants.DT_BLOCK:
8102     result.append(constants.DISK_LD_DEFAULTS[constants.LD_BLOCKDEV])
8103
8104   return result
8105
8106
8107 def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
8108                          iv_name, p_minor, s_minor, drbd_params, data_params,
8109                          meta_params):
8110   """Generate a drbd8 device complete with its children.
8111
8112   """
8113   assert len(vgnames) == len(names) == 2
8114   port = lu.cfg.AllocatePort()
8115   shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
8116
8117   dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
8118                           logical_id=(vgnames[0], names[0]),
8119                           params=data_params)
8120   dev_meta = objects.Disk(dev_type=constants.LD_LV, size=DRBD_META_SIZE,
8121                           logical_id=(vgnames[1], names[1]),
8122                           params=meta_params)
8123   drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
8124                           logical_id=(primary, secondary, port,
8125                                       p_minor, s_minor,
8126                                       shared_secret),
8127                           children=[dev_data, dev_meta],
8128                           iv_name=iv_name, params=drbd_params)
8129   return drbd_dev
8130
8131
8132 def _GenerateDiskTemplate(lu, template_name,
8133                           instance_name, primary_node,
8134                           secondary_nodes, disk_info,
8135                           file_storage_dir, file_driver,
8136                           base_index, feedback_fn, disk_params):
8137   """Generate the entire disk layout for a given template type.
8138
8139   """
8140   #TODO: compute space requirements
8141
8142   vgname = lu.cfg.GetVGName()
8143   disk_count = len(disk_info)
8144   disks = []
8145   ld_params = _ComputeLDParams(template_name, disk_params)
8146   if template_name == constants.DT_DISKLESS:
8147     pass
8148   elif template_name == constants.DT_PLAIN:
8149     if len(secondary_nodes) != 0:
8150       raise errors.ProgrammerError("Wrong template configuration")
8151
8152     names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
8153                                       for i in range(disk_count)])
8154     for idx, disk in enumerate(disk_info):
8155       disk_index = idx + base_index
8156       vg = disk.get(constants.IDISK_VG, vgname)
8157       feedback_fn("* disk %i, vg %s, name %s" % (idx, vg, names[idx]))
8158       disk_dev = objects.Disk(dev_type=constants.LD_LV,
8159                               size=disk[constants.IDISK_SIZE],
8160                               logical_id=(vg, names[idx]),
8161                               iv_name="disk/%d" % disk_index,
8162                               mode=disk[constants.IDISK_MODE],
8163                               params=ld_params[0])
8164       disks.append(disk_dev)
8165   elif template_name == constants.DT_DRBD8:
8166     drbd_params, data_params, meta_params = ld_params
8167     if len(secondary_nodes) != 1:
8168       raise errors.ProgrammerError("Wrong template configuration")
8169     remote_node = secondary_nodes[0]
8170     minors = lu.cfg.AllocateDRBDMinor(
8171       [primary_node, remote_node] * len(disk_info), instance_name)
8172
8173     names = []
8174     for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
8175                                                for i in range(disk_count)]):
8176       names.append(lv_prefix + "_data")
8177       names.append(lv_prefix + "_meta")
8178     for idx, disk in enumerate(disk_info):
8179       disk_index = idx + base_index
8180       data_vg = disk.get(constants.IDISK_VG, vgname)
8181       meta_vg = disk.get(constants.IDISK_METAVG, data_vg)
8182       disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
8183                                       disk[constants.IDISK_SIZE],
8184                                       [data_vg, meta_vg],
8185                                       names[idx * 2:idx * 2 + 2],
8186                                       "disk/%d" % disk_index,
8187                                       minors[idx * 2], minors[idx * 2 + 1],
8188                                       drbd_params, data_params, meta_params)
8189       disk_dev.mode = disk[constants.IDISK_MODE]
8190       disks.append(disk_dev)
8191   elif template_name == constants.DT_FILE:
8192     if len(secondary_nodes) != 0:
8193       raise errors.ProgrammerError("Wrong template configuration")
8194
8195     opcodes.RequireFileStorage()
8196
8197     for idx, disk in enumerate(disk_info):
8198       disk_index = idx + base_index
8199       disk_dev = objects.Disk(dev_type=constants.LD_FILE,
8200                               size=disk[constants.IDISK_SIZE],
8201                               iv_name="disk/%d" % disk_index,
8202                               logical_id=(file_driver,
8203                                           "%s/disk%d" % (file_storage_dir,
8204                                                          disk_index)),
8205                               mode=disk[constants.IDISK_MODE],
8206                               params=ld_params[0])
8207       disks.append(disk_dev)
8208   elif template_name == constants.DT_SHARED_FILE:
8209     if len(secondary_nodes) != 0:
8210       raise errors.ProgrammerError("Wrong template configuration")
8211
8212     opcodes.RequireSharedFileStorage()
8213
8214     for idx, disk in enumerate(disk_info):
8215       disk_index = idx + base_index
8216       disk_dev = objects.Disk(dev_type=constants.LD_FILE,
8217                               size=disk[constants.IDISK_SIZE],
8218                               iv_name="disk/%d" % disk_index,
8219                               logical_id=(file_driver,
8220                                           "%s/disk%d" % (file_storage_dir,
8221                                                          disk_index)),
8222                               mode=disk[constants.IDISK_MODE],
8223                               params=ld_params[0])
8224       disks.append(disk_dev)
8225   elif template_name == constants.DT_BLOCK:
8226     if len(secondary_nodes) != 0:
8227       raise errors.ProgrammerError("Wrong template configuration")
8228
8229     for idx, disk in enumerate(disk_info):
8230       disk_index = idx + base_index
8231       disk_dev = objects.Disk(dev_type=constants.LD_BLOCKDEV,
8232                               size=disk[constants.IDISK_SIZE],
8233                               logical_id=(constants.BLOCKDEV_DRIVER_MANUAL,
8234                                           disk[constants.IDISK_ADOPT]),
8235                               iv_name="disk/%d" % disk_index,
8236                               mode=disk[constants.IDISK_MODE],
8237                               params=ld_params[0])
8238       disks.append(disk_dev)
8239
8240   else:
8241     raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
8242   return disks
8243
8244
8245 def _GetInstanceInfoText(instance):
8246   """Compute that text that should be added to the disk's metadata.
8247
8248   """
8249   return "originstname+%s" % instance.name
8250
8251
8252 def _CalcEta(time_taken, written, total_size):
8253   """Calculates the ETA based on size written and total size.
8254
8255   @param time_taken: The time taken so far
8256   @param written: amount written so far
8257   @param total_size: The total size of data to be written
8258   @return: The remaining time in seconds
8259
8260   """
8261   avg_time = time_taken / float(written)
8262   return (total_size - written) * avg_time
8263
8264
8265 def _WipeDisks(lu, instance):
8266   """Wipes instance disks.
8267
8268   @type lu: L{LogicalUnit}
8269   @param lu: the logical unit on whose behalf we execute
8270   @type instance: L{objects.Instance}
8271   @param instance: the instance whose disks we should create
8272   @return: the success of the wipe
8273
8274   """
8275   node = instance.primary_node
8276
8277   for device in instance.disks:
8278     lu.cfg.SetDiskID(device, node)
8279
8280   logging.info("Pause sync of instance %s disks", instance.name)
8281   result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, True)
8282
8283   for idx, success in enumerate(result.payload):
8284     if not success:
8285       logging.warn("pause-sync of instance %s for disks %d failed",
8286                    instance.name, idx)
8287
8288   try:
8289     for idx, device in enumerate(instance.disks):
8290       # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
8291       # MAX_WIPE_CHUNK at max
8292       wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 *
8293                             constants.MIN_WIPE_CHUNK_PERCENT)
8294       # we _must_ make this an int, otherwise rounding errors will
8295       # occur
8296       wipe_chunk_size = int(wipe_chunk_size)
8297
8298       lu.LogInfo("* Wiping disk %d", idx)
8299       logging.info("Wiping disk %d for instance %s, node %s using"
8300                    " chunk size %s", idx, instance.name, node, wipe_chunk_size)
8301
8302       offset = 0
8303       size = device.size
8304       last_output = 0
8305       start_time = time.time()
8306
8307       while offset < size:
8308         wipe_size = min(wipe_chunk_size, size - offset)
8309         logging.debug("Wiping disk %d, offset %s, chunk %s",
8310                       idx, offset, wipe_size)
8311         result = lu.rpc.call_blockdev_wipe(node, device, offset, wipe_size)
8312         result.Raise("Could not wipe disk %d at offset %d for size %d" %
8313                      (idx, offset, wipe_size))
8314         now = time.time()
8315         offset += wipe_size
8316         if now - last_output >= 60:
8317           eta = _CalcEta(now - start_time, offset, size)
8318           lu.LogInfo(" - done: %.1f%% ETA: %s" %
8319                      (offset / float(size) * 100, utils.FormatSeconds(eta)))
8320           last_output = now
8321   finally:
8322     logging.info("Resume sync of instance %s disks", instance.name)
8323
8324     result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, False)
8325
8326     for idx, success in enumerate(result.payload):
8327       if not success:
8328         lu.LogWarning("Resume sync of disk %d failed, please have a"
8329                       " look at the status and troubleshoot the issue", idx)
8330         logging.warn("resume-sync of instance %s for disks %d failed",
8331                      instance.name, idx)
8332
8333
8334 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
8335   """Create all disks for an instance.
8336
8337   This abstracts away some work from AddInstance.
8338
8339   @type lu: L{LogicalUnit}
8340   @param lu: the logical unit on whose behalf we execute
8341   @type instance: L{objects.Instance}
8342   @param instance: the instance whose disks we should create
8343   @type to_skip: list
8344   @param to_skip: list of indices to skip
8345   @type target_node: string
8346   @param target_node: if passed, overrides the target node for creation
8347   @rtype: boolean
8348   @return: the success of the creation
8349
8350   """
8351   info = _GetInstanceInfoText(instance)
8352   if target_node is None:
8353     pnode = instance.primary_node
8354     all_nodes = instance.all_nodes
8355   else:
8356     pnode = target_node
8357     all_nodes = [pnode]
8358
8359   if instance.disk_template in constants.DTS_FILEBASED:
8360     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
8361     result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
8362
8363     result.Raise("Failed to create directory '%s' on"
8364                  " node %s" % (file_storage_dir, pnode))
8365
8366   # Note: this needs to be kept in sync with adding of disks in
8367   # LUInstanceSetParams
8368   for idx, device in enumerate(instance.disks):
8369     if to_skip and idx in to_skip:
8370       continue
8371     logging.info("Creating volume %s for instance %s",
8372                  device.iv_name, instance.name)
8373     #HARDCODE
8374     for node in all_nodes:
8375       f_create = node == pnode
8376       _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
8377
8378
8379 def _RemoveDisks(lu, instance, target_node=None):
8380   """Remove all disks for an instance.
8381
8382   This abstracts away some work from `AddInstance()` and
8383   `RemoveInstance()`. Note that in case some of the devices couldn't
8384   be removed, the removal will continue with the other ones (compare
8385   with `_CreateDisks()`).
8386
8387   @type lu: L{LogicalUnit}
8388   @param lu: the logical unit on whose behalf we execute
8389   @type instance: L{objects.Instance}
8390   @param instance: the instance whose disks we should remove
8391   @type target_node: string
8392   @param target_node: used to override the node on which to remove the disks
8393   @rtype: boolean
8394   @return: the success of the removal
8395
8396   """
8397   logging.info("Removing block devices for instance %s", instance.name)
8398
8399   all_result = True
8400   for device in instance.disks:
8401     if target_node:
8402       edata = [(target_node, device)]
8403     else:
8404       edata = device.ComputeNodeTree(instance.primary_node)
8405     for node, disk in edata:
8406       lu.cfg.SetDiskID(disk, node)
8407       msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
8408       if msg:
8409         lu.LogWarning("Could not remove block device %s on node %s,"
8410                       " continuing anyway: %s", device.iv_name, node, msg)
8411         all_result = False
8412
8413     # if this is a DRBD disk, return its port to the pool
8414     if device.dev_type in constants.LDS_DRBD:
8415       tcp_port = device.logical_id[2]
8416       lu.cfg.AddTcpUdpPort(tcp_port)
8417
8418   if instance.disk_template == constants.DT_FILE:
8419     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
8420     if target_node:
8421       tgt = target_node
8422     else:
8423       tgt = instance.primary_node
8424     result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
8425     if result.fail_msg:
8426       lu.LogWarning("Could not remove directory '%s' on node %s: %s",
8427                     file_storage_dir, instance.primary_node, result.fail_msg)
8428       all_result = False
8429
8430   return all_result
8431
8432
8433 def _ComputeDiskSizePerVG(disk_template, disks):
8434   """Compute disk size requirements in the volume group
8435
8436   """
8437   def _compute(disks, payload):
8438     """Universal algorithm.
8439
8440     """
8441     vgs = {}
8442     for disk in disks:
8443       vgs[disk[constants.IDISK_VG]] = \
8444         vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
8445
8446     return vgs
8447
8448   # Required free disk space as a function of disk and swap space
8449   req_size_dict = {
8450     constants.DT_DISKLESS: {},
8451     constants.DT_PLAIN: _compute(disks, 0),
8452     # 128 MB are added for drbd metadata for each disk
8453     constants.DT_DRBD8: _compute(disks, DRBD_META_SIZE),
8454     constants.DT_FILE: {},
8455     constants.DT_SHARED_FILE: {},
8456   }
8457
8458   if disk_template not in req_size_dict:
8459     raise errors.ProgrammerError("Disk template '%s' size requirement"
8460                                  " is unknown" % disk_template)
8461
8462   return req_size_dict[disk_template]
8463
8464
8465 def _ComputeDiskSize(disk_template, disks):
8466   """Compute disk size requirements in the volume group
8467
8468   """
8469   # Required free disk space as a function of disk and swap space
8470   req_size_dict = {
8471     constants.DT_DISKLESS: None,
8472     constants.DT_PLAIN: sum(d[constants.IDISK_SIZE] for d in disks),
8473     # 128 MB are added for drbd metadata for each disk
8474     constants.DT_DRBD8:
8475       sum(d[constants.IDISK_SIZE] + DRBD_META_SIZE for d in disks),
8476     constants.DT_FILE: None,
8477     constants.DT_SHARED_FILE: 0,
8478     constants.DT_BLOCK: 0,
8479   }
8480
8481   if disk_template not in req_size_dict:
8482     raise errors.ProgrammerError("Disk template '%s' size requirement"
8483                                  " is unknown" % disk_template)
8484
8485   return req_size_dict[disk_template]
8486
8487
8488 def _FilterVmNodes(lu, nodenames):
8489   """Filters out non-vm_capable nodes from a list.
8490
8491   @type lu: L{LogicalUnit}
8492   @param lu: the logical unit for which we check
8493   @type nodenames: list
8494   @param nodenames: the list of nodes on which we should check
8495   @rtype: list
8496   @return: the list of vm-capable nodes
8497
8498   """
8499   vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
8500   return [name for name in nodenames if name not in vm_nodes]
8501
8502
8503 def _CheckHVParams(lu, nodenames, hvname, hvparams):
8504   """Hypervisor parameter validation.
8505
8506   This function abstract the hypervisor parameter validation to be
8507   used in both instance create and instance modify.
8508
8509   @type lu: L{LogicalUnit}
8510   @param lu: the logical unit for which we check
8511   @type nodenames: list
8512   @param nodenames: the list of nodes on which we should check
8513   @type hvname: string
8514   @param hvname: the name of the hypervisor we should use
8515   @type hvparams: dict
8516   @param hvparams: the parameters which we need to check
8517   @raise errors.OpPrereqError: if the parameters are not valid
8518
8519   """
8520   nodenames = _FilterVmNodes(lu, nodenames)
8521
8522   cluster = lu.cfg.GetClusterInfo()
8523   hvfull = objects.FillDict(cluster.hvparams.get(hvname, {}), hvparams)
8524
8525   hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames, hvname, hvfull)
8526   for node in nodenames:
8527     info = hvinfo[node]
8528     if info.offline:
8529       continue
8530     info.Raise("Hypervisor parameter validation failed on node %s" % node)
8531
8532
8533 def _CheckOSParams(lu, required, nodenames, osname, osparams):
8534   """OS parameters validation.
8535
8536   @type lu: L{LogicalUnit}
8537   @param lu: the logical unit for which we check
8538   @type required: boolean
8539   @param required: whether the validation should fail if the OS is not
8540       found
8541   @type nodenames: list
8542   @param nodenames: the list of nodes on which we should check
8543   @type osname: string
8544   @param osname: the name of the hypervisor we should use
8545   @type osparams: dict
8546   @param osparams: the parameters which we need to check
8547   @raise errors.OpPrereqError: if the parameters are not valid
8548
8549   """
8550   nodenames = _FilterVmNodes(lu, nodenames)
8551   result = lu.rpc.call_os_validate(nodenames, required, osname,
8552                                    [constants.OS_VALIDATE_PARAMETERS],
8553                                    osparams)
8554   for node, nres in result.items():
8555     # we don't check for offline cases since this should be run only
8556     # against the master node and/or an instance's nodes
8557     nres.Raise("OS Parameters validation failed on node %s" % node)
8558     if not nres.payload:
8559       lu.LogInfo("OS %s not found on node %s, validation skipped",
8560                  osname, node)
8561
8562
8563 class LUInstanceCreate(LogicalUnit):
8564   """Create an instance.
8565
8566   """
8567   HPATH = "instance-add"
8568   HTYPE = constants.HTYPE_INSTANCE
8569   REQ_BGL = False
8570
8571   def CheckArguments(self):
8572     """Check arguments.
8573
8574     """
8575     # do not require name_check to ease forward/backward compatibility
8576     # for tools
8577     if self.op.no_install and self.op.start:
8578       self.LogInfo("No-installation mode selected, disabling startup")
8579       self.op.start = False
8580     # validate/normalize the instance name
8581     self.op.instance_name = \
8582       netutils.Hostname.GetNormalizedName(self.op.instance_name)
8583
8584     if self.op.ip_check and not self.op.name_check:
8585       # TODO: make the ip check more flexible and not depend on the name check
8586       raise errors.OpPrereqError("Cannot do IP address check without a name"
8587                                  " check", errors.ECODE_INVAL)
8588
8589     # check nics' parameter names
8590     for nic in self.op.nics:
8591       utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
8592
8593     # check disks. parameter names and consistent adopt/no-adopt strategy
8594     has_adopt = has_no_adopt = False
8595     for disk in self.op.disks:
8596       utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
8597       if constants.IDISK_ADOPT in disk:
8598         has_adopt = True
8599       else:
8600         has_no_adopt = True
8601     if has_adopt and has_no_adopt:
8602       raise errors.OpPrereqError("Either all disks are adopted or none is",
8603                                  errors.ECODE_INVAL)
8604     if has_adopt:
8605       if self.op.disk_template not in constants.DTS_MAY_ADOPT:
8606         raise errors.OpPrereqError("Disk adoption is not supported for the"
8607                                    " '%s' disk template" %
8608                                    self.op.disk_template,
8609                                    errors.ECODE_INVAL)
8610       if self.op.iallocator is not None:
8611         raise errors.OpPrereqError("Disk adoption not allowed with an"
8612                                    " iallocator script", errors.ECODE_INVAL)
8613       if self.op.mode == constants.INSTANCE_IMPORT:
8614         raise errors.OpPrereqError("Disk adoption not allowed for"
8615                                    " instance import", errors.ECODE_INVAL)
8616     else:
8617       if self.op.disk_template in constants.DTS_MUST_ADOPT:
8618         raise errors.OpPrereqError("Disk template %s requires disk adoption,"
8619                                    " but no 'adopt' parameter given" %
8620                                    self.op.disk_template,
8621                                    errors.ECODE_INVAL)
8622
8623     self.adopt_disks = has_adopt
8624
8625     # instance name verification
8626     if self.op.name_check:
8627       self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
8628       self.op.instance_name = self.hostname1.name
8629       # used in CheckPrereq for ip ping check
8630       self.check_ip = self.hostname1.ip
8631     else:
8632       self.check_ip = None
8633
8634     # file storage checks
8635     if (self.op.file_driver and
8636         not self.op.file_driver in constants.FILE_DRIVER):
8637       raise errors.OpPrereqError("Invalid file driver name '%s'" %
8638                                  self.op.file_driver, errors.ECODE_INVAL)
8639
8640     if self.op.disk_template == constants.DT_FILE:
8641       opcodes.RequireFileStorage()
8642     elif self.op.disk_template == constants.DT_SHARED_FILE:
8643       opcodes.RequireSharedFileStorage()
8644
8645     ### Node/iallocator related checks
8646     _CheckIAllocatorOrNode(self, "iallocator", "pnode")
8647
8648     if self.op.pnode is not None:
8649       if self.op.disk_template in constants.DTS_INT_MIRROR:
8650         if self.op.snode is None:
8651           raise errors.OpPrereqError("The networked disk templates need"
8652                                      " a mirror node", errors.ECODE_INVAL)
8653       elif self.op.snode:
8654         self.LogWarning("Secondary node will be ignored on non-mirrored disk"
8655                         " template")
8656         self.op.snode = None
8657
8658     self._cds = _GetClusterDomainSecret()
8659
8660     if self.op.mode == constants.INSTANCE_IMPORT:
8661       # On import force_variant must be True, because if we forced it at
8662       # initial install, our only chance when importing it back is that it
8663       # works again!
8664       self.op.force_variant = True
8665
8666       if self.op.no_install:
8667         self.LogInfo("No-installation mode has no effect during import")
8668
8669     elif self.op.mode == constants.INSTANCE_CREATE:
8670       if self.op.os_type is None:
8671         raise errors.OpPrereqError("No guest OS specified",
8672                                    errors.ECODE_INVAL)
8673       if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
8674         raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
8675                                    " installation" % self.op.os_type,
8676                                    errors.ECODE_STATE)
8677       if self.op.disk_template is None:
8678         raise errors.OpPrereqError("No disk template specified",
8679                                    errors.ECODE_INVAL)
8680
8681     elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
8682       # Check handshake to ensure both clusters have the same domain secret
8683       src_handshake = self.op.source_handshake
8684       if not src_handshake:
8685         raise errors.OpPrereqError("Missing source handshake",
8686                                    errors.ECODE_INVAL)
8687
8688       errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
8689                                                            src_handshake)
8690       if errmsg:
8691         raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
8692                                    errors.ECODE_INVAL)
8693
8694       # Load and check source CA
8695       self.source_x509_ca_pem = self.op.source_x509_ca
8696       if not self.source_x509_ca_pem:
8697         raise errors.OpPrereqError("Missing source X509 CA",
8698                                    errors.ECODE_INVAL)
8699
8700       try:
8701         (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
8702                                                     self._cds)
8703       except OpenSSL.crypto.Error, err:
8704         raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
8705                                    (err, ), errors.ECODE_INVAL)
8706
8707       (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
8708       if errcode is not None:
8709         raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
8710                                    errors.ECODE_INVAL)
8711
8712       self.source_x509_ca = cert
8713
8714       src_instance_name = self.op.source_instance_name
8715       if not src_instance_name:
8716         raise errors.OpPrereqError("Missing source instance name",
8717                                    errors.ECODE_INVAL)
8718
8719       self.source_instance_name = \
8720           netutils.GetHostname(name=src_instance_name).name
8721
8722     else:
8723       raise errors.OpPrereqError("Invalid instance creation mode %r" %
8724                                  self.op.mode, errors.ECODE_INVAL)
8725
8726   def ExpandNames(self):
8727     """ExpandNames for CreateInstance.
8728
8729     Figure out the right locks for instance creation.
8730
8731     """
8732     self.needed_locks = {}
8733
8734     instance_name = self.op.instance_name
8735     # this is just a preventive check, but someone might still add this
8736     # instance in the meantime, and creation will fail at lock-add time
8737     if instance_name in self.cfg.GetInstanceList():
8738       raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
8739                                  instance_name, errors.ECODE_EXISTS)
8740
8741     self.add_locks[locking.LEVEL_INSTANCE] = instance_name
8742
8743     if self.op.iallocator:
8744       # TODO: Find a solution to not lock all nodes in the cluster, e.g. by
8745       # specifying a group on instance creation and then selecting nodes from
8746       # that group
8747       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8748       self.needed_locks[locking.LEVEL_NODE_RES] = locking.ALL_SET
8749     else:
8750       self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
8751       nodelist = [self.op.pnode]
8752       if self.op.snode is not None:
8753         self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
8754         nodelist.append(self.op.snode)
8755       self.needed_locks[locking.LEVEL_NODE] = nodelist
8756       # Lock resources of instance's primary and secondary nodes (copy to
8757       # prevent accidential modification)
8758       self.needed_locks[locking.LEVEL_NODE_RES] = list(nodelist)
8759
8760     # in case of import lock the source node too
8761     if self.op.mode == constants.INSTANCE_IMPORT:
8762       src_node = self.op.src_node
8763       src_path = self.op.src_path
8764
8765       if src_path is None:
8766         self.op.src_path = src_path = self.op.instance_name
8767
8768       if src_node is None:
8769         self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8770         self.op.src_node = None
8771         if os.path.isabs(src_path):
8772           raise errors.OpPrereqError("Importing an instance from a path"
8773                                      " requires a source node option",
8774                                      errors.ECODE_INVAL)
8775       else:
8776         self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
8777         if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
8778           self.needed_locks[locking.LEVEL_NODE].append(src_node)
8779         if not os.path.isabs(src_path):
8780           self.op.src_path = src_path = \
8781             utils.PathJoin(constants.EXPORT_DIR, src_path)
8782
8783   def _RunAllocator(self):
8784     """Run the allocator based on input opcode.
8785
8786     """
8787     nics = [n.ToDict() for n in self.nics]
8788     ial = IAllocator(self.cfg, self.rpc,
8789                      mode=constants.IALLOCATOR_MODE_ALLOC,
8790                      name=self.op.instance_name,
8791                      disk_template=self.op.disk_template,
8792                      tags=self.op.tags,
8793                      os=self.op.os_type,
8794                      vcpus=self.be_full[constants.BE_VCPUS],
8795                      memory=self.be_full[constants.BE_MAXMEM],
8796                      disks=self.disks,
8797                      nics=nics,
8798                      hypervisor=self.op.hypervisor,
8799                      )
8800
8801     ial.Run(self.op.iallocator)
8802
8803     if not ial.success:
8804       raise errors.OpPrereqError("Can't compute nodes using"
8805                                  " iallocator '%s': %s" %
8806                                  (self.op.iallocator, ial.info),
8807                                  errors.ECODE_NORES)
8808     if len(ial.result) != ial.required_nodes:
8809       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
8810                                  " of nodes (%s), required %s" %
8811                                  (self.op.iallocator, len(ial.result),
8812                                   ial.required_nodes), errors.ECODE_FAULT)
8813     self.op.pnode = ial.result[0]
8814     self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
8815                  self.op.instance_name, self.op.iallocator,
8816                  utils.CommaJoin(ial.result))
8817     if ial.required_nodes == 2:
8818       self.op.snode = ial.result[1]
8819
8820   def BuildHooksEnv(self):
8821     """Build hooks env.
8822
8823     This runs on master, primary and secondary nodes of the instance.
8824
8825     """
8826     env = {
8827       "ADD_MODE": self.op.mode,
8828       }
8829     if self.op.mode == constants.INSTANCE_IMPORT:
8830       env["SRC_NODE"] = self.op.src_node
8831       env["SRC_PATH"] = self.op.src_path
8832       env["SRC_IMAGES"] = self.src_images
8833
8834     env.update(_BuildInstanceHookEnv(
8835       name=self.op.instance_name,
8836       primary_node=self.op.pnode,
8837       secondary_nodes=self.secondaries,
8838       status=self.op.start,
8839       os_type=self.op.os_type,
8840       minmem=self.be_full[constants.BE_MINMEM],
8841       maxmem=self.be_full[constants.BE_MAXMEM],
8842       vcpus=self.be_full[constants.BE_VCPUS],
8843       nics=_NICListToTuple(self, self.nics),
8844       disk_template=self.op.disk_template,
8845       disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE])
8846              for d in self.disks],
8847       bep=self.be_full,
8848       hvp=self.hv_full,
8849       hypervisor_name=self.op.hypervisor,
8850       tags=self.op.tags,
8851     ))
8852
8853     return env
8854
8855   def BuildHooksNodes(self):
8856     """Build hooks nodes.
8857
8858     """
8859     nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
8860     return nl, nl
8861
8862   def _ReadExportInfo(self):
8863     """Reads the export information from disk.
8864
8865     It will override the opcode source node and path with the actual
8866     information, if these two were not specified before.
8867
8868     @return: the export information
8869
8870     """
8871     assert self.op.mode == constants.INSTANCE_IMPORT
8872
8873     src_node = self.op.src_node
8874     src_path = self.op.src_path
8875
8876     if src_node is None:
8877       locked_nodes = self.owned_locks(locking.LEVEL_NODE)
8878       exp_list = self.rpc.call_export_list(locked_nodes)
8879       found = False
8880       for node in exp_list:
8881         if exp_list[node].fail_msg:
8882           continue
8883         if src_path in exp_list[node].payload:
8884           found = True
8885           self.op.src_node = src_node = node
8886           self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
8887                                                        src_path)
8888           break
8889       if not found:
8890         raise errors.OpPrereqError("No export found for relative path %s" %
8891                                     src_path, errors.ECODE_INVAL)
8892
8893     _CheckNodeOnline(self, src_node)
8894     result = self.rpc.call_export_info(src_node, src_path)
8895     result.Raise("No export or invalid export found in dir %s" % src_path)
8896
8897     export_info = objects.SerializableConfigParser.Loads(str(result.payload))
8898     if not export_info.has_section(constants.INISECT_EXP):
8899       raise errors.ProgrammerError("Corrupted export config",
8900                                    errors.ECODE_ENVIRON)
8901
8902     ei_version = export_info.get(constants.INISECT_EXP, "version")
8903     if (int(ei_version) != constants.EXPORT_VERSION):
8904       raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
8905                                  (ei_version, constants.EXPORT_VERSION),
8906                                  errors.ECODE_ENVIRON)
8907     return export_info
8908
8909   def _ReadExportParams(self, einfo):
8910     """Use export parameters as defaults.
8911
8912     In case the opcode doesn't specify (as in override) some instance
8913     parameters, then try to use them from the export information, if
8914     that declares them.
8915
8916     """
8917     self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
8918
8919     if self.op.disk_template is None:
8920       if einfo.has_option(constants.INISECT_INS, "disk_template"):
8921         self.op.disk_template = einfo.get(constants.INISECT_INS,
8922                                           "disk_template")
8923         if self.op.disk_template not in constants.DISK_TEMPLATES:
8924           raise errors.OpPrereqError("Disk template specified in configuration"
8925                                      " file is not one of the allowed values:"
8926                                      " %s" % " ".join(constants.DISK_TEMPLATES))
8927       else:
8928         raise errors.OpPrereqError("No disk template specified and the export"
8929                                    " is missing the disk_template information",
8930                                    errors.ECODE_INVAL)
8931
8932     if not self.op.disks:
8933       disks = []
8934       # TODO: import the disk iv_name too
8935       for idx in range(constants.MAX_DISKS):
8936         if einfo.has_option(constants.INISECT_INS, "disk%d_size" % idx):
8937           disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
8938           disks.append({constants.IDISK_SIZE: disk_sz})
8939       self.op.disks = disks
8940       if not disks and self.op.disk_template != constants.DT_DISKLESS:
8941         raise errors.OpPrereqError("No disk info specified and the export"
8942                                    " is missing the disk information",
8943                                    errors.ECODE_INVAL)
8944
8945     if not self.op.nics:
8946       nics = []
8947       for idx in range(constants.MAX_NICS):
8948         if einfo.has_option(constants.INISECT_INS, "nic%d_mac" % idx):
8949           ndict = {}
8950           for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
8951             v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
8952             ndict[name] = v
8953           nics.append(ndict)
8954         else:
8955           break
8956       self.op.nics = nics
8957
8958     if not self.op.tags and einfo.has_option(constants.INISECT_INS, "tags"):
8959       self.op.tags = einfo.get(constants.INISECT_INS, "tags").split()
8960
8961     if (self.op.hypervisor is None and
8962         einfo.has_option(constants.INISECT_INS, "hypervisor")):
8963       self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
8964
8965     if einfo.has_section(constants.INISECT_HYP):
8966       # use the export parameters but do not override the ones
8967       # specified by the user
8968       for name, value in einfo.items(constants.INISECT_HYP):
8969         if name not in self.op.hvparams:
8970           self.op.hvparams[name] = value
8971
8972     if einfo.has_section(constants.INISECT_BEP):
8973       # use the parameters, without overriding
8974       for name, value in einfo.items(constants.INISECT_BEP):
8975         if name not in self.op.beparams:
8976           self.op.beparams[name] = value
8977         # Compatibility for the old "memory" be param
8978         if name == constants.BE_MEMORY:
8979           if constants.BE_MAXMEM not in self.op.beparams:
8980             self.op.beparams[constants.BE_MAXMEM] = value
8981           if constants.BE_MINMEM not in self.op.beparams:
8982             self.op.beparams[constants.BE_MINMEM] = value
8983     else:
8984       # try to read the parameters old style, from the main section
8985       for name in constants.BES_PARAMETERS:
8986         if (name not in self.op.beparams and
8987             einfo.has_option(constants.INISECT_INS, name)):
8988           self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
8989
8990     if einfo.has_section(constants.INISECT_OSP):
8991       # use the parameters, without overriding
8992       for name, value in einfo.items(constants.INISECT_OSP):
8993         if name not in self.op.osparams:
8994           self.op.osparams[name] = value
8995
8996   def _RevertToDefaults(self, cluster):
8997     """Revert the instance parameters to the default values.
8998
8999     """
9000     # hvparams
9001     hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
9002     for name in self.op.hvparams.keys():
9003       if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
9004         del self.op.hvparams[name]
9005     # beparams
9006     be_defs = cluster.SimpleFillBE({})
9007     for name in self.op.beparams.keys():
9008       if name in be_defs and be_defs[name] == self.op.beparams[name]:
9009         del self.op.beparams[name]
9010     # nic params
9011     nic_defs = cluster.SimpleFillNIC({})
9012     for nic in self.op.nics:
9013       for name in constants.NICS_PARAMETERS:
9014         if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
9015           del nic[name]
9016     # osparams
9017     os_defs = cluster.SimpleFillOS(self.op.os_type, {})
9018     for name in self.op.osparams.keys():
9019       if name in os_defs and os_defs[name] == self.op.osparams[name]:
9020         del self.op.osparams[name]
9021
9022   def _CalculateFileStorageDir(self):
9023     """Calculate final instance file storage dir.
9024
9025     """
9026     # file storage dir calculation/check
9027     self.instance_file_storage_dir = None
9028     if self.op.disk_template in constants.DTS_FILEBASED:
9029       # build the full file storage dir path
9030       joinargs = []
9031
9032       if self.op.disk_template == constants.DT_SHARED_FILE:
9033         get_fsd_fn = self.cfg.GetSharedFileStorageDir
9034       else:
9035         get_fsd_fn = self.cfg.GetFileStorageDir
9036
9037       cfg_storagedir = get_fsd_fn()
9038       if not cfg_storagedir:
9039         raise errors.OpPrereqError("Cluster file storage dir not defined")
9040       joinargs.append(cfg_storagedir)
9041
9042       if self.op.file_storage_dir is not None:
9043         joinargs.append(self.op.file_storage_dir)
9044
9045       joinargs.append(self.op.instance_name)
9046
9047       # pylint: disable=W0142
9048       self.instance_file_storage_dir = utils.PathJoin(*joinargs)
9049
9050   def CheckPrereq(self):
9051     """Check prerequisites.
9052
9053     """
9054     self._CalculateFileStorageDir()
9055
9056     if self.op.mode == constants.INSTANCE_IMPORT:
9057       export_info = self._ReadExportInfo()
9058       self._ReadExportParams(export_info)
9059
9060     if (not self.cfg.GetVGName() and
9061         self.op.disk_template not in constants.DTS_NOT_LVM):
9062       raise errors.OpPrereqError("Cluster does not support lvm-based"
9063                                  " instances", errors.ECODE_STATE)
9064
9065     if (self.op.hypervisor is None or
9066         self.op.hypervisor == constants.VALUE_AUTO):
9067       self.op.hypervisor = self.cfg.GetHypervisorType()
9068
9069     cluster = self.cfg.GetClusterInfo()
9070     enabled_hvs = cluster.enabled_hypervisors
9071     if self.op.hypervisor not in enabled_hvs:
9072       raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
9073                                  " cluster (%s)" % (self.op.hypervisor,
9074                                   ",".join(enabled_hvs)),
9075                                  errors.ECODE_STATE)
9076
9077     # Check tag validity
9078     for tag in self.op.tags:
9079       objects.TaggableObject.ValidateTag(tag)
9080
9081     # check hypervisor parameter syntax (locally)
9082     utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
9083     filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
9084                                       self.op.hvparams)
9085     hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
9086     hv_type.CheckParameterSyntax(filled_hvp)
9087     self.hv_full = filled_hvp
9088     # check that we don't specify global parameters on an instance
9089     _CheckGlobalHvParams(self.op.hvparams)
9090
9091     # fill and remember the beparams dict
9092     default_beparams = cluster.beparams[constants.PP_DEFAULT]
9093     for param, value in self.op.beparams.iteritems():
9094       if value == constants.VALUE_AUTO:
9095         self.op.beparams[param] = default_beparams[param]
9096     objects.UpgradeBeParams(self.op.beparams)
9097     utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
9098     self.be_full = cluster.SimpleFillBE(self.op.beparams)
9099
9100     # build os parameters
9101     self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
9102
9103     # now that hvp/bep are in final format, let's reset to defaults,
9104     # if told to do so
9105     if self.op.identify_defaults:
9106       self._RevertToDefaults(cluster)
9107
9108     # NIC buildup
9109     self.nics = []
9110     for idx, nic in enumerate(self.op.nics):
9111       nic_mode_req = nic.get(constants.INIC_MODE, None)
9112       nic_mode = nic_mode_req
9113       if nic_mode is None or nic_mode == constants.VALUE_AUTO:
9114         nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
9115
9116       # in routed mode, for the first nic, the default ip is 'auto'
9117       if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
9118         default_ip_mode = constants.VALUE_AUTO
9119       else:
9120         default_ip_mode = constants.VALUE_NONE
9121
9122       # ip validity checks
9123       ip = nic.get(constants.INIC_IP, default_ip_mode)
9124       if ip is None or ip.lower() == constants.VALUE_NONE:
9125         nic_ip = None
9126       elif ip.lower() == constants.VALUE_AUTO:
9127         if not self.op.name_check:
9128           raise errors.OpPrereqError("IP address set to auto but name checks"
9129                                      " have been skipped",
9130                                      errors.ECODE_INVAL)
9131         nic_ip = self.hostname1.ip
9132       else:
9133         if not netutils.IPAddress.IsValid(ip):
9134           raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
9135                                      errors.ECODE_INVAL)
9136         nic_ip = ip
9137
9138       # TODO: check the ip address for uniqueness
9139       if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
9140         raise errors.OpPrereqError("Routed nic mode requires an ip address",
9141                                    errors.ECODE_INVAL)
9142
9143       # MAC address verification
9144       mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
9145       if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9146         mac = utils.NormalizeAndValidateMac(mac)
9147
9148         try:
9149           self.cfg.ReserveMAC(mac, self.proc.GetECId())
9150         except errors.ReservationError:
9151           raise errors.OpPrereqError("MAC address %s already in use"
9152                                      " in cluster" % mac,
9153                                      errors.ECODE_NOTUNIQUE)
9154
9155       #  Build nic parameters
9156       link = nic.get(constants.INIC_LINK, None)
9157       if link == constants.VALUE_AUTO:
9158         link = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_LINK]
9159       nicparams = {}
9160       if nic_mode_req:
9161         nicparams[constants.NIC_MODE] = nic_mode
9162       if link:
9163         nicparams[constants.NIC_LINK] = link
9164
9165       check_params = cluster.SimpleFillNIC(nicparams)
9166       objects.NIC.CheckParameterSyntax(check_params)
9167       self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
9168
9169     # disk checks/pre-build
9170     default_vg = self.cfg.GetVGName()
9171     self.disks = []
9172     for disk in self.op.disks:
9173       mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
9174       if mode not in constants.DISK_ACCESS_SET:
9175         raise errors.OpPrereqError("Invalid disk access mode '%s'" %
9176                                    mode, errors.ECODE_INVAL)
9177       size = disk.get(constants.IDISK_SIZE, None)
9178       if size is None:
9179         raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
9180       try:
9181         size = int(size)
9182       except (TypeError, ValueError):
9183         raise errors.OpPrereqError("Invalid disk size '%s'" % size,
9184                                    errors.ECODE_INVAL)
9185
9186       data_vg = disk.get(constants.IDISK_VG, default_vg)
9187       new_disk = {
9188         constants.IDISK_SIZE: size,
9189         constants.IDISK_MODE: mode,
9190         constants.IDISK_VG: data_vg,
9191         constants.IDISK_METAVG: disk.get(constants.IDISK_METAVG, data_vg),
9192         }
9193       if constants.IDISK_ADOPT in disk:
9194         new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
9195       self.disks.append(new_disk)
9196
9197     if self.op.mode == constants.INSTANCE_IMPORT:
9198       disk_images = []
9199       for idx in range(len(self.disks)):
9200         option = "disk%d_dump" % idx
9201         if export_info.has_option(constants.INISECT_INS, option):
9202           # FIXME: are the old os-es, disk sizes, etc. useful?
9203           export_name = export_info.get(constants.INISECT_INS, option)
9204           image = utils.PathJoin(self.op.src_path, export_name)
9205           disk_images.append(image)
9206         else:
9207           disk_images.append(False)
9208
9209       self.src_images = disk_images
9210
9211       old_name = export_info.get(constants.INISECT_INS, "name")
9212       if self.op.instance_name == old_name:
9213         for idx, nic in enumerate(self.nics):
9214           if nic.mac == constants.VALUE_AUTO:
9215             nic_mac_ini = "nic%d_mac" % idx
9216             nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
9217
9218     # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
9219
9220     # ip ping checks (we use the same ip that was resolved in ExpandNames)
9221     if self.op.ip_check:
9222       if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
9223         raise errors.OpPrereqError("IP %s of instance %s already in use" %
9224                                    (self.check_ip, self.op.instance_name),
9225                                    errors.ECODE_NOTUNIQUE)
9226
9227     #### mac address generation
9228     # By generating here the mac address both the allocator and the hooks get
9229     # the real final mac address rather than the 'auto' or 'generate' value.
9230     # There is a race condition between the generation and the instance object
9231     # creation, which means that we know the mac is valid now, but we're not
9232     # sure it will be when we actually add the instance. If things go bad
9233     # adding the instance will abort because of a duplicate mac, and the
9234     # creation job will fail.
9235     for nic in self.nics:
9236       if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9237         nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
9238
9239     #### allocator run
9240
9241     if self.op.iallocator is not None:
9242       self._RunAllocator()
9243
9244     # Release all unneeded node locks
9245     _ReleaseLocks(self, locking.LEVEL_NODE,
9246                   keep=filter(None, [self.op.pnode, self.op.snode,
9247                                      self.op.src_node]))
9248
9249     #### node related checks
9250
9251     # check primary node
9252     self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
9253     assert self.pnode is not None, \
9254       "Cannot retrieve locked node %s" % self.op.pnode
9255     if pnode.offline:
9256       raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
9257                                  pnode.name, errors.ECODE_STATE)
9258     if pnode.drained:
9259       raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
9260                                  pnode.name, errors.ECODE_STATE)
9261     if not pnode.vm_capable:
9262       raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
9263                                  " '%s'" % pnode.name, errors.ECODE_STATE)
9264
9265     self.secondaries = []
9266
9267     # mirror node verification
9268     if self.op.disk_template in constants.DTS_INT_MIRROR:
9269       if self.op.snode == pnode.name:
9270         raise errors.OpPrereqError("The secondary node cannot be the"
9271                                    " primary node", errors.ECODE_INVAL)
9272       _CheckNodeOnline(self, self.op.snode)
9273       _CheckNodeNotDrained(self, self.op.snode)
9274       _CheckNodeVmCapable(self, self.op.snode)
9275       self.secondaries.append(self.op.snode)
9276
9277       snode = self.cfg.GetNodeInfo(self.op.snode)
9278       if pnode.group != snode.group:
9279         self.LogWarning("The primary and secondary nodes are in two"
9280                         " different node groups; the disk parameters"
9281                         " from the first disk's node group will be"
9282                         " used")
9283
9284     nodenames = [pnode.name] + self.secondaries
9285
9286     # disk parameters (not customizable at instance or node level)
9287     # just use the primary node parameters, ignoring the secondary.
9288     self.diskparams = self.cfg.GetNodeGroup(pnode.group).diskparams
9289
9290     if not self.adopt_disks:
9291       # Check lv size requirements, if not adopting
9292       req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
9293       _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
9294
9295     elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
9296       all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
9297                                 disk[constants.IDISK_ADOPT])
9298                      for disk in self.disks])
9299       if len(all_lvs) != len(self.disks):
9300         raise errors.OpPrereqError("Duplicate volume names given for adoption",
9301                                    errors.ECODE_INVAL)
9302       for lv_name in all_lvs:
9303         try:
9304           # FIXME: lv_name here is "vg/lv" need to ensure that other calls
9305           # to ReserveLV uses the same syntax
9306           self.cfg.ReserveLV(lv_name, self.proc.GetECId())
9307         except errors.ReservationError:
9308           raise errors.OpPrereqError("LV named %s used by another instance" %
9309                                      lv_name, errors.ECODE_NOTUNIQUE)
9310
9311       vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
9312       vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
9313
9314       node_lvs = self.rpc.call_lv_list([pnode.name],
9315                                        vg_names.payload.keys())[pnode.name]
9316       node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
9317       node_lvs = node_lvs.payload
9318
9319       delta = all_lvs.difference(node_lvs.keys())
9320       if delta:
9321         raise errors.OpPrereqError("Missing logical volume(s): %s" %
9322                                    utils.CommaJoin(delta),
9323                                    errors.ECODE_INVAL)
9324       online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
9325       if online_lvs:
9326         raise errors.OpPrereqError("Online logical volumes found, cannot"
9327                                    " adopt: %s" % utils.CommaJoin(online_lvs),
9328                                    errors.ECODE_STATE)
9329       # update the size of disk based on what is found
9330       for dsk in self.disks:
9331         dsk[constants.IDISK_SIZE] = \
9332           int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
9333                                         dsk[constants.IDISK_ADOPT])][0]))
9334
9335     elif self.op.disk_template == constants.DT_BLOCK:
9336       # Normalize and de-duplicate device paths
9337       all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
9338                        for disk in self.disks])
9339       if len(all_disks) != len(self.disks):
9340         raise errors.OpPrereqError("Duplicate disk names given for adoption",
9341                                    errors.ECODE_INVAL)
9342       baddisks = [d for d in all_disks
9343                   if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
9344       if baddisks:
9345         raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
9346                                    " cannot be adopted" %
9347                                    (", ".join(baddisks),
9348                                     constants.ADOPTABLE_BLOCKDEV_ROOT),
9349                                    errors.ECODE_INVAL)
9350
9351       node_disks = self.rpc.call_bdev_sizes([pnode.name],
9352                                             list(all_disks))[pnode.name]
9353       node_disks.Raise("Cannot get block device information from node %s" %
9354                        pnode.name)
9355       node_disks = node_disks.payload
9356       delta = all_disks.difference(node_disks.keys())
9357       if delta:
9358         raise errors.OpPrereqError("Missing block device(s): %s" %
9359                                    utils.CommaJoin(delta),
9360                                    errors.ECODE_INVAL)
9361       for dsk in self.disks:
9362         dsk[constants.IDISK_SIZE] = \
9363           int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
9364
9365     _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
9366
9367     _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
9368     # check OS parameters (remotely)
9369     _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
9370
9371     _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
9372
9373     # memory check on primary node
9374     #TODO(dynmem): use MINMEM for checking
9375     if self.op.start:
9376       _CheckNodeFreeMemory(self, self.pnode.name,
9377                            "creating instance %s" % self.op.instance_name,
9378                            self.be_full[constants.BE_MAXMEM],
9379                            self.op.hypervisor)
9380
9381     self.dry_run_result = list(nodenames)
9382
9383   def Exec(self, feedback_fn):
9384     """Create and add the instance to the cluster.
9385
9386     """
9387     instance = self.op.instance_name
9388     pnode_name = self.pnode.name
9389
9390     assert not (self.owned_locks(locking.LEVEL_NODE_RES) -
9391                 self.owned_locks(locking.LEVEL_NODE)), \
9392       "Node locks differ from node resource locks"
9393
9394     ht_kind = self.op.hypervisor
9395     if ht_kind in constants.HTS_REQ_PORT:
9396       network_port = self.cfg.AllocatePort()
9397     else:
9398       network_port = None
9399
9400     disks = _GenerateDiskTemplate(self,
9401                                   self.op.disk_template,
9402                                   instance, pnode_name,
9403                                   self.secondaries,
9404                                   self.disks,
9405                                   self.instance_file_storage_dir,
9406                                   self.op.file_driver,
9407                                   0,
9408                                   feedback_fn,
9409                                   self.diskparams)
9410
9411     iobj = objects.Instance(name=instance, os=self.op.os_type,
9412                             primary_node=pnode_name,
9413                             nics=self.nics, disks=disks,
9414                             disk_template=self.op.disk_template,
9415                             admin_state=constants.ADMINST_DOWN,
9416                             network_port=network_port,
9417                             beparams=self.op.beparams,
9418                             hvparams=self.op.hvparams,
9419                             hypervisor=self.op.hypervisor,
9420                             osparams=self.op.osparams,
9421                             )
9422
9423     if self.op.tags:
9424       for tag in self.op.tags:
9425         iobj.AddTag(tag)
9426
9427     if self.adopt_disks:
9428       if self.op.disk_template == constants.DT_PLAIN:
9429         # rename LVs to the newly-generated names; we need to construct
9430         # 'fake' LV disks with the old data, plus the new unique_id
9431         tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
9432         rename_to = []
9433         for t_dsk, a_dsk in zip(tmp_disks, self.disks):
9434           rename_to.append(t_dsk.logical_id)
9435           t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
9436           self.cfg.SetDiskID(t_dsk, pnode_name)
9437         result = self.rpc.call_blockdev_rename(pnode_name,
9438                                                zip(tmp_disks, rename_to))
9439         result.Raise("Failed to rename adoped LVs")
9440     else:
9441       feedback_fn("* creating instance disks...")
9442       try:
9443         _CreateDisks(self, iobj)
9444       except errors.OpExecError:
9445         self.LogWarning("Device creation failed, reverting...")
9446         try:
9447           _RemoveDisks(self, iobj)
9448         finally:
9449           self.cfg.ReleaseDRBDMinors(instance)
9450           raise
9451
9452     feedback_fn("adding instance %s to cluster config" % instance)
9453
9454     self.cfg.AddInstance(iobj, self.proc.GetECId())
9455
9456     # Declare that we don't want to remove the instance lock anymore, as we've
9457     # added the instance to the config
9458     del self.remove_locks[locking.LEVEL_INSTANCE]
9459
9460     if self.op.mode == constants.INSTANCE_IMPORT:
9461       # Release unused nodes
9462       _ReleaseLocks(self, locking.LEVEL_NODE, keep=[self.op.src_node])
9463     else:
9464       # Release all nodes
9465       _ReleaseLocks(self, locking.LEVEL_NODE)
9466
9467     disk_abort = False
9468     if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
9469       feedback_fn("* wiping instance disks...")
9470       try:
9471         _WipeDisks(self, iobj)
9472       except errors.OpExecError, err:
9473         logging.exception("Wiping disks failed")
9474         self.LogWarning("Wiping instance disks failed (%s)", err)
9475         disk_abort = True
9476
9477     if disk_abort:
9478       # Something is already wrong with the disks, don't do anything else
9479       pass
9480     elif self.op.wait_for_sync:
9481       disk_abort = not _WaitForSync(self, iobj)
9482     elif iobj.disk_template in constants.DTS_INT_MIRROR:
9483       # make sure the disks are not degraded (still sync-ing is ok)
9484       feedback_fn("* checking mirrors status")
9485       disk_abort = not _WaitForSync(self, iobj, oneshot=True)
9486     else:
9487       disk_abort = False
9488
9489     if disk_abort:
9490       _RemoveDisks(self, iobj)
9491       self.cfg.RemoveInstance(iobj.name)
9492       # Make sure the instance lock gets removed
9493       self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
9494       raise errors.OpExecError("There are some degraded disks for"
9495                                " this instance")
9496
9497     # Release all node resource locks
9498     _ReleaseLocks(self, locking.LEVEL_NODE_RES)
9499
9500     if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
9501       if self.op.mode == constants.INSTANCE_CREATE:
9502         if not self.op.no_install:
9503           pause_sync = (iobj.disk_template in constants.DTS_INT_MIRROR and
9504                         not self.op.wait_for_sync)
9505           if pause_sync:
9506             feedback_fn("* pausing disk sync to install instance OS")
9507             result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
9508                                                               iobj.disks, True)
9509             for idx, success in enumerate(result.payload):
9510               if not success:
9511                 logging.warn("pause-sync of instance %s for disk %d failed",
9512                              instance, idx)
9513
9514           feedback_fn("* running the instance OS create scripts...")
9515           # FIXME: pass debug option from opcode to backend
9516           os_add_result = \
9517             self.rpc.call_instance_os_add(pnode_name, (iobj, None), False,
9518                                           self.op.debug_level)
9519           if pause_sync:
9520             feedback_fn("* resuming disk sync")
9521             result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
9522                                                               iobj.disks, False)
9523             for idx, success in enumerate(result.payload):
9524               if not success:
9525                 logging.warn("resume-sync of instance %s for disk %d failed",
9526                              instance, idx)
9527
9528           os_add_result.Raise("Could not add os for instance %s"
9529                               " on node %s" % (instance, pnode_name))
9530
9531       elif self.op.mode == constants.INSTANCE_IMPORT:
9532         feedback_fn("* running the instance OS import scripts...")
9533
9534         transfers = []
9535
9536         for idx, image in enumerate(self.src_images):
9537           if not image:
9538             continue
9539
9540           # FIXME: pass debug option from opcode to backend
9541           dt = masterd.instance.DiskTransfer("disk/%s" % idx,
9542                                              constants.IEIO_FILE, (image, ),
9543                                              constants.IEIO_SCRIPT,
9544                                              (iobj.disks[idx], idx),
9545                                              None)
9546           transfers.append(dt)
9547
9548         import_result = \
9549           masterd.instance.TransferInstanceData(self, feedback_fn,
9550                                                 self.op.src_node, pnode_name,
9551                                                 self.pnode.secondary_ip,
9552                                                 iobj, transfers)
9553         if not compat.all(import_result):
9554           self.LogWarning("Some disks for instance %s on node %s were not"
9555                           " imported successfully" % (instance, pnode_name))
9556
9557       elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
9558         feedback_fn("* preparing remote import...")
9559         # The source cluster will stop the instance before attempting to make a
9560         # connection. In some cases stopping an instance can take a long time,
9561         # hence the shutdown timeout is added to the connection timeout.
9562         connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
9563                            self.op.source_shutdown_timeout)
9564         timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
9565
9566         assert iobj.primary_node == self.pnode.name
9567         disk_results = \
9568           masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
9569                                         self.source_x509_ca,
9570                                         self._cds, timeouts)
9571         if not compat.all(disk_results):
9572           # TODO: Should the instance still be started, even if some disks
9573           # failed to import (valid for local imports, too)?
9574           self.LogWarning("Some disks for instance %s on node %s were not"
9575                           " imported successfully" % (instance, pnode_name))
9576
9577         # Run rename script on newly imported instance
9578         assert iobj.name == instance
9579         feedback_fn("Running rename script for %s" % instance)
9580         result = self.rpc.call_instance_run_rename(pnode_name, iobj,
9581                                                    self.source_instance_name,
9582                                                    self.op.debug_level)
9583         if result.fail_msg:
9584           self.LogWarning("Failed to run rename script for %s on node"
9585                           " %s: %s" % (instance, pnode_name, result.fail_msg))
9586
9587       else:
9588         # also checked in the prereq part
9589         raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
9590                                      % self.op.mode)
9591
9592     assert not self.owned_locks(locking.LEVEL_NODE_RES)
9593
9594     if self.op.start:
9595       iobj.admin_state = constants.ADMINST_UP
9596       self.cfg.Update(iobj, feedback_fn)
9597       logging.info("Starting instance %s on node %s", instance, pnode_name)
9598       feedback_fn("* starting instance...")
9599       result = self.rpc.call_instance_start(pnode_name, (iobj, None, None),
9600                                             False)
9601       result.Raise("Could not start instance")
9602
9603     return list(iobj.all_nodes)
9604
9605
9606 class LUInstanceConsole(NoHooksLU):
9607   """Connect to an instance's console.
9608
9609   This is somewhat special in that it returns the command line that
9610   you need to run on the master node in order to connect to the
9611   console.
9612
9613   """
9614   REQ_BGL = False
9615
9616   def ExpandNames(self):
9617     self.share_locks = _ShareAll()
9618     self._ExpandAndLockInstance()
9619
9620   def CheckPrereq(self):
9621     """Check prerequisites.
9622
9623     This checks that the instance is in the cluster.
9624
9625     """
9626     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
9627     assert self.instance is not None, \
9628       "Cannot retrieve locked instance %s" % self.op.instance_name
9629     _CheckNodeOnline(self, self.instance.primary_node)
9630
9631   def Exec(self, feedback_fn):
9632     """Connect to the console of an instance
9633
9634     """
9635     instance = self.instance
9636     node = instance.primary_node
9637
9638     node_insts = self.rpc.call_instance_list([node],
9639                                              [instance.hypervisor])[node]
9640     node_insts.Raise("Can't get node information from %s" % node)
9641
9642     if instance.name not in node_insts.payload:
9643       if instance.admin_state == constants.ADMINST_UP:
9644         state = constants.INSTST_ERRORDOWN
9645       elif instance.admin_state == constants.ADMINST_DOWN:
9646         state = constants.INSTST_ADMINDOWN
9647       else:
9648         state = constants.INSTST_ADMINOFFLINE
9649       raise errors.OpExecError("Instance %s is not running (state %s)" %
9650                                (instance.name, state))
9651
9652     logging.debug("Connecting to console of %s on %s", instance.name, node)
9653
9654     return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
9655
9656
9657 def _GetInstanceConsole(cluster, instance):
9658   """Returns console information for an instance.
9659
9660   @type cluster: L{objects.Cluster}
9661   @type instance: L{objects.Instance}
9662   @rtype: dict
9663
9664   """
9665   hyper = hypervisor.GetHypervisor(instance.hypervisor)
9666   # beparams and hvparams are passed separately, to avoid editing the
9667   # instance and then saving the defaults in the instance itself.
9668   hvparams = cluster.FillHV(instance)
9669   beparams = cluster.FillBE(instance)
9670   console = hyper.GetInstanceConsole(instance, hvparams, beparams)
9671
9672   assert console.instance == instance.name
9673   assert console.Validate()
9674
9675   return console.ToDict()
9676
9677
9678 class LUInstanceReplaceDisks(LogicalUnit):
9679   """Replace the disks of an instance.
9680
9681   """
9682   HPATH = "mirrors-replace"
9683   HTYPE = constants.HTYPE_INSTANCE
9684   REQ_BGL = False
9685
9686   def CheckArguments(self):
9687     TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
9688                                   self.op.iallocator)
9689
9690   def ExpandNames(self):
9691     self._ExpandAndLockInstance()
9692
9693     assert locking.LEVEL_NODE not in self.needed_locks
9694     assert locking.LEVEL_NODE_RES not in self.needed_locks
9695     assert locking.LEVEL_NODEGROUP not in self.needed_locks
9696
9697     assert self.op.iallocator is None or self.op.remote_node is None, \
9698       "Conflicting options"
9699
9700     if self.op.remote_node is not None:
9701       self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
9702
9703       # Warning: do not remove the locking of the new secondary here
9704       # unless DRBD8.AddChildren is changed to work in parallel;
9705       # currently it doesn't since parallel invocations of
9706       # FindUnusedMinor will conflict
9707       self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
9708       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
9709     else:
9710       self.needed_locks[locking.LEVEL_NODE] = []
9711       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
9712
9713       if self.op.iallocator is not None:
9714         # iallocator will select a new node in the same group
9715         self.needed_locks[locking.LEVEL_NODEGROUP] = []
9716
9717     self.needed_locks[locking.LEVEL_NODE_RES] = []
9718
9719     self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
9720                                    self.op.iallocator, self.op.remote_node,
9721                                    self.op.disks, False, self.op.early_release)
9722
9723     self.tasklets = [self.replacer]
9724
9725   def DeclareLocks(self, level):
9726     if level == locking.LEVEL_NODEGROUP:
9727       assert self.op.remote_node is None
9728       assert self.op.iallocator is not None
9729       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
9730
9731       self.share_locks[locking.LEVEL_NODEGROUP] = 1
9732       # Lock all groups used by instance optimistically; this requires going
9733       # via the node before it's locked, requiring verification later on
9734       self.needed_locks[locking.LEVEL_NODEGROUP] = \
9735         self.cfg.GetInstanceNodeGroups(self.op.instance_name)
9736
9737     elif level == locking.LEVEL_NODE:
9738       if self.op.iallocator is not None:
9739         assert self.op.remote_node is None
9740         assert not self.needed_locks[locking.LEVEL_NODE]
9741
9742         # Lock member nodes of all locked groups
9743         self.needed_locks[locking.LEVEL_NODE] = [node_name
9744           for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
9745           for node_name in self.cfg.GetNodeGroup(group_uuid).members]
9746       else:
9747         self._LockInstancesNodes()
9748     elif level == locking.LEVEL_NODE_RES:
9749       # Reuse node locks
9750       self.needed_locks[locking.LEVEL_NODE_RES] = \
9751         self.needed_locks[locking.LEVEL_NODE]
9752
9753   def BuildHooksEnv(self):
9754     """Build hooks env.
9755
9756     This runs on the master, the primary and all the secondaries.
9757
9758     """
9759     instance = self.replacer.instance
9760     env = {
9761       "MODE": self.op.mode,
9762       "NEW_SECONDARY": self.op.remote_node,
9763       "OLD_SECONDARY": instance.secondary_nodes[0],
9764       }
9765     env.update(_BuildInstanceHookEnvByObject(self, instance))
9766     return env
9767
9768   def BuildHooksNodes(self):
9769     """Build hooks nodes.
9770
9771     """
9772     instance = self.replacer.instance
9773     nl = [
9774       self.cfg.GetMasterNode(),
9775       instance.primary_node,
9776       ]
9777     if self.op.remote_node is not None:
9778       nl.append(self.op.remote_node)
9779     return nl, nl
9780
9781   def CheckPrereq(self):
9782     """Check prerequisites.
9783
9784     """
9785     assert (self.glm.is_owned(locking.LEVEL_NODEGROUP) or
9786             self.op.iallocator is None)
9787
9788     # Verify if node group locks are still correct
9789     owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
9790     if owned_groups:
9791       _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
9792
9793     return LogicalUnit.CheckPrereq(self)
9794
9795
9796 class TLReplaceDisks(Tasklet):
9797   """Replaces disks for an instance.
9798
9799   Note: Locking is not within the scope of this class.
9800
9801   """
9802   def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
9803                disks, delay_iallocator, early_release):
9804     """Initializes this class.
9805
9806     """
9807     Tasklet.__init__(self, lu)
9808
9809     # Parameters
9810     self.instance_name = instance_name
9811     self.mode = mode
9812     self.iallocator_name = iallocator_name
9813     self.remote_node = remote_node
9814     self.disks = disks
9815     self.delay_iallocator = delay_iallocator
9816     self.early_release = early_release
9817
9818     # Runtime data
9819     self.instance = None
9820     self.new_node = None
9821     self.target_node = None
9822     self.other_node = None
9823     self.remote_node_info = None
9824     self.node_secondary_ip = None
9825
9826   @staticmethod
9827   def CheckArguments(mode, remote_node, iallocator):
9828     """Helper function for users of this class.
9829
9830     """
9831     # check for valid parameter combination
9832     if mode == constants.REPLACE_DISK_CHG:
9833       if remote_node is None and iallocator is None:
9834         raise errors.OpPrereqError("When changing the secondary either an"
9835                                    " iallocator script must be used or the"
9836                                    " new node given", errors.ECODE_INVAL)
9837
9838       if remote_node is not None and iallocator is not None:
9839         raise errors.OpPrereqError("Give either the iallocator or the new"
9840                                    " secondary, not both", errors.ECODE_INVAL)
9841
9842     elif remote_node is not None or iallocator is not None:
9843       # Not replacing the secondary
9844       raise errors.OpPrereqError("The iallocator and new node options can"
9845                                  " only be used when changing the"
9846                                  " secondary node", errors.ECODE_INVAL)
9847
9848   @staticmethod
9849   def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
9850     """Compute a new secondary node using an IAllocator.
9851
9852     """
9853     ial = IAllocator(lu.cfg, lu.rpc,
9854                      mode=constants.IALLOCATOR_MODE_RELOC,
9855                      name=instance_name,
9856                      relocate_from=list(relocate_from))
9857
9858     ial.Run(iallocator_name)
9859
9860     if not ial.success:
9861       raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
9862                                  " %s" % (iallocator_name, ial.info),
9863                                  errors.ECODE_NORES)
9864
9865     if len(ial.result) != ial.required_nodes:
9866       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
9867                                  " of nodes (%s), required %s" %
9868                                  (iallocator_name,
9869                                   len(ial.result), ial.required_nodes),
9870                                  errors.ECODE_FAULT)
9871
9872     remote_node_name = ial.result[0]
9873
9874     lu.LogInfo("Selected new secondary for instance '%s': %s",
9875                instance_name, remote_node_name)
9876
9877     return remote_node_name
9878
9879   def _FindFaultyDisks(self, node_name):
9880     """Wrapper for L{_FindFaultyInstanceDisks}.
9881
9882     """
9883     return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
9884                                     node_name, True)
9885
9886   def _CheckDisksActivated(self, instance):
9887     """Checks if the instance disks are activated.
9888
9889     @param instance: The instance to check disks
9890     @return: True if they are activated, False otherwise
9891
9892     """
9893     nodes = instance.all_nodes
9894
9895     for idx, dev in enumerate(instance.disks):
9896       for node in nodes:
9897         self.lu.LogInfo("Checking disk/%d on %s", idx, node)
9898         self.cfg.SetDiskID(dev, node)
9899
9900         result = self.rpc.call_blockdev_find(node, dev)
9901
9902         if result.offline:
9903           continue
9904         elif result.fail_msg or not result.payload:
9905           return False
9906
9907     return True
9908
9909   def CheckPrereq(self):
9910     """Check prerequisites.
9911
9912     This checks that the instance is in the cluster.
9913
9914     """
9915     self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
9916     assert instance is not None, \
9917       "Cannot retrieve locked instance %s" % self.instance_name
9918
9919     if instance.disk_template != constants.DT_DRBD8:
9920       raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
9921                                  " instances", errors.ECODE_INVAL)
9922
9923     if len(instance.secondary_nodes) != 1:
9924       raise errors.OpPrereqError("The instance has a strange layout,"
9925                                  " expected one secondary but found %d" %
9926                                  len(instance.secondary_nodes),
9927                                  errors.ECODE_FAULT)
9928
9929     if not self.delay_iallocator:
9930       self._CheckPrereq2()
9931
9932   def _CheckPrereq2(self):
9933     """Check prerequisites, second part.
9934
9935     This function should always be part of CheckPrereq. It was separated and is
9936     now called from Exec because during node evacuation iallocator was only
9937     called with an unmodified cluster model, not taking planned changes into
9938     account.
9939
9940     """
9941     instance = self.instance
9942     secondary_node = instance.secondary_nodes[0]
9943
9944     if self.iallocator_name is None:
9945       remote_node = self.remote_node
9946     else:
9947       remote_node = self._RunAllocator(self.lu, self.iallocator_name,
9948                                        instance.name, instance.secondary_nodes)
9949
9950     if remote_node is None:
9951       self.remote_node_info = None
9952     else:
9953       assert remote_node in self.lu.owned_locks(locking.LEVEL_NODE), \
9954              "Remote node '%s' is not locked" % remote_node
9955
9956       self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
9957       assert self.remote_node_info is not None, \
9958         "Cannot retrieve locked node %s" % remote_node
9959
9960     if remote_node == self.instance.primary_node:
9961       raise errors.OpPrereqError("The specified node is the primary node of"
9962                                  " the instance", errors.ECODE_INVAL)
9963
9964     if remote_node == secondary_node:
9965       raise errors.OpPrereqError("The specified node is already the"
9966                                  " secondary node of the instance",
9967                                  errors.ECODE_INVAL)
9968
9969     if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
9970                                     constants.REPLACE_DISK_CHG):
9971       raise errors.OpPrereqError("Cannot specify disks to be replaced",
9972                                  errors.ECODE_INVAL)
9973
9974     if self.mode == constants.REPLACE_DISK_AUTO:
9975       if not self._CheckDisksActivated(instance):
9976         raise errors.OpPrereqError("Please run activate-disks on instance %s"
9977                                    " first" % self.instance_name,
9978                                    errors.ECODE_STATE)
9979       faulty_primary = self._FindFaultyDisks(instance.primary_node)
9980       faulty_secondary = self._FindFaultyDisks(secondary_node)
9981
9982       if faulty_primary and faulty_secondary:
9983         raise errors.OpPrereqError("Instance %s has faulty disks on more than"
9984                                    " one node and can not be repaired"
9985                                    " automatically" % self.instance_name,
9986                                    errors.ECODE_STATE)
9987
9988       if faulty_primary:
9989         self.disks = faulty_primary
9990         self.target_node = instance.primary_node
9991         self.other_node = secondary_node
9992         check_nodes = [self.target_node, self.other_node]
9993       elif faulty_secondary:
9994         self.disks = faulty_secondary
9995         self.target_node = secondary_node
9996         self.other_node = instance.primary_node
9997         check_nodes = [self.target_node, self.other_node]
9998       else:
9999         self.disks = []
10000         check_nodes = []
10001
10002     else:
10003       # Non-automatic modes
10004       if self.mode == constants.REPLACE_DISK_PRI:
10005         self.target_node = instance.primary_node
10006         self.other_node = secondary_node
10007         check_nodes = [self.target_node, self.other_node]
10008
10009       elif self.mode == constants.REPLACE_DISK_SEC:
10010         self.target_node = secondary_node
10011         self.other_node = instance.primary_node
10012         check_nodes = [self.target_node, self.other_node]
10013
10014       elif self.mode == constants.REPLACE_DISK_CHG:
10015         self.new_node = remote_node
10016         self.other_node = instance.primary_node
10017         self.target_node = secondary_node
10018         check_nodes = [self.new_node, self.other_node]
10019
10020         _CheckNodeNotDrained(self.lu, remote_node)
10021         _CheckNodeVmCapable(self.lu, remote_node)
10022
10023         old_node_info = self.cfg.GetNodeInfo(secondary_node)
10024         assert old_node_info is not None
10025         if old_node_info.offline and not self.early_release:
10026           # doesn't make sense to delay the release
10027           self.early_release = True
10028           self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
10029                           " early-release mode", secondary_node)
10030
10031       else:
10032         raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
10033                                      self.mode)
10034
10035       # If not specified all disks should be replaced
10036       if not self.disks:
10037         self.disks = range(len(self.instance.disks))
10038
10039     # TODO: compute disk parameters
10040     primary_node_info = self.cfg.GetNodeInfo(instance.primary_node)
10041     secondary_node_info = self.cfg.GetNodeInfo(secondary_node)
10042     if primary_node_info.group != secondary_node_info.group:
10043       self.lu.LogInfo("The instance primary and secondary nodes are in two"
10044                       " different node groups; the disk parameters of the"
10045                       " primary node's group will be applied.")
10046
10047     self.diskparams = self.cfg.GetNodeGroup(primary_node_info.group).diskparams
10048
10049     for node in check_nodes:
10050       _CheckNodeOnline(self.lu, node)
10051
10052     touched_nodes = frozenset(node_name for node_name in [self.new_node,
10053                                                           self.other_node,
10054                                                           self.target_node]
10055                               if node_name is not None)
10056
10057     # Release unneeded node and node resource locks
10058     _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
10059     _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES, keep=touched_nodes)
10060
10061     # Release any owned node group
10062     if self.lu.glm.is_owned(locking.LEVEL_NODEGROUP):
10063       _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
10064
10065     # Check whether disks are valid
10066     for disk_idx in self.disks:
10067       instance.FindDisk(disk_idx)
10068
10069     # Get secondary node IP addresses
10070     self.node_secondary_ip = dict((name, node.secondary_ip) for (name, node)
10071                                   in self.cfg.GetMultiNodeInfo(touched_nodes))
10072
10073   def Exec(self, feedback_fn):
10074     """Execute disk replacement.
10075
10076     This dispatches the disk replacement to the appropriate handler.
10077
10078     """
10079     if self.delay_iallocator:
10080       self._CheckPrereq2()
10081
10082     if __debug__:
10083       # Verify owned locks before starting operation
10084       owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
10085       assert set(owned_nodes) == set(self.node_secondary_ip), \
10086           ("Incorrect node locks, owning %s, expected %s" %
10087            (owned_nodes, self.node_secondary_ip.keys()))
10088       assert (self.lu.owned_locks(locking.LEVEL_NODE) ==
10089               self.lu.owned_locks(locking.LEVEL_NODE_RES))
10090
10091       owned_instances = self.lu.owned_locks(locking.LEVEL_INSTANCE)
10092       assert list(owned_instances) == [self.instance_name], \
10093           "Instance '%s' not locked" % self.instance_name
10094
10095       assert not self.lu.glm.is_owned(locking.LEVEL_NODEGROUP), \
10096           "Should not own any node group lock at this point"
10097
10098     if not self.disks:
10099       feedback_fn("No disks need replacement")
10100       return
10101
10102     feedback_fn("Replacing disk(s) %s for %s" %
10103                 (utils.CommaJoin(self.disks), self.instance.name))
10104
10105     activate_disks = (self.instance.admin_state != constants.ADMINST_UP)
10106
10107     # Activate the instance disks if we're replacing them on a down instance
10108     if activate_disks:
10109       _StartInstanceDisks(self.lu, self.instance, True)
10110
10111     try:
10112       # Should we replace the secondary node?
10113       if self.new_node is not None:
10114         fn = self._ExecDrbd8Secondary
10115       else:
10116         fn = self._ExecDrbd8DiskOnly
10117
10118       result = fn(feedback_fn)
10119     finally:
10120       # Deactivate the instance disks if we're replacing them on a
10121       # down instance
10122       if activate_disks:
10123         _SafeShutdownInstanceDisks(self.lu, self.instance)
10124
10125     assert not self.lu.owned_locks(locking.LEVEL_NODE)
10126
10127     if __debug__:
10128       # Verify owned locks
10129       owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE_RES)
10130       nodes = frozenset(self.node_secondary_ip)
10131       assert ((self.early_release and not owned_nodes) or
10132               (not self.early_release and not (set(owned_nodes) - nodes))), \
10133         ("Not owning the correct locks, early_release=%s, owned=%r,"
10134          " nodes=%r" % (self.early_release, owned_nodes, nodes))
10135
10136     return result
10137
10138   def _CheckVolumeGroup(self, nodes):
10139     self.lu.LogInfo("Checking volume groups")
10140
10141     vgname = self.cfg.GetVGName()
10142
10143     # Make sure volume group exists on all involved nodes
10144     results = self.rpc.call_vg_list(nodes)
10145     if not results:
10146       raise errors.OpExecError("Can't list volume groups on the nodes")
10147
10148     for node in nodes:
10149       res = results[node]
10150       res.Raise("Error checking node %s" % node)
10151       if vgname not in res.payload:
10152         raise errors.OpExecError("Volume group '%s' not found on node %s" %
10153                                  (vgname, node))
10154
10155   def _CheckDisksExistence(self, nodes):
10156     # Check disk existence
10157     for idx, dev in enumerate(self.instance.disks):
10158       if idx not in self.disks:
10159         continue
10160
10161       for node in nodes:
10162         self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
10163         self.cfg.SetDiskID(dev, node)
10164
10165         result = self.rpc.call_blockdev_find(node, dev)
10166
10167         msg = result.fail_msg
10168         if msg or not result.payload:
10169           if not msg:
10170             msg = "disk not found"
10171           raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
10172                                    (idx, node, msg))
10173
10174   def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
10175     for idx, dev in enumerate(self.instance.disks):
10176       if idx not in self.disks:
10177         continue
10178
10179       self.lu.LogInfo("Checking disk/%d consistency on node %s" %
10180                       (idx, node_name))
10181
10182       if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
10183                                    ldisk=ldisk):
10184         raise errors.OpExecError("Node %s has degraded storage, unsafe to"
10185                                  " replace disks for instance %s" %
10186                                  (node_name, self.instance.name))
10187
10188   def _CreateNewStorage(self, node_name):
10189     """Create new storage on the primary or secondary node.
10190
10191     This is only used for same-node replaces, not for changing the
10192     secondary node, hence we don't want to modify the existing disk.
10193
10194     """
10195     iv_names = {}
10196
10197     for idx, dev in enumerate(self.instance.disks):
10198       if idx not in self.disks:
10199         continue
10200
10201       self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
10202
10203       self.cfg.SetDiskID(dev, node_name)
10204
10205       lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
10206       names = _GenerateUniqueNames(self.lu, lv_names)
10207
10208       _, data_p, meta_p = _ComputeLDParams(constants.DT_DRBD8, self.diskparams)
10209
10210       vg_data = dev.children[0].logical_id[0]
10211       lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
10212                              logical_id=(vg_data, names[0]), params=data_p)
10213       vg_meta = dev.children[1].logical_id[0]
10214       lv_meta = objects.Disk(dev_type=constants.LD_LV, size=DRBD_META_SIZE,
10215                              logical_id=(vg_meta, names[1]), params=meta_p)
10216
10217       new_lvs = [lv_data, lv_meta]
10218       old_lvs = [child.Copy() for child in dev.children]
10219       iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
10220
10221       # we pass force_create=True to force the LVM creation
10222       for new_lv in new_lvs:
10223         _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
10224                         _GetInstanceInfoText(self.instance), False)
10225
10226     return iv_names
10227
10228   def _CheckDevices(self, node_name, iv_names):
10229     for name, (dev, _, _) in iv_names.iteritems():
10230       self.cfg.SetDiskID(dev, node_name)
10231
10232       result = self.rpc.call_blockdev_find(node_name, dev)
10233
10234       msg = result.fail_msg
10235       if msg or not result.payload:
10236         if not msg:
10237           msg = "disk not found"
10238         raise errors.OpExecError("Can't find DRBD device %s: %s" %
10239                                  (name, msg))
10240
10241       if result.payload.is_degraded:
10242         raise errors.OpExecError("DRBD device %s is degraded!" % name)
10243
10244   def _RemoveOldStorage(self, node_name, iv_names):
10245     for name, (_, old_lvs, _) in iv_names.iteritems():
10246       self.lu.LogInfo("Remove logical volumes for %s" % name)
10247
10248       for lv in old_lvs:
10249         self.cfg.SetDiskID(lv, node_name)
10250
10251         msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
10252         if msg:
10253           self.lu.LogWarning("Can't remove old LV: %s" % msg,
10254                              hint="remove unused LVs manually")
10255
10256   def _ExecDrbd8DiskOnly(self, feedback_fn): # pylint: disable=W0613
10257     """Replace a disk on the primary or secondary for DRBD 8.
10258
10259     The algorithm for replace is quite complicated:
10260
10261       1. for each disk to be replaced:
10262
10263         1. create new LVs on the target node with unique names
10264         1. detach old LVs from the drbd device
10265         1. rename old LVs to name_replaced.<time_t>
10266         1. rename new LVs to old LVs
10267         1. attach the new LVs (with the old names now) to the drbd device
10268
10269       1. wait for sync across all devices
10270
10271       1. for each modified disk:
10272
10273         1. remove old LVs (which have the name name_replaces.<time_t>)
10274
10275     Failures are not very well handled.
10276
10277     """
10278     steps_total = 6
10279
10280     # Step: check device activation
10281     self.lu.LogStep(1, steps_total, "Check device existence")
10282     self._CheckDisksExistence([self.other_node, self.target_node])
10283     self._CheckVolumeGroup([self.target_node, self.other_node])
10284
10285     # Step: check other node consistency
10286     self.lu.LogStep(2, steps_total, "Check peer consistency")
10287     self._CheckDisksConsistency(self.other_node,
10288                                 self.other_node == self.instance.primary_node,
10289                                 False)
10290
10291     # Step: create new storage
10292     self.lu.LogStep(3, steps_total, "Allocate new storage")
10293     iv_names = self._CreateNewStorage(self.target_node)
10294
10295     # Step: for each lv, detach+rename*2+attach
10296     self.lu.LogStep(4, steps_total, "Changing drbd configuration")
10297     for dev, old_lvs, new_lvs in iv_names.itervalues():
10298       self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
10299
10300       result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
10301                                                      old_lvs)
10302       result.Raise("Can't detach drbd from local storage on node"
10303                    " %s for device %s" % (self.target_node, dev.iv_name))
10304       #dev.children = []
10305       #cfg.Update(instance)
10306
10307       # ok, we created the new LVs, so now we know we have the needed
10308       # storage; as such, we proceed on the target node to rename
10309       # old_lv to _old, and new_lv to old_lv; note that we rename LVs
10310       # using the assumption that logical_id == physical_id (which in
10311       # turn is the unique_id on that node)
10312
10313       # FIXME(iustin): use a better name for the replaced LVs
10314       temp_suffix = int(time.time())
10315       ren_fn = lambda d, suff: (d.physical_id[0],
10316                                 d.physical_id[1] + "_replaced-%s" % suff)
10317
10318       # Build the rename list based on what LVs exist on the node
10319       rename_old_to_new = []
10320       for to_ren in old_lvs:
10321         result = self.rpc.call_blockdev_find(self.target_node, to_ren)
10322         if not result.fail_msg and result.payload:
10323           # device exists
10324           rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
10325
10326       self.lu.LogInfo("Renaming the old LVs on the target node")
10327       result = self.rpc.call_blockdev_rename(self.target_node,
10328                                              rename_old_to_new)
10329       result.Raise("Can't rename old LVs on node %s" % self.target_node)
10330
10331       # Now we rename the new LVs to the old LVs
10332       self.lu.LogInfo("Renaming the new LVs on the target node")
10333       rename_new_to_old = [(new, old.physical_id)
10334                            for old, new in zip(old_lvs, new_lvs)]
10335       result = self.rpc.call_blockdev_rename(self.target_node,
10336                                              rename_new_to_old)
10337       result.Raise("Can't rename new LVs on node %s" % self.target_node)
10338
10339       # Intermediate steps of in memory modifications
10340       for old, new in zip(old_lvs, new_lvs):
10341         new.logical_id = old.logical_id
10342         self.cfg.SetDiskID(new, self.target_node)
10343
10344       # We need to modify old_lvs so that removal later removes the
10345       # right LVs, not the newly added ones; note that old_lvs is a
10346       # copy here
10347       for disk in old_lvs:
10348         disk.logical_id = ren_fn(disk, temp_suffix)
10349         self.cfg.SetDiskID(disk, self.target_node)
10350
10351       # Now that the new lvs have the old name, we can add them to the device
10352       self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
10353       result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
10354                                                   new_lvs)
10355       msg = result.fail_msg
10356       if msg:
10357         for new_lv in new_lvs:
10358           msg2 = self.rpc.call_blockdev_remove(self.target_node,
10359                                                new_lv).fail_msg
10360           if msg2:
10361             self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
10362                                hint=("cleanup manually the unused logical"
10363                                      "volumes"))
10364         raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
10365
10366     cstep = itertools.count(5)
10367
10368     if self.early_release:
10369       self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
10370       self._RemoveOldStorage(self.target_node, iv_names)
10371       # TODO: Check if releasing locks early still makes sense
10372       _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
10373     else:
10374       # Release all resource locks except those used by the instance
10375       _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
10376                     keep=self.node_secondary_ip.keys())
10377
10378     # Release all node locks while waiting for sync
10379     _ReleaseLocks(self.lu, locking.LEVEL_NODE)
10380
10381     # TODO: Can the instance lock be downgraded here? Take the optional disk
10382     # shutdown in the caller into consideration.
10383
10384     # Wait for sync
10385     # This can fail as the old devices are degraded and _WaitForSync
10386     # does a combined result over all disks, so we don't check its return value
10387     self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
10388     _WaitForSync(self.lu, self.instance)
10389
10390     # Check all devices manually
10391     self._CheckDevices(self.instance.primary_node, iv_names)
10392
10393     # Step: remove old storage
10394     if not self.early_release:
10395       self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
10396       self._RemoveOldStorage(self.target_node, iv_names)
10397
10398   def _ExecDrbd8Secondary(self, feedback_fn):
10399     """Replace the secondary node for DRBD 8.
10400
10401     The algorithm for replace is quite complicated:
10402       - for all disks of the instance:
10403         - create new LVs on the new node with same names
10404         - shutdown the drbd device on the old secondary
10405         - disconnect the drbd network on the primary
10406         - create the drbd device on the new secondary
10407         - network attach the drbd on the primary, using an artifice:
10408           the drbd code for Attach() will connect to the network if it
10409           finds a device which is connected to the good local disks but
10410           not network enabled
10411       - wait for sync across all devices
10412       - remove all disks from the old secondary
10413
10414     Failures are not very well handled.
10415
10416     """
10417     steps_total = 6
10418
10419     pnode = self.instance.primary_node
10420
10421     # Step: check device activation
10422     self.lu.LogStep(1, steps_total, "Check device existence")
10423     self._CheckDisksExistence([self.instance.primary_node])
10424     self._CheckVolumeGroup([self.instance.primary_node])
10425
10426     # Step: check other node consistency
10427     self.lu.LogStep(2, steps_total, "Check peer consistency")
10428     self._CheckDisksConsistency(self.instance.primary_node, True, True)
10429
10430     # Step: create new storage
10431     self.lu.LogStep(3, steps_total, "Allocate new storage")
10432     for idx, dev in enumerate(self.instance.disks):
10433       self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
10434                       (self.new_node, idx))
10435       # we pass force_create=True to force LVM creation
10436       for new_lv in dev.children:
10437         _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
10438                         _GetInstanceInfoText(self.instance), False)
10439
10440     # Step 4: dbrd minors and drbd setups changes
10441     # after this, we must manually remove the drbd minors on both the
10442     # error and the success paths
10443     self.lu.LogStep(4, steps_total, "Changing drbd configuration")
10444     minors = self.cfg.AllocateDRBDMinor([self.new_node
10445                                          for dev in self.instance.disks],
10446                                         self.instance.name)
10447     logging.debug("Allocated minors %r", minors)
10448
10449     iv_names = {}
10450     for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
10451       self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
10452                       (self.new_node, idx))
10453       # create new devices on new_node; note that we create two IDs:
10454       # one without port, so the drbd will be activated without
10455       # networking information on the new node at this stage, and one
10456       # with network, for the latter activation in step 4
10457       (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
10458       if self.instance.primary_node == o_node1:
10459         p_minor = o_minor1
10460       else:
10461         assert self.instance.primary_node == o_node2, "Three-node instance?"
10462         p_minor = o_minor2
10463
10464       new_alone_id = (self.instance.primary_node, self.new_node, None,
10465                       p_minor, new_minor, o_secret)
10466       new_net_id = (self.instance.primary_node, self.new_node, o_port,
10467                     p_minor, new_minor, o_secret)
10468
10469       iv_names[idx] = (dev, dev.children, new_net_id)
10470       logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
10471                     new_net_id)
10472       drbd_params, _, _ = _ComputeLDParams(constants.DT_DRBD8, self.diskparams)
10473       new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
10474                               logical_id=new_alone_id,
10475                               children=dev.children,
10476                               size=dev.size,
10477                               params=drbd_params)
10478       try:
10479         _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
10480                               _GetInstanceInfoText(self.instance), False)
10481       except errors.GenericError:
10482         self.cfg.ReleaseDRBDMinors(self.instance.name)
10483         raise
10484
10485     # We have new devices, shutdown the drbd on the old secondary
10486     for idx, dev in enumerate(self.instance.disks):
10487       self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
10488       self.cfg.SetDiskID(dev, self.target_node)
10489       msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
10490       if msg:
10491         self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
10492                            "node: %s" % (idx, msg),
10493                            hint=("Please cleanup this device manually as"
10494                                  " soon as possible"))
10495
10496     self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
10497     result = self.rpc.call_drbd_disconnect_net([pnode], self.node_secondary_ip,
10498                                                self.instance.disks)[pnode]
10499
10500     msg = result.fail_msg
10501     if msg:
10502       # detaches didn't succeed (unlikely)
10503       self.cfg.ReleaseDRBDMinors(self.instance.name)
10504       raise errors.OpExecError("Can't detach the disks from the network on"
10505                                " old node: %s" % (msg,))
10506
10507     # if we managed to detach at least one, we update all the disks of
10508     # the instance to point to the new secondary
10509     self.lu.LogInfo("Updating instance configuration")
10510     for dev, _, new_logical_id in iv_names.itervalues():
10511       dev.logical_id = new_logical_id
10512       self.cfg.SetDiskID(dev, self.instance.primary_node)
10513
10514     self.cfg.Update(self.instance, feedback_fn)
10515
10516     # Release all node locks (the configuration has been updated)
10517     _ReleaseLocks(self.lu, locking.LEVEL_NODE)
10518
10519     # and now perform the drbd attach
10520     self.lu.LogInfo("Attaching primary drbds to new secondary"
10521                     " (standalone => connected)")
10522     result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
10523                                             self.new_node],
10524                                            self.node_secondary_ip,
10525                                            self.instance.disks,
10526                                            self.instance.name,
10527                                            False)
10528     for to_node, to_result in result.items():
10529       msg = to_result.fail_msg
10530       if msg:
10531         self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
10532                            to_node, msg,
10533                            hint=("please do a gnt-instance info to see the"
10534                                  " status of disks"))
10535
10536     cstep = itertools.count(5)
10537
10538     if self.early_release:
10539       self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
10540       self._RemoveOldStorage(self.target_node, iv_names)
10541       # TODO: Check if releasing locks early still makes sense
10542       _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
10543     else:
10544       # Release all resource locks except those used by the instance
10545       _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
10546                     keep=self.node_secondary_ip.keys())
10547
10548     # TODO: Can the instance lock be downgraded here? Take the optional disk
10549     # shutdown in the caller into consideration.
10550
10551     # Wait for sync
10552     # This can fail as the old devices are degraded and _WaitForSync
10553     # does a combined result over all disks, so we don't check its return value
10554     self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
10555     _WaitForSync(self.lu, self.instance)
10556
10557     # Check all devices manually
10558     self._CheckDevices(self.instance.primary_node, iv_names)
10559
10560     # Step: remove old storage
10561     if not self.early_release:
10562       self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
10563       self._RemoveOldStorage(self.target_node, iv_names)
10564
10565
10566 class LURepairNodeStorage(NoHooksLU):
10567   """Repairs the volume group on a node.
10568
10569   """
10570   REQ_BGL = False
10571
10572   def CheckArguments(self):
10573     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
10574
10575     storage_type = self.op.storage_type
10576
10577     if (constants.SO_FIX_CONSISTENCY not in
10578         constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
10579       raise errors.OpPrereqError("Storage units of type '%s' can not be"
10580                                  " repaired" % storage_type,
10581                                  errors.ECODE_INVAL)
10582
10583   def ExpandNames(self):
10584     self.needed_locks = {
10585       locking.LEVEL_NODE: [self.op.node_name],
10586       }
10587
10588   def _CheckFaultyDisks(self, instance, node_name):
10589     """Ensure faulty disks abort the opcode or at least warn."""
10590     try:
10591       if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
10592                                   node_name, True):
10593         raise errors.OpPrereqError("Instance '%s' has faulty disks on"
10594                                    " node '%s'" % (instance.name, node_name),
10595                                    errors.ECODE_STATE)
10596     except errors.OpPrereqError, err:
10597       if self.op.ignore_consistency:
10598         self.proc.LogWarning(str(err.args[0]))
10599       else:
10600         raise
10601
10602   def CheckPrereq(self):
10603     """Check prerequisites.
10604
10605     """
10606     # Check whether any instance on this node has faulty disks
10607     for inst in _GetNodeInstances(self.cfg, self.op.node_name):
10608       if inst.admin_state != constants.ADMINST_UP:
10609         continue
10610       check_nodes = set(inst.all_nodes)
10611       check_nodes.discard(self.op.node_name)
10612       for inst_node_name in check_nodes:
10613         self._CheckFaultyDisks(inst, inst_node_name)
10614
10615   def Exec(self, feedback_fn):
10616     feedback_fn("Repairing storage unit '%s' on %s ..." %
10617                 (self.op.name, self.op.node_name))
10618
10619     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
10620     result = self.rpc.call_storage_execute(self.op.node_name,
10621                                            self.op.storage_type, st_args,
10622                                            self.op.name,
10623                                            constants.SO_FIX_CONSISTENCY)
10624     result.Raise("Failed to repair storage unit '%s' on %s" %
10625                  (self.op.name, self.op.node_name))
10626
10627
10628 class LUNodeEvacuate(NoHooksLU):
10629   """Evacuates instances off a list of nodes.
10630
10631   """
10632   REQ_BGL = False
10633
10634   _MODE2IALLOCATOR = {
10635     constants.NODE_EVAC_PRI: constants.IALLOCATOR_NEVAC_PRI,
10636     constants.NODE_EVAC_SEC: constants.IALLOCATOR_NEVAC_SEC,
10637     constants.NODE_EVAC_ALL: constants.IALLOCATOR_NEVAC_ALL,
10638     }
10639   assert frozenset(_MODE2IALLOCATOR.keys()) == constants.NODE_EVAC_MODES
10640   assert (frozenset(_MODE2IALLOCATOR.values()) ==
10641           constants.IALLOCATOR_NEVAC_MODES)
10642
10643   def CheckArguments(self):
10644     _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
10645
10646   def ExpandNames(self):
10647     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
10648
10649     if self.op.remote_node is not None:
10650       self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10651       assert self.op.remote_node
10652
10653       if self.op.remote_node == self.op.node_name:
10654         raise errors.OpPrereqError("Can not use evacuated node as a new"
10655                                    " secondary node", errors.ECODE_INVAL)
10656
10657       if self.op.mode != constants.NODE_EVAC_SEC:
10658         raise errors.OpPrereqError("Without the use of an iallocator only"
10659                                    " secondary instances can be evacuated",
10660                                    errors.ECODE_INVAL)
10661
10662     # Declare locks
10663     self.share_locks = _ShareAll()
10664     self.needed_locks = {
10665       locking.LEVEL_INSTANCE: [],
10666       locking.LEVEL_NODEGROUP: [],
10667       locking.LEVEL_NODE: [],
10668       }
10669
10670     # Determine nodes (via group) optimistically, needs verification once locks
10671     # have been acquired
10672     self.lock_nodes = self._DetermineNodes()
10673
10674   def _DetermineNodes(self):
10675     """Gets the list of nodes to operate on.
10676
10677     """
10678     if self.op.remote_node is None:
10679       # Iallocator will choose any node(s) in the same group
10680       group_nodes = self.cfg.GetNodeGroupMembersByNodes([self.op.node_name])
10681     else:
10682       group_nodes = frozenset([self.op.remote_node])
10683
10684     # Determine nodes to be locked
10685     return set([self.op.node_name]) | group_nodes
10686
10687   def _DetermineInstances(self):
10688     """Builds list of instances to operate on.
10689
10690     """
10691     assert self.op.mode in constants.NODE_EVAC_MODES
10692
10693     if self.op.mode == constants.NODE_EVAC_PRI:
10694       # Primary instances only
10695       inst_fn = _GetNodePrimaryInstances
10696       assert self.op.remote_node is None, \
10697         "Evacuating primary instances requires iallocator"
10698     elif self.op.mode == constants.NODE_EVAC_SEC:
10699       # Secondary instances only
10700       inst_fn = _GetNodeSecondaryInstances
10701     else:
10702       # All instances
10703       assert self.op.mode == constants.NODE_EVAC_ALL
10704       inst_fn = _GetNodeInstances
10705       # TODO: In 2.6, change the iallocator interface to take an evacuation mode
10706       # per instance
10707       raise errors.OpPrereqError("Due to an issue with the iallocator"
10708                                  " interface it is not possible to evacuate"
10709                                  " all instances at once; specify explicitly"
10710                                  " whether to evacuate primary or secondary"
10711                                  " instances",
10712                                  errors.ECODE_INVAL)
10713
10714     return inst_fn(self.cfg, self.op.node_name)
10715
10716   def DeclareLocks(self, level):
10717     if level == locking.LEVEL_INSTANCE:
10718       # Lock instances optimistically, needs verification once node and group
10719       # locks have been acquired
10720       self.needed_locks[locking.LEVEL_INSTANCE] = \
10721         set(i.name for i in self._DetermineInstances())
10722
10723     elif level == locking.LEVEL_NODEGROUP:
10724       # Lock node groups for all potential target nodes optimistically, needs
10725       # verification once nodes have been acquired
10726       self.needed_locks[locking.LEVEL_NODEGROUP] = \
10727         self.cfg.GetNodeGroupsFromNodes(self.lock_nodes)
10728
10729     elif level == locking.LEVEL_NODE:
10730       self.needed_locks[locking.LEVEL_NODE] = self.lock_nodes
10731
10732   def CheckPrereq(self):
10733     # Verify locks
10734     owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
10735     owned_nodes = self.owned_locks(locking.LEVEL_NODE)
10736     owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
10737
10738     need_nodes = self._DetermineNodes()
10739
10740     if not owned_nodes.issuperset(need_nodes):
10741       raise errors.OpPrereqError("Nodes in same group as '%s' changed since"
10742                                  " locks were acquired, current nodes are"
10743                                  " are '%s', used to be '%s'; retry the"
10744                                  " operation" %
10745                                  (self.op.node_name,
10746                                   utils.CommaJoin(need_nodes),
10747                                   utils.CommaJoin(owned_nodes)),
10748                                  errors.ECODE_STATE)
10749
10750     wanted_groups = self.cfg.GetNodeGroupsFromNodes(owned_nodes)
10751     if owned_groups != wanted_groups:
10752       raise errors.OpExecError("Node groups changed since locks were acquired,"
10753                                " current groups are '%s', used to be '%s';"
10754                                " retry the operation" %
10755                                (utils.CommaJoin(wanted_groups),
10756                                 utils.CommaJoin(owned_groups)))
10757
10758     # Determine affected instances
10759     self.instances = self._DetermineInstances()
10760     self.instance_names = [i.name for i in self.instances]
10761
10762     if set(self.instance_names) != owned_instances:
10763       raise errors.OpExecError("Instances on node '%s' changed since locks"
10764                                " were acquired, current instances are '%s',"
10765                                " used to be '%s'; retry the operation" %
10766                                (self.op.node_name,
10767                                 utils.CommaJoin(self.instance_names),
10768                                 utils.CommaJoin(owned_instances)))
10769
10770     if self.instance_names:
10771       self.LogInfo("Evacuating instances from node '%s': %s",
10772                    self.op.node_name,
10773                    utils.CommaJoin(utils.NiceSort(self.instance_names)))
10774     else:
10775       self.LogInfo("No instances to evacuate from node '%s'",
10776                    self.op.node_name)
10777
10778     if self.op.remote_node is not None:
10779       for i in self.instances:
10780         if i.primary_node == self.op.remote_node:
10781           raise errors.OpPrereqError("Node %s is the primary node of"
10782                                      " instance %s, cannot use it as"
10783                                      " secondary" %
10784                                      (self.op.remote_node, i.name),
10785                                      errors.ECODE_INVAL)
10786
10787   def Exec(self, feedback_fn):
10788     assert (self.op.iallocator is not None) ^ (self.op.remote_node is not None)
10789
10790     if not self.instance_names:
10791       # No instances to evacuate
10792       jobs = []
10793
10794     elif self.op.iallocator is not None:
10795       # TODO: Implement relocation to other group
10796       ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_NODE_EVAC,
10797                        evac_mode=self._MODE2IALLOCATOR[self.op.mode],
10798                        instances=list(self.instance_names))
10799
10800       ial.Run(self.op.iallocator)
10801
10802       if not ial.success:
10803         raise errors.OpPrereqError("Can't compute node evacuation using"
10804                                    " iallocator '%s': %s" %
10805                                    (self.op.iallocator, ial.info),
10806                                    errors.ECODE_NORES)
10807
10808       jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, True)
10809
10810     elif self.op.remote_node is not None:
10811       assert self.op.mode == constants.NODE_EVAC_SEC
10812       jobs = [
10813         [opcodes.OpInstanceReplaceDisks(instance_name=instance_name,
10814                                         remote_node=self.op.remote_node,
10815                                         disks=[],
10816                                         mode=constants.REPLACE_DISK_CHG,
10817                                         early_release=self.op.early_release)]
10818         for instance_name in self.instance_names
10819         ]
10820
10821     else:
10822       raise errors.ProgrammerError("No iallocator or remote node")
10823
10824     return ResultWithJobs(jobs)
10825
10826
10827 def _SetOpEarlyRelease(early_release, op):
10828   """Sets C{early_release} flag on opcodes if available.
10829
10830   """
10831   try:
10832     op.early_release = early_release
10833   except AttributeError:
10834     assert not isinstance(op, opcodes.OpInstanceReplaceDisks)
10835
10836   return op
10837
10838
10839 def _NodeEvacDest(use_nodes, group, nodes):
10840   """Returns group or nodes depending on caller's choice.
10841
10842   """
10843   if use_nodes:
10844     return utils.CommaJoin(nodes)
10845   else:
10846     return group
10847
10848
10849 def _LoadNodeEvacResult(lu, alloc_result, early_release, use_nodes):
10850   """Unpacks the result of change-group and node-evacuate iallocator requests.
10851
10852   Iallocator modes L{constants.IALLOCATOR_MODE_NODE_EVAC} and
10853   L{constants.IALLOCATOR_MODE_CHG_GROUP}.
10854
10855   @type lu: L{LogicalUnit}
10856   @param lu: Logical unit instance
10857   @type alloc_result: tuple/list
10858   @param alloc_result: Result from iallocator
10859   @type early_release: bool
10860   @param early_release: Whether to release locks early if possible
10861   @type use_nodes: bool
10862   @param use_nodes: Whether to display node names instead of groups
10863
10864   """
10865   (moved, failed, jobs) = alloc_result
10866
10867   if failed:
10868     failreason = utils.CommaJoin("%s (%s)" % (name, reason)
10869                                  for (name, reason) in failed)
10870     lu.LogWarning("Unable to evacuate instances %s", failreason)
10871     raise errors.OpExecError("Unable to evacuate instances %s" % failreason)
10872
10873   if moved:
10874     lu.LogInfo("Instances to be moved: %s",
10875                utils.CommaJoin("%s (to %s)" %
10876                                (name, _NodeEvacDest(use_nodes, group, nodes))
10877                                for (name, group, nodes) in moved))
10878
10879   return [map(compat.partial(_SetOpEarlyRelease, early_release),
10880               map(opcodes.OpCode.LoadOpCode, ops))
10881           for ops in jobs]
10882
10883
10884 class LUInstanceGrowDisk(LogicalUnit):
10885   """Grow a disk of an instance.
10886
10887   """
10888   HPATH = "disk-grow"
10889   HTYPE = constants.HTYPE_INSTANCE
10890   REQ_BGL = False
10891
10892   def ExpandNames(self):
10893     self._ExpandAndLockInstance()
10894     self.needed_locks[locking.LEVEL_NODE] = []
10895     self.needed_locks[locking.LEVEL_NODE_RES] = []
10896     self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
10897
10898   def DeclareLocks(self, level):
10899     if level == locking.LEVEL_NODE:
10900       self._LockInstancesNodes()
10901     elif level == locking.LEVEL_NODE_RES:
10902       # Copy node locks
10903       self.needed_locks[locking.LEVEL_NODE_RES] = \
10904         self.needed_locks[locking.LEVEL_NODE][:]
10905
10906   def BuildHooksEnv(self):
10907     """Build hooks env.
10908
10909     This runs on the master, the primary and all the secondaries.
10910
10911     """
10912     env = {
10913       "DISK": self.op.disk,
10914       "AMOUNT": self.op.amount,
10915       }
10916     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
10917     return env
10918
10919   def BuildHooksNodes(self):
10920     """Build hooks nodes.
10921
10922     """
10923     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
10924     return (nl, nl)
10925
10926   def CheckPrereq(self):
10927     """Check prerequisites.
10928
10929     This checks that the instance is in the cluster.
10930
10931     """
10932     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10933     assert instance is not None, \
10934       "Cannot retrieve locked instance %s" % self.op.instance_name
10935     nodenames = list(instance.all_nodes)
10936     for node in nodenames:
10937       _CheckNodeOnline(self, node)
10938
10939     self.instance = instance
10940
10941     if instance.disk_template not in constants.DTS_GROWABLE:
10942       raise errors.OpPrereqError("Instance's disk layout does not support"
10943                                  " growing", errors.ECODE_INVAL)
10944
10945     self.disk = instance.FindDisk(self.op.disk)
10946
10947     if instance.disk_template not in (constants.DT_FILE,
10948                                       constants.DT_SHARED_FILE):
10949       # TODO: check the free disk space for file, when that feature will be
10950       # supported
10951       _CheckNodesFreeDiskPerVG(self, nodenames,
10952                                self.disk.ComputeGrowth(self.op.amount))
10953
10954   def Exec(self, feedback_fn):
10955     """Execute disk grow.
10956
10957     """
10958     instance = self.instance
10959     disk = self.disk
10960
10961     assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
10962     assert (self.owned_locks(locking.LEVEL_NODE) ==
10963             self.owned_locks(locking.LEVEL_NODE_RES))
10964
10965     disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
10966     if not disks_ok:
10967       raise errors.OpExecError("Cannot activate block device to grow")
10968
10969     feedback_fn("Growing disk %s of instance '%s' by %s" %
10970                 (self.op.disk, instance.name,
10971                  utils.FormatUnit(self.op.amount, "h")))
10972
10973     # First run all grow ops in dry-run mode
10974     for node in instance.all_nodes:
10975       self.cfg.SetDiskID(disk, node)
10976       result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, True)
10977       result.Raise("Grow request failed to node %s" % node)
10978
10979     # We know that (as far as we can test) operations across different
10980     # nodes will succeed, time to run it for real
10981     for node in instance.all_nodes:
10982       self.cfg.SetDiskID(disk, node)
10983       result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, False)
10984       result.Raise("Grow request failed to node %s" % node)
10985
10986       # TODO: Rewrite code to work properly
10987       # DRBD goes into sync mode for a short amount of time after executing the
10988       # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
10989       # calling "resize" in sync mode fails. Sleeping for a short amount of
10990       # time is a work-around.
10991       time.sleep(5)
10992
10993     disk.RecordGrow(self.op.amount)
10994     self.cfg.Update(instance, feedback_fn)
10995
10996     # Changes have been recorded, release node lock
10997     _ReleaseLocks(self, locking.LEVEL_NODE)
10998
10999     # Downgrade lock while waiting for sync
11000     self.glm.downgrade(locking.LEVEL_INSTANCE)
11001
11002     if self.op.wait_for_sync:
11003       disk_abort = not _WaitForSync(self, instance, disks=[disk])
11004       if disk_abort:
11005         self.proc.LogWarning("Disk sync-ing has not returned a good"
11006                              " status; please check the instance")
11007       if instance.admin_state != constants.ADMINST_UP:
11008         _SafeShutdownInstanceDisks(self, instance, disks=[disk])
11009     elif instance.admin_state != constants.ADMINST_UP:
11010       self.proc.LogWarning("Not shutting down the disk even if the instance is"
11011                            " not supposed to be running because no wait for"
11012                            " sync mode was requested")
11013
11014     assert self.owned_locks(locking.LEVEL_NODE_RES)
11015     assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
11016
11017
11018 class LUInstanceQueryData(NoHooksLU):
11019   """Query runtime instance data.
11020
11021   """
11022   REQ_BGL = False
11023
11024   def ExpandNames(self):
11025     self.needed_locks = {}
11026
11027     # Use locking if requested or when non-static information is wanted
11028     if not (self.op.static or self.op.use_locking):
11029       self.LogWarning("Non-static data requested, locks need to be acquired")
11030       self.op.use_locking = True
11031
11032     if self.op.instances or not self.op.use_locking:
11033       # Expand instance names right here
11034       self.wanted_names = _GetWantedInstances(self, self.op.instances)
11035     else:
11036       # Will use acquired locks
11037       self.wanted_names = None
11038
11039     if self.op.use_locking:
11040       self.share_locks = _ShareAll()
11041
11042       if self.wanted_names is None:
11043         self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
11044       else:
11045         self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
11046
11047       self.needed_locks[locking.LEVEL_NODE] = []
11048       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
11049
11050   def DeclareLocks(self, level):
11051     if self.op.use_locking and level == locking.LEVEL_NODE:
11052       self._LockInstancesNodes()
11053
11054   def CheckPrereq(self):
11055     """Check prerequisites.
11056
11057     This only checks the optional instance list against the existing names.
11058
11059     """
11060     if self.wanted_names is None:
11061       assert self.op.use_locking, "Locking was not used"
11062       self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
11063
11064     self.wanted_instances = \
11065         map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
11066
11067   def _ComputeBlockdevStatus(self, node, instance_name, dev):
11068     """Returns the status of a block device
11069
11070     """
11071     if self.op.static or not node:
11072       return None
11073
11074     self.cfg.SetDiskID(dev, node)
11075
11076     result = self.rpc.call_blockdev_find(node, dev)
11077     if result.offline:
11078       return None
11079
11080     result.Raise("Can't compute disk status for %s" % instance_name)
11081
11082     status = result.payload
11083     if status is None:
11084       return None
11085
11086     return (status.dev_path, status.major, status.minor,
11087             status.sync_percent, status.estimated_time,
11088             status.is_degraded, status.ldisk_status)
11089
11090   def _ComputeDiskStatus(self, instance, snode, dev):
11091     """Compute block device status.
11092
11093     """
11094     if dev.dev_type in constants.LDS_DRBD:
11095       # we change the snode then (otherwise we use the one passed in)
11096       if dev.logical_id[0] == instance.primary_node:
11097         snode = dev.logical_id[1]
11098       else:
11099         snode = dev.logical_id[0]
11100
11101     dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
11102                                               instance.name, dev)
11103     dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
11104
11105     if dev.children:
11106       dev_children = map(compat.partial(self._ComputeDiskStatus,
11107                                         instance, snode),
11108                          dev.children)
11109     else:
11110       dev_children = []
11111
11112     return {
11113       "iv_name": dev.iv_name,
11114       "dev_type": dev.dev_type,
11115       "logical_id": dev.logical_id,
11116       "physical_id": dev.physical_id,
11117       "pstatus": dev_pstatus,
11118       "sstatus": dev_sstatus,
11119       "children": dev_children,
11120       "mode": dev.mode,
11121       "size": dev.size,
11122       }
11123
11124   def Exec(self, feedback_fn):
11125     """Gather and return data"""
11126     result = {}
11127
11128     cluster = self.cfg.GetClusterInfo()
11129
11130     pri_nodes = self.cfg.GetMultiNodeInfo(i.primary_node
11131                                           for i in self.wanted_instances)
11132     for instance, (_, pnode) in zip(self.wanted_instances, pri_nodes):
11133       if self.op.static or pnode.offline:
11134         remote_state = None
11135         if pnode.offline:
11136           self.LogWarning("Primary node %s is marked offline, returning static"
11137                           " information only for instance %s" %
11138                           (pnode.name, instance.name))
11139       else:
11140         remote_info = self.rpc.call_instance_info(instance.primary_node,
11141                                                   instance.name,
11142                                                   instance.hypervisor)
11143         remote_info.Raise("Error checking node %s" % instance.primary_node)
11144         remote_info = remote_info.payload
11145         if remote_info and "state" in remote_info:
11146           remote_state = "up"
11147         else:
11148           if instance.admin_state == constants.ADMINST_UP:
11149             remote_state = "down"
11150           else:
11151             remote_state = instance.admin_state
11152
11153       disks = map(compat.partial(self._ComputeDiskStatus, instance, None),
11154                   instance.disks)
11155
11156       result[instance.name] = {
11157         "name": instance.name,
11158         "config_state": instance.admin_state,
11159         "run_state": remote_state,
11160         "pnode": instance.primary_node,
11161         "snodes": instance.secondary_nodes,
11162         "os": instance.os,
11163         # this happens to be the same format used for hooks
11164         "nics": _NICListToTuple(self, instance.nics),
11165         "disk_template": instance.disk_template,
11166         "disks": disks,
11167         "hypervisor": instance.hypervisor,
11168         "network_port": instance.network_port,
11169         "hv_instance": instance.hvparams,
11170         "hv_actual": cluster.FillHV(instance, skip_globals=True),
11171         "be_instance": instance.beparams,
11172         "be_actual": cluster.FillBE(instance),
11173         "os_instance": instance.osparams,
11174         "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
11175         "serial_no": instance.serial_no,
11176         "mtime": instance.mtime,
11177         "ctime": instance.ctime,
11178         "uuid": instance.uuid,
11179         }
11180
11181     return result
11182
11183
11184 class LUInstanceSetParams(LogicalUnit):
11185   """Modifies an instances's parameters.
11186
11187   """
11188   HPATH = "instance-modify"
11189   HTYPE = constants.HTYPE_INSTANCE
11190   REQ_BGL = False
11191
11192   def CheckArguments(self):
11193     if not (self.op.nics or self.op.disks or self.op.disk_template or
11194             self.op.hvparams or self.op.beparams or self.op.os_name or
11195             self.op.online_inst or self.op.offline_inst):
11196       raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
11197
11198     if self.op.hvparams:
11199       _CheckGlobalHvParams(self.op.hvparams)
11200
11201     # Disk validation
11202     disk_addremove = 0
11203     for disk_op, disk_dict in self.op.disks:
11204       utils.ForceDictType(disk_dict, constants.IDISK_PARAMS_TYPES)
11205       if disk_op == constants.DDM_REMOVE:
11206         disk_addremove += 1
11207         continue
11208       elif disk_op == constants.DDM_ADD:
11209         disk_addremove += 1
11210       else:
11211         if not isinstance(disk_op, int):
11212           raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
11213         if not isinstance(disk_dict, dict):
11214           msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
11215           raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
11216
11217       if disk_op == constants.DDM_ADD:
11218         mode = disk_dict.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
11219         if mode not in constants.DISK_ACCESS_SET:
11220           raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
11221                                      errors.ECODE_INVAL)
11222         size = disk_dict.get(constants.IDISK_SIZE, None)
11223         if size is None:
11224           raise errors.OpPrereqError("Required disk parameter size missing",
11225                                      errors.ECODE_INVAL)
11226         try:
11227           size = int(size)
11228         except (TypeError, ValueError), err:
11229           raise errors.OpPrereqError("Invalid disk size parameter: %s" %
11230                                      str(err), errors.ECODE_INVAL)
11231         disk_dict[constants.IDISK_SIZE] = size
11232       else:
11233         # modification of disk
11234         if constants.IDISK_SIZE in disk_dict:
11235           raise errors.OpPrereqError("Disk size change not possible, use"
11236                                      " grow-disk", errors.ECODE_INVAL)
11237
11238     if disk_addremove > 1:
11239       raise errors.OpPrereqError("Only one disk add or remove operation"
11240                                  " supported at a time", errors.ECODE_INVAL)
11241
11242     if self.op.disks and self.op.disk_template is not None:
11243       raise errors.OpPrereqError("Disk template conversion and other disk"
11244                                  " changes not supported at the same time",
11245                                  errors.ECODE_INVAL)
11246
11247     if (self.op.disk_template and
11248         self.op.disk_template in constants.DTS_INT_MIRROR and
11249         self.op.remote_node is None):
11250       raise errors.OpPrereqError("Changing the disk template to a mirrored"
11251                                  " one requires specifying a secondary node",
11252                                  errors.ECODE_INVAL)
11253
11254     # NIC validation
11255     nic_addremove = 0
11256     for nic_op, nic_dict in self.op.nics:
11257       utils.ForceDictType(nic_dict, constants.INIC_PARAMS_TYPES)
11258       if nic_op == constants.DDM_REMOVE:
11259         nic_addremove += 1
11260         continue
11261       elif nic_op == constants.DDM_ADD:
11262         nic_addremove += 1
11263       else:
11264         if not isinstance(nic_op, int):
11265           raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
11266         if not isinstance(nic_dict, dict):
11267           msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
11268           raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
11269
11270       # nic_dict should be a dict
11271       nic_ip = nic_dict.get(constants.INIC_IP, None)
11272       if nic_ip is not None:
11273         if nic_ip.lower() == constants.VALUE_NONE:
11274           nic_dict[constants.INIC_IP] = None
11275         else:
11276           if not netutils.IPAddress.IsValid(nic_ip):
11277             raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
11278                                        errors.ECODE_INVAL)
11279
11280       nic_bridge = nic_dict.get("bridge", None)
11281       nic_link = nic_dict.get(constants.INIC_LINK, None)
11282       if nic_bridge and nic_link:
11283         raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
11284                                    " at the same time", errors.ECODE_INVAL)
11285       elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
11286         nic_dict["bridge"] = None
11287       elif nic_link and nic_link.lower() == constants.VALUE_NONE:
11288         nic_dict[constants.INIC_LINK] = None
11289
11290       if nic_op == constants.DDM_ADD:
11291         nic_mac = nic_dict.get(constants.INIC_MAC, None)
11292         if nic_mac is None:
11293           nic_dict[constants.INIC_MAC] = constants.VALUE_AUTO
11294
11295       if constants.INIC_MAC in nic_dict:
11296         nic_mac = nic_dict[constants.INIC_MAC]
11297         if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
11298           nic_mac = utils.NormalizeAndValidateMac(nic_mac)
11299
11300         if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
11301           raise errors.OpPrereqError("'auto' is not a valid MAC address when"
11302                                      " modifying an existing nic",
11303                                      errors.ECODE_INVAL)
11304
11305     if nic_addremove > 1:
11306       raise errors.OpPrereqError("Only one NIC add or remove operation"
11307                                  " supported at a time", errors.ECODE_INVAL)
11308
11309   def ExpandNames(self):
11310     self._ExpandAndLockInstance()
11311     # Can't even acquire node locks in shared mode as upcoming changes in
11312     # Ganeti 2.6 will start to modify the node object on disk conversion
11313     self.needed_locks[locking.LEVEL_NODE] = []
11314     self.needed_locks[locking.LEVEL_NODE_RES] = []
11315     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
11316
11317   def DeclareLocks(self, level):
11318     if level == locking.LEVEL_NODE:
11319       self._LockInstancesNodes()
11320       if self.op.disk_template and self.op.remote_node:
11321         self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
11322         self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
11323     elif level == locking.LEVEL_NODE_RES and self.op.disk_template:
11324       # Copy node locks
11325       self.needed_locks[locking.LEVEL_NODE_RES] = \
11326         self.needed_locks[locking.LEVEL_NODE][:]
11327
11328   def BuildHooksEnv(self):
11329     """Build hooks env.
11330
11331     This runs on the master, primary and secondaries.
11332
11333     """
11334     args = dict()
11335     if constants.BE_MINMEM in self.be_new:
11336       args["minmem"] = self.be_new[constants.BE_MINMEM]
11337     if constants.BE_MAXMEM in self.be_new:
11338       args["maxmem"] = self.be_new[constants.BE_MAXMEM]
11339     if constants.BE_VCPUS in self.be_new:
11340       args["vcpus"] = self.be_new[constants.BE_VCPUS]
11341     # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
11342     # information at all.
11343     if self.op.nics:
11344       args["nics"] = []
11345       nic_override = dict(self.op.nics)
11346       for idx, nic in enumerate(self.instance.nics):
11347         if idx in nic_override:
11348           this_nic_override = nic_override[idx]
11349         else:
11350           this_nic_override = {}
11351         if constants.INIC_IP in this_nic_override:
11352           ip = this_nic_override[constants.INIC_IP]
11353         else:
11354           ip = nic.ip
11355         if constants.INIC_MAC in this_nic_override:
11356           mac = this_nic_override[constants.INIC_MAC]
11357         else:
11358           mac = nic.mac
11359         if idx in self.nic_pnew:
11360           nicparams = self.nic_pnew[idx]
11361         else:
11362           nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
11363         mode = nicparams[constants.NIC_MODE]
11364         link = nicparams[constants.NIC_LINK]
11365         args["nics"].append((ip, mac, mode, link))
11366       if constants.DDM_ADD in nic_override:
11367         ip = nic_override[constants.DDM_ADD].get(constants.INIC_IP, None)
11368         mac = nic_override[constants.DDM_ADD][constants.INIC_MAC]
11369         nicparams = self.nic_pnew[constants.DDM_ADD]
11370         mode = nicparams[constants.NIC_MODE]
11371         link = nicparams[constants.NIC_LINK]
11372         args["nics"].append((ip, mac, mode, link))
11373       elif constants.DDM_REMOVE in nic_override:
11374         del args["nics"][-1]
11375
11376     env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
11377     if self.op.disk_template:
11378       env["NEW_DISK_TEMPLATE"] = self.op.disk_template
11379
11380     return env
11381
11382   def BuildHooksNodes(self):
11383     """Build hooks nodes.
11384
11385     """
11386     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
11387     return (nl, nl)
11388
11389   def CheckPrereq(self):
11390     """Check prerequisites.
11391
11392     This only checks the instance list against the existing names.
11393
11394     """
11395     # checking the new params on the primary/secondary nodes
11396
11397     instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
11398     cluster = self.cluster = self.cfg.GetClusterInfo()
11399     assert self.instance is not None, \
11400       "Cannot retrieve locked instance %s" % self.op.instance_name
11401     pnode = instance.primary_node
11402     nodelist = list(instance.all_nodes)
11403     pnode_info = self.cfg.GetNodeInfo(pnode)
11404     self.diskparams = self.cfg.GetNodeGroup(pnode_info.group).diskparams
11405
11406     # OS change
11407     if self.op.os_name and not self.op.force:
11408       _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
11409                       self.op.force_variant)
11410       instance_os = self.op.os_name
11411     else:
11412       instance_os = instance.os
11413
11414     if self.op.disk_template:
11415       if instance.disk_template == self.op.disk_template:
11416         raise errors.OpPrereqError("Instance already has disk template %s" %
11417                                    instance.disk_template, errors.ECODE_INVAL)
11418
11419       if (instance.disk_template,
11420           self.op.disk_template) not in self._DISK_CONVERSIONS:
11421         raise errors.OpPrereqError("Unsupported disk template conversion from"
11422                                    " %s to %s" % (instance.disk_template,
11423                                                   self.op.disk_template),
11424                                    errors.ECODE_INVAL)
11425       _CheckInstanceState(self, instance, INSTANCE_DOWN,
11426                           msg="cannot change disk template")
11427       if self.op.disk_template in constants.DTS_INT_MIRROR:
11428         if self.op.remote_node == pnode:
11429           raise errors.OpPrereqError("Given new secondary node %s is the same"
11430                                      " as the primary node of the instance" %
11431                                      self.op.remote_node, errors.ECODE_STATE)
11432         _CheckNodeOnline(self, self.op.remote_node)
11433         _CheckNodeNotDrained(self, self.op.remote_node)
11434         # FIXME: here we assume that the old instance type is DT_PLAIN
11435         assert instance.disk_template == constants.DT_PLAIN
11436         disks = [{constants.IDISK_SIZE: d.size,
11437                   constants.IDISK_VG: d.logical_id[0]}
11438                  for d in instance.disks]
11439         required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
11440         _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
11441
11442         snode_info = self.cfg.GetNodeInfo(self.op.remote_node)
11443         if pnode_info.group != snode_info.group:
11444           self.LogWarning("The primary and secondary nodes are in two"
11445                           " different node groups; the disk parameters"
11446                           " from the first disk's node group will be"
11447                           " used")
11448
11449     # hvparams processing
11450     if self.op.hvparams:
11451       hv_type = instance.hypervisor
11452       i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
11453       utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
11454       hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
11455
11456       # local check
11457       hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
11458       _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
11459       self.hv_proposed = self.hv_new = hv_new # the new actual values
11460       self.hv_inst = i_hvdict # the new dict (without defaults)
11461     else:
11462       self.hv_proposed = cluster.SimpleFillHV(instance.hypervisor, instance.os,
11463                                               instance.hvparams)
11464       self.hv_new = self.hv_inst = {}
11465
11466     # beparams processing
11467     if self.op.beparams:
11468       i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
11469                                    use_none=True)
11470       objects.UpgradeBeParams(i_bedict)
11471       utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
11472       be_new = cluster.SimpleFillBE(i_bedict)
11473       self.be_proposed = self.be_new = be_new # the new actual values
11474       self.be_inst = i_bedict # the new dict (without defaults)
11475     else:
11476       self.be_new = self.be_inst = {}
11477       self.be_proposed = cluster.SimpleFillBE(instance.beparams)
11478     be_old = cluster.FillBE(instance)
11479
11480     # CPU param validation -- checking every time a paramtere is
11481     # changed to cover all cases where either CPU mask or vcpus have
11482     # changed
11483     if (constants.BE_VCPUS in self.be_proposed and
11484         constants.HV_CPU_MASK in self.hv_proposed):
11485       cpu_list = \
11486         utils.ParseMultiCpuMask(self.hv_proposed[constants.HV_CPU_MASK])
11487       # Verify mask is consistent with number of vCPUs. Can skip this
11488       # test if only 1 entry in the CPU mask, which means same mask
11489       # is applied to all vCPUs.
11490       if (len(cpu_list) > 1 and
11491           len(cpu_list) != self.be_proposed[constants.BE_VCPUS]):
11492         raise errors.OpPrereqError("Number of vCPUs [%d] does not match the"
11493                                    " CPU mask [%s]" %
11494                                    (self.be_proposed[constants.BE_VCPUS],
11495                                     self.hv_proposed[constants.HV_CPU_MASK]),
11496                                    errors.ECODE_INVAL)
11497
11498       # Only perform this test if a new CPU mask is given
11499       if constants.HV_CPU_MASK in self.hv_new:
11500         # Calculate the largest CPU number requested
11501         max_requested_cpu = max(map(max, cpu_list))
11502         # Check that all of the instance's nodes have enough physical CPUs to
11503         # satisfy the requested CPU mask
11504         _CheckNodesPhysicalCPUs(self, instance.all_nodes,
11505                                 max_requested_cpu + 1, instance.hypervisor)
11506
11507     # osparams processing
11508     if self.op.osparams:
11509       i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
11510       _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
11511       self.os_inst = i_osdict # the new dict (without defaults)
11512     else:
11513       self.os_inst = {}
11514
11515     self.warn = []
11516
11517     #TODO(dynmem): do the appropriate check involving MINMEM
11518     if (constants.BE_MAXMEM in self.op.beparams and not self.op.force and
11519         be_new[constants.BE_MAXMEM] > be_old[constants.BE_MAXMEM]):
11520       mem_check_list = [pnode]
11521       if be_new[constants.BE_AUTO_BALANCE]:
11522         # either we changed auto_balance to yes or it was from before
11523         mem_check_list.extend(instance.secondary_nodes)
11524       instance_info = self.rpc.call_instance_info(pnode, instance.name,
11525                                                   instance.hypervisor)
11526       nodeinfo = self.rpc.call_node_info(mem_check_list, None,
11527                                          [instance.hypervisor])
11528       pninfo = nodeinfo[pnode]
11529       msg = pninfo.fail_msg
11530       if msg:
11531         # Assume the primary node is unreachable and go ahead
11532         self.warn.append("Can't get info from primary node %s: %s" %
11533                          (pnode, msg))
11534       else:
11535         (_, _, (pnhvinfo, )) = pninfo.payload
11536         if not isinstance(pnhvinfo.get("memory_free", None), int):
11537           self.warn.append("Node data from primary node %s doesn't contain"
11538                            " free memory information" % pnode)
11539         elif instance_info.fail_msg:
11540           self.warn.append("Can't get instance runtime information: %s" %
11541                           instance_info.fail_msg)
11542         else:
11543           if instance_info.payload:
11544             current_mem = int(instance_info.payload["memory"])
11545           else:
11546             # Assume instance not running
11547             # (there is a slight race condition here, but it's not very
11548             # probable, and we have no other way to check)
11549             # TODO: Describe race condition
11550             current_mem = 0
11551           #TODO(dynmem): do the appropriate check involving MINMEM
11552           miss_mem = (be_new[constants.BE_MAXMEM] - current_mem -
11553                       pnhvinfo["memory_free"])
11554           if miss_mem > 0:
11555             raise errors.OpPrereqError("This change will prevent the instance"
11556                                        " from starting, due to %d MB of memory"
11557                                        " missing on its primary node" %
11558                                        miss_mem,
11559                                        errors.ECODE_NORES)
11560
11561       if be_new[constants.BE_AUTO_BALANCE]:
11562         for node, nres in nodeinfo.items():
11563           if node not in instance.secondary_nodes:
11564             continue
11565           nres.Raise("Can't get info from secondary node %s" % node,
11566                      prereq=True, ecode=errors.ECODE_STATE)
11567           (_, _, (nhvinfo, )) = nres.payload
11568           if not isinstance(nhvinfo.get("memory_free", None), int):
11569             raise errors.OpPrereqError("Secondary node %s didn't return free"
11570                                        " memory information" % node,
11571                                        errors.ECODE_STATE)
11572           #TODO(dynmem): do the appropriate check involving MINMEM
11573           elif be_new[constants.BE_MAXMEM] > nhvinfo["memory_free"]:
11574             raise errors.OpPrereqError("This change will prevent the instance"
11575                                        " from failover to its secondary node"
11576                                        " %s, due to not enough memory" % node,
11577                                        errors.ECODE_STATE)
11578
11579     # NIC processing
11580     self.nic_pnew = {}
11581     self.nic_pinst = {}
11582     for nic_op, nic_dict in self.op.nics:
11583       if nic_op == constants.DDM_REMOVE:
11584         if not instance.nics:
11585           raise errors.OpPrereqError("Instance has no NICs, cannot remove",
11586                                      errors.ECODE_INVAL)
11587         continue
11588       if nic_op != constants.DDM_ADD:
11589         # an existing nic
11590         if not instance.nics:
11591           raise errors.OpPrereqError("Invalid NIC index %s, instance has"
11592                                      " no NICs" % nic_op,
11593                                      errors.ECODE_INVAL)
11594         if nic_op < 0 or nic_op >= len(instance.nics):
11595           raise errors.OpPrereqError("Invalid NIC index %s, valid values"
11596                                      " are 0 to %d" %
11597                                      (nic_op, len(instance.nics) - 1),
11598                                      errors.ECODE_INVAL)
11599         old_nic_params = instance.nics[nic_op].nicparams
11600         old_nic_ip = instance.nics[nic_op].ip
11601       else:
11602         old_nic_params = {}
11603         old_nic_ip = None
11604
11605       update_params_dict = dict([(key, nic_dict[key])
11606                                  for key in constants.NICS_PARAMETERS
11607                                  if key in nic_dict])
11608
11609       if "bridge" in nic_dict:
11610         update_params_dict[constants.NIC_LINK] = nic_dict["bridge"]
11611
11612       new_nic_params = _GetUpdatedParams(old_nic_params,
11613                                          update_params_dict)
11614       utils.ForceDictType(new_nic_params, constants.NICS_PARAMETER_TYPES)
11615       new_filled_nic_params = cluster.SimpleFillNIC(new_nic_params)
11616       objects.NIC.CheckParameterSyntax(new_filled_nic_params)
11617       self.nic_pinst[nic_op] = new_nic_params
11618       self.nic_pnew[nic_op] = new_filled_nic_params
11619       new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
11620
11621       if new_nic_mode == constants.NIC_MODE_BRIDGED:
11622         nic_bridge = new_filled_nic_params[constants.NIC_LINK]
11623         msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
11624         if msg:
11625           msg = "Error checking bridges on node %s: %s" % (pnode, msg)
11626           if self.op.force:
11627             self.warn.append(msg)
11628           else:
11629             raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
11630       if new_nic_mode == constants.NIC_MODE_ROUTED:
11631         if constants.INIC_IP in nic_dict:
11632           nic_ip = nic_dict[constants.INIC_IP]
11633         else:
11634           nic_ip = old_nic_ip
11635         if nic_ip is None:
11636           raise errors.OpPrereqError("Cannot set the nic ip to None"
11637                                      " on a routed nic", errors.ECODE_INVAL)
11638       if constants.INIC_MAC in nic_dict:
11639         nic_mac = nic_dict[constants.INIC_MAC]
11640         if nic_mac is None:
11641           raise errors.OpPrereqError("Cannot set the nic mac to None",
11642                                      errors.ECODE_INVAL)
11643         elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
11644           # otherwise generate the mac
11645           nic_dict[constants.INIC_MAC] = \
11646             self.cfg.GenerateMAC(self.proc.GetECId())
11647         else:
11648           # or validate/reserve the current one
11649           try:
11650             self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
11651           except errors.ReservationError:
11652             raise errors.OpPrereqError("MAC address %s already in use"
11653                                        " in cluster" % nic_mac,
11654                                        errors.ECODE_NOTUNIQUE)
11655
11656     # DISK processing
11657     if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
11658       raise errors.OpPrereqError("Disk operations not supported for"
11659                                  " diskless instances",
11660                                  errors.ECODE_INVAL)
11661     for disk_op, _ in self.op.disks:
11662       if disk_op == constants.DDM_REMOVE:
11663         if len(instance.disks) == 1:
11664           raise errors.OpPrereqError("Cannot remove the last disk of"
11665                                      " an instance", errors.ECODE_INVAL)
11666         _CheckInstanceState(self, instance, INSTANCE_DOWN,
11667                             msg="cannot remove disks")
11668
11669       if (disk_op == constants.DDM_ADD and
11670           len(instance.disks) >= constants.MAX_DISKS):
11671         raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
11672                                    " add more" % constants.MAX_DISKS,
11673                                    errors.ECODE_STATE)
11674       if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
11675         # an existing disk
11676         if disk_op < 0 or disk_op >= len(instance.disks):
11677           raise errors.OpPrereqError("Invalid disk index %s, valid values"
11678                                      " are 0 to %d" %
11679                                      (disk_op, len(instance.disks)),
11680                                      errors.ECODE_INVAL)
11681
11682     # disabling the instance
11683     if self.op.offline_inst:
11684       _CheckInstanceState(self, instance, INSTANCE_DOWN,
11685                           msg="cannot change instance state to offline")
11686
11687     # enabling the instance
11688     if self.op.online_inst:
11689       _CheckInstanceState(self, instance, INSTANCE_OFFLINE,
11690                           msg="cannot make instance go online")
11691
11692   def _ConvertPlainToDrbd(self, feedback_fn):
11693     """Converts an instance from plain to drbd.
11694
11695     """
11696     feedback_fn("Converting template to drbd")
11697     instance = self.instance
11698     pnode = instance.primary_node
11699     snode = self.op.remote_node
11700
11701     assert instance.disk_template == constants.DT_PLAIN
11702
11703     # create a fake disk info for _GenerateDiskTemplate
11704     disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
11705                   constants.IDISK_VG: d.logical_id[0]}
11706                  for d in instance.disks]
11707     new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
11708                                       instance.name, pnode, [snode],
11709                                       disk_info, None, None, 0, feedback_fn,
11710                                       self.diskparams)
11711     info = _GetInstanceInfoText(instance)
11712     feedback_fn("Creating aditional volumes...")
11713     # first, create the missing data and meta devices
11714     for disk in new_disks:
11715       # unfortunately this is... not too nice
11716       _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
11717                             info, True)
11718       for child in disk.children:
11719         _CreateSingleBlockDev(self, snode, instance, child, info, True)
11720     # at this stage, all new LVs have been created, we can rename the
11721     # old ones
11722     feedback_fn("Renaming original volumes...")
11723     rename_list = [(o, n.children[0].logical_id)
11724                    for (o, n) in zip(instance.disks, new_disks)]
11725     result = self.rpc.call_blockdev_rename(pnode, rename_list)
11726     result.Raise("Failed to rename original LVs")
11727
11728     feedback_fn("Initializing DRBD devices...")
11729     # all child devices are in place, we can now create the DRBD devices
11730     for disk in new_disks:
11731       for node in [pnode, snode]:
11732         f_create = node == pnode
11733         _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
11734
11735     # at this point, the instance has been modified
11736     instance.disk_template = constants.DT_DRBD8
11737     instance.disks = new_disks
11738     self.cfg.Update(instance, feedback_fn)
11739
11740     # Release node locks while waiting for sync
11741     _ReleaseLocks(self, locking.LEVEL_NODE)
11742
11743     # disks are created, waiting for sync
11744     disk_abort = not _WaitForSync(self, instance,
11745                                   oneshot=not self.op.wait_for_sync)
11746     if disk_abort:
11747       raise errors.OpExecError("There are some degraded disks for"
11748                                " this instance, please cleanup manually")
11749
11750     # Node resource locks will be released by caller
11751
11752   def _ConvertDrbdToPlain(self, feedback_fn):
11753     """Converts an instance from drbd to plain.
11754
11755     """
11756     instance = self.instance
11757
11758     assert len(instance.secondary_nodes) == 1
11759     assert instance.disk_template == constants.DT_DRBD8
11760
11761     pnode = instance.primary_node
11762     snode = instance.secondary_nodes[0]
11763     feedback_fn("Converting template to plain")
11764
11765     old_disks = instance.disks
11766     new_disks = [d.children[0] for d in old_disks]
11767
11768     # copy over size and mode
11769     for parent, child in zip(old_disks, new_disks):
11770       child.size = parent.size
11771       child.mode = parent.mode
11772
11773     # update instance structure
11774     instance.disks = new_disks
11775     instance.disk_template = constants.DT_PLAIN
11776     self.cfg.Update(instance, feedback_fn)
11777
11778     # Release locks in case removing disks takes a while
11779     _ReleaseLocks(self, locking.LEVEL_NODE)
11780
11781     feedback_fn("Removing volumes on the secondary node...")
11782     for disk in old_disks:
11783       self.cfg.SetDiskID(disk, snode)
11784       msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
11785       if msg:
11786         self.LogWarning("Could not remove block device %s on node %s,"
11787                         " continuing anyway: %s", disk.iv_name, snode, msg)
11788
11789     feedback_fn("Removing unneeded volumes on the primary node...")
11790     for idx, disk in enumerate(old_disks):
11791       meta = disk.children[1]
11792       self.cfg.SetDiskID(meta, pnode)
11793       msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
11794       if msg:
11795         self.LogWarning("Could not remove metadata for disk %d on node %s,"
11796                         " continuing anyway: %s", idx, pnode, msg)
11797
11798     # this is a DRBD disk, return its port to the pool
11799     for disk in old_disks:
11800       tcp_port = disk.logical_id[2]
11801       self.cfg.AddTcpUdpPort(tcp_port)
11802
11803     # Node resource locks will be released by caller
11804
11805   def Exec(self, feedback_fn):
11806     """Modifies an instance.
11807
11808     All parameters take effect only at the next restart of the instance.
11809
11810     """
11811     # Process here the warnings from CheckPrereq, as we don't have a
11812     # feedback_fn there.
11813     for warn in self.warn:
11814       feedback_fn("WARNING: %s" % warn)
11815
11816     assert ((self.op.disk_template is None) ^
11817             bool(self.owned_locks(locking.LEVEL_NODE_RES))), \
11818       "Not owning any node resource locks"
11819
11820     result = []
11821     instance = self.instance
11822     # disk changes
11823     for disk_op, disk_dict in self.op.disks:
11824       if disk_op == constants.DDM_REMOVE:
11825         # remove the last disk
11826         device = instance.disks.pop()
11827         device_idx = len(instance.disks)
11828         for node, disk in device.ComputeNodeTree(instance.primary_node):
11829           self.cfg.SetDiskID(disk, node)
11830           msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
11831           if msg:
11832             self.LogWarning("Could not remove disk/%d on node %s: %s,"
11833                             " continuing anyway", device_idx, node, msg)
11834         result.append(("disk/%d" % device_idx, "remove"))
11835
11836         # if this is a DRBD disk, return its port to the pool
11837         if device.dev_type in constants.LDS_DRBD:
11838           tcp_port = device.logical_id[2]
11839           self.cfg.AddTcpUdpPort(tcp_port)
11840       elif disk_op == constants.DDM_ADD:
11841         # add a new disk
11842         if instance.disk_template in (constants.DT_FILE,
11843                                         constants.DT_SHARED_FILE):
11844           file_driver, file_path = instance.disks[0].logical_id
11845           file_path = os.path.dirname(file_path)
11846         else:
11847           file_driver = file_path = None
11848         disk_idx_base = len(instance.disks)
11849         new_disk = _GenerateDiskTemplate(self,
11850                                          instance.disk_template,
11851                                          instance.name, instance.primary_node,
11852                                          instance.secondary_nodes,
11853                                          [disk_dict],
11854                                          file_path,
11855                                          file_driver,
11856                                          disk_idx_base,
11857                                          feedback_fn,
11858                                          self.diskparams)[0]
11859         instance.disks.append(new_disk)
11860         info = _GetInstanceInfoText(instance)
11861
11862         logging.info("Creating volume %s for instance %s",
11863                      new_disk.iv_name, instance.name)
11864         # Note: this needs to be kept in sync with _CreateDisks
11865         #HARDCODE
11866         for node in instance.all_nodes:
11867           f_create = node == instance.primary_node
11868           try:
11869             _CreateBlockDev(self, node, instance, new_disk,
11870                             f_create, info, f_create)
11871           except errors.OpExecError, err:
11872             self.LogWarning("Failed to create volume %s (%s) on"
11873                             " node %s: %s",
11874                             new_disk.iv_name, new_disk, node, err)
11875         result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
11876                        (new_disk.size, new_disk.mode)))
11877       else:
11878         # change a given disk
11879         instance.disks[disk_op].mode = disk_dict[constants.IDISK_MODE]
11880         result.append(("disk.mode/%d" % disk_op,
11881                        disk_dict[constants.IDISK_MODE]))
11882
11883     if self.op.disk_template:
11884       if __debug__:
11885         check_nodes = set(instance.all_nodes)
11886         if self.op.remote_node:
11887           check_nodes.add(self.op.remote_node)
11888         for level in [locking.LEVEL_NODE, locking.LEVEL_NODE_RES]:
11889           owned = self.owned_locks(level)
11890           assert not (check_nodes - owned), \
11891             ("Not owning the correct locks, owning %r, expected at least %r" %
11892              (owned, check_nodes))
11893
11894       r_shut = _ShutdownInstanceDisks(self, instance)
11895       if not r_shut:
11896         raise errors.OpExecError("Cannot shutdown instance disks, unable to"
11897                                  " proceed with disk template conversion")
11898       mode = (instance.disk_template, self.op.disk_template)
11899       try:
11900         self._DISK_CONVERSIONS[mode](self, feedback_fn)
11901       except:
11902         self.cfg.ReleaseDRBDMinors(instance.name)
11903         raise
11904       result.append(("disk_template", self.op.disk_template))
11905
11906       assert instance.disk_template == self.op.disk_template, \
11907         ("Expected disk template '%s', found '%s'" %
11908          (self.op.disk_template, instance.disk_template))
11909
11910     # Release node and resource locks if there are any (they might already have
11911     # been released during disk conversion)
11912     _ReleaseLocks(self, locking.LEVEL_NODE)
11913     _ReleaseLocks(self, locking.LEVEL_NODE_RES)
11914
11915     # NIC changes
11916     for nic_op, nic_dict in self.op.nics:
11917       if nic_op == constants.DDM_REMOVE:
11918         # remove the last nic
11919         del instance.nics[-1]
11920         result.append(("nic.%d" % len(instance.nics), "remove"))
11921       elif nic_op == constants.DDM_ADD:
11922         # mac and bridge should be set, by now
11923         mac = nic_dict[constants.INIC_MAC]
11924         ip = nic_dict.get(constants.INIC_IP, None)
11925         nicparams = self.nic_pinst[constants.DDM_ADD]
11926         new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
11927         instance.nics.append(new_nic)
11928         result.append(("nic.%d" % (len(instance.nics) - 1),
11929                        "add:mac=%s,ip=%s,mode=%s,link=%s" %
11930                        (new_nic.mac, new_nic.ip,
11931                         self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
11932                         self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
11933                        )))
11934       else:
11935         for key in (constants.INIC_MAC, constants.INIC_IP):
11936           if key in nic_dict:
11937             setattr(instance.nics[nic_op], key, nic_dict[key])
11938         if nic_op in self.nic_pinst:
11939           instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
11940         for key, val in nic_dict.iteritems():
11941           result.append(("nic.%s/%d" % (key, nic_op), val))
11942
11943     # hvparams changes
11944     if self.op.hvparams:
11945       instance.hvparams = self.hv_inst
11946       for key, val in self.op.hvparams.iteritems():
11947         result.append(("hv/%s" % key, val))
11948
11949     # beparams changes
11950     if self.op.beparams:
11951       instance.beparams = self.be_inst
11952       for key, val in self.op.beparams.iteritems():
11953         result.append(("be/%s" % key, val))
11954
11955     # OS change
11956     if self.op.os_name:
11957       instance.os = self.op.os_name
11958
11959     # osparams changes
11960     if self.op.osparams:
11961       instance.osparams = self.os_inst
11962       for key, val in self.op.osparams.iteritems():
11963         result.append(("os/%s" % key, val))
11964
11965     # online/offline instance
11966     if self.op.online_inst:
11967       self.cfg.MarkInstanceDown(instance.name)
11968       result.append(("admin_state", constants.ADMINST_DOWN))
11969     if self.op.offline_inst:
11970       self.cfg.MarkInstanceOffline(instance.name)
11971       result.append(("admin_state", constants.ADMINST_OFFLINE))
11972
11973     self.cfg.Update(instance, feedback_fn)
11974
11975     assert not (self.owned_locks(locking.LEVEL_NODE_RES) or
11976                 self.owned_locks(locking.LEVEL_NODE)), \
11977       "All node locks should have been released by now"
11978
11979     return result
11980
11981   _DISK_CONVERSIONS = {
11982     (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
11983     (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
11984     }
11985
11986
11987 class LUInstanceChangeGroup(LogicalUnit):
11988   HPATH = "instance-change-group"
11989   HTYPE = constants.HTYPE_INSTANCE
11990   REQ_BGL = False
11991
11992   def ExpandNames(self):
11993     self.share_locks = _ShareAll()
11994     self.needed_locks = {
11995       locking.LEVEL_NODEGROUP: [],
11996       locking.LEVEL_NODE: [],
11997       }
11998
11999     self._ExpandAndLockInstance()
12000
12001     if self.op.target_groups:
12002       self.req_target_uuids = map(self.cfg.LookupNodeGroup,
12003                                   self.op.target_groups)
12004     else:
12005       self.req_target_uuids = None
12006
12007     self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
12008
12009   def DeclareLocks(self, level):
12010     if level == locking.LEVEL_NODEGROUP:
12011       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
12012
12013       if self.req_target_uuids:
12014         lock_groups = set(self.req_target_uuids)
12015
12016         # Lock all groups used by instance optimistically; this requires going
12017         # via the node before it's locked, requiring verification later on
12018         instance_groups = self.cfg.GetInstanceNodeGroups(self.op.instance_name)
12019         lock_groups.update(instance_groups)
12020       else:
12021         # No target groups, need to lock all of them
12022         lock_groups = locking.ALL_SET
12023
12024       self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
12025
12026     elif level == locking.LEVEL_NODE:
12027       if self.req_target_uuids:
12028         # Lock all nodes used by instances
12029         self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
12030         self._LockInstancesNodes()
12031
12032         # Lock all nodes in all potential target groups
12033         lock_groups = (frozenset(self.owned_locks(locking.LEVEL_NODEGROUP)) -
12034                        self.cfg.GetInstanceNodeGroups(self.op.instance_name))
12035         member_nodes = [node_name
12036                         for group in lock_groups
12037                         for node_name in self.cfg.GetNodeGroup(group).members]
12038         self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
12039       else:
12040         # Lock all nodes as all groups are potential targets
12041         self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
12042
12043   def CheckPrereq(self):
12044     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
12045     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
12046     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
12047
12048     assert (self.req_target_uuids is None or
12049             owned_groups.issuperset(self.req_target_uuids))
12050     assert owned_instances == set([self.op.instance_name])
12051
12052     # Get instance information
12053     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
12054
12055     # Check if node groups for locked instance are still correct
12056     assert owned_nodes.issuperset(self.instance.all_nodes), \
12057       ("Instance %s's nodes changed while we kept the lock" %
12058        self.op.instance_name)
12059
12060     inst_groups = _CheckInstanceNodeGroups(self.cfg, self.op.instance_name,
12061                                            owned_groups)
12062
12063     if self.req_target_uuids:
12064       # User requested specific target groups
12065       self.target_uuids = self.req_target_uuids
12066     else:
12067       # All groups except those used by the instance are potential targets
12068       self.target_uuids = owned_groups - inst_groups
12069
12070     conflicting_groups = self.target_uuids & inst_groups
12071     if conflicting_groups:
12072       raise errors.OpPrereqError("Can't use group(s) '%s' as targets, they are"
12073                                  " used by the instance '%s'" %
12074                                  (utils.CommaJoin(conflicting_groups),
12075                                   self.op.instance_name),
12076                                  errors.ECODE_INVAL)
12077
12078     if not self.target_uuids:
12079       raise errors.OpPrereqError("There are no possible target groups",
12080                                  errors.ECODE_INVAL)
12081
12082   def BuildHooksEnv(self):
12083     """Build hooks env.
12084
12085     """
12086     assert self.target_uuids
12087
12088     env = {
12089       "TARGET_GROUPS": " ".join(self.target_uuids),
12090       }
12091
12092     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
12093
12094     return env
12095
12096   def BuildHooksNodes(self):
12097     """Build hooks nodes.
12098
12099     """
12100     mn = self.cfg.GetMasterNode()
12101     return ([mn], [mn])
12102
12103   def Exec(self, feedback_fn):
12104     instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
12105
12106     assert instances == [self.op.instance_name], "Instance not locked"
12107
12108     ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
12109                      instances=instances, target_groups=list(self.target_uuids))
12110
12111     ial.Run(self.op.iallocator)
12112
12113     if not ial.success:
12114       raise errors.OpPrereqError("Can't compute solution for changing group of"
12115                                  " instance '%s' using iallocator '%s': %s" %
12116                                  (self.op.instance_name, self.op.iallocator,
12117                                   ial.info),
12118                                  errors.ECODE_NORES)
12119
12120     jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
12121
12122     self.LogInfo("Iallocator returned %s job(s) for changing group of"
12123                  " instance '%s'", len(jobs), self.op.instance_name)
12124
12125     return ResultWithJobs(jobs)
12126
12127
12128 class LUBackupQuery(NoHooksLU):
12129   """Query the exports list
12130
12131   """
12132   REQ_BGL = False
12133
12134   def ExpandNames(self):
12135     self.needed_locks = {}
12136     self.share_locks[locking.LEVEL_NODE] = 1
12137     if not self.op.nodes:
12138       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
12139     else:
12140       self.needed_locks[locking.LEVEL_NODE] = \
12141         _GetWantedNodes(self, self.op.nodes)
12142
12143   def Exec(self, feedback_fn):
12144     """Compute the list of all the exported system images.
12145
12146     @rtype: dict
12147     @return: a dictionary with the structure node->(export-list)
12148         where export-list is a list of the instances exported on
12149         that node.
12150
12151     """
12152     self.nodes = self.owned_locks(locking.LEVEL_NODE)
12153     rpcresult = self.rpc.call_export_list(self.nodes)
12154     result = {}
12155     for node in rpcresult:
12156       if rpcresult[node].fail_msg:
12157         result[node] = False
12158       else:
12159         result[node] = rpcresult[node].payload
12160
12161     return result
12162
12163
12164 class LUBackupPrepare(NoHooksLU):
12165   """Prepares an instance for an export and returns useful information.
12166
12167   """
12168   REQ_BGL = False
12169
12170   def ExpandNames(self):
12171     self._ExpandAndLockInstance()
12172
12173   def CheckPrereq(self):
12174     """Check prerequisites.
12175
12176     """
12177     instance_name = self.op.instance_name
12178
12179     self.instance = self.cfg.GetInstanceInfo(instance_name)
12180     assert self.instance is not None, \
12181           "Cannot retrieve locked instance %s" % self.op.instance_name
12182     _CheckNodeOnline(self, self.instance.primary_node)
12183
12184     self._cds = _GetClusterDomainSecret()
12185
12186   def Exec(self, feedback_fn):
12187     """Prepares an instance for an export.
12188
12189     """
12190     instance = self.instance
12191
12192     if self.op.mode == constants.EXPORT_MODE_REMOTE:
12193       salt = utils.GenerateSecret(8)
12194
12195       feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
12196       result = self.rpc.call_x509_cert_create(instance.primary_node,
12197                                               constants.RIE_CERT_VALIDITY)
12198       result.Raise("Can't create X509 key and certificate on %s" % result.node)
12199
12200       (name, cert_pem) = result.payload
12201
12202       cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
12203                                              cert_pem)
12204
12205       return {
12206         "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
12207         "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
12208                           salt),
12209         "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
12210         }
12211
12212     return None
12213
12214
12215 class LUBackupExport(LogicalUnit):
12216   """Export an instance to an image in the cluster.
12217
12218   """
12219   HPATH = "instance-export"
12220   HTYPE = constants.HTYPE_INSTANCE
12221   REQ_BGL = False
12222
12223   def CheckArguments(self):
12224     """Check the arguments.
12225
12226     """
12227     self.x509_key_name = self.op.x509_key_name
12228     self.dest_x509_ca_pem = self.op.destination_x509_ca
12229
12230     if self.op.mode == constants.EXPORT_MODE_REMOTE:
12231       if not self.x509_key_name:
12232         raise errors.OpPrereqError("Missing X509 key name for encryption",
12233                                    errors.ECODE_INVAL)
12234
12235       if not self.dest_x509_ca_pem:
12236         raise errors.OpPrereqError("Missing destination X509 CA",
12237                                    errors.ECODE_INVAL)
12238
12239   def ExpandNames(self):
12240     self._ExpandAndLockInstance()
12241
12242     # Lock all nodes for local exports
12243     if self.op.mode == constants.EXPORT_MODE_LOCAL:
12244       # FIXME: lock only instance primary and destination node
12245       #
12246       # Sad but true, for now we have do lock all nodes, as we don't know where
12247       # the previous export might be, and in this LU we search for it and
12248       # remove it from its current node. In the future we could fix this by:
12249       #  - making a tasklet to search (share-lock all), then create the
12250       #    new one, then one to remove, after
12251       #  - removing the removal operation altogether
12252       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
12253
12254   def DeclareLocks(self, level):
12255     """Last minute lock declaration."""
12256     # All nodes are locked anyway, so nothing to do here.
12257
12258   def BuildHooksEnv(self):
12259     """Build hooks env.
12260
12261     This will run on the master, primary node and target node.
12262
12263     """
12264     env = {
12265       "EXPORT_MODE": self.op.mode,
12266       "EXPORT_NODE": self.op.target_node,
12267       "EXPORT_DO_SHUTDOWN": self.op.shutdown,
12268       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
12269       # TODO: Generic function for boolean env variables
12270       "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
12271       }
12272
12273     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
12274
12275     return env
12276
12277   def BuildHooksNodes(self):
12278     """Build hooks nodes.
12279
12280     """
12281     nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
12282
12283     if self.op.mode == constants.EXPORT_MODE_LOCAL:
12284       nl.append(self.op.target_node)
12285
12286     return (nl, nl)
12287
12288   def CheckPrereq(self):
12289     """Check prerequisites.
12290
12291     This checks that the instance and node names are valid.
12292
12293     """
12294     instance_name = self.op.instance_name
12295
12296     self.instance = self.cfg.GetInstanceInfo(instance_name)
12297     assert self.instance is not None, \
12298           "Cannot retrieve locked instance %s" % self.op.instance_name
12299     _CheckNodeOnline(self, self.instance.primary_node)
12300
12301     if (self.op.remove_instance and
12302         self.instance.admin_state == constants.ADMINST_UP and
12303         not self.op.shutdown):
12304       raise errors.OpPrereqError("Can not remove instance without shutting it"
12305                                  " down before")
12306
12307     if self.op.mode == constants.EXPORT_MODE_LOCAL:
12308       self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
12309       self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
12310       assert self.dst_node is not None
12311
12312       _CheckNodeOnline(self, self.dst_node.name)
12313       _CheckNodeNotDrained(self, self.dst_node.name)
12314
12315       self._cds = None
12316       self.dest_disk_info = None
12317       self.dest_x509_ca = None
12318
12319     elif self.op.mode == constants.EXPORT_MODE_REMOTE:
12320       self.dst_node = None
12321
12322       if len(self.op.target_node) != len(self.instance.disks):
12323         raise errors.OpPrereqError(("Received destination information for %s"
12324                                     " disks, but instance %s has %s disks") %
12325                                    (len(self.op.target_node), instance_name,
12326                                     len(self.instance.disks)),
12327                                    errors.ECODE_INVAL)
12328
12329       cds = _GetClusterDomainSecret()
12330
12331       # Check X509 key name
12332       try:
12333         (key_name, hmac_digest, hmac_salt) = self.x509_key_name
12334       except (TypeError, ValueError), err:
12335         raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
12336
12337       if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
12338         raise errors.OpPrereqError("HMAC for X509 key name is wrong",
12339                                    errors.ECODE_INVAL)
12340
12341       # Load and verify CA
12342       try:
12343         (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
12344       except OpenSSL.crypto.Error, err:
12345         raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
12346                                    (err, ), errors.ECODE_INVAL)
12347
12348       (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
12349       if errcode is not None:
12350         raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
12351                                    (msg, ), errors.ECODE_INVAL)
12352
12353       self.dest_x509_ca = cert
12354
12355       # Verify target information
12356       disk_info = []
12357       for idx, disk_data in enumerate(self.op.target_node):
12358         try:
12359           (host, port, magic) = \
12360             masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
12361         except errors.GenericError, err:
12362           raise errors.OpPrereqError("Target info for disk %s: %s" %
12363                                      (idx, err), errors.ECODE_INVAL)
12364
12365         disk_info.append((host, port, magic))
12366
12367       assert len(disk_info) == len(self.op.target_node)
12368       self.dest_disk_info = disk_info
12369
12370     else:
12371       raise errors.ProgrammerError("Unhandled export mode %r" %
12372                                    self.op.mode)
12373
12374     # instance disk type verification
12375     # TODO: Implement export support for file-based disks
12376     for disk in self.instance.disks:
12377       if disk.dev_type == constants.LD_FILE:
12378         raise errors.OpPrereqError("Export not supported for instances with"
12379                                    " file-based disks", errors.ECODE_INVAL)
12380
12381   def _CleanupExports(self, feedback_fn):
12382     """Removes exports of current instance from all other nodes.
12383
12384     If an instance in a cluster with nodes A..D was exported to node C, its
12385     exports will be removed from the nodes A, B and D.
12386
12387     """
12388     assert self.op.mode != constants.EXPORT_MODE_REMOTE
12389
12390     nodelist = self.cfg.GetNodeList()
12391     nodelist.remove(self.dst_node.name)
12392
12393     # on one-node clusters nodelist will be empty after the removal
12394     # if we proceed the backup would be removed because OpBackupQuery
12395     # substitutes an empty list with the full cluster node list.
12396     iname = self.instance.name
12397     if nodelist:
12398       feedback_fn("Removing old exports for instance %s" % iname)
12399       exportlist = self.rpc.call_export_list(nodelist)
12400       for node in exportlist:
12401         if exportlist[node].fail_msg:
12402           continue
12403         if iname in exportlist[node].payload:
12404           msg = self.rpc.call_export_remove(node, iname).fail_msg
12405           if msg:
12406             self.LogWarning("Could not remove older export for instance %s"
12407                             " on node %s: %s", iname, node, msg)
12408
12409   def Exec(self, feedback_fn):
12410     """Export an instance to an image in the cluster.
12411
12412     """
12413     assert self.op.mode in constants.EXPORT_MODES
12414
12415     instance = self.instance
12416     src_node = instance.primary_node
12417
12418     if self.op.shutdown:
12419       # shutdown the instance, but not the disks
12420       feedback_fn("Shutting down instance %s" % instance.name)
12421       result = self.rpc.call_instance_shutdown(src_node, instance,
12422                                                self.op.shutdown_timeout)
12423       # TODO: Maybe ignore failures if ignore_remove_failures is set
12424       result.Raise("Could not shutdown instance %s on"
12425                    " node %s" % (instance.name, src_node))
12426
12427     # set the disks ID correctly since call_instance_start needs the
12428     # correct drbd minor to create the symlinks
12429     for disk in instance.disks:
12430       self.cfg.SetDiskID(disk, src_node)
12431
12432     activate_disks = (instance.admin_state != constants.ADMINST_UP)
12433
12434     if activate_disks:
12435       # Activate the instance disks if we'exporting a stopped instance
12436       feedback_fn("Activating disks for %s" % instance.name)
12437       _StartInstanceDisks(self, instance, None)
12438
12439     try:
12440       helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
12441                                                      instance)
12442
12443       helper.CreateSnapshots()
12444       try:
12445         if (self.op.shutdown and
12446             instance.admin_state == constants.ADMINST_UP and
12447             not self.op.remove_instance):
12448           assert not activate_disks
12449           feedback_fn("Starting instance %s" % instance.name)
12450           result = self.rpc.call_instance_start(src_node,
12451                                                 (instance, None, None), False)
12452           msg = result.fail_msg
12453           if msg:
12454             feedback_fn("Failed to start instance: %s" % msg)
12455             _ShutdownInstanceDisks(self, instance)
12456             raise errors.OpExecError("Could not start instance: %s" % msg)
12457
12458         if self.op.mode == constants.EXPORT_MODE_LOCAL:
12459           (fin_resu, dresults) = helper.LocalExport(self.dst_node)
12460         elif self.op.mode == constants.EXPORT_MODE_REMOTE:
12461           connect_timeout = constants.RIE_CONNECT_TIMEOUT
12462           timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
12463
12464           (key_name, _, _) = self.x509_key_name
12465
12466           dest_ca_pem = \
12467             OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
12468                                             self.dest_x509_ca)
12469
12470           (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
12471                                                      key_name, dest_ca_pem,
12472                                                      timeouts)
12473       finally:
12474         helper.Cleanup()
12475
12476       # Check for backwards compatibility
12477       assert len(dresults) == len(instance.disks)
12478       assert compat.all(isinstance(i, bool) for i in dresults), \
12479              "Not all results are boolean: %r" % dresults
12480
12481     finally:
12482       if activate_disks:
12483         feedback_fn("Deactivating disks for %s" % instance.name)
12484         _ShutdownInstanceDisks(self, instance)
12485
12486     if not (compat.all(dresults) and fin_resu):
12487       failures = []
12488       if not fin_resu:
12489         failures.append("export finalization")
12490       if not compat.all(dresults):
12491         fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
12492                                if not dsk)
12493         failures.append("disk export: disk(s) %s" % fdsk)
12494
12495       raise errors.OpExecError("Export failed, errors in %s" %
12496                                utils.CommaJoin(failures))
12497
12498     # At this point, the export was successful, we can cleanup/finish
12499
12500     # Remove instance if requested
12501     if self.op.remove_instance:
12502       feedback_fn("Removing instance %s" % instance.name)
12503       _RemoveInstance(self, feedback_fn, instance,
12504                       self.op.ignore_remove_failures)
12505
12506     if self.op.mode == constants.EXPORT_MODE_LOCAL:
12507       self._CleanupExports(feedback_fn)
12508
12509     return fin_resu, dresults
12510
12511
12512 class LUBackupRemove(NoHooksLU):
12513   """Remove exports related to the named instance.
12514
12515   """
12516   REQ_BGL = False
12517
12518   def ExpandNames(self):
12519     self.needed_locks = {}
12520     # We need all nodes to be locked in order for RemoveExport to work, but we
12521     # don't need to lock the instance itself, as nothing will happen to it (and
12522     # we can remove exports also for a removed instance)
12523     self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
12524
12525   def Exec(self, feedback_fn):
12526     """Remove any export.
12527
12528     """
12529     instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
12530     # If the instance was not found we'll try with the name that was passed in.
12531     # This will only work if it was an FQDN, though.
12532     fqdn_warn = False
12533     if not instance_name:
12534       fqdn_warn = True
12535       instance_name = self.op.instance_name
12536
12537     locked_nodes = self.owned_locks(locking.LEVEL_NODE)
12538     exportlist = self.rpc.call_export_list(locked_nodes)
12539     found = False
12540     for node in exportlist:
12541       msg = exportlist[node].fail_msg
12542       if msg:
12543         self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
12544         continue
12545       if instance_name in exportlist[node].payload:
12546         found = True
12547         result = self.rpc.call_export_remove(node, instance_name)
12548         msg = result.fail_msg
12549         if msg:
12550           logging.error("Could not remove export for instance %s"
12551                         " on node %s: %s", instance_name, node, msg)
12552
12553     if fqdn_warn and not found:
12554       feedback_fn("Export not found. If trying to remove an export belonging"
12555                   " to a deleted instance please use its Fully Qualified"
12556                   " Domain Name.")
12557
12558
12559 class LUGroupAdd(LogicalUnit):
12560   """Logical unit for creating node groups.
12561
12562   """
12563   HPATH = "group-add"
12564   HTYPE = constants.HTYPE_GROUP
12565   REQ_BGL = False
12566
12567   def ExpandNames(self):
12568     # We need the new group's UUID here so that we can create and acquire the
12569     # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
12570     # that it should not check whether the UUID exists in the configuration.
12571     self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
12572     self.needed_locks = {}
12573     self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
12574
12575   def CheckPrereq(self):
12576     """Check prerequisites.
12577
12578     This checks that the given group name is not an existing node group
12579     already.
12580
12581     """
12582     try:
12583       existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12584     except errors.OpPrereqError:
12585       pass
12586     else:
12587       raise errors.OpPrereqError("Desired group name '%s' already exists as a"
12588                                  " node group (UUID: %s)" %
12589                                  (self.op.group_name, existing_uuid),
12590                                  errors.ECODE_EXISTS)
12591
12592     if self.op.ndparams:
12593       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
12594
12595     if self.op.diskparams:
12596       for templ in constants.DISK_TEMPLATES:
12597         if templ not in self.op.diskparams:
12598           self.op.diskparams[templ] = {}
12599         utils.ForceDictType(self.op.diskparams[templ], constants.DISK_DT_TYPES)
12600     else:
12601       self.op.diskparams = self.cfg.GetClusterInfo().diskparams
12602
12603   def BuildHooksEnv(self):
12604     """Build hooks env.
12605
12606     """
12607     return {
12608       "GROUP_NAME": self.op.group_name,
12609       }
12610
12611   def BuildHooksNodes(self):
12612     """Build hooks nodes.
12613
12614     """
12615     mn = self.cfg.GetMasterNode()
12616     return ([mn], [mn])
12617
12618   def Exec(self, feedback_fn):
12619     """Add the node group to the cluster.
12620
12621     """
12622     group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
12623                                   uuid=self.group_uuid,
12624                                   alloc_policy=self.op.alloc_policy,
12625                                   ndparams=self.op.ndparams,
12626                                   diskparams=self.op.diskparams)
12627
12628     self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
12629     del self.remove_locks[locking.LEVEL_NODEGROUP]
12630
12631
12632 class LUGroupAssignNodes(NoHooksLU):
12633   """Logical unit for assigning nodes to groups.
12634
12635   """
12636   REQ_BGL = False
12637
12638   def ExpandNames(self):
12639     # These raise errors.OpPrereqError on their own:
12640     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12641     self.op.nodes = _GetWantedNodes(self, self.op.nodes)
12642
12643     # We want to lock all the affected nodes and groups. We have readily
12644     # available the list of nodes, and the *destination* group. To gather the
12645     # list of "source" groups, we need to fetch node information later on.
12646     self.needed_locks = {
12647       locking.LEVEL_NODEGROUP: set([self.group_uuid]),
12648       locking.LEVEL_NODE: self.op.nodes,
12649       }
12650
12651   def DeclareLocks(self, level):
12652     if level == locking.LEVEL_NODEGROUP:
12653       assert len(self.needed_locks[locking.LEVEL_NODEGROUP]) == 1
12654
12655       # Try to get all affected nodes' groups without having the group or node
12656       # lock yet. Needs verification later in the code flow.
12657       groups = self.cfg.GetNodeGroupsFromNodes(self.op.nodes)
12658
12659       self.needed_locks[locking.LEVEL_NODEGROUP].update(groups)
12660
12661   def CheckPrereq(self):
12662     """Check prerequisites.
12663
12664     """
12665     assert self.needed_locks[locking.LEVEL_NODEGROUP]
12666     assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
12667             frozenset(self.op.nodes))
12668
12669     expected_locks = (set([self.group_uuid]) |
12670                       self.cfg.GetNodeGroupsFromNodes(self.op.nodes))
12671     actual_locks = self.owned_locks(locking.LEVEL_NODEGROUP)
12672     if actual_locks != expected_locks:
12673       raise errors.OpExecError("Nodes changed groups since locks were acquired,"
12674                                " current groups are '%s', used to be '%s'" %
12675                                (utils.CommaJoin(expected_locks),
12676                                 utils.CommaJoin(actual_locks)))
12677
12678     self.node_data = self.cfg.GetAllNodesInfo()
12679     self.group = self.cfg.GetNodeGroup(self.group_uuid)
12680     instance_data = self.cfg.GetAllInstancesInfo()
12681
12682     if self.group is None:
12683       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
12684                                (self.op.group_name, self.group_uuid))
12685
12686     (new_splits, previous_splits) = \
12687       self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
12688                                              for node in self.op.nodes],
12689                                             self.node_data, instance_data)
12690
12691     if new_splits:
12692       fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
12693
12694       if not self.op.force:
12695         raise errors.OpExecError("The following instances get split by this"
12696                                  " change and --force was not given: %s" %
12697                                  fmt_new_splits)
12698       else:
12699         self.LogWarning("This operation will split the following instances: %s",
12700                         fmt_new_splits)
12701
12702         if previous_splits:
12703           self.LogWarning("In addition, these already-split instances continue"
12704                           " to be split across groups: %s",
12705                           utils.CommaJoin(utils.NiceSort(previous_splits)))
12706
12707   def Exec(self, feedback_fn):
12708     """Assign nodes to a new group.
12709
12710     """
12711     mods = [(node_name, self.group_uuid) for node_name in self.op.nodes]
12712
12713     self.cfg.AssignGroupNodes(mods)
12714
12715   @staticmethod
12716   def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
12717     """Check for split instances after a node assignment.
12718
12719     This method considers a series of node assignments as an atomic operation,
12720     and returns information about split instances after applying the set of
12721     changes.
12722
12723     In particular, it returns information about newly split instances, and
12724     instances that were already split, and remain so after the change.
12725
12726     Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
12727     considered.
12728
12729     @type changes: list of (node_name, new_group_uuid) pairs.
12730     @param changes: list of node assignments to consider.
12731     @param node_data: a dict with data for all nodes
12732     @param instance_data: a dict with all instances to consider
12733     @rtype: a two-tuple
12734     @return: a list of instances that were previously okay and result split as a
12735       consequence of this change, and a list of instances that were previously
12736       split and this change does not fix.
12737
12738     """
12739     changed_nodes = dict((node, group) for node, group in changes
12740                          if node_data[node].group != group)
12741
12742     all_split_instances = set()
12743     previously_split_instances = set()
12744
12745     def InstanceNodes(instance):
12746       return [instance.primary_node] + list(instance.secondary_nodes)
12747
12748     for inst in instance_data.values():
12749       if inst.disk_template not in constants.DTS_INT_MIRROR:
12750         continue
12751
12752       instance_nodes = InstanceNodes(inst)
12753
12754       if len(set(node_data[node].group for node in instance_nodes)) > 1:
12755         previously_split_instances.add(inst.name)
12756
12757       if len(set(changed_nodes.get(node, node_data[node].group)
12758                  for node in instance_nodes)) > 1:
12759         all_split_instances.add(inst.name)
12760
12761     return (list(all_split_instances - previously_split_instances),
12762             list(previously_split_instances & all_split_instances))
12763
12764
12765 class _GroupQuery(_QueryBase):
12766   FIELDS = query.GROUP_FIELDS
12767
12768   def ExpandNames(self, lu):
12769     lu.needed_locks = {}
12770
12771     self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
12772     name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
12773
12774     if not self.names:
12775       self.wanted = [name_to_uuid[name]
12776                      for name in utils.NiceSort(name_to_uuid.keys())]
12777     else:
12778       # Accept names to be either names or UUIDs.
12779       missing = []
12780       self.wanted = []
12781       all_uuid = frozenset(self._all_groups.keys())
12782
12783       for name in self.names:
12784         if name in all_uuid:
12785           self.wanted.append(name)
12786         elif name in name_to_uuid:
12787           self.wanted.append(name_to_uuid[name])
12788         else:
12789           missing.append(name)
12790
12791       if missing:
12792         raise errors.OpPrereqError("Some groups do not exist: %s" %
12793                                    utils.CommaJoin(missing),
12794                                    errors.ECODE_NOENT)
12795
12796   def DeclareLocks(self, lu, level):
12797     pass
12798
12799   def _GetQueryData(self, lu):
12800     """Computes the list of node groups and their attributes.
12801
12802     """
12803     do_nodes = query.GQ_NODE in self.requested_data
12804     do_instances = query.GQ_INST in self.requested_data
12805
12806     group_to_nodes = None
12807     group_to_instances = None
12808
12809     # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
12810     # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
12811     # latter GetAllInstancesInfo() is not enough, for we have to go through
12812     # instance->node. Hence, we will need to process nodes even if we only need
12813     # instance information.
12814     if do_nodes or do_instances:
12815       all_nodes = lu.cfg.GetAllNodesInfo()
12816       group_to_nodes = dict((uuid, []) for uuid in self.wanted)
12817       node_to_group = {}
12818
12819       for node in all_nodes.values():
12820         if node.group in group_to_nodes:
12821           group_to_nodes[node.group].append(node.name)
12822           node_to_group[node.name] = node.group
12823
12824       if do_instances:
12825         all_instances = lu.cfg.GetAllInstancesInfo()
12826         group_to_instances = dict((uuid, []) for uuid in self.wanted)
12827
12828         for instance in all_instances.values():
12829           node = instance.primary_node
12830           if node in node_to_group:
12831             group_to_instances[node_to_group[node]].append(instance.name)
12832
12833         if not do_nodes:
12834           # Do not pass on node information if it was not requested.
12835           group_to_nodes = None
12836
12837     return query.GroupQueryData([self._all_groups[uuid]
12838                                  for uuid in self.wanted],
12839                                 group_to_nodes, group_to_instances)
12840
12841
12842 class LUGroupQuery(NoHooksLU):
12843   """Logical unit for querying node groups.
12844
12845   """
12846   REQ_BGL = False
12847
12848   def CheckArguments(self):
12849     self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
12850                           self.op.output_fields, False)
12851
12852   def ExpandNames(self):
12853     self.gq.ExpandNames(self)
12854
12855   def DeclareLocks(self, level):
12856     self.gq.DeclareLocks(self, level)
12857
12858   def Exec(self, feedback_fn):
12859     return self.gq.OldStyleQuery(self)
12860
12861
12862 class LUGroupSetParams(LogicalUnit):
12863   """Modifies the parameters of a node group.
12864
12865   """
12866   HPATH = "group-modify"
12867   HTYPE = constants.HTYPE_GROUP
12868   REQ_BGL = False
12869
12870   def CheckArguments(self):
12871     all_changes = [
12872       self.op.ndparams,
12873       self.op.diskparams,
12874       self.op.alloc_policy,
12875       ]
12876
12877     if all_changes.count(None) == len(all_changes):
12878       raise errors.OpPrereqError("Please pass at least one modification",
12879                                  errors.ECODE_INVAL)
12880
12881   def ExpandNames(self):
12882     # This raises errors.OpPrereqError on its own:
12883     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12884
12885     self.needed_locks = {
12886       locking.LEVEL_NODEGROUP: [self.group_uuid],
12887       }
12888
12889   def CheckPrereq(self):
12890     """Check prerequisites.
12891
12892     """
12893     self.group = self.cfg.GetNodeGroup(self.group_uuid)
12894
12895     if self.group is None:
12896       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
12897                                (self.op.group_name, self.group_uuid))
12898
12899     if self.op.ndparams:
12900       new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
12901       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
12902       self.new_ndparams = new_ndparams
12903
12904     if self.op.diskparams:
12905       self.new_diskparams = dict()
12906       for templ in constants.DISK_TEMPLATES:
12907         if templ not in self.op.diskparams:
12908           self.op.diskparams[templ] = {}
12909         new_templ_params = _GetUpdatedParams(self.group.diskparams[templ],
12910                                              self.op.diskparams[templ])
12911         utils.ForceDictType(new_templ_params, constants.DISK_DT_TYPES)
12912         self.new_diskparams[templ] = new_templ_params
12913
12914   def BuildHooksEnv(self):
12915     """Build hooks env.
12916
12917     """
12918     return {
12919       "GROUP_NAME": self.op.group_name,
12920       "NEW_ALLOC_POLICY": self.op.alloc_policy,
12921       }
12922
12923   def BuildHooksNodes(self):
12924     """Build hooks nodes.
12925
12926     """
12927     mn = self.cfg.GetMasterNode()
12928     return ([mn], [mn])
12929
12930   def Exec(self, feedback_fn):
12931     """Modifies the node group.
12932
12933     """
12934     result = []
12935
12936     if self.op.ndparams:
12937       self.group.ndparams = self.new_ndparams
12938       result.append(("ndparams", str(self.group.ndparams)))
12939
12940     if self.op.diskparams:
12941       self.group.diskparams = self.new_diskparams
12942       result.append(("diskparams", str(self.group.diskparams)))
12943
12944     if self.op.alloc_policy:
12945       self.group.alloc_policy = self.op.alloc_policy
12946
12947     self.cfg.Update(self.group, feedback_fn)
12948     return result
12949
12950
12951 class LUGroupRemove(LogicalUnit):
12952   HPATH = "group-remove"
12953   HTYPE = constants.HTYPE_GROUP
12954   REQ_BGL = False
12955
12956   def ExpandNames(self):
12957     # This will raises errors.OpPrereqError on its own:
12958     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12959     self.needed_locks = {
12960       locking.LEVEL_NODEGROUP: [self.group_uuid],
12961       }
12962
12963   def CheckPrereq(self):
12964     """Check prerequisites.
12965
12966     This checks that the given group name exists as a node group, that is
12967     empty (i.e., contains no nodes), and that is not the last group of the
12968     cluster.
12969
12970     """
12971     # Verify that the group is empty.
12972     group_nodes = [node.name
12973                    for node in self.cfg.GetAllNodesInfo().values()
12974                    if node.group == self.group_uuid]
12975
12976     if group_nodes:
12977       raise errors.OpPrereqError("Group '%s' not empty, has the following"
12978                                  " nodes: %s" %
12979                                  (self.op.group_name,
12980                                   utils.CommaJoin(utils.NiceSort(group_nodes))),
12981                                  errors.ECODE_STATE)
12982
12983     # Verify the cluster would not be left group-less.
12984     if len(self.cfg.GetNodeGroupList()) == 1:
12985       raise errors.OpPrereqError("Group '%s' is the only group,"
12986                                  " cannot be removed" %
12987                                  self.op.group_name,
12988                                  errors.ECODE_STATE)
12989
12990   def BuildHooksEnv(self):
12991     """Build hooks env.
12992
12993     """
12994     return {
12995       "GROUP_NAME": self.op.group_name,
12996       }
12997
12998   def BuildHooksNodes(self):
12999     """Build hooks nodes.
13000
13001     """
13002     mn = self.cfg.GetMasterNode()
13003     return ([mn], [mn])
13004
13005   def Exec(self, feedback_fn):
13006     """Remove the node group.
13007
13008     """
13009     try:
13010       self.cfg.RemoveNodeGroup(self.group_uuid)
13011     except errors.ConfigurationError:
13012       raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
13013                                (self.op.group_name, self.group_uuid))
13014
13015     self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
13016
13017
13018 class LUGroupRename(LogicalUnit):
13019   HPATH = "group-rename"
13020   HTYPE = constants.HTYPE_GROUP
13021   REQ_BGL = False
13022
13023   def ExpandNames(self):
13024     # This raises errors.OpPrereqError on its own:
13025     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13026
13027     self.needed_locks = {
13028       locking.LEVEL_NODEGROUP: [self.group_uuid],
13029       }
13030
13031   def CheckPrereq(self):
13032     """Check prerequisites.
13033
13034     Ensures requested new name is not yet used.
13035
13036     """
13037     try:
13038       new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
13039     except errors.OpPrereqError:
13040       pass
13041     else:
13042       raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
13043                                  " node group (UUID: %s)" %
13044                                  (self.op.new_name, new_name_uuid),
13045                                  errors.ECODE_EXISTS)
13046
13047   def BuildHooksEnv(self):
13048     """Build hooks env.
13049
13050     """
13051     return {
13052       "OLD_NAME": self.op.group_name,
13053       "NEW_NAME": self.op.new_name,
13054       }
13055
13056   def BuildHooksNodes(self):
13057     """Build hooks nodes.
13058
13059     """
13060     mn = self.cfg.GetMasterNode()
13061
13062     all_nodes = self.cfg.GetAllNodesInfo()
13063     all_nodes.pop(mn, None)
13064
13065     run_nodes = [mn]
13066     run_nodes.extend(node.name for node in all_nodes.values()
13067                      if node.group == self.group_uuid)
13068
13069     return (run_nodes, run_nodes)
13070
13071   def Exec(self, feedback_fn):
13072     """Rename the node group.
13073
13074     """
13075     group = self.cfg.GetNodeGroup(self.group_uuid)
13076
13077     if group is None:
13078       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
13079                                (self.op.group_name, self.group_uuid))
13080
13081     group.name = self.op.new_name
13082     self.cfg.Update(group, feedback_fn)
13083
13084     return self.op.new_name
13085
13086
13087 class LUGroupEvacuate(LogicalUnit):
13088   HPATH = "group-evacuate"
13089   HTYPE = constants.HTYPE_GROUP
13090   REQ_BGL = False
13091
13092   def ExpandNames(self):
13093     # This raises errors.OpPrereqError on its own:
13094     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13095
13096     if self.op.target_groups:
13097       self.req_target_uuids = map(self.cfg.LookupNodeGroup,
13098                                   self.op.target_groups)
13099     else:
13100       self.req_target_uuids = []
13101
13102     if self.group_uuid in self.req_target_uuids:
13103       raise errors.OpPrereqError("Group to be evacuated (%s) can not be used"
13104                                  " as a target group (targets are %s)" %
13105                                  (self.group_uuid,
13106                                   utils.CommaJoin(self.req_target_uuids)),
13107                                  errors.ECODE_INVAL)
13108
13109     self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
13110
13111     self.share_locks = _ShareAll()
13112     self.needed_locks = {
13113       locking.LEVEL_INSTANCE: [],
13114       locking.LEVEL_NODEGROUP: [],
13115       locking.LEVEL_NODE: [],
13116       }
13117
13118   def DeclareLocks(self, level):
13119     if level == locking.LEVEL_INSTANCE:
13120       assert not self.needed_locks[locking.LEVEL_INSTANCE]
13121
13122       # Lock instances optimistically, needs verification once node and group
13123       # locks have been acquired
13124       self.needed_locks[locking.LEVEL_INSTANCE] = \
13125         self.cfg.GetNodeGroupInstances(self.group_uuid)
13126
13127     elif level == locking.LEVEL_NODEGROUP:
13128       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
13129
13130       if self.req_target_uuids:
13131         lock_groups = set([self.group_uuid] + self.req_target_uuids)
13132
13133         # Lock all groups used by instances optimistically; this requires going
13134         # via the node before it's locked, requiring verification later on
13135         lock_groups.update(group_uuid
13136                            for instance_name in
13137                              self.owned_locks(locking.LEVEL_INSTANCE)
13138                            for group_uuid in
13139                              self.cfg.GetInstanceNodeGroups(instance_name))
13140       else:
13141         # No target groups, need to lock all of them
13142         lock_groups = locking.ALL_SET
13143
13144       self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
13145
13146     elif level == locking.LEVEL_NODE:
13147       # This will only lock the nodes in the group to be evacuated which
13148       # contain actual instances
13149       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
13150       self._LockInstancesNodes()
13151
13152       # Lock all nodes in group to be evacuated and target groups
13153       owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
13154       assert self.group_uuid in owned_groups
13155       member_nodes = [node_name
13156                       for group in owned_groups
13157                       for node_name in self.cfg.GetNodeGroup(group).members]
13158       self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
13159
13160   def CheckPrereq(self):
13161     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
13162     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
13163     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
13164
13165     assert owned_groups.issuperset(self.req_target_uuids)
13166     assert self.group_uuid in owned_groups
13167
13168     # Check if locked instances are still correct
13169     _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
13170
13171     # Get instance information
13172     self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
13173
13174     # Check if node groups for locked instances are still correct
13175     for instance_name in owned_instances:
13176       inst = self.instances[instance_name]
13177       assert owned_nodes.issuperset(inst.all_nodes), \
13178         "Instance %s's nodes changed while we kept the lock" % instance_name
13179
13180       inst_groups = _CheckInstanceNodeGroups(self.cfg, instance_name,
13181                                              owned_groups)
13182
13183       assert self.group_uuid in inst_groups, \
13184         "Instance %s has no node in group %s" % (instance_name, self.group_uuid)
13185
13186     if self.req_target_uuids:
13187       # User requested specific target groups
13188       self.target_uuids = self.req_target_uuids
13189     else:
13190       # All groups except the one to be evacuated are potential targets
13191       self.target_uuids = [group_uuid for group_uuid in owned_groups
13192                            if group_uuid != self.group_uuid]
13193
13194       if not self.target_uuids:
13195         raise errors.OpPrereqError("There are no possible target groups",
13196                                    errors.ECODE_INVAL)
13197
13198   def BuildHooksEnv(self):
13199     """Build hooks env.
13200
13201     """
13202     return {
13203       "GROUP_NAME": self.op.group_name,
13204       "TARGET_GROUPS": " ".join(self.target_uuids),
13205       }
13206
13207   def BuildHooksNodes(self):
13208     """Build hooks nodes.
13209
13210     """
13211     mn = self.cfg.GetMasterNode()
13212
13213     assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
13214
13215     run_nodes = [mn] + self.cfg.GetNodeGroup(self.group_uuid).members
13216
13217     return (run_nodes, run_nodes)
13218
13219   def Exec(self, feedback_fn):
13220     instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
13221
13222     assert self.group_uuid not in self.target_uuids
13223
13224     ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
13225                      instances=instances, target_groups=self.target_uuids)
13226
13227     ial.Run(self.op.iallocator)
13228
13229     if not ial.success:
13230       raise errors.OpPrereqError("Can't compute group evacuation using"
13231                                  " iallocator '%s': %s" %
13232                                  (self.op.iallocator, ial.info),
13233                                  errors.ECODE_NORES)
13234
13235     jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
13236
13237     self.LogInfo("Iallocator returned %s job(s) for evacuating node group %s",
13238                  len(jobs), self.op.group_name)
13239
13240     return ResultWithJobs(jobs)
13241
13242
13243 class TagsLU(NoHooksLU): # pylint: disable=W0223
13244   """Generic tags LU.
13245
13246   This is an abstract class which is the parent of all the other tags LUs.
13247
13248   """
13249   def ExpandNames(self):
13250     self.group_uuid = None
13251     self.needed_locks = {}
13252     if self.op.kind == constants.TAG_NODE:
13253       self.op.name = _ExpandNodeName(self.cfg, self.op.name)
13254       self.needed_locks[locking.LEVEL_NODE] = self.op.name
13255     elif self.op.kind == constants.TAG_INSTANCE:
13256       self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
13257       self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
13258     elif self.op.kind == constants.TAG_NODEGROUP:
13259       self.group_uuid = self.cfg.LookupNodeGroup(self.op.name)
13260
13261     # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
13262     # not possible to acquire the BGL based on opcode parameters)
13263
13264   def CheckPrereq(self):
13265     """Check prerequisites.
13266
13267     """
13268     if self.op.kind == constants.TAG_CLUSTER:
13269       self.target = self.cfg.GetClusterInfo()
13270     elif self.op.kind == constants.TAG_NODE:
13271       self.target = self.cfg.GetNodeInfo(self.op.name)
13272     elif self.op.kind == constants.TAG_INSTANCE:
13273       self.target = self.cfg.GetInstanceInfo(self.op.name)
13274     elif self.op.kind == constants.TAG_NODEGROUP:
13275       self.target = self.cfg.GetNodeGroup(self.group_uuid)
13276     else:
13277       raise errors.OpPrereqError("Wrong tag type requested (%s)" %
13278                                  str(self.op.kind), errors.ECODE_INVAL)
13279
13280
13281 class LUTagsGet(TagsLU):
13282   """Returns the tags of a given object.
13283
13284   """
13285   REQ_BGL = False
13286
13287   def ExpandNames(self):
13288     TagsLU.ExpandNames(self)
13289
13290     # Share locks as this is only a read operation
13291     self.share_locks = _ShareAll()
13292
13293   def Exec(self, feedback_fn):
13294     """Returns the tag list.
13295
13296     """
13297     return list(self.target.GetTags())
13298
13299
13300 class LUTagsSearch(NoHooksLU):
13301   """Searches the tags for a given pattern.
13302
13303   """
13304   REQ_BGL = False
13305
13306   def ExpandNames(self):
13307     self.needed_locks = {}
13308
13309   def CheckPrereq(self):
13310     """Check prerequisites.
13311
13312     This checks the pattern passed for validity by compiling it.
13313
13314     """
13315     try:
13316       self.re = re.compile(self.op.pattern)
13317     except re.error, err:
13318       raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
13319                                  (self.op.pattern, err), errors.ECODE_INVAL)
13320
13321   def Exec(self, feedback_fn):
13322     """Returns the tag list.
13323
13324     """
13325     cfg = self.cfg
13326     tgts = [("/cluster", cfg.GetClusterInfo())]
13327     ilist = cfg.GetAllInstancesInfo().values()
13328     tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
13329     nlist = cfg.GetAllNodesInfo().values()
13330     tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
13331     tgts.extend(("/nodegroup/%s" % n.name, n)
13332                 for n in cfg.GetAllNodeGroupsInfo().values())
13333     results = []
13334     for path, target in tgts:
13335       for tag in target.GetTags():
13336         if self.re.search(tag):
13337           results.append((path, tag))
13338     return results
13339
13340
13341 class LUTagsSet(TagsLU):
13342   """Sets a tag on a given object.
13343
13344   """
13345   REQ_BGL = False
13346
13347   def CheckPrereq(self):
13348     """Check prerequisites.
13349
13350     This checks the type and length of the tag name and value.
13351
13352     """
13353     TagsLU.CheckPrereq(self)
13354     for tag in self.op.tags:
13355       objects.TaggableObject.ValidateTag(tag)
13356
13357   def Exec(self, feedback_fn):
13358     """Sets the tag.
13359
13360     """
13361     try:
13362       for tag in self.op.tags:
13363         self.target.AddTag(tag)
13364     except errors.TagError, err:
13365       raise errors.OpExecError("Error while setting tag: %s" % str(err))
13366     self.cfg.Update(self.target, feedback_fn)
13367
13368
13369 class LUTagsDel(TagsLU):
13370   """Delete a list of tags from a given object.
13371
13372   """
13373   REQ_BGL = False
13374
13375   def CheckPrereq(self):
13376     """Check prerequisites.
13377
13378     This checks that we have the given tag.
13379
13380     """
13381     TagsLU.CheckPrereq(self)
13382     for tag in self.op.tags:
13383       objects.TaggableObject.ValidateTag(tag)
13384     del_tags = frozenset(self.op.tags)
13385     cur_tags = self.target.GetTags()
13386
13387     diff_tags = del_tags - cur_tags
13388     if diff_tags:
13389       diff_names = ("'%s'" % i for i in sorted(diff_tags))
13390       raise errors.OpPrereqError("Tag(s) %s not found" %
13391                                  (utils.CommaJoin(diff_names), ),
13392                                  errors.ECODE_NOENT)
13393
13394   def Exec(self, feedback_fn):
13395     """Remove the tag from the object.
13396
13397     """
13398     for tag in self.op.tags:
13399       self.target.RemoveTag(tag)
13400     self.cfg.Update(self.target, feedback_fn)
13401
13402
13403 class LUTestDelay(NoHooksLU):
13404   """Sleep for a specified amount of time.
13405
13406   This LU sleeps on the master and/or nodes for a specified amount of
13407   time.
13408
13409   """
13410   REQ_BGL = False
13411
13412   def ExpandNames(self):
13413     """Expand names and set required locks.
13414
13415     This expands the node list, if any.
13416
13417     """
13418     self.needed_locks = {}
13419     if self.op.on_nodes:
13420       # _GetWantedNodes can be used here, but is not always appropriate to use
13421       # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
13422       # more information.
13423       self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
13424       self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
13425
13426   def _TestDelay(self):
13427     """Do the actual sleep.
13428
13429     """
13430     if self.op.on_master:
13431       if not utils.TestDelay(self.op.duration):
13432         raise errors.OpExecError("Error during master delay test")
13433     if self.op.on_nodes:
13434       result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
13435       for node, node_result in result.items():
13436         node_result.Raise("Failure during rpc call to node %s" % node)
13437
13438   def Exec(self, feedback_fn):
13439     """Execute the test delay opcode, with the wanted repetitions.
13440
13441     """
13442     if self.op.repeat == 0:
13443       self._TestDelay()
13444     else:
13445       top_value = self.op.repeat - 1
13446       for i in range(self.op.repeat):
13447         self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
13448         self._TestDelay()
13449
13450
13451 class LUTestJqueue(NoHooksLU):
13452   """Utility LU to test some aspects of the job queue.
13453
13454   """
13455   REQ_BGL = False
13456
13457   # Must be lower than default timeout for WaitForJobChange to see whether it
13458   # notices changed jobs
13459   _CLIENT_CONNECT_TIMEOUT = 20.0
13460   _CLIENT_CONFIRM_TIMEOUT = 60.0
13461
13462   @classmethod
13463   def _NotifyUsingSocket(cls, cb, errcls):
13464     """Opens a Unix socket and waits for another program to connect.
13465
13466     @type cb: callable
13467     @param cb: Callback to send socket name to client
13468     @type errcls: class
13469     @param errcls: Exception class to use for errors
13470
13471     """
13472     # Using a temporary directory as there's no easy way to create temporary
13473     # sockets without writing a custom loop around tempfile.mktemp and
13474     # socket.bind
13475     tmpdir = tempfile.mkdtemp()
13476     try:
13477       tmpsock = utils.PathJoin(tmpdir, "sock")
13478
13479       logging.debug("Creating temporary socket at %s", tmpsock)
13480       sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
13481       try:
13482         sock.bind(tmpsock)
13483         sock.listen(1)
13484
13485         # Send details to client
13486         cb(tmpsock)
13487
13488         # Wait for client to connect before continuing
13489         sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
13490         try:
13491           (conn, _) = sock.accept()
13492         except socket.error, err:
13493           raise errcls("Client didn't connect in time (%s)" % err)
13494       finally:
13495         sock.close()
13496     finally:
13497       # Remove as soon as client is connected
13498       shutil.rmtree(tmpdir)
13499
13500     # Wait for client to close
13501     try:
13502       try:
13503         # pylint: disable=E1101
13504         # Instance of '_socketobject' has no ... member
13505         conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
13506         conn.recv(1)
13507       except socket.error, err:
13508         raise errcls("Client failed to confirm notification (%s)" % err)
13509     finally:
13510       conn.close()
13511
13512   def _SendNotification(self, test, arg, sockname):
13513     """Sends a notification to the client.
13514
13515     @type test: string
13516     @param test: Test name
13517     @param arg: Test argument (depends on test)
13518     @type sockname: string
13519     @param sockname: Socket path
13520
13521     """
13522     self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
13523
13524   def _Notify(self, prereq, test, arg):
13525     """Notifies the client of a test.
13526
13527     @type prereq: bool
13528     @param prereq: Whether this is a prereq-phase test
13529     @type test: string
13530     @param test: Test name
13531     @param arg: Test argument (depends on test)
13532
13533     """
13534     if prereq:
13535       errcls = errors.OpPrereqError
13536     else:
13537       errcls = errors.OpExecError
13538
13539     return self._NotifyUsingSocket(compat.partial(self._SendNotification,
13540                                                   test, arg),
13541                                    errcls)
13542
13543   def CheckArguments(self):
13544     self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
13545     self.expandnames_calls = 0
13546
13547   def ExpandNames(self):
13548     checkargs_calls = getattr(self, "checkargs_calls", 0)
13549     if checkargs_calls < 1:
13550       raise errors.ProgrammerError("CheckArguments was not called")
13551
13552     self.expandnames_calls += 1
13553
13554     if self.op.notify_waitlock:
13555       self._Notify(True, constants.JQT_EXPANDNAMES, None)
13556
13557     self.LogInfo("Expanding names")
13558
13559     # Get lock on master node (just to get a lock, not for a particular reason)
13560     self.needed_locks = {
13561       locking.LEVEL_NODE: self.cfg.GetMasterNode(),
13562       }
13563
13564   def Exec(self, feedback_fn):
13565     if self.expandnames_calls < 1:
13566       raise errors.ProgrammerError("ExpandNames was not called")
13567
13568     if self.op.notify_exec:
13569       self._Notify(False, constants.JQT_EXEC, None)
13570
13571     self.LogInfo("Executing")
13572
13573     if self.op.log_messages:
13574       self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
13575       for idx, msg in enumerate(self.op.log_messages):
13576         self.LogInfo("Sending log message %s", idx + 1)
13577         feedback_fn(constants.JQT_MSGPREFIX + msg)
13578         # Report how many test messages have been sent
13579         self._Notify(False, constants.JQT_LOGMSG, idx + 1)
13580
13581     if self.op.fail:
13582       raise errors.OpExecError("Opcode failure was requested")
13583
13584     return True
13585
13586
13587 class IAllocator(object):
13588   """IAllocator framework.
13589
13590   An IAllocator instance has three sets of attributes:
13591     - cfg that is needed to query the cluster
13592     - input data (all members of the _KEYS class attribute are required)
13593     - four buffer attributes (in|out_data|text), that represent the
13594       input (to the external script) in text and data structure format,
13595       and the output from it, again in two formats
13596     - the result variables from the script (success, info, nodes) for
13597       easy usage
13598
13599   """
13600   # pylint: disable=R0902
13601   # lots of instance attributes
13602
13603   def __init__(self, cfg, rpc_runner, mode, **kwargs):
13604     self.cfg = cfg
13605     self.rpc = rpc_runner
13606     # init buffer variables
13607     self.in_text = self.out_text = self.in_data = self.out_data = None
13608     # init all input fields so that pylint is happy
13609     self.mode = mode
13610     self.memory = self.disks = self.disk_template = None
13611     self.os = self.tags = self.nics = self.vcpus = None
13612     self.hypervisor = None
13613     self.relocate_from = None
13614     self.name = None
13615     self.instances = None
13616     self.evac_mode = None
13617     self.target_groups = []
13618     # computed fields
13619     self.required_nodes = None
13620     # init result fields
13621     self.success = self.info = self.result = None
13622
13623     try:
13624       (fn, keydata, self._result_check) = self._MODE_DATA[self.mode]
13625     except KeyError:
13626       raise errors.ProgrammerError("Unknown mode '%s' passed to the"
13627                                    " IAllocator" % self.mode)
13628
13629     keyset = [n for (n, _) in keydata]
13630
13631     for key in kwargs:
13632       if key not in keyset:
13633         raise errors.ProgrammerError("Invalid input parameter '%s' to"
13634                                      " IAllocator" % key)
13635       setattr(self, key, kwargs[key])
13636
13637     for key in keyset:
13638       if key not in kwargs:
13639         raise errors.ProgrammerError("Missing input parameter '%s' to"
13640                                      " IAllocator" % key)
13641     self._BuildInputData(compat.partial(fn, self), keydata)
13642
13643   def _ComputeClusterData(self):
13644     """Compute the generic allocator input data.
13645
13646     This is the data that is independent of the actual operation.
13647
13648     """
13649     cfg = self.cfg
13650     cluster_info = cfg.GetClusterInfo()
13651     # cluster data
13652     data = {
13653       "version": constants.IALLOCATOR_VERSION,
13654       "cluster_name": cfg.GetClusterName(),
13655       "cluster_tags": list(cluster_info.GetTags()),
13656       "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
13657       # we don't have job IDs
13658       }
13659     ninfo = cfg.GetAllNodesInfo()
13660     iinfo = cfg.GetAllInstancesInfo().values()
13661     i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
13662
13663     # node data
13664     node_list = [n.name for n in ninfo.values() if n.vm_capable]
13665
13666     if self.mode == constants.IALLOCATOR_MODE_ALLOC:
13667       hypervisor_name = self.hypervisor
13668     elif self.mode == constants.IALLOCATOR_MODE_RELOC:
13669       hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
13670     else:
13671       hypervisor_name = cluster_info.enabled_hypervisors[0]
13672
13673     node_data = self.rpc.call_node_info(node_list, [cfg.GetVGName()],
13674                                         [hypervisor_name])
13675     node_iinfo = \
13676       self.rpc.call_all_instances_info(node_list,
13677                                        cluster_info.enabled_hypervisors)
13678
13679     data["nodegroups"] = self._ComputeNodeGroupData(cfg)
13680
13681     config_ndata = self._ComputeBasicNodeData(ninfo)
13682     data["nodes"] = self._ComputeDynamicNodeData(ninfo, node_data, node_iinfo,
13683                                                  i_list, config_ndata)
13684     assert len(data["nodes"]) == len(ninfo), \
13685         "Incomplete node data computed"
13686
13687     data["instances"] = self._ComputeInstanceData(cluster_info, i_list)
13688
13689     self.in_data = data
13690
13691   @staticmethod
13692   def _ComputeNodeGroupData(cfg):
13693     """Compute node groups data.
13694
13695     """
13696     ng = dict((guuid, {
13697       "name": gdata.name,
13698       "alloc_policy": gdata.alloc_policy,
13699       })
13700       for guuid, gdata in cfg.GetAllNodeGroupsInfo().items())
13701
13702     return ng
13703
13704   @staticmethod
13705   def _ComputeBasicNodeData(node_cfg):
13706     """Compute global node data.
13707
13708     @rtype: dict
13709     @returns: a dict of name: (node dict, node config)
13710
13711     """
13712     # fill in static (config-based) values
13713     node_results = dict((ninfo.name, {
13714       "tags": list(ninfo.GetTags()),
13715       "primary_ip": ninfo.primary_ip,
13716       "secondary_ip": ninfo.secondary_ip,
13717       "offline": ninfo.offline,
13718       "drained": ninfo.drained,
13719       "master_candidate": ninfo.master_candidate,
13720       "group": ninfo.group,
13721       "master_capable": ninfo.master_capable,
13722       "vm_capable": ninfo.vm_capable,
13723       })
13724       for ninfo in node_cfg.values())
13725
13726     return node_results
13727
13728   @staticmethod
13729   def _ComputeDynamicNodeData(node_cfg, node_data, node_iinfo, i_list,
13730                               node_results):
13731     """Compute global node data.
13732
13733     @param node_results: the basic node structures as filled from the config
13734
13735     """
13736     #TODO(dynmem): compute the right data on MAX and MIN memory
13737     # make a copy of the current dict
13738     node_results = dict(node_results)
13739     for nname, nresult in node_data.items():
13740       assert nname in node_results, "Missing basic data for node %s" % nname
13741       ninfo = node_cfg[nname]
13742
13743       if not (ninfo.offline or ninfo.drained):
13744         nresult.Raise("Can't get data for node %s" % nname)
13745         node_iinfo[nname].Raise("Can't get node instance info from node %s" %
13746                                 nname)
13747         remote_info = _MakeLegacyNodeInfo(nresult.payload)
13748
13749         for attr in ["memory_total", "memory_free", "memory_dom0",
13750                      "vg_size", "vg_free", "cpu_total"]:
13751           if attr not in remote_info:
13752             raise errors.OpExecError("Node '%s' didn't return attribute"
13753                                      " '%s'" % (nname, attr))
13754           if not isinstance(remote_info[attr], int):
13755             raise errors.OpExecError("Node '%s' returned invalid value"
13756                                      " for '%s': %s" %
13757                                      (nname, attr, remote_info[attr]))
13758         # compute memory used by primary instances
13759         i_p_mem = i_p_up_mem = 0
13760         for iinfo, beinfo in i_list:
13761           if iinfo.primary_node == nname:
13762             i_p_mem += beinfo[constants.BE_MAXMEM]
13763             if iinfo.name not in node_iinfo[nname].payload:
13764               i_used_mem = 0
13765             else:
13766               i_used_mem = int(node_iinfo[nname].payload[iinfo.name]["memory"])
13767             i_mem_diff = beinfo[constants.BE_MAXMEM] - i_used_mem
13768             remote_info["memory_free"] -= max(0, i_mem_diff)
13769
13770             if iinfo.admin_state == constants.ADMINST_UP:
13771               i_p_up_mem += beinfo[constants.BE_MAXMEM]
13772
13773         # compute memory used by instances
13774         pnr_dyn = {
13775           "total_memory": remote_info["memory_total"],
13776           "reserved_memory": remote_info["memory_dom0"],
13777           "free_memory": remote_info["memory_free"],
13778           "total_disk": remote_info["vg_size"],
13779           "free_disk": remote_info["vg_free"],
13780           "total_cpus": remote_info["cpu_total"],
13781           "i_pri_memory": i_p_mem,
13782           "i_pri_up_memory": i_p_up_mem,
13783           }
13784         pnr_dyn.update(node_results[nname])
13785         node_results[nname] = pnr_dyn
13786
13787     return node_results
13788
13789   @staticmethod
13790   def _ComputeInstanceData(cluster_info, i_list):
13791     """Compute global instance data.
13792
13793     """
13794     instance_data = {}
13795     for iinfo, beinfo in i_list:
13796       nic_data = []
13797       for nic in iinfo.nics:
13798         filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
13799         nic_dict = {
13800           "mac": nic.mac,
13801           "ip": nic.ip,
13802           "mode": filled_params[constants.NIC_MODE],
13803           "link": filled_params[constants.NIC_LINK],
13804           }
13805         if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
13806           nic_dict["bridge"] = filled_params[constants.NIC_LINK]
13807         nic_data.append(nic_dict)
13808       pir = {
13809         "tags": list(iinfo.GetTags()),
13810         "admin_state": iinfo.admin_state,
13811         "vcpus": beinfo[constants.BE_VCPUS],
13812         "memory": beinfo[constants.BE_MAXMEM],
13813         "os": iinfo.os,
13814         "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
13815         "nics": nic_data,
13816         "disks": [{constants.IDISK_SIZE: dsk.size,
13817                    constants.IDISK_MODE: dsk.mode}
13818                   for dsk in iinfo.disks],
13819         "disk_template": iinfo.disk_template,
13820         "hypervisor": iinfo.hypervisor,
13821         }
13822       pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
13823                                                  pir["disks"])
13824       instance_data[iinfo.name] = pir
13825
13826     return instance_data
13827
13828   def _AddNewInstance(self):
13829     """Add new instance data to allocator structure.
13830
13831     This in combination with _AllocatorGetClusterData will create the
13832     correct structure needed as input for the allocator.
13833
13834     The checks for the completeness of the opcode must have already been
13835     done.
13836
13837     """
13838     disk_space = _ComputeDiskSize(self.disk_template, self.disks)
13839
13840     if self.disk_template in constants.DTS_INT_MIRROR:
13841       self.required_nodes = 2
13842     else:
13843       self.required_nodes = 1
13844
13845     request = {
13846       "name": self.name,
13847       "disk_template": self.disk_template,
13848       "tags": self.tags,
13849       "os": self.os,
13850       "vcpus": self.vcpus,
13851       "memory": self.memory,
13852       "disks": self.disks,
13853       "disk_space_total": disk_space,
13854       "nics": self.nics,
13855       "required_nodes": self.required_nodes,
13856       "hypervisor": self.hypervisor,
13857       }
13858
13859     return request
13860
13861   def _AddRelocateInstance(self):
13862     """Add relocate instance data to allocator structure.
13863
13864     This in combination with _IAllocatorGetClusterData will create the
13865     correct structure needed as input for the allocator.
13866
13867     The checks for the completeness of the opcode must have already been
13868     done.
13869
13870     """
13871     instance = self.cfg.GetInstanceInfo(self.name)
13872     if instance is None:
13873       raise errors.ProgrammerError("Unknown instance '%s' passed to"
13874                                    " IAllocator" % self.name)
13875
13876     if instance.disk_template not in constants.DTS_MIRRORED:
13877       raise errors.OpPrereqError("Can't relocate non-mirrored instances",
13878                                  errors.ECODE_INVAL)
13879
13880     if instance.disk_template in constants.DTS_INT_MIRROR and \
13881         len(instance.secondary_nodes) != 1:
13882       raise errors.OpPrereqError("Instance has not exactly one secondary node",
13883                                  errors.ECODE_STATE)
13884
13885     self.required_nodes = 1
13886     disk_sizes = [{constants.IDISK_SIZE: disk.size} for disk in instance.disks]
13887     disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
13888
13889     request = {
13890       "name": self.name,
13891       "disk_space_total": disk_space,
13892       "required_nodes": self.required_nodes,
13893       "relocate_from": self.relocate_from,
13894       }
13895     return request
13896
13897   def _AddNodeEvacuate(self):
13898     """Get data for node-evacuate requests.
13899
13900     """
13901     return {
13902       "instances": self.instances,
13903       "evac_mode": self.evac_mode,
13904       }
13905
13906   def _AddChangeGroup(self):
13907     """Get data for node-evacuate requests.
13908
13909     """
13910     return {
13911       "instances": self.instances,
13912       "target_groups": self.target_groups,
13913       }
13914
13915   def _BuildInputData(self, fn, keydata):
13916     """Build input data structures.
13917
13918     """
13919     self._ComputeClusterData()
13920
13921     request = fn()
13922     request["type"] = self.mode
13923     for keyname, keytype in keydata:
13924       if keyname not in request:
13925         raise errors.ProgrammerError("Request parameter %s is missing" %
13926                                      keyname)
13927       val = request[keyname]
13928       if not keytype(val):
13929         raise errors.ProgrammerError("Request parameter %s doesn't pass"
13930                                      " validation, value %s, expected"
13931                                      " type %s" % (keyname, val, keytype))
13932     self.in_data["request"] = request
13933
13934     self.in_text = serializer.Dump(self.in_data)
13935
13936   _STRING_LIST = ht.TListOf(ht.TString)
13937   _JOB_LIST = ht.TListOf(ht.TListOf(ht.TStrictDict(True, False, {
13938      # pylint: disable=E1101
13939      # Class '...' has no 'OP_ID' member
13940      "OP_ID": ht.TElemOf([opcodes.OpInstanceFailover.OP_ID,
13941                           opcodes.OpInstanceMigrate.OP_ID,
13942                           opcodes.OpInstanceReplaceDisks.OP_ID])
13943      })))
13944
13945   _NEVAC_MOVED = \
13946     ht.TListOf(ht.TAnd(ht.TIsLength(3),
13947                        ht.TItems([ht.TNonEmptyString,
13948                                   ht.TNonEmptyString,
13949                                   ht.TListOf(ht.TNonEmptyString),
13950                                  ])))
13951   _NEVAC_FAILED = \
13952     ht.TListOf(ht.TAnd(ht.TIsLength(2),
13953                        ht.TItems([ht.TNonEmptyString,
13954                                   ht.TMaybeString,
13955                                  ])))
13956   _NEVAC_RESULT = ht.TAnd(ht.TIsLength(3),
13957                           ht.TItems([_NEVAC_MOVED, _NEVAC_FAILED, _JOB_LIST]))
13958
13959   _MODE_DATA = {
13960     constants.IALLOCATOR_MODE_ALLOC:
13961       (_AddNewInstance,
13962        [
13963         ("name", ht.TString),
13964         ("memory", ht.TInt),
13965         ("disks", ht.TListOf(ht.TDict)),
13966         ("disk_template", ht.TString),
13967         ("os", ht.TString),
13968         ("tags", _STRING_LIST),
13969         ("nics", ht.TListOf(ht.TDict)),
13970         ("vcpus", ht.TInt),
13971         ("hypervisor", ht.TString),
13972         ], ht.TList),
13973     constants.IALLOCATOR_MODE_RELOC:
13974       (_AddRelocateInstance,
13975        [("name", ht.TString), ("relocate_from", _STRING_LIST)],
13976        ht.TList),
13977      constants.IALLOCATOR_MODE_NODE_EVAC:
13978       (_AddNodeEvacuate, [
13979         ("instances", _STRING_LIST),
13980         ("evac_mode", ht.TElemOf(constants.IALLOCATOR_NEVAC_MODES)),
13981         ], _NEVAC_RESULT),
13982      constants.IALLOCATOR_MODE_CHG_GROUP:
13983       (_AddChangeGroup, [
13984         ("instances", _STRING_LIST),
13985         ("target_groups", _STRING_LIST),
13986         ], _NEVAC_RESULT),
13987     }
13988
13989   def Run(self, name, validate=True, call_fn=None):
13990     """Run an instance allocator and return the results.
13991
13992     """
13993     if call_fn is None:
13994       call_fn = self.rpc.call_iallocator_runner
13995
13996     result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
13997     result.Raise("Failure while running the iallocator script")
13998
13999     self.out_text = result.payload
14000     if validate:
14001       self._ValidateResult()
14002
14003   def _ValidateResult(self):
14004     """Process the allocator results.
14005
14006     This will process and if successful save the result in
14007     self.out_data and the other parameters.
14008
14009     """
14010     try:
14011       rdict = serializer.Load(self.out_text)
14012     except Exception, err:
14013       raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
14014
14015     if not isinstance(rdict, dict):
14016       raise errors.OpExecError("Can't parse iallocator results: not a dict")
14017
14018     # TODO: remove backwards compatiblity in later versions
14019     if "nodes" in rdict and "result" not in rdict:
14020       rdict["result"] = rdict["nodes"]
14021       del rdict["nodes"]
14022
14023     for key in "success", "info", "result":
14024       if key not in rdict:
14025         raise errors.OpExecError("Can't parse iallocator results:"
14026                                  " missing key '%s'" % key)
14027       setattr(self, key, rdict[key])
14028
14029     if not self._result_check(self.result):
14030       raise errors.OpExecError("Iallocator returned invalid result,"
14031                                " expected %s, got %s" %
14032                                (self._result_check, self.result),
14033                                errors.ECODE_INVAL)
14034
14035     if self.mode == constants.IALLOCATOR_MODE_RELOC:
14036       assert self.relocate_from is not None
14037       assert self.required_nodes == 1
14038
14039       node2group = dict((name, ndata["group"])
14040                         for (name, ndata) in self.in_data["nodes"].items())
14041
14042       fn = compat.partial(self._NodesToGroups, node2group,
14043                           self.in_data["nodegroups"])
14044
14045       instance = self.cfg.GetInstanceInfo(self.name)
14046       request_groups = fn(self.relocate_from + [instance.primary_node])
14047       result_groups = fn(rdict["result"] + [instance.primary_node])
14048
14049       if self.success and not set(result_groups).issubset(request_groups):
14050         raise errors.OpExecError("Groups of nodes returned by iallocator (%s)"
14051                                  " differ from original groups (%s)" %
14052                                  (utils.CommaJoin(result_groups),
14053                                   utils.CommaJoin(request_groups)))
14054
14055     elif self.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
14056       assert self.evac_mode in constants.IALLOCATOR_NEVAC_MODES
14057
14058     self.out_data = rdict
14059
14060   @staticmethod
14061   def _NodesToGroups(node2group, groups, nodes):
14062     """Returns a list of unique group names for a list of nodes.
14063
14064     @type node2group: dict
14065     @param node2group: Map from node name to group UUID
14066     @type groups: dict
14067     @param groups: Group information
14068     @type nodes: list
14069     @param nodes: Node names
14070
14071     """
14072     result = set()
14073
14074     for node in nodes:
14075       try:
14076         group_uuid = node2group[node]
14077       except KeyError:
14078         # Ignore unknown node
14079         pass
14080       else:
14081         try:
14082           group = groups[group_uuid]
14083         except KeyError:
14084           # Can't find group, let's use UUID
14085           group_name = group_uuid
14086         else:
14087           group_name = group["name"]
14088
14089         result.add(group_name)
14090
14091     return sorted(result)
14092
14093
14094 class LUTestAllocator(NoHooksLU):
14095   """Run allocator tests.
14096
14097   This LU runs the allocator tests
14098
14099   """
14100   def CheckPrereq(self):
14101     """Check prerequisites.
14102
14103     This checks the opcode parameters depending on the director and mode test.
14104
14105     """
14106     if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
14107       for attr in ["memory", "disks", "disk_template",
14108                    "os", "tags", "nics", "vcpus"]:
14109         if not hasattr(self.op, attr):
14110           raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
14111                                      attr, errors.ECODE_INVAL)
14112       iname = self.cfg.ExpandInstanceName(self.op.name)
14113       if iname is not None:
14114         raise errors.OpPrereqError("Instance '%s' already in the cluster" %
14115                                    iname, errors.ECODE_EXISTS)
14116       if not isinstance(self.op.nics, list):
14117         raise errors.OpPrereqError("Invalid parameter 'nics'",
14118                                    errors.ECODE_INVAL)
14119       if not isinstance(self.op.disks, list):
14120         raise errors.OpPrereqError("Invalid parameter 'disks'",
14121                                    errors.ECODE_INVAL)
14122       for row in self.op.disks:
14123         if (not isinstance(row, dict) or
14124             constants.IDISK_SIZE not in row or
14125             not isinstance(row[constants.IDISK_SIZE], int) or
14126             constants.IDISK_MODE not in row or
14127             row[constants.IDISK_MODE] not in constants.DISK_ACCESS_SET):
14128           raise errors.OpPrereqError("Invalid contents of the 'disks'"
14129                                      " parameter", errors.ECODE_INVAL)
14130       if self.op.hypervisor is None:
14131         self.op.hypervisor = self.cfg.GetHypervisorType()
14132     elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
14133       fname = _ExpandInstanceName(self.cfg, self.op.name)
14134       self.op.name = fname
14135       self.relocate_from = \
14136           list(self.cfg.GetInstanceInfo(fname).secondary_nodes)
14137     elif self.op.mode in (constants.IALLOCATOR_MODE_CHG_GROUP,
14138                           constants.IALLOCATOR_MODE_NODE_EVAC):
14139       if not self.op.instances:
14140         raise errors.OpPrereqError("Missing instances", errors.ECODE_INVAL)
14141       self.op.instances = _GetWantedInstances(self, self.op.instances)
14142     else:
14143       raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
14144                                  self.op.mode, errors.ECODE_INVAL)
14145
14146     if self.op.direction == constants.IALLOCATOR_DIR_OUT:
14147       if self.op.allocator is None:
14148         raise errors.OpPrereqError("Missing allocator name",
14149                                    errors.ECODE_INVAL)
14150     elif self.op.direction != constants.IALLOCATOR_DIR_IN:
14151       raise errors.OpPrereqError("Wrong allocator test '%s'" %
14152                                  self.op.direction, errors.ECODE_INVAL)
14153
14154   def Exec(self, feedback_fn):
14155     """Run the allocator test.
14156
14157     """
14158     if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
14159       ial = IAllocator(self.cfg, self.rpc,
14160                        mode=self.op.mode,
14161                        name=self.op.name,
14162                        memory=self.op.memory,
14163                        disks=self.op.disks,
14164                        disk_template=self.op.disk_template,
14165                        os=self.op.os,
14166                        tags=self.op.tags,
14167                        nics=self.op.nics,
14168                        vcpus=self.op.vcpus,
14169                        hypervisor=self.op.hypervisor,
14170                        )
14171     elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
14172       ial = IAllocator(self.cfg, self.rpc,
14173                        mode=self.op.mode,
14174                        name=self.op.name,
14175                        relocate_from=list(self.relocate_from),
14176                        )
14177     elif self.op.mode == constants.IALLOCATOR_MODE_CHG_GROUP:
14178       ial = IAllocator(self.cfg, self.rpc,
14179                        mode=self.op.mode,
14180                        instances=self.op.instances,
14181                        target_groups=self.op.target_groups)
14182     elif self.op.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
14183       ial = IAllocator(self.cfg, self.rpc,
14184                        mode=self.op.mode,
14185                        instances=self.op.instances,
14186                        evac_mode=self.op.evac_mode)
14187     else:
14188       raise errors.ProgrammerError("Uncatched mode %s in"
14189                                    " LUTestAllocator.Exec", self.op.mode)
14190
14191     if self.op.direction == constants.IALLOCATOR_DIR_IN:
14192       result = ial.in_text
14193     else:
14194       ial.Run(self.op.allocator, validate=False)
14195       result = ial.out_text
14196     return result
14197
14198
14199 #: Query type implementations
14200 _QUERY_IMPL = {
14201   constants.QR_INSTANCE: _InstanceQuery,
14202   constants.QR_NODE: _NodeQuery,
14203   constants.QR_GROUP: _GroupQuery,
14204   constants.QR_OS: _OsQuery,
14205   }
14206
14207 assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
14208
14209
14210 def _GetQueryImplementation(name):
14211   """Returns the implemtnation for a query type.
14212
14213   @param name: Query type, must be one of L{constants.QR_VIA_OP}
14214
14215   """
14216   try:
14217     return _QUERY_IMPL[name]
14218   except KeyError:
14219     raise errors.OpPrereqError("Unknown query resource '%s'" % name,
14220                                errors.ECODE_INVAL)