code.grnet.gr Git - ganeti-local/blob - lib/cmdlib.py

   1 #
   2 #
   3
   4 # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012 Google Inc.
   5 #
   6 # This program is free software; you can redistribute it and/or modify
   7 # it under the terms of the GNU General Public License as published by
   8 # the Free Software Foundation; either version 2 of the License, or
   9 # (at your option) any later version.
  10 #
  11 # This program is distributed in the hope that it will be useful, but
  12 # WITHOUT ANY WARRANTY; without even the implied warranty of
  13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 # General Public License for more details.
  15 #
  16 # You should have received a copy of the GNU General Public License
  17 # along with this program; if not, write to the Free Software
  18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  19 # 02110-1301, USA.
  20
  21
  22 """Module implementing the master-side code."""
  23
  24 # pylint: disable=W0201,C0302
  25
  26 # W0201 since most LU attributes are defined in CheckPrereq or similar
  27 # functions
  28
  29 # C0302: since we have waaaay too many lines in this module
  30
  31 import os
  32 import os.path
  33 import time
  34 import re
  35 import platform
  36 import logging
  37 import copy
  38 import OpenSSL
  39 import socket
  40 import tempfile
  41 import shutil
  42 import itertools
  43 import operator
  44
  45 from ganeti import ssh
  46 from ganeti import utils
  47 from ganeti import errors
  48 from ganeti import hypervisor
  49 from ganeti import locking
  50 from ganeti import constants
  51 from ganeti import objects
  52 from ganeti import serializer
  53 from ganeti import ssconf
  54 from ganeti import uidpool
  55 from ganeti import compat
  56 from ganeti import masterd
  57 from ganeti import netutils
  58 from ganeti import query
  59 from ganeti import qlang
  60 from ganeti import opcodes
  61 from ganeti import ht
  62 from ganeti import rpc
  63
  64 import ganeti.masterd.instance # pylint: disable=W0611
  65
  66
  67 #: Size of DRBD meta block device
  68 DRBD_META_SIZE = 128
  69
  70 # States of instance
  71 INSTANCE_UP = [constants.ADMINST_UP]
  72 INSTANCE_DOWN = [constants.ADMINST_DOWN]
  73 INSTANCE_OFFLINE = [constants.ADMINST_OFFLINE]
  74 INSTANCE_ONLINE = [constants.ADMINST_DOWN, constants.ADMINST_UP]
  75 INSTANCE_NOT_RUNNING = [constants.ADMINST_DOWN, constants.ADMINST_OFFLINE]
  76
  77
  78 class ResultWithJobs:
  79   """Data container for LU results with jobs.
  80
  81   Instances of this class returned from L{LogicalUnit.Exec} will be recognized
  82   by L{mcpu.Processor._ProcessResult}. The latter will then submit the jobs
  83   contained in the C{jobs} attribute and include the job IDs in the opcode
  84   result.
  85
  86   """
  87   def __init__(self, jobs, **kwargs):
  88     """Initializes this class.
  89
  90     Additional return values can be specified as keyword arguments.
  91
  92     @type jobs: list of lists of L{opcode.OpCode}
  93     @param jobs: A list of lists of opcode objects
  94
  95     """
  96     self.jobs = jobs
  97     self.other = kwargs
  98
  99
 100 class LogicalUnit(object):
 101   """Logical Unit base class.
 102
 103   Subclasses must follow these rules:
 104     - implement ExpandNames
 105     - implement CheckPrereq (except when tasklets are used)
 106     - implement Exec (except when tasklets are used)
 107     - implement BuildHooksEnv
 108     - implement BuildHooksNodes
 109     - redefine HPATH and HTYPE
 110     - optionally redefine their run requirements:
 111         REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
 112
 113   Note that all commands require root permissions.
 114
 115   @ivar dry_run_result: the value (if any) that will be returned to the caller
 116       in dry-run mode (signalled by opcode dry_run parameter)
 117
 118   """
 119   HPATH = None
 120   HTYPE = None
 121   REQ_BGL = True
 122
 123   def __init__(self, processor, op, context, rpc_runner):
 124     """Constructor for LogicalUnit.
 125
 126     This needs to be overridden in derived classes in order to check op
 127     validity.
 128
 129     """
 130     self.proc = processor
 131     self.op = op
 132     self.cfg = context.cfg
 133     self.glm = context.glm
 134     # readability alias
 135     self.owned_locks = context.glm.list_owned
 136     self.context = context
 137     self.rpc = rpc_runner
 138     # Dicts used to declare locking needs to mcpu
 139     self.needed_locks = None
 140     self.share_locks = dict.fromkeys(locking.LEVELS, 0)
 141     self.add_locks = {}
 142     self.remove_locks = {}
 143     # Used to force good behavior when calling helper functions
 144     self.recalculate_locks = {}
 145     # logging
 146     self.Log = processor.Log # pylint: disable=C0103
 147     self.LogWarning = processor.LogWarning # pylint: disable=C0103
 148     self.LogInfo = processor.LogInfo # pylint: disable=C0103
 149     self.LogStep = processor.LogStep # pylint: disable=C0103
 150     # support for dry-run
 151     self.dry_run_result = None
 152     # support for generic debug attribute
 153     if (not hasattr(self.op, "debug_level") or
 154         not isinstance(self.op.debug_level, int)):
 155       self.op.debug_level = 0
 156
 157     # Tasklets
 158     self.tasklets = None
 159
 160     # Validate opcode parameters and set defaults
 161     self.op.Validate(True)
 162
 163     self.CheckArguments()
 164
 165   def CheckArguments(self):
 166     """Check syntactic validity for the opcode arguments.
 167
 168     This method is for doing a simple syntactic check and ensure
 169     validity of opcode parameters, without any cluster-related
 170     checks. While the same can be accomplished in ExpandNames and/or
 171     CheckPrereq, doing these separate is better because:
 172
 173       - ExpandNames is left as as purely a lock-related function
 174       - CheckPrereq is run after we have acquired locks (and possible
 175         waited for them)
 176
 177     The function is allowed to change the self.op attribute so that
 178     later methods can no longer worry about missing parameters.
 179
 180     """
 181     pass
 182
 183   def ExpandNames(self):
 184     """Expand names for this LU.
 185
 186     This method is called before starting to execute the opcode, and it should
 187     update all the parameters of the opcode to their canonical form (e.g. a
 188     short node name must be fully expanded after this method has successfully
 189     completed). This way locking, hooks, logging, etc. can work correctly.
 190
 191     LUs which implement this method must also populate the self.needed_locks
 192     member, as a dict with lock levels as keys, and a list of needed lock names
 193     as values. Rules:
 194
 195       - use an empty dict if you don't need any lock
 196       - if you don't need any lock at a particular level omit that level
 197       - don't put anything for the BGL level
 198       - if you want all locks at a level use locking.ALL_SET as a value
 199
 200     If you need to share locks (rather than acquire them exclusively) at one
 201     level you can modify self.share_locks, setting a true value (usually 1) for
 202     that level. By default locks are not shared.
 203
 204     This function can also define a list of tasklets, which then will be
 205     executed in order instead of the usual LU-level CheckPrereq and Exec
 206     functions, if those are not defined by the LU.
 207
 208     Examples::
 209
 210       # Acquire all nodes and one instance
 211       self.needed_locks = {
 212         locking.LEVEL_NODE: locking.ALL_SET,
 213         locking.LEVEL_INSTANCE: ['instance1.example.com'],
 214       }
 215       # Acquire just two nodes
 216       self.needed_locks = {
 217         locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
 218       }
 219       # Acquire no locks
 220       self.needed_locks = {} # No, you can't leave it to the default value None
 221
 222     """
 223     # The implementation of this method is mandatory only if the new LU is
 224     # concurrent, so that old LUs don't need to be changed all at the same
 225     # time.
 226     if self.REQ_BGL:
 227       self.needed_locks = {} # Exclusive LUs don't need locks.
 228     else:
 229       raise NotImplementedError
 230
 231   def DeclareLocks(self, level):
 232     """Declare LU locking needs for a level
 233
 234     While most LUs can just declare their locking needs at ExpandNames time,
 235     sometimes there's the need to calculate some locks after having acquired
 236     the ones before. This function is called just before acquiring locks at a
 237     particular level, but after acquiring the ones at lower levels, and permits
 238     such calculations. It can be used to modify self.needed_locks, and by
 239     default it does nothing.
 240
 241     This function is only called if you have something already set in
 242     self.needed_locks for the level.
 243
 244     @param level: Locking level which is going to be locked
 245     @type level: member of ganeti.locking.LEVELS
 246
 247     """
 248
 249   def CheckPrereq(self):
 250     """Check prerequisites for this LU.
 251
 252     This method should check that the prerequisites for the execution
 253     of this LU are fulfilled. It can do internode communication, but
 254     it should be idempotent - no cluster or system changes are
 255     allowed.
 256
 257     The method should raise errors.OpPrereqError in case something is
 258     not fulfilled. Its return value is ignored.
 259
 260     This method should also update all the parameters of the opcode to
 261     their canonical form if it hasn't been done by ExpandNames before.
 262
 263     """
 264     if self.tasklets is not None:
 265       for (idx, tl) in enumerate(self.tasklets):
 266         logging.debug("Checking prerequisites for tasklet %s/%s",
 267                       idx + 1, len(self.tasklets))
 268         tl.CheckPrereq()
 269     else:
 270       pass
 271
 272   def Exec(self, feedback_fn):
 273     """Execute the LU.
 274
 275     This method should implement the actual work. It should raise
 276     errors.OpExecError for failures that are somewhat dealt with in
 277     code, or expected.
 278
 279     """
 280     if self.tasklets is not None:
 281       for (idx, tl) in enumerate(self.tasklets):
 282         logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
 283         tl.Exec(feedback_fn)
 284     else:
 285       raise NotImplementedError
 286
 287   def BuildHooksEnv(self):
 288     """Build hooks environment for this LU.
 289
 290     @rtype: dict
 291     @return: Dictionary containing the environment that will be used for
 292       running the hooks for this LU. The keys of the dict must not be prefixed
 293       with "GANETI_"--that'll be added by the hooks runner. The hooks runner
 294       will extend the environment with additional variables. If no environment
 295       should be defined, an empty dictionary should be returned (not C{None}).
 296     @note: If the C{HPATH} attribute of the LU class is C{None}, this function
 297       will not be called.
 298
 299     """
 300     raise NotImplementedError
 301
 302   def BuildHooksNodes(self):
 303     """Build list of nodes to run LU's hooks.
 304
 305     @rtype: tuple; (list, list)
 306     @return: Tuple containing a list of node names on which the hook
 307       should run before the execution and a list of node names on which the
 308       hook should run after the execution. No nodes should be returned as an
 309       empty list (and not None).
 310     @note: If the C{HPATH} attribute of the LU class is C{None}, this function
 311       will not be called.
 312
 313     """
 314     raise NotImplementedError
 315
 316   def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
 317     """Notify the LU about the results of its hooks.
 318
 319     This method is called every time a hooks phase is executed, and notifies
 320     the Logical Unit about the hooks' result. The LU can then use it to alter
 321     its result based on the hooks.  By default the method does nothing and the
 322     previous result is passed back unchanged but any LU can define it if it
 323     wants to use the local cluster hook-scripts somehow.
 324
 325     @param phase: one of L{constants.HOOKS_PHASE_POST} or
 326         L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
 327     @param hook_results: the results of the multi-node hooks rpc call
 328     @param feedback_fn: function used send feedback back to the caller
 329     @param lu_result: the previous Exec result this LU had, or None
 330         in the PRE phase
 331     @return: the new Exec result, based on the previous result
 332         and hook results
 333
 334     """
 335     # API must be kept, thus we ignore the unused argument and could
 336     # be a function warnings
 337     # pylint: disable=W0613,R0201
 338     return lu_result
 339
 340   def _ExpandAndLockInstance(self):
 341     """Helper function to expand and lock an instance.
 342
 343     Many LUs that work on an instance take its name in self.op.instance_name
 344     and need to expand it and then declare the expanded name for locking. This
 345     function does it, and then updates self.op.instance_name to the expanded
 346     name. It also initializes needed_locks as a dict, if this hasn't been done
 347     before.
 348
 349     """
 350     if self.needed_locks is None:
 351       self.needed_locks = {}
 352     else:
 353       assert locking.LEVEL_INSTANCE not in self.needed_locks, \
 354         "_ExpandAndLockInstance called with instance-level locks set"
 355     self.op.instance_name = _ExpandInstanceName(self.cfg,
 356                                                 self.op.instance_name)
 357     self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
 358
 359   def _LockInstancesNodes(self, primary_only=False,
 360                           level=locking.LEVEL_NODE):
 361     """Helper function to declare instances' nodes for locking.
 362
 363     This function should be called after locking one or more instances to lock
 364     their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
 365     with all primary or secondary nodes for instances already locked and
 366     present in self.needed_locks[locking.LEVEL_INSTANCE].
 367
 368     It should be called from DeclareLocks, and for safety only works if
 369     self.recalculate_locks[locking.LEVEL_NODE] is set.
 370
 371     In the future it may grow parameters to just lock some instance's nodes, or
 372     to just lock primaries or secondary nodes, if needed.
 373
 374     If should be called in DeclareLocks in a way similar to::
 375
 376       if level == locking.LEVEL_NODE:
 377         self._LockInstancesNodes()
 378
 379     @type primary_only: boolean
 380     @param primary_only: only lock primary nodes of locked instances
 381     @param level: Which lock level to use for locking nodes
 382
 383     """
 384     assert level in self.recalculate_locks, \
 385       "_LockInstancesNodes helper function called with no nodes to recalculate"
 386
 387     # TODO: check if we're really been called with the instance locks held
 388
 389     # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
 390     # future we might want to have different behaviors depending on the value
 391     # of self.recalculate_locks[locking.LEVEL_NODE]
 392     wanted_nodes = []
 393     locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
 394     for _, instance in self.cfg.GetMultiInstanceInfo(locked_i):
 395       wanted_nodes.append(instance.primary_node)
 396       if not primary_only:
 397         wanted_nodes.extend(instance.secondary_nodes)
 398
 399     if self.recalculate_locks[level] == constants.LOCKS_REPLACE:
 400       self.needed_locks[level] = wanted_nodes
 401     elif self.recalculate_locks[level] == constants.LOCKS_APPEND:
 402       self.needed_locks[level].extend(wanted_nodes)
 403     else:
 404       raise errors.ProgrammerError("Unknown recalculation mode")
 405
 406     del self.recalculate_locks[level]
 407
 408
 409 class NoHooksLU(LogicalUnit): # pylint: disable=W0223
 410   """Simple LU which runs no hooks.
 411
 412   This LU is intended as a parent for other LogicalUnits which will
 413   run no hooks, in order to reduce duplicate code.
 414
 415   """
 416   HPATH = None
 417   HTYPE = None
 418
 419   def BuildHooksEnv(self):
 420     """Empty BuildHooksEnv for NoHooksLu.
 421
 422     This just raises an error.
 423
 424     """
 425     raise AssertionError("BuildHooksEnv called for NoHooksLUs")
 426
 427   def BuildHooksNodes(self):
 428     """Empty BuildHooksNodes for NoHooksLU.
 429
 430     """
 431     raise AssertionError("BuildHooksNodes called for NoHooksLU")
 432
 433
 434 class Tasklet:
 435   """Tasklet base class.
 436
 437   Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
 438   they can mix legacy code with tasklets. Locking needs to be done in the LU,
 439   tasklets know nothing about locks.
 440
 441   Subclasses must follow these rules:
 442     - Implement CheckPrereq
 443     - Implement Exec
 444
 445   """
 446   def __init__(self, lu):
 447     self.lu = lu
 448
 449     # Shortcuts
 450     self.cfg = lu.cfg
 451     self.rpc = lu.rpc
 452
 453   def CheckPrereq(self):
 454     """Check prerequisites for this tasklets.
 455
 456     This method should check whether the prerequisites for the execution of
 457     this tasklet are fulfilled. It can do internode communication, but it
 458     should be idempotent - no cluster or system changes are allowed.
 459
 460     The method should raise errors.OpPrereqError in case something is not
 461     fulfilled. Its return value is ignored.
 462
 463     This method should also update all parameters to their canonical form if it
 464     hasn't been done before.
 465
 466     """
 467     pass
 468
 469   def Exec(self, feedback_fn):
 470     """Execute the tasklet.
 471
 472     This method should implement the actual work. It should raise
 473     errors.OpExecError for failures that are somewhat dealt with in code, or
 474     expected.
 475
 476     """
 477     raise NotImplementedError
 478
 479
 480 class _QueryBase:
 481   """Base for query utility classes.
 482
 483   """
 484   #: Attribute holding field definitions
 485   FIELDS = None
 486
 487   def __init__(self, qfilter, fields, use_locking):
 488     """Initializes this class.
 489
 490     """
 491     self.use_locking = use_locking
 492
 493     self.query = query.Query(self.FIELDS, fields, qfilter=qfilter,
 494                              namefield="name")
 495     self.requested_data = self.query.RequestedData()
 496     self.names = self.query.RequestedNames()
 497
 498     # Sort only if no names were requested
 499     self.sort_by_name = not self.names
 500
 501     self.do_locking = None
 502     self.wanted = None
 503
 504   def _GetNames(self, lu, all_names, lock_level):
 505     """Helper function to determine names asked for in the query.
 506
 507     """
 508     if self.do_locking:
 509       names = lu.owned_locks(lock_level)
 510     else:
 511       names = all_names
 512
 513     if self.wanted == locking.ALL_SET:
 514       assert not self.names
 515       # caller didn't specify names, so ordering is not important
 516       return utils.NiceSort(names)
 517
 518     # caller specified names and we must keep the same order
 519     assert self.names
 520     assert not self.do_locking or lu.glm.is_owned(lock_level)
 521
 522     missing = set(self.wanted).difference(names)
 523     if missing:
 524       raise errors.OpExecError("Some items were removed before retrieving"
 525                                " their data: %s" % missing)
 526
 527     # Return expanded names
 528     return self.wanted
 529
 530   def ExpandNames(self, lu):
 531     """Expand names for this query.
 532
 533     See L{LogicalUnit.ExpandNames}.
 534
 535     """
 536     raise NotImplementedError()
 537
 538   def DeclareLocks(self, lu, level):
 539     """Declare locks for this query.
 540
 541     See L{LogicalUnit.DeclareLocks}.
 542
 543     """
 544     raise NotImplementedError()
 545
 546   def _GetQueryData(self, lu):
 547     """Collects all data for this query.
 548
 549     @return: Query data object
 550
 551     """
 552     raise NotImplementedError()
 553
 554   def NewStyleQuery(self, lu):
 555     """Collect data and execute query.
 556
 557     """
 558     return query.GetQueryResponse(self.query, self._GetQueryData(lu),
 559                                   sort_by_name=self.sort_by_name)
 560
 561   def OldStyleQuery(self, lu):
 562     """Collect data and execute query.
 563
 564     """
 565     return self.query.OldStyleQuery(self._GetQueryData(lu),
 566                                     sort_by_name=self.sort_by_name)
 567
 568
 569 def _ShareAll():
 570   """Returns a dict declaring all lock levels shared.
 571
 572   """
 573   return dict.fromkeys(locking.LEVELS, 1)
 574
 575
 576 def _MakeLegacyNodeInfo(data):
 577   """Formats the data returned by L{rpc.RpcRunner.call_node_info}.
 578
 579   Converts the data into a single dictionary. This is fine for most use cases,
 580   but some require information from more than one volume group or hypervisor.
 581
 582   """
 583   (bootid, (vg_info, ), (hv_info, )) = data
 584
 585   return utils.JoinDisjointDicts(utils.JoinDisjointDicts(vg_info, hv_info), {
 586     "bootid": bootid,
 587     })
 588
 589
 590 def _CheckInstanceNodeGroups(cfg, instance_name, owned_groups):
 591   """Checks if the owned node groups are still correct for an instance.
 592
 593   @type cfg: L{config.ConfigWriter}
 594   @param cfg: The cluster configuration
 595   @type instance_name: string
 596   @param instance_name: Instance name
 597   @type owned_groups: set or frozenset
 598   @param owned_groups: List of currently owned node groups
 599
 600   """
 601   inst_groups = cfg.GetInstanceNodeGroups(instance_name)
 602
 603   if not owned_groups.issuperset(inst_groups):
 604     raise errors.OpPrereqError("Instance %s's node groups changed since"
 605                                " locks were acquired, current groups are"
 606                                " are '%s', owning groups '%s'; retry the"
 607                                " operation" %
 608                                (instance_name,
 609                                 utils.CommaJoin(inst_groups),
 610                                 utils.CommaJoin(owned_groups)),
 611                                errors.ECODE_STATE)
 612
 613   return inst_groups
 614
 615
 616 def _CheckNodeGroupInstances(cfg, group_uuid, owned_instances):
 617   """Checks if the instances in a node group are still correct.
 618
 619   @type cfg: L{config.ConfigWriter}
 620   @param cfg: The cluster configuration
 621   @type group_uuid: string
 622   @param group_uuid: Node group UUID
 623   @type owned_instances: set or frozenset
 624   @param owned_instances: List of currently owned instances
 625
 626   """
 627   wanted_instances = cfg.GetNodeGroupInstances(group_uuid)
 628   if owned_instances != wanted_instances:
 629     raise errors.OpPrereqError("Instances in node group '%s' changed since"
 630                                " locks were acquired, wanted '%s', have '%s';"
 631                                " retry the operation" %
 632                                (group_uuid,
 633                                 utils.CommaJoin(wanted_instances),
 634                                 utils.CommaJoin(owned_instances)),
 635                                errors.ECODE_STATE)
 636
 637   return wanted_instances
 638
 639
 640 def _SupportsOob(cfg, node):
 641   """Tells if node supports OOB.
 642
 643   @type cfg: L{config.ConfigWriter}
 644   @param cfg: The cluster configuration
 645   @type node: L{objects.Node}
 646   @param node: The node
 647   @return: The OOB script if supported or an empty string otherwise
 648
 649   """
 650   return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
 651
 652
 653 def _GetWantedNodes(lu, nodes):
 654   """Returns list of checked and expanded node names.
 655
 656   @type lu: L{LogicalUnit}
 657   @param lu: the logical unit on whose behalf we execute
 658   @type nodes: list
 659   @param nodes: list of node names or None for all nodes
 660   @rtype: list
 661   @return: the list of nodes, sorted
 662   @raise errors.ProgrammerError: if the nodes parameter is wrong type
 663
 664   """
 665   if nodes:
 666     return [_ExpandNodeName(lu.cfg, name) for name in nodes]
 667
 668   return utils.NiceSort(lu.cfg.GetNodeList())
 669
 670
 671 def _GetWantedInstances(lu, instances):
 672   """Returns list of checked and expanded instance names.
 673
 674   @type lu: L{LogicalUnit}
 675   @param lu: the logical unit on whose behalf we execute
 676   @type instances: list
 677   @param instances: list of instance names or None for all instances
 678   @rtype: list
 679   @return: the list of instances, sorted
 680   @raise errors.OpPrereqError: if the instances parameter is wrong type
 681   @raise errors.OpPrereqError: if any of the passed instances is not found
 682
 683   """
 684   if instances:
 685     wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
 686   else:
 687     wanted = utils.NiceSort(lu.cfg.GetInstanceList())
 688   return wanted
 689
 690
 691 def _GetUpdatedParams(old_params, update_dict,
 692                       use_default=True, use_none=False):
 693   """Return the new version of a parameter dictionary.
 694
 695   @type old_params: dict
 696   @param old_params: old parameters
 697   @type update_dict: dict
 698   @param update_dict: dict containing new parameter values, or
 699       constants.VALUE_DEFAULT to reset the parameter to its default
 700       value
 701   @param use_default: boolean
 702   @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
 703       values as 'to be deleted' values
 704   @param use_none: boolean
 705   @type use_none: whether to recognise C{None} values as 'to be
 706       deleted' values
 707   @rtype: dict
 708   @return: the new parameter dictionary
 709
 710   """
 711   params_copy = copy.deepcopy(old_params)
 712   for key, val in update_dict.iteritems():
 713     if ((use_default and val == constants.VALUE_DEFAULT) or
 714         (use_none and val is None)):
 715       try:
 716         del params_copy[key]
 717       except KeyError:
 718         pass
 719     else:
 720       params_copy[key] = val
 721   return params_copy
 722
 723
 724 def _GetUpdatedIPolicy(old_ipolicy, new_ipolicy, group_policy=False):
 725   """Return the new version of a instance policy.
 726
 727   @param group_policy: whether this policy applies to a group and thus
 728     we should support removal of policy entries
 729
 730   """
 731   use_none = use_default = group_policy
 732   ipolicy = copy.deepcopy(old_ipolicy)
 733   for key, value in new_ipolicy.items():
 734     if key not in constants.IPOLICY_ALL_KEYS:
 735       raise errors.OpPrereqError("Invalid key in new ipolicy: %s" % key,
 736                                  errors.ECODE_INVAL)
 737     if key in constants.IPOLICY_PARAMETERS:
 738       utils.ForceDictType(value, constants.ISPECS_PARAMETER_TYPES)
 739       ipolicy[key] = _GetUpdatedParams(old_ipolicy.get(key, {}), value,
 740                                        use_none=use_none,
 741                                        use_default=use_default)
 742     else:
 743       # FIXME: we assume all others are lists; this should be redone
 744       # in a nicer way
 745       if not value or value == [constants.VALUE_DEFAULT]:
 746         if group_policy:
 747           del ipolicy[key]
 748         else:
 749           raise errors.OpPrereqError("Can't unset ipolicy attribute '%s'"
 750                                      " on the cluster'" % key,
 751                                      errors.ECODE_INVAL)
 752       else:
 753         ipolicy[key] = list(value)
 754   try:
 755     objects.InstancePolicy.CheckParameterSyntax(ipolicy)
 756   except errors.ConfigurationError, err:
 757     raise errors.OpPrereqError("Invalid instance policy: %s" % err,
 758                                errors.ECODE_INVAL)
 759   return ipolicy
 760
 761
 762 def _UpdateAndVerifySubDict(base, updates, type_check):
 763   """Updates and verifies a dict with sub dicts of the same type.
 764
 765   @param base: The dict with the old data
 766   @param updates: The dict with the new data
 767   @param type_check: Dict suitable to ForceDictType to verify correct types
 768   @returns: A new dict with updated and verified values
 769
 770   """
 771   def fn(old, value):
 772     new = _GetUpdatedParams(old, value)
 773     utils.ForceDictType(new, type_check)
 774     return new
 775
 776   ret = copy.deepcopy(base)
 777   ret.update(dict((key, fn(base.get(key, {}), value))
 778                   for key, value in updates.items()))
 779   return ret
 780
 781
 782 def _MergeAndVerifyHvState(op_input, obj_input):
 783   """Combines the hv state from an opcode with the one of the object
 784
 785   @param op_input: The input dict from the opcode
 786   @param obj_input: The input dict from the objects
 787   @return: The verified and updated dict
 788
 789   """
 790   if op_input:
 791     invalid_hvs = set(op_input) - constants.HYPER_TYPES
 792     if invalid_hvs:
 793       raise errors.OpPrereqError("Invalid hypervisor(s) in hypervisor state:"
 794                                  " %s" % utils.CommaJoin(invalid_hvs),
 795                                  errors.ECODE_INVAL)
 796     if obj_input is None:
 797       obj_input = {}
 798     type_check = constants.HVSTS_PARAMETER_TYPES
 799     return _UpdateAndVerifySubDict(obj_input, op_input, type_check)
 800
 801   return None
 802
 803
 804 def _MergeAndVerifyDiskState(op_input, obj_input):
 805   """Combines the disk state from an opcode with the one of the object
 806
 807   @param op_input: The input dict from the opcode
 808   @param obj_input: The input dict from the objects
 809   @return: The verified and updated dict
 810   """
 811   if op_input:
 812     invalid_dst = set(op_input) - constants.DS_VALID_TYPES
 813     if invalid_dst:
 814       raise errors.OpPrereqError("Invalid storage type(s) in disk state: %s" %
 815                                  utils.CommaJoin(invalid_dst),
 816                                  errors.ECODE_INVAL)
 817     type_check = constants.DSS_PARAMETER_TYPES
 818     if obj_input is None:
 819       obj_input = {}
 820     return dict((key, _UpdateAndVerifySubDict(obj_input.get(key, {}), value,
 821                                               type_check))
 822                 for key, value in op_input.items())
 823
 824   return None
 825
 826
 827 def _ReleaseLocks(lu, level, names=None, keep=None):
 828   """Releases locks owned by an LU.
 829
 830   @type lu: L{LogicalUnit}
 831   @param level: Lock level
 832   @type names: list or None
 833   @param names: Names of locks to release
 834   @type keep: list or None
 835   @param keep: Names of locks to retain
 836
 837   """
 838   assert not (keep is not None and names is not None), \
 839          "Only one of the 'names' and the 'keep' parameters can be given"
 840
 841   if names is not None:
 842     should_release = names.__contains__
 843   elif keep:
 844     should_release = lambda name: name not in keep
 845   else:
 846     should_release = None
 847
 848   owned = lu.owned_locks(level)
 849   if not owned:
 850     # Not owning any lock at this level, do nothing
 851     pass
 852
 853   elif should_release:
 854     retain = []
 855     release = []
 856
 857     # Determine which locks to release
 858     for name in owned:
 859       if should_release(name):
 860         release.append(name)
 861       else:
 862         retain.append(name)
 863
 864     assert len(lu.owned_locks(level)) == (len(retain) + len(release))
 865
 866     # Release just some locks
 867     lu.glm.release(level, names=release)
 868
 869     assert frozenset(lu.owned_locks(level)) == frozenset(retain)
 870   else:
 871     # Release everything
 872     lu.glm.release(level)
 873
 874     assert not lu.glm.is_owned(level), "No locks should be owned"
 875
 876
 877 def _MapInstanceDisksToNodes(instances):
 878   """Creates a map from (node, volume) to instance name.
 879
 880   @type instances: list of L{objects.Instance}
 881   @rtype: dict; tuple of (node name, volume name) as key, instance name as value
 882
 883   """
 884   return dict(((node, vol), inst.name)
 885               for inst in instances
 886               for (node, vols) in inst.MapLVsByNode().items()
 887               for vol in vols)
 888
 889
 890 def _RunPostHook(lu, node_name):
 891   """Runs the post-hook for an opcode on a single node.
 892
 893   """
 894   hm = lu.proc.BuildHooksManager(lu)
 895   try:
 896     hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
 897   except:
 898     # pylint: disable=W0702
 899     lu.LogWarning("Errors occurred running hooks on %s" % node_name)
 900
 901
 902 def _CheckOutputFields(static, dynamic, selected):
 903   """Checks whether all selected fields are valid.
 904
 905   @type static: L{utils.FieldSet}
 906   @param static: static fields set
 907   @type dynamic: L{utils.FieldSet}
 908   @param dynamic: dynamic fields set
 909
 910   """
 911   f = utils.FieldSet()
 912   f.Extend(static)
 913   f.Extend(dynamic)
 914
 915   delta = f.NonMatching(selected)
 916   if delta:
 917     raise errors.OpPrereqError("Unknown output fields selected: %s"
 918                                % ",".join(delta), errors.ECODE_INVAL)
 919
 920
 921 def _CheckGlobalHvParams(params):
 922   """Validates that given hypervisor params are not global ones.
 923
 924   This will ensure that instances don't get customised versions of
 925   global params.
 926
 927   """
 928   used_globals = constants.HVC_GLOBALS.intersection(params)
 929   if used_globals:
 930     msg = ("The following hypervisor parameters are global and cannot"
 931            " be customized at instance level, please modify them at"
 932            " cluster level: %s" % utils.CommaJoin(used_globals))
 933     raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
 934
 935
 936 def _CheckNodeOnline(lu, node, msg=None):
 937   """Ensure that a given node is online.
 938
 939   @param lu: the LU on behalf of which we make the check
 940   @param node: the node to check
 941   @param msg: if passed, should be a message to replace the default one
 942   @raise errors.OpPrereqError: if the node is offline
 943
 944   """
 945   if msg is None:
 946     msg = "Can't use offline node"
 947   if lu.cfg.GetNodeInfo(node).offline:
 948     raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
 949
 950
 951 def _CheckNodeNotDrained(lu, node):
 952   """Ensure that a given node is not drained.
 953
 954   @param lu: the LU on behalf of which we make the check
 955   @param node: the node to check
 956   @raise errors.OpPrereqError: if the node is drained
 957
 958   """
 959   if lu.cfg.GetNodeInfo(node).drained:
 960     raise errors.OpPrereqError("Can't use drained node %s" % node,
 961                                errors.ECODE_STATE)
 962
 963
 964 def _CheckNodeVmCapable(lu, node):
 965   """Ensure that a given node is vm capable.
 966
 967   @param lu: the LU on behalf of which we make the check
 968   @param node: the node to check
 969   @raise errors.OpPrereqError: if the node is not vm capable
 970
 971   """
 972   if not lu.cfg.GetNodeInfo(node).vm_capable:
 973     raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
 974                                errors.ECODE_STATE)
 975
 976
 977 def _CheckNodeHasOS(lu, node, os_name, force_variant):
 978   """Ensure that a node supports a given OS.
 979
 980   @param lu: the LU on behalf of which we make the check
 981   @param node: the node to check
 982   @param os_name: the OS to query about
 983   @param force_variant: whether to ignore variant errors
 984   @raise errors.OpPrereqError: if the node is not supporting the OS
 985
 986   """
 987   result = lu.rpc.call_os_get(node, os_name)
 988   result.Raise("OS '%s' not in supported OS list for node %s" %
 989                (os_name, node),
 990                prereq=True, ecode=errors.ECODE_INVAL)
 991   if not force_variant:
 992     _CheckOSVariant(result.payload, os_name)
 993
 994
 995 def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
 996   """Ensure that a node has the given secondary ip.
 997
 998   @type lu: L{LogicalUnit}
 999   @param lu: the LU on behalf of which we make the check
1000   @type node: string
1001   @param node: the node to check
1002   @type secondary_ip: string
1003   @param secondary_ip: the ip to check
1004   @type prereq: boolean
1005   @param prereq: whether to throw a prerequisite or an execute error
1006   @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
1007   @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
1008
1009   """
1010   result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
1011   result.Raise("Failure checking secondary ip on node %s" % node,
1012                prereq=prereq, ecode=errors.ECODE_ENVIRON)
1013   if not result.payload:
1014     msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
1015            " please fix and re-run this command" % secondary_ip)
1016     if prereq:
1017       raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
1018     else:
1019       raise errors.OpExecError(msg)
1020
1021
1022 def _GetClusterDomainSecret():
1023   """Reads the cluster domain secret.
1024
1025   """
1026   return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
1027                                strict=True)
1028
1029
1030 def _CheckInstanceState(lu, instance, req_states, msg=None):
1031   """Ensure that an instance is in one of the required states.
1032
1033   @param lu: the LU on behalf of which we make the check
1034   @param instance: the instance to check
1035   @param msg: if passed, should be a message to replace the default one
1036   @raise errors.OpPrereqError: if the instance is not in the required state
1037
1038   """
1039   if msg is None:
1040     msg = "can't use instance from outside %s states" % ", ".join(req_states)
1041   if instance.admin_state not in req_states:
1042     raise errors.OpPrereqError("Instance '%s' is marked to be %s, %s" %
1043                                (instance.name, instance.admin_state, msg),
1044                                errors.ECODE_STATE)
1045
1046   if constants.ADMINST_UP not in req_states:
1047     pnode = instance.primary_node
1048     ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
1049     ins_l.Raise("Can't contact node %s for instance information" % pnode,
1050                 prereq=True, ecode=errors.ECODE_ENVIRON)
1051
1052     if instance.name in ins_l.payload:
1053       raise errors.OpPrereqError("Instance %s is running, %s" %
1054                                  (instance.name, msg), errors.ECODE_STATE)
1055
1056
1057 def _ComputeMinMaxSpec(name, ipolicy, value):
1058   """Computes if value is in the desired range.
1059
1060   @param name: name of the parameter for which we perform the check
1061   @param ipolicy: dictionary containing min, max and std values
1062   @param value: actual value that we want to use
1063   @return: None or element not meeting the criteria
1064
1065
1066   """
1067   if value in [None, constants.VALUE_AUTO]:
1068     return None
1069   max_v = ipolicy[constants.ISPECS_MAX].get(name, value)
1070   min_v = ipolicy[constants.ISPECS_MIN].get(name, value)
1071   if value > max_v or min_v > value:
1072     return ("%s value %s is not in range [%s, %s]" %
1073             (name, value, min_v, max_v))
1074   return None
1075
1076
1077 def _ComputeIPolicySpecViolation(ipolicy, mem_size, cpu_count, disk_count,
1078                                  nic_count, disk_sizes,
1079                                  _compute_fn=_ComputeMinMaxSpec):
1080   """Verifies ipolicy against provided specs.
1081
1082   @type ipolicy: dict
1083   @param ipolicy: The ipolicy
1084   @type mem_size: int
1085   @param mem_size: The memory size
1086   @type cpu_count: int
1087   @param cpu_count: Used cpu cores
1088   @type disk_count: int
1089   @param disk_count: Number of disks used
1090   @type nic_count: int
1091   @param nic_count: Number of nics used
1092   @type disk_sizes: list of ints
1093   @param disk_sizes: Disk sizes of used disk (len must match C{disk_count})
1094   @param _compute_fn: The compute function (unittest only)
1095   @return: A list of violations, or an empty list of no violations are found
1096
1097   """
1098   assert disk_count == len(disk_sizes)
1099
1100   test_settings = [
1101     (constants.ISPEC_MEM_SIZE, mem_size),
1102     (constants.ISPEC_CPU_COUNT, cpu_count),
1103     (constants.ISPEC_DISK_COUNT, disk_count),
1104     (constants.ISPEC_NIC_COUNT, nic_count),
1105     ] + map((lambda d: (constants.ISPEC_DISK_SIZE, d)), disk_sizes)
1106
1107   return filter(None,
1108                 (_compute_fn(name, ipolicy, value)
1109                  for (name, value) in test_settings))
1110
1111
1112 def _ComputeIPolicyInstanceViolation(ipolicy, instance,
1113                                      _compute_fn=_ComputeIPolicySpecViolation):
1114   """Compute if instance meets the specs of ipolicy.
1115
1116   @type ipolicy: dict
1117   @param ipolicy: The ipolicy to verify against
1118   @type instance: L{objects.Instance}
1119   @param instance: The instance to verify
1120   @param _compute_fn: The function to verify ipolicy (unittest only)
1121   @see: L{_ComputeIPolicySpecViolation}
1122
1123   """
1124   mem_size = instance.beparams.get(constants.BE_MAXMEM, None)
1125   cpu_count = instance.beparams.get(constants.BE_VCPUS, None)
1126   disk_count = len(instance.disks)
1127   disk_sizes = [disk.size for disk in instance.disks]
1128   nic_count = len(instance.nics)
1129
1130   return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1131                      disk_sizes)
1132
1133
1134 def _ComputeIPolicyInstanceSpecViolation(ipolicy, instance_spec,
1135     _compute_fn=_ComputeIPolicySpecViolation):
1136   """Compute if instance specs meets the specs of ipolicy.
1137
1138   @type ipolicy: dict
1139   @param ipolicy: The ipolicy to verify against
1140   @param instance_spec: dict
1141   @param instance_spec: The instance spec to verify
1142   @param _compute_fn: The function to verify ipolicy (unittest only)
1143   @see: L{_ComputeIPolicySpecViolation}
1144
1145   """
1146   mem_size = instance_spec.get(constants.ISPEC_MEM_SIZE, None)
1147   cpu_count = instance_spec.get(constants.ISPEC_CPU_COUNT, None)
1148   disk_count = instance_spec.get(constants.ISPEC_DISK_COUNT, 0)
1149   disk_sizes = instance_spec.get(constants.ISPEC_DISK_SIZE, [])
1150   nic_count = instance_spec.get(constants.ISPEC_NIC_COUNT, 0)
1151
1152   return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1153                      disk_sizes)
1154
1155
1156 def _ComputeIPolicyNodeViolation(ipolicy, instance, current_group,
1157                                  target_group,
1158                                  _compute_fn=_ComputeIPolicyInstanceViolation):
1159   """Compute if instance meets the specs of the new target group.
1160
1161   @param ipolicy: The ipolicy to verify
1162   @param instance: The instance object to verify
1163   @param current_group: The current group of the instance
1164   @param target_group: The new group of the instance
1165   @param _compute_fn: The function to verify ipolicy (unittest only)
1166   @see: L{_ComputeIPolicySpecViolation}
1167
1168   """
1169   if current_group == target_group:
1170     return []
1171   else:
1172     return _compute_fn(ipolicy, instance)
1173
1174
1175 def _CheckTargetNodeIPolicy(lu, ipolicy, instance, node, ignore=False,
1176                             _compute_fn=_ComputeIPolicyNodeViolation):
1177   """Checks that the target node is correct in terms of instance policy.
1178
1179   @param ipolicy: The ipolicy to verify
1180   @param instance: The instance object to verify
1181   @param node: The new node to relocate
1182   @param ignore: Ignore violations of the ipolicy
1183   @param _compute_fn: The function to verify ipolicy (unittest only)
1184   @see: L{_ComputeIPolicySpecViolation}
1185
1186   """
1187   primary_node = lu.cfg.GetNodeInfo(instance.primary_node)
1188   res = _compute_fn(ipolicy, instance, primary_node.group, node.group)
1189
1190   if res:
1191     msg = ("Instance does not meet target node group's (%s) instance"
1192            " policy: %s") % (node.group, utils.CommaJoin(res))
1193     if ignore:
1194       lu.LogWarning(msg)
1195     else:
1196       raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
1197
1198
1199 def _ExpandItemName(fn, name, kind):
1200   """Expand an item name.
1201
1202   @param fn: the function to use for expansion
1203   @param name: requested item name
1204   @param kind: text description ('Node' or 'Instance')
1205   @return: the resolved (full) name
1206   @raise errors.OpPrereqError: if the item is not found
1207
1208   """
1209   full_name = fn(name)
1210   if full_name is None:
1211     raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
1212                                errors.ECODE_NOENT)
1213   return full_name
1214
1215
1216 def _ExpandNodeName(cfg, name):
1217   """Wrapper over L{_ExpandItemName} for nodes."""
1218   return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
1219
1220
1221 def _ExpandInstanceName(cfg, name):
1222   """Wrapper over L{_ExpandItemName} for instance."""
1223   return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
1224
1225
1226 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
1227                           minmem, maxmem, vcpus, nics, disk_template, disks,
1228                           bep, hvp, hypervisor_name, tags):
1229   """Builds instance related env variables for hooks
1230
1231   This builds the hook environment from individual variables.
1232
1233   @type name: string
1234   @param name: the name of the instance
1235   @type primary_node: string
1236   @param primary_node: the name of the instance's primary node
1237   @type secondary_nodes: list
1238   @param secondary_nodes: list of secondary nodes as strings
1239   @type os_type: string
1240   @param os_type: the name of the instance's OS
1241   @type status: string
1242   @param status: the desired status of the instance
1243   @type minmem: string
1244   @param minmem: the minimum memory size of the instance
1245   @type maxmem: string
1246   @param maxmem: the maximum memory size of the instance
1247   @type vcpus: string
1248   @param vcpus: the count of VCPUs the instance has
1249   @type nics: list
1250   @param nics: list of tuples (ip, mac, mode, link) representing
1251       the NICs the instance has
1252   @type disk_template: string
1253   @param disk_template: the disk template of the instance
1254   @type disks: list
1255   @param disks: the list of (size, mode) pairs
1256   @type bep: dict
1257   @param bep: the backend parameters for the instance
1258   @type hvp: dict
1259   @param hvp: the hypervisor parameters for the instance
1260   @type hypervisor_name: string
1261   @param hypervisor_name: the hypervisor for the instance
1262   @type tags: list
1263   @param tags: list of instance tags as strings
1264   @rtype: dict
1265   @return: the hook environment for this instance
1266
1267   """
1268   env = {
1269     "OP_TARGET": name,
1270     "INSTANCE_NAME": name,
1271     "INSTANCE_PRIMARY": primary_node,
1272     "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
1273     "INSTANCE_OS_TYPE": os_type,
1274     "INSTANCE_STATUS": status,
1275     "INSTANCE_MINMEM": minmem,
1276     "INSTANCE_MAXMEM": maxmem,
1277     # TODO(2.7) remove deprecated "memory" value
1278     "INSTANCE_MEMORY": maxmem,
1279     "INSTANCE_VCPUS": vcpus,
1280     "INSTANCE_DISK_TEMPLATE": disk_template,
1281     "INSTANCE_HYPERVISOR": hypervisor_name,
1282   }
1283   if nics:
1284     nic_count = len(nics)
1285     for idx, (ip, mac, mode, link) in enumerate(nics):
1286       if ip is None:
1287         ip = ""
1288       env["INSTANCE_NIC%d_IP" % idx] = ip
1289       env["INSTANCE_NIC%d_MAC" % idx] = mac
1290       env["INSTANCE_NIC%d_MODE" % idx] = mode
1291       env["INSTANCE_NIC%d_LINK" % idx] = link
1292       if mode == constants.NIC_MODE_BRIDGED:
1293         env["INSTANCE_NIC%d_BRIDGE" % idx] = link
1294   else:
1295     nic_count = 0
1296
1297   env["INSTANCE_NIC_COUNT"] = nic_count
1298
1299   if disks:
1300     disk_count = len(disks)
1301     for idx, (size, mode) in enumerate(disks):
1302       env["INSTANCE_DISK%d_SIZE" % idx] = size
1303       env["INSTANCE_DISK%d_MODE" % idx] = mode
1304   else:
1305     disk_count = 0
1306
1307   env["INSTANCE_DISK_COUNT"] = disk_count
1308
1309   if not tags:
1310     tags = []
1311
1312   env["INSTANCE_TAGS"] = " ".join(tags)
1313
1314   for source, kind in [(bep, "BE"), (hvp, "HV")]:
1315     for key, value in source.items():
1316       env["INSTANCE_%s_%s" % (kind, key)] = value
1317
1318   return env
1319
1320
1321 def _NICListToTuple(lu, nics):
1322   """Build a list of nic information tuples.
1323
1324   This list is suitable to be passed to _BuildInstanceHookEnv or as a return
1325   value in LUInstanceQueryData.
1326
1327   @type lu:  L{LogicalUnit}
1328   @param lu: the logical unit on whose behalf we execute
1329   @type nics: list of L{objects.NIC}
1330   @param nics: list of nics to convert to hooks tuples
1331
1332   """
1333   hooks_nics = []
1334   cluster = lu.cfg.GetClusterInfo()
1335   for nic in nics:
1336     ip = nic.ip
1337     mac = nic.mac
1338     filled_params = cluster.SimpleFillNIC(nic.nicparams)
1339     mode = filled_params[constants.NIC_MODE]
1340     link = filled_params[constants.NIC_LINK]
1341     hooks_nics.append((ip, mac, mode, link))
1342   return hooks_nics
1343
1344
1345 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
1346   """Builds instance related env variables for hooks from an object.
1347
1348   @type lu: L{LogicalUnit}
1349   @param lu: the logical unit on whose behalf we execute
1350   @type instance: L{objects.Instance}
1351   @param instance: the instance for which we should build the
1352       environment
1353   @type override: dict
1354   @param override: dictionary with key/values that will override
1355       our values
1356   @rtype: dict
1357   @return: the hook environment dictionary
1358
1359   """
1360   cluster = lu.cfg.GetClusterInfo()
1361   bep = cluster.FillBE(instance)
1362   hvp = cluster.FillHV(instance)
1363   args = {
1364     "name": instance.name,
1365     "primary_node": instance.primary_node,
1366     "secondary_nodes": instance.secondary_nodes,
1367     "os_type": instance.os,
1368     "status": instance.admin_state,
1369     "maxmem": bep[constants.BE_MAXMEM],
1370     "minmem": bep[constants.BE_MINMEM],
1371     "vcpus": bep[constants.BE_VCPUS],
1372     "nics": _NICListToTuple(lu, instance.nics),
1373     "disk_template": instance.disk_template,
1374     "disks": [(disk.size, disk.mode) for disk in instance.disks],
1375     "bep": bep,
1376     "hvp": hvp,
1377     "hypervisor_name": instance.hypervisor,
1378     "tags": instance.tags,
1379   }
1380   if override:
1381     args.update(override)
1382   return _BuildInstanceHookEnv(**args) # pylint: disable=W0142
1383
1384
1385 def _AdjustCandidatePool(lu, exceptions):
1386   """Adjust the candidate pool after node operations.
1387
1388   """
1389   mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1390   if mod_list:
1391     lu.LogInfo("Promoted nodes to master candidate role: %s",
1392                utils.CommaJoin(node.name for node in mod_list))
1393     for name in mod_list:
1394       lu.context.ReaddNode(name)
1395   mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1396   if mc_now > mc_max:
1397     lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1398                (mc_now, mc_max))
1399
1400
1401 def _DecideSelfPromotion(lu, exceptions=None):
1402   """Decide whether I should promote myself as a master candidate.
1403
1404   """
1405   cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1406   mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1407   # the new node will increase mc_max with one, so:
1408   mc_should = min(mc_should + 1, cp_size)
1409   return mc_now < mc_should
1410
1411
1412 def _CalculateGroupIPolicy(cluster, group):
1413   """Calculate instance policy for group.
1414
1415   """
1416   return cluster.SimpleFillIPolicy(group.ipolicy)
1417
1418
1419 def _CheckNicsBridgesExist(lu, target_nics, target_node):
1420   """Check that the brigdes needed by a list of nics exist.
1421
1422   """
1423   cluster = lu.cfg.GetClusterInfo()
1424   paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1425   brlist = [params[constants.NIC_LINK] for params in paramslist
1426             if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1427   if brlist:
1428     result = lu.rpc.call_bridges_exist(target_node, brlist)
1429     result.Raise("Error checking bridges on destination node '%s'" %
1430                  target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1431
1432
1433 def _CheckInstanceBridgesExist(lu, instance, node=None):
1434   """Check that the brigdes needed by an instance exist.
1435
1436   """
1437   if node is None:
1438     node = instance.primary_node
1439   _CheckNicsBridgesExist(lu, instance.nics, node)
1440
1441
1442 def _CheckOSVariant(os_obj, name):
1443   """Check whether an OS name conforms to the os variants specification.
1444
1445   @type os_obj: L{objects.OS}
1446   @param os_obj: OS object to check
1447   @type name: string
1448   @param name: OS name passed by the user, to check for validity
1449
1450   """
1451   variant = objects.OS.GetVariant(name)
1452   if not os_obj.supported_variants:
1453     if variant:
1454       raise errors.OpPrereqError("OS '%s' doesn't support variants ('%s'"
1455                                  " passed)" % (os_obj.name, variant),
1456                                  errors.ECODE_INVAL)
1457     return
1458   if not variant:
1459     raise errors.OpPrereqError("OS name must include a variant",
1460                                errors.ECODE_INVAL)
1461
1462   if variant not in os_obj.supported_variants:
1463     raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1464
1465
1466 def _GetNodeInstancesInner(cfg, fn):
1467   return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1468
1469
1470 def _GetNodeInstances(cfg, node_name):
1471   """Returns a list of all primary and secondary instances on a node.
1472
1473   """
1474
1475   return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1476
1477
1478 def _GetNodePrimaryInstances(cfg, node_name):
1479   """Returns primary instances on a node.
1480
1481   """
1482   return _GetNodeInstancesInner(cfg,
1483                                 lambda inst: node_name == inst.primary_node)
1484
1485
1486 def _GetNodeSecondaryInstances(cfg, node_name):
1487   """Returns secondary instances on a node.
1488
1489   """
1490   return _GetNodeInstancesInner(cfg,
1491                                 lambda inst: node_name in inst.secondary_nodes)
1492
1493
1494 def _GetStorageTypeArgs(cfg, storage_type):
1495   """Returns the arguments for a storage type.
1496
1497   """
1498   # Special case for file storage
1499   if storage_type == constants.ST_FILE:
1500     # storage.FileStorage wants a list of storage directories
1501     return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1502
1503   return []
1504
1505
1506 def _FindFaultyInstanceDisks(cfg, rpc_runner, instance, node_name, prereq):
1507   faulty = []
1508
1509   for dev in instance.disks:
1510     cfg.SetDiskID(dev, node_name)
1511
1512   result = rpc_runner.call_blockdev_getmirrorstatus(node_name, instance.disks)
1513   result.Raise("Failed to get disk status from node %s" % node_name,
1514                prereq=prereq, ecode=errors.ECODE_ENVIRON)
1515
1516   for idx, bdev_status in enumerate(result.payload):
1517     if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1518       faulty.append(idx)
1519
1520   return faulty
1521
1522
1523 def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1524   """Check the sanity of iallocator and node arguments and use the
1525   cluster-wide iallocator if appropriate.
1526
1527   Check that at most one of (iallocator, node) is specified. If none is
1528   specified, then the LU's opcode's iallocator slot is filled with the
1529   cluster-wide default iallocator.
1530
1531   @type iallocator_slot: string
1532   @param iallocator_slot: the name of the opcode iallocator slot
1533   @type node_slot: string
1534   @param node_slot: the name of the opcode target node slot
1535
1536   """
1537   node = getattr(lu.op, node_slot, None)
1538   iallocator = getattr(lu.op, iallocator_slot, None)
1539
1540   if node is not None and iallocator is not None:
1541     raise errors.OpPrereqError("Do not specify both, iallocator and node",
1542                                errors.ECODE_INVAL)
1543   elif node is None and iallocator is None:
1544     default_iallocator = lu.cfg.GetDefaultIAllocator()
1545     if default_iallocator:
1546       setattr(lu.op, iallocator_slot, default_iallocator)
1547     else:
1548       raise errors.OpPrereqError("No iallocator or node given and no"
1549                                  " cluster-wide default iallocator found;"
1550                                  " please specify either an iallocator or a"
1551                                  " node, or set a cluster-wide default"
1552                                  " iallocator")
1553
1554
1555 def _GetDefaultIAllocator(cfg, iallocator):
1556   """Decides on which iallocator to use.
1557
1558   @type cfg: L{config.ConfigWriter}
1559   @param cfg: Cluster configuration object
1560   @type iallocator: string or None
1561   @param iallocator: Iallocator specified in opcode
1562   @rtype: string
1563   @return: Iallocator name
1564
1565   """
1566   if not iallocator:
1567     # Use default iallocator
1568     iallocator = cfg.GetDefaultIAllocator()
1569
1570   if not iallocator:
1571     raise errors.OpPrereqError("No iallocator was specified, neither in the"
1572                                " opcode nor as a cluster-wide default",
1573                                errors.ECODE_INVAL)
1574
1575   return iallocator
1576
1577
1578 class LUClusterPostInit(LogicalUnit):
1579   """Logical unit for running hooks after cluster initialization.
1580
1581   """
1582   HPATH = "cluster-init"
1583   HTYPE = constants.HTYPE_CLUSTER
1584
1585   def BuildHooksEnv(self):
1586     """Build hooks env.
1587
1588     """
1589     return {
1590       "OP_TARGET": self.cfg.GetClusterName(),
1591       }
1592
1593   def BuildHooksNodes(self):
1594     """Build hooks nodes.
1595
1596     """
1597     return ([], [self.cfg.GetMasterNode()])
1598
1599   def Exec(self, feedback_fn):
1600     """Nothing to do.
1601
1602     """
1603     return True
1604
1605
1606 class LUClusterDestroy(LogicalUnit):
1607   """Logical unit for destroying the cluster.
1608
1609   """
1610   HPATH = "cluster-destroy"
1611   HTYPE = constants.HTYPE_CLUSTER
1612
1613   def BuildHooksEnv(self):
1614     """Build hooks env.
1615
1616     """
1617     return {
1618       "OP_TARGET": self.cfg.GetClusterName(),
1619       }
1620
1621   def BuildHooksNodes(self):
1622     """Build hooks nodes.
1623
1624     """
1625     return ([], [])
1626
1627   def CheckPrereq(self):
1628     """Check prerequisites.
1629
1630     This checks whether the cluster is empty.
1631
1632     Any errors are signaled by raising errors.OpPrereqError.
1633
1634     """
1635     master = self.cfg.GetMasterNode()
1636
1637     nodelist = self.cfg.GetNodeList()
1638     if len(nodelist) != 1 or nodelist[0] != master:
1639       raise errors.OpPrereqError("There are still %d node(s) in"
1640                                  " this cluster." % (len(nodelist) - 1),
1641                                  errors.ECODE_INVAL)
1642     instancelist = self.cfg.GetInstanceList()
1643     if instancelist:
1644       raise errors.OpPrereqError("There are still %d instance(s) in"
1645                                  " this cluster." % len(instancelist),
1646                                  errors.ECODE_INVAL)
1647
1648   def Exec(self, feedback_fn):
1649     """Destroys the cluster.
1650
1651     """
1652     master_params = self.cfg.GetMasterNetworkParameters()
1653
1654     # Run post hooks on master node before it's removed
1655     _RunPostHook(self, master_params.name)
1656
1657     ems = self.cfg.GetUseExternalMipScript()
1658     result = self.rpc.call_node_deactivate_master_ip(master_params.name,
1659                                                      master_params, ems)
1660     if result.fail_msg:
1661       self.LogWarning("Error disabling the master IP address: %s",
1662                       result.fail_msg)
1663
1664     return master_params.name
1665
1666
1667 def _VerifyCertificate(filename):
1668   """Verifies a certificate for L{LUClusterVerifyConfig}.
1669
1670   @type filename: string
1671   @param filename: Path to PEM file
1672
1673   """
1674   try:
1675     cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1676                                            utils.ReadFile(filename))
1677   except Exception, err: # pylint: disable=W0703
1678     return (LUClusterVerifyConfig.ETYPE_ERROR,
1679             "Failed to load X509 certificate %s: %s" % (filename, err))
1680
1681   (errcode, msg) = \
1682     utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1683                                 constants.SSL_CERT_EXPIRATION_ERROR)
1684
1685   if msg:
1686     fnamemsg = "While verifying %s: %s" % (filename, msg)
1687   else:
1688     fnamemsg = None
1689
1690   if errcode is None:
1691     return (None, fnamemsg)
1692   elif errcode == utils.CERT_WARNING:
1693     return (LUClusterVerifyConfig.ETYPE_WARNING, fnamemsg)
1694   elif errcode == utils.CERT_ERROR:
1695     return (LUClusterVerifyConfig.ETYPE_ERROR, fnamemsg)
1696
1697   raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1698
1699
1700 def _GetAllHypervisorParameters(cluster, instances):
1701   """Compute the set of all hypervisor parameters.
1702
1703   @type cluster: L{objects.Cluster}
1704   @param cluster: the cluster object
1705   @param instances: list of L{objects.Instance}
1706   @param instances: additional instances from which to obtain parameters
1707   @rtype: list of (origin, hypervisor, parameters)
1708   @return: a list with all parameters found, indicating the hypervisor they
1709        apply to, and the origin (can be "cluster", "os X", or "instance Y")
1710
1711   """
1712   hvp_data = []
1713
1714   for hv_name in cluster.enabled_hypervisors:
1715     hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
1716
1717   for os_name, os_hvp in cluster.os_hvp.items():
1718     for hv_name, hv_params in os_hvp.items():
1719       if hv_params:
1720         full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
1721         hvp_data.append(("os %s" % os_name, hv_name, full_params))
1722
1723   # TODO: collapse identical parameter values in a single one
1724   for instance in instances:
1725     if instance.hvparams:
1726       hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
1727                        cluster.FillHV(instance)))
1728
1729   return hvp_data
1730
1731
1732 class _VerifyErrors(object):
1733   """Mix-in for cluster/group verify LUs.
1734
1735   It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects
1736   self.op and self._feedback_fn to be available.)
1737
1738   """
1739
1740   ETYPE_FIELD = "code"
1741   ETYPE_ERROR = "ERROR"
1742   ETYPE_WARNING = "WARNING"
1743
1744   def _Error(self, ecode, item, msg, *args, **kwargs):
1745     """Format an error message.
1746
1747     Based on the opcode's error_codes parameter, either format a
1748     parseable error code, or a simpler error string.
1749
1750     This must be called only from Exec and functions called from Exec.
1751
1752     """
1753     ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1754     itype, etxt, _ = ecode
1755     # first complete the msg
1756     if args:
1757       msg = msg % args
1758     # then format the whole message
1759     if self.op.error_codes: # This is a mix-in. pylint: disable=E1101
1760       msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1761     else:
1762       if item:
1763         item = " " + item
1764       else:
1765         item = ""
1766       msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1767     # and finally report it via the feedback_fn
1768     self._feedback_fn("  - %s" % msg) # Mix-in. pylint: disable=E1101
1769
1770   def _ErrorIf(self, cond, ecode, *args, **kwargs):
1771     """Log an error message if the passed condition is True.
1772
1773     """
1774     cond = (bool(cond)
1775             or self.op.debug_simulate_errors) # pylint: disable=E1101
1776
1777     # If the error code is in the list of ignored errors, demote the error to a
1778     # warning
1779     (_, etxt, _) = ecode
1780     if etxt in self.op.ignore_errors:     # pylint: disable=E1101
1781       kwargs[self.ETYPE_FIELD] = self.ETYPE_WARNING
1782
1783     if cond:
1784       self._Error(ecode, *args, **kwargs)
1785
1786     # do not mark the operation as failed for WARN cases only
1787     if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1788       self.bad = self.bad or cond
1789
1790
1791 class LUClusterVerify(NoHooksLU):
1792   """Submits all jobs necessary to verify the cluster.
1793
1794   """
1795   REQ_BGL = False
1796
1797   def ExpandNames(self):
1798     self.needed_locks = {}
1799
1800   def Exec(self, feedback_fn):
1801     jobs = []
1802
1803     if self.op.group_name:
1804       groups = [self.op.group_name]
1805       depends_fn = lambda: None
1806     else:
1807       groups = self.cfg.GetNodeGroupList()
1808
1809       # Verify global configuration
1810       jobs.append([
1811         opcodes.OpClusterVerifyConfig(ignore_errors=self.op.ignore_errors)
1812         ])
1813
1814       # Always depend on global verification
1815       depends_fn = lambda: [(-len(jobs), [])]
1816
1817     jobs.extend([opcodes.OpClusterVerifyGroup(group_name=group,
1818                                             ignore_errors=self.op.ignore_errors,
1819                                             depends=depends_fn())]
1820                 for group in groups)
1821
1822     # Fix up all parameters
1823     for op in itertools.chain(*jobs): # pylint: disable=W0142
1824       op.debug_simulate_errors = self.op.debug_simulate_errors
1825       op.verbose = self.op.verbose
1826       op.error_codes = self.op.error_codes
1827       try:
1828         op.skip_checks = self.op.skip_checks
1829       except AttributeError:
1830         assert not isinstance(op, opcodes.OpClusterVerifyGroup)
1831
1832     return ResultWithJobs(jobs)
1833
1834
1835 class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
1836   """Verifies the cluster config.
1837
1838   """
1839   REQ_BGL = True
1840
1841   def _VerifyHVP(self, hvp_data):
1842     """Verifies locally the syntax of the hypervisor parameters.
1843
1844     """
1845     for item, hv_name, hv_params in hvp_data:
1846       msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
1847              (item, hv_name))
1848       try:
1849         hv_class = hypervisor.GetHypervisor(hv_name)
1850         utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
1851         hv_class.CheckParameterSyntax(hv_params)
1852       except errors.GenericError, err:
1853         self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg % str(err))
1854
1855   def ExpandNames(self):
1856     # Information can be safely retrieved as the BGL is acquired in exclusive
1857     # mode
1858     assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER)
1859     self.all_group_info = self.cfg.GetAllNodeGroupsInfo()
1860     self.all_node_info = self.cfg.GetAllNodesInfo()
1861     self.all_inst_info = self.cfg.GetAllInstancesInfo()
1862     self.needed_locks = {}
1863
1864   def Exec(self, feedback_fn):
1865     """Verify integrity of cluster, performing various test on nodes.
1866
1867     """
1868     self.bad = False
1869     self._feedback_fn = feedback_fn
1870
1871     feedback_fn("* Verifying cluster config")
1872
1873     for msg in self.cfg.VerifyConfig():
1874       self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg)
1875
1876     feedback_fn("* Verifying cluster certificate files")
1877
1878     for cert_filename in constants.ALL_CERT_FILES:
1879       (errcode, msg) = _VerifyCertificate(cert_filename)
1880       self._ErrorIf(errcode, constants.CV_ECLUSTERCERT, None, msg, code=errcode)
1881
1882     feedback_fn("* Verifying hypervisor parameters")
1883
1884     self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(),
1885                                                 self.all_inst_info.values()))
1886
1887     feedback_fn("* Verifying all nodes belong to an existing group")
1888
1889     # We do this verification here because, should this bogus circumstance
1890     # occur, it would never be caught by VerifyGroup, which only acts on
1891     # nodes/instances reachable from existing node groups.
1892
1893     dangling_nodes = set(node.name for node in self.all_node_info.values()
1894                          if node.group not in self.all_group_info)
1895
1896     dangling_instances = {}
1897     no_node_instances = []
1898
1899     for inst in self.all_inst_info.values():
1900       if inst.primary_node in dangling_nodes:
1901         dangling_instances.setdefault(inst.primary_node, []).append(inst.name)
1902       elif inst.primary_node not in self.all_node_info:
1903         no_node_instances.append(inst.name)
1904
1905     pretty_dangling = [
1906         "%s (%s)" %
1907         (node.name,
1908          utils.CommaJoin(dangling_instances.get(node.name,
1909                                                 ["no instances"])))
1910         for node in dangling_nodes]
1911
1912     self._ErrorIf(bool(dangling_nodes), constants.CV_ECLUSTERDANGLINGNODES,
1913                   None,
1914                   "the following nodes (and their instances) belong to a non"
1915                   " existing group: %s", utils.CommaJoin(pretty_dangling))
1916
1917     self._ErrorIf(bool(no_node_instances), constants.CV_ECLUSTERDANGLINGINST,
1918                   None,
1919                   "the following instances have a non-existing primary-node:"
1920                   " %s", utils.CommaJoin(no_node_instances))
1921
1922     return not self.bad
1923
1924
1925 class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
1926   """Verifies the status of a node group.
1927
1928   """
1929   HPATH = "cluster-verify"
1930   HTYPE = constants.HTYPE_CLUSTER
1931   REQ_BGL = False
1932
1933   _HOOKS_INDENT_RE = re.compile("^", re.M)
1934
1935   class NodeImage(object):
1936     """A class representing the logical and physical status of a node.
1937
1938     @type name: string
1939     @ivar name: the node name to which this object refers
1940     @ivar volumes: a structure as returned from
1941         L{ganeti.backend.GetVolumeList} (runtime)
1942     @ivar instances: a list of running instances (runtime)
1943     @ivar pinst: list of configured primary instances (config)
1944     @ivar sinst: list of configured secondary instances (config)
1945     @ivar sbp: dictionary of {primary-node: list of instances} for all
1946         instances for which this node is secondary (config)
1947     @ivar mfree: free memory, as reported by hypervisor (runtime)
1948     @ivar dfree: free disk, as reported by the node (runtime)
1949     @ivar offline: the offline status (config)
1950     @type rpc_fail: boolean
1951     @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1952         not whether the individual keys were correct) (runtime)
1953     @type lvm_fail: boolean
1954     @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1955     @type hyp_fail: boolean
1956     @ivar hyp_fail: whether the RPC call didn't return the instance list
1957     @type ghost: boolean
1958     @ivar ghost: whether this is a known node or not (config)
1959     @type os_fail: boolean
1960     @ivar os_fail: whether the RPC call didn't return valid OS data
1961     @type oslist: list
1962     @ivar oslist: list of OSes as diagnosed by DiagnoseOS
1963     @type vm_capable: boolean
1964     @ivar vm_capable: whether the node can host instances
1965
1966     """
1967     def __init__(self, offline=False, name=None, vm_capable=True):
1968       self.name = name
1969       self.volumes = {}
1970       self.instances = []
1971       self.pinst = []
1972       self.sinst = []
1973       self.sbp = {}
1974       self.mfree = 0
1975       self.dfree = 0
1976       self.offline = offline
1977       self.vm_capable = vm_capable
1978       self.rpc_fail = False
1979       self.lvm_fail = False
1980       self.hyp_fail = False
1981       self.ghost = False
1982       self.os_fail = False
1983       self.oslist = {}
1984
1985   def ExpandNames(self):
1986     # This raises errors.OpPrereqError on its own:
1987     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
1988
1989     # Get instances in node group; this is unsafe and needs verification later
1990     inst_names = self.cfg.GetNodeGroupInstances(self.group_uuid)
1991
1992     self.needed_locks = {
1993       locking.LEVEL_INSTANCE: inst_names,
1994       locking.LEVEL_NODEGROUP: [self.group_uuid],
1995       locking.LEVEL_NODE: [],
1996       }
1997
1998     self.share_locks = _ShareAll()
1999
2000   def DeclareLocks(self, level):
2001     if level == locking.LEVEL_NODE:
2002       # Get members of node group; this is unsafe and needs verification later
2003       nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
2004
2005       all_inst_info = self.cfg.GetAllInstancesInfo()
2006
2007       # In Exec(), we warn about mirrored instances that have primary and
2008       # secondary living in separate node groups. To fully verify that
2009       # volumes for these instances are healthy, we will need to do an
2010       # extra call to their secondaries. We ensure here those nodes will
2011       # be locked.
2012       for inst in self.owned_locks(locking.LEVEL_INSTANCE):
2013         # Important: access only the instances whose lock is owned
2014         if all_inst_info[inst].disk_template in constants.DTS_INT_MIRROR:
2015           nodes.update(all_inst_info[inst].secondary_nodes)
2016
2017       self.needed_locks[locking.LEVEL_NODE] = nodes
2018
2019   def CheckPrereq(self):
2020     assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
2021     self.group_info = self.cfg.GetNodeGroup(self.group_uuid)
2022
2023     group_nodes = set(self.group_info.members)
2024     group_instances = self.cfg.GetNodeGroupInstances(self.group_uuid)
2025
2026     unlocked_nodes = \
2027         group_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
2028
2029     unlocked_instances = \
2030         group_instances.difference(self.owned_locks(locking.LEVEL_INSTANCE))
2031
2032     if unlocked_nodes:
2033       raise errors.OpPrereqError("Missing lock for nodes: %s" %
2034                                  utils.CommaJoin(unlocked_nodes))
2035
2036     if unlocked_instances:
2037       raise errors.OpPrereqError("Missing lock for instances: %s" %
2038                                  utils.CommaJoin(unlocked_instances))
2039
2040     self.all_node_info = self.cfg.GetAllNodesInfo()
2041     self.all_inst_info = self.cfg.GetAllInstancesInfo()
2042
2043     self.my_node_names = utils.NiceSort(group_nodes)
2044     self.my_inst_names = utils.NiceSort(group_instances)
2045
2046     self.my_node_info = dict((name, self.all_node_info[name])
2047                              for name in self.my_node_names)
2048
2049     self.my_inst_info = dict((name, self.all_inst_info[name])
2050                              for name in self.my_inst_names)
2051
2052     # We detect here the nodes that will need the extra RPC calls for verifying
2053     # split LV volumes; they should be locked.
2054     extra_lv_nodes = set()
2055
2056     for inst in self.my_inst_info.values():
2057       if inst.disk_template in constants.DTS_INT_MIRROR:
2058         group = self.my_node_info[inst.primary_node].group
2059         for nname in inst.secondary_nodes:
2060           if self.all_node_info[nname].group != group:
2061             extra_lv_nodes.add(nname)
2062
2063     unlocked_lv_nodes = \
2064         extra_lv_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
2065
2066     if unlocked_lv_nodes:
2067       raise errors.OpPrereqError("these nodes could be locked: %s" %
2068                                  utils.CommaJoin(unlocked_lv_nodes))
2069     self.extra_lv_nodes = list(extra_lv_nodes)
2070
2071   def _VerifyNode(self, ninfo, nresult):
2072     """Perform some basic validation on data returned from a node.
2073
2074       - check the result data structure is well formed and has all the
2075         mandatory fields
2076       - check ganeti version
2077
2078     @type ninfo: L{objects.Node}
2079     @param ninfo: the node to check
2080     @param nresult: the results from the node
2081     @rtype: boolean
2082     @return: whether overall this call was successful (and we can expect
2083          reasonable values in the respose)
2084
2085     """
2086     node = ninfo.name
2087     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2088
2089     # main result, nresult should be a non-empty dict
2090     test = not nresult or not isinstance(nresult, dict)
2091     _ErrorIf(test, constants.CV_ENODERPC, node,
2092                   "unable to verify node: no data returned")
2093     if test:
2094       return False
2095
2096     # compares ganeti version
2097     local_version = constants.PROTOCOL_VERSION
2098     remote_version = nresult.get("version", None)
2099     test = not (remote_version and
2100                 isinstance(remote_version, (list, tuple)) and
2101                 len(remote_version) == 2)
2102     _ErrorIf(test, constants.CV_ENODERPC, node,
2103              "connection to node returned invalid data")
2104     if test:
2105       return False
2106
2107     test = local_version != remote_version[0]
2108     _ErrorIf(test, constants.CV_ENODEVERSION, node,
2109              "incompatible protocol versions: master %s,"
2110              " node %s", local_version, remote_version[0])
2111     if test:
2112       return False
2113
2114     # node seems compatible, we can actually try to look into its results
2115
2116     # full package version
2117     self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
2118                   constants.CV_ENODEVERSION, node,
2119                   "software version mismatch: master %s, node %s",
2120                   constants.RELEASE_VERSION, remote_version[1],
2121                   code=self.ETYPE_WARNING)
2122
2123     hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
2124     if ninfo.vm_capable and isinstance(hyp_result, dict):
2125       for hv_name, hv_result in hyp_result.iteritems():
2126         test = hv_result is not None
2127         _ErrorIf(test, constants.CV_ENODEHV, node,
2128                  "hypervisor %s verify failure: '%s'", hv_name, hv_result)
2129
2130     hvp_result = nresult.get(constants.NV_HVPARAMS, None)
2131     if ninfo.vm_capable and isinstance(hvp_result, list):
2132       for item, hv_name, hv_result in hvp_result:
2133         _ErrorIf(True, constants.CV_ENODEHV, node,
2134                  "hypervisor %s parameter verify failure (source %s): %s",
2135                  hv_name, item, hv_result)
2136
2137     test = nresult.get(constants.NV_NODESETUP,
2138                        ["Missing NODESETUP results"])
2139     _ErrorIf(test, constants.CV_ENODESETUP, node, "node setup error: %s",
2140              "; ".join(test))
2141
2142     return True
2143
2144   def _VerifyNodeTime(self, ninfo, nresult,
2145                       nvinfo_starttime, nvinfo_endtime):
2146     """Check the node time.
2147
2148     @type ninfo: L{objects.Node}
2149     @param ninfo: the node to check
2150     @param nresult: the remote results for the node
2151     @param nvinfo_starttime: the start time of the RPC call
2152     @param nvinfo_endtime: the end time of the RPC call
2153
2154     """
2155     node = ninfo.name
2156     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2157
2158     ntime = nresult.get(constants.NV_TIME, None)
2159     try:
2160       ntime_merged = utils.MergeTime(ntime)
2161     except (ValueError, TypeError):
2162       _ErrorIf(True, constants.CV_ENODETIME, node, "Node returned invalid time")
2163       return
2164
2165     if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
2166       ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
2167     elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
2168       ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
2169     else:
2170       ntime_diff = None
2171
2172     _ErrorIf(ntime_diff is not None, constants.CV_ENODETIME, node,
2173              "Node time diverges by at least %s from master node time",
2174              ntime_diff)
2175
2176   def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
2177     """Check the node LVM results.
2178
2179     @type ninfo: L{objects.Node}
2180     @param ninfo: the node to check
2181     @param nresult: the remote results for the node
2182     @param vg_name: the configured VG name
2183
2184     """
2185     if vg_name is None:
2186       return
2187
2188     node = ninfo.name
2189     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2190
2191     # checks vg existence and size > 20G
2192     vglist = nresult.get(constants.NV_VGLIST, None)
2193     test = not vglist
2194     _ErrorIf(test, constants.CV_ENODELVM, node, "unable to check volume groups")
2195     if not test:
2196       vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
2197                                             constants.MIN_VG_SIZE)
2198       _ErrorIf(vgstatus, constants.CV_ENODELVM, node, vgstatus)
2199
2200     # check pv names
2201     pvlist = nresult.get(constants.NV_PVLIST, None)
2202     test = pvlist is None
2203     _ErrorIf(test, constants.CV_ENODELVM, node, "Can't get PV list from node")
2204     if not test:
2205       # check that ':' is not present in PV names, since it's a
2206       # special character for lvcreate (denotes the range of PEs to
2207       # use on the PV)
2208       for _, pvname, owner_vg in pvlist:
2209         test = ":" in pvname
2210         _ErrorIf(test, constants.CV_ENODELVM, node,
2211                  "Invalid character ':' in PV '%s' of VG '%s'",
2212                  pvname, owner_vg)
2213
2214   def _VerifyNodeBridges(self, ninfo, nresult, bridges):
2215     """Check the node bridges.
2216
2217     @type ninfo: L{objects.Node}
2218     @param ninfo: the node to check
2219     @param nresult: the remote results for the node
2220     @param bridges: the expected list of bridges
2221
2222     """
2223     if not bridges:
2224       return
2225
2226     node = ninfo.name
2227     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2228
2229     missing = nresult.get(constants.NV_BRIDGES, None)
2230     test = not isinstance(missing, list)
2231     _ErrorIf(test, constants.CV_ENODENET, node,
2232              "did not return valid bridge information")
2233     if not test:
2234       _ErrorIf(bool(missing), constants.CV_ENODENET, node,
2235                "missing bridges: %s" % utils.CommaJoin(sorted(missing)))
2236
2237   def _VerifyNodeUserScripts(self, ninfo, nresult):
2238     """Check the results of user scripts presence and executability on the node
2239
2240     @type ninfo: L{objects.Node}
2241     @param ninfo: the node to check
2242     @param nresult: the remote results for the node
2243
2244     """
2245     node = ninfo.name
2246
2247     test = not constants.NV_USERSCRIPTS in nresult
2248     self._ErrorIf(test, constants.CV_ENODEUSERSCRIPTS, node,
2249                   "did not return user scripts information")
2250
2251     broken_scripts = nresult.get(constants.NV_USERSCRIPTS, None)
2252     if not test:
2253       self._ErrorIf(broken_scripts, constants.CV_ENODEUSERSCRIPTS, node,
2254                     "user scripts not present or not executable: %s" %
2255                     utils.CommaJoin(sorted(broken_scripts)))
2256
2257   def _VerifyNodeNetwork(self, ninfo, nresult):
2258     """Check the node network connectivity results.
2259
2260     @type ninfo: L{objects.Node}
2261     @param ninfo: the node to check
2262     @param nresult: the remote results for the node
2263
2264     """
2265     node = ninfo.name
2266     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2267
2268     test = constants.NV_NODELIST not in nresult
2269     _ErrorIf(test, constants.CV_ENODESSH, node,
2270              "node hasn't returned node ssh connectivity data")
2271     if not test:
2272       if nresult[constants.NV_NODELIST]:
2273         for a_node, a_msg in nresult[constants.NV_NODELIST].items():
2274           _ErrorIf(True, constants.CV_ENODESSH, node,
2275                    "ssh communication with node '%s': %s", a_node, a_msg)
2276
2277     test = constants.NV_NODENETTEST not in nresult
2278     _ErrorIf(test, constants.CV_ENODENET, node,
2279              "node hasn't returned node tcp connectivity data")
2280     if not test:
2281       if nresult[constants.NV_NODENETTEST]:
2282         nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
2283         for anode in nlist:
2284           _ErrorIf(True, constants.CV_ENODENET, node,
2285                    "tcp communication with node '%s': %s",
2286                    anode, nresult[constants.NV_NODENETTEST][anode])
2287
2288     test = constants.NV_MASTERIP not in nresult
2289     _ErrorIf(test, constants.CV_ENODENET, node,
2290              "node hasn't returned node master IP reachability data")
2291     if not test:
2292       if not nresult[constants.NV_MASTERIP]:
2293         if node == self.master_node:
2294           msg = "the master node cannot reach the master IP (not configured?)"
2295         else:
2296           msg = "cannot reach the master IP"
2297         _ErrorIf(True, constants.CV_ENODENET, node, msg)
2298
2299   def _VerifyInstance(self, instance, instanceconfig, node_image,
2300                       diskstatus):
2301     """Verify an instance.
2302
2303     This function checks to see if the required block devices are
2304     available on the instance's node.
2305
2306     """
2307     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2308     node_current = instanceconfig.primary_node
2309
2310     node_vol_should = {}
2311     instanceconfig.MapLVsByNode(node_vol_should)
2312
2313     ipolicy = _CalculateGroupIPolicy(self.cfg.GetClusterInfo(), self.group_info)
2314     err = _ComputeIPolicyInstanceViolation(ipolicy, instanceconfig)
2315     _ErrorIf(err, constants.CV_EINSTANCEPOLICY, instance, err)
2316
2317     for node in node_vol_should:
2318       n_img = node_image[node]
2319       if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2320         # ignore missing volumes on offline or broken nodes
2321         continue
2322       for volume in node_vol_should[node]:
2323         test = volume not in n_img.volumes
2324         _ErrorIf(test, constants.CV_EINSTANCEMISSINGDISK, instance,
2325                  "volume %s missing on node %s", volume, node)
2326
2327     if instanceconfig.admin_state == constants.ADMINST_UP:
2328       pri_img = node_image[node_current]
2329       test = instance not in pri_img.instances and not pri_img.offline
2330       _ErrorIf(test, constants.CV_EINSTANCEDOWN, instance,
2331                "instance not running on its primary node %s",
2332                node_current)
2333
2334     diskdata = [(nname, success, status, idx)
2335                 for (nname, disks) in diskstatus.items()
2336                 for idx, (success, status) in enumerate(disks)]
2337
2338     for nname, success, bdev_status, idx in diskdata:
2339       # the 'ghost node' construction in Exec() ensures that we have a
2340       # node here
2341       snode = node_image[nname]
2342       bad_snode = snode.ghost or snode.offline
2343       _ErrorIf(instanceconfig.admin_state == constants.ADMINST_UP and
2344                not success and not bad_snode,
2345                constants.CV_EINSTANCEFAULTYDISK, instance,
2346                "couldn't retrieve status for disk/%s on %s: %s",
2347                idx, nname, bdev_status)
2348       _ErrorIf((instanceconfig.admin_state == constants.ADMINST_UP and
2349                 success and bdev_status.ldisk_status == constants.LDS_FAULTY),
2350                constants.CV_EINSTANCEFAULTYDISK, instance,
2351                "disk/%s on %s is faulty", idx, nname)
2352
2353   def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
2354     """Verify if there are any unknown volumes in the cluster.
2355
2356     The .os, .swap and backup volumes are ignored. All other volumes are
2357     reported as unknown.
2358
2359     @type reserved: L{ganeti.utils.FieldSet}
2360     @param reserved: a FieldSet of reserved volume names
2361
2362     """
2363     for node, n_img in node_image.items():
2364       if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2365         # skip non-healthy nodes
2366         continue
2367       for volume in n_img.volumes:
2368         test = ((node not in node_vol_should or
2369                 volume not in node_vol_should[node]) and
2370                 not reserved.Matches(volume))
2371         self._ErrorIf(test, constants.CV_ENODEORPHANLV, node,
2372                       "volume %s is unknown", volume)
2373
2374   def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
2375     """Verify N+1 Memory Resilience.
2376
2377     Check that if one single node dies we can still start all the
2378     instances it was primary for.
2379
2380     """
2381     cluster_info = self.cfg.GetClusterInfo()
2382     for node, n_img in node_image.items():
2383       # This code checks that every node which is now listed as
2384       # secondary has enough memory to host all instances it is
2385       # supposed to should a single other node in the cluster fail.
2386       # FIXME: not ready for failover to an arbitrary node
2387       # FIXME: does not support file-backed instances
2388       # WARNING: we currently take into account down instances as well
2389       # as up ones, considering that even if they're down someone
2390       # might want to start them even in the event of a node failure.
2391       if n_img.offline:
2392         # we're skipping offline nodes from the N+1 warning, since
2393         # most likely we don't have good memory infromation from them;
2394         # we already list instances living on such nodes, and that's
2395         # enough warning
2396         continue
2397       #TODO(dynmem): use MINMEM for checking
2398       #TODO(dynmem): also consider ballooning out other instances
2399       for prinode, instances in n_img.sbp.items():
2400         needed_mem = 0
2401         for instance in instances:
2402           bep = cluster_info.FillBE(instance_cfg[instance])
2403           if bep[constants.BE_AUTO_BALANCE]:
2404             needed_mem += bep[constants.BE_MAXMEM]
2405         test = n_img.mfree < needed_mem
2406         self._ErrorIf(test, constants.CV_ENODEN1, node,
2407                       "not enough memory to accomodate instance failovers"
2408                       " should node %s fail (%dMiB needed, %dMiB available)",
2409                       prinode, needed_mem, n_img.mfree)
2410
2411   @classmethod
2412   def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
2413                    (files_all, files_opt, files_mc, files_vm)):
2414     """Verifies file checksums collected from all nodes.
2415
2416     @param errorif: Callback for reporting errors
2417     @param nodeinfo: List of L{objects.Node} objects
2418     @param master_node: Name of master node
2419     @param all_nvinfo: RPC results
2420
2421     """
2422     # Define functions determining which nodes to consider for a file
2423     files2nodefn = [
2424       (files_all, None),
2425       (files_mc, lambda node: (node.master_candidate or
2426                                node.name == master_node)),
2427       (files_vm, lambda node: node.vm_capable),
2428       ]
2429
2430     # Build mapping from filename to list of nodes which should have the file
2431     nodefiles = {}
2432     for (files, fn) in files2nodefn:
2433       if fn is None:
2434         filenodes = nodeinfo
2435       else:
2436         filenodes = filter(fn, nodeinfo)
2437       nodefiles.update((filename,
2438                         frozenset(map(operator.attrgetter("name"), filenodes)))
2439                        for filename in files)
2440
2441     assert set(nodefiles) == (files_all | files_mc | files_vm)
2442
2443     fileinfo = dict((filename, {}) for filename in nodefiles)
2444     ignore_nodes = set()
2445
2446     for node in nodeinfo:
2447       if node.offline:
2448         ignore_nodes.add(node.name)
2449         continue
2450
2451       nresult = all_nvinfo[node.name]
2452
2453       if nresult.fail_msg or not nresult.payload:
2454         node_files = None
2455       else:
2456         node_files = nresult.payload.get(constants.NV_FILELIST, None)
2457
2458       test = not (node_files and isinstance(node_files, dict))
2459       errorif(test, constants.CV_ENODEFILECHECK, node.name,
2460               "Node did not return file checksum data")
2461       if test:
2462         ignore_nodes.add(node.name)
2463         continue
2464
2465       # Build per-checksum mapping from filename to nodes having it
2466       for (filename, checksum) in node_files.items():
2467         assert filename in nodefiles
2468         fileinfo[filename].setdefault(checksum, set()).add(node.name)
2469
2470     for (filename, checksums) in fileinfo.items():
2471       assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
2472
2473       # Nodes having the file
2474       with_file = frozenset(node_name
2475                             for nodes in fileinfo[filename].values()
2476                             for node_name in nodes) - ignore_nodes
2477
2478       expected_nodes = nodefiles[filename] - ignore_nodes
2479
2480       # Nodes missing file
2481       missing_file = expected_nodes - with_file
2482
2483       if filename in files_opt:
2484         # All or no nodes
2485         errorif(missing_file and missing_file != expected_nodes,
2486                 constants.CV_ECLUSTERFILECHECK, None,
2487                 "File %s is optional, but it must exist on all or no"
2488                 " nodes (not found on %s)",
2489                 filename, utils.CommaJoin(utils.NiceSort(missing_file)))
2490       else:
2491         errorif(missing_file, constants.CV_ECLUSTERFILECHECK, None,
2492                 "File %s is missing from node(s) %s", filename,
2493                 utils.CommaJoin(utils.NiceSort(missing_file)))
2494
2495         # Warn if a node has a file it shouldn't
2496         unexpected = with_file - expected_nodes
2497         errorif(unexpected,
2498                 constants.CV_ECLUSTERFILECHECK, None,
2499                 "File %s should not exist on node(s) %s",
2500                 filename, utils.CommaJoin(utils.NiceSort(unexpected)))
2501
2502       # See if there are multiple versions of the file
2503       test = len(checksums) > 1
2504       if test:
2505         variants = ["variant %s on %s" %
2506                     (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
2507                     for (idx, (checksum, nodes)) in
2508                       enumerate(sorted(checksums.items()))]
2509       else:
2510         variants = []
2511
2512       errorif(test, constants.CV_ECLUSTERFILECHECK, None,
2513               "File %s found with %s different checksums (%s)",
2514               filename, len(checksums), "; ".join(variants))
2515
2516   def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
2517                       drbd_map):
2518     """Verifies and the node DRBD status.
2519
2520     @type ninfo: L{objects.Node}
2521     @param ninfo: the node to check
2522     @param nresult: the remote results for the node
2523     @param instanceinfo: the dict of instances
2524     @param drbd_helper: the configured DRBD usermode helper
2525     @param drbd_map: the DRBD map as returned by
2526         L{ganeti.config.ConfigWriter.ComputeDRBDMap}
2527
2528     """
2529     node = ninfo.name
2530     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2531
2532     if drbd_helper:
2533       helper_result = nresult.get(constants.NV_DRBDHELPER, None)
2534       test = (helper_result == None)
2535       _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2536                "no drbd usermode helper returned")
2537       if helper_result:
2538         status, payload = helper_result
2539         test = not status
2540         _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2541                  "drbd usermode helper check unsuccessful: %s", payload)
2542         test = status and (payload != drbd_helper)
2543         _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2544                  "wrong drbd usermode helper: %s", payload)
2545
2546     # compute the DRBD minors
2547     node_drbd = {}
2548     for minor, instance in drbd_map[node].items():
2549       test = instance not in instanceinfo
2550       _ErrorIf(test, constants.CV_ECLUSTERCFG, None,
2551                "ghost instance '%s' in temporary DRBD map", instance)
2552         # ghost instance should not be running, but otherwise we
2553         # don't give double warnings (both ghost instance and
2554         # unallocated minor in use)
2555       if test:
2556         node_drbd[minor] = (instance, False)
2557       else:
2558         instance = instanceinfo[instance]
2559         node_drbd[minor] = (instance.name,
2560                             instance.admin_state == constants.ADMINST_UP)
2561
2562     # and now check them
2563     used_minors = nresult.get(constants.NV_DRBDLIST, [])
2564     test = not isinstance(used_minors, (tuple, list))
2565     _ErrorIf(test, constants.CV_ENODEDRBD, node,
2566              "cannot parse drbd status file: %s", str(used_minors))
2567     if test:
2568       # we cannot check drbd status
2569       return
2570
2571     for minor, (iname, must_exist) in node_drbd.items():
2572       test = minor not in used_minors and must_exist
2573       _ErrorIf(test, constants.CV_ENODEDRBD, node,
2574                "drbd minor %d of instance %s is not active", minor, iname)
2575     for minor in used_minors:
2576       test = minor not in node_drbd
2577       _ErrorIf(test, constants.CV_ENODEDRBD, node,
2578                "unallocated drbd minor %d is in use", minor)
2579
2580   def _UpdateNodeOS(self, ninfo, nresult, nimg):
2581     """Builds the node OS structures.
2582
2583     @type ninfo: L{objects.Node}
2584     @param ninfo: the node to check
2585     @param nresult: the remote results for the node
2586     @param nimg: the node image object
2587
2588     """
2589     node = ninfo.name
2590     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2591
2592     remote_os = nresult.get(constants.NV_OSLIST, None)
2593     test = (not isinstance(remote_os, list) or
2594             not compat.all(isinstance(v, list) and len(v) == 7
2595                            for v in remote_os))
2596
2597     _ErrorIf(test, constants.CV_ENODEOS, node,
2598              "node hasn't returned valid OS data")
2599
2600     nimg.os_fail = test
2601
2602     if test:
2603       return
2604
2605     os_dict = {}
2606
2607     for (name, os_path, status, diagnose,
2608          variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
2609
2610       if name not in os_dict:
2611         os_dict[name] = []
2612
2613       # parameters is a list of lists instead of list of tuples due to
2614       # JSON lacking a real tuple type, fix it:
2615       parameters = [tuple(v) for v in parameters]
2616       os_dict[name].append((os_path, status, diagnose,
2617                             set(variants), set(parameters), set(api_ver)))
2618
2619     nimg.oslist = os_dict
2620
2621   def _VerifyNodeOS(self, ninfo, nimg, base):
2622     """Verifies the node OS list.
2623
2624     @type ninfo: L{objects.Node}
2625     @param ninfo: the node to check
2626     @param nimg: the node image object
2627     @param base: the 'template' node we match against (e.g. from the master)
2628
2629     """
2630     node = ninfo.name
2631     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2632
2633     assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
2634
2635     beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
2636     for os_name, os_data in nimg.oslist.items():
2637       assert os_data, "Empty OS status for OS %s?!" % os_name
2638       f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
2639       _ErrorIf(not f_status, constants.CV_ENODEOS, node,
2640                "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
2641       _ErrorIf(len(os_data) > 1, constants.CV_ENODEOS, node,
2642                "OS '%s' has multiple entries (first one shadows the rest): %s",
2643                os_name, utils.CommaJoin([v[0] for v in os_data]))
2644       # comparisons with the 'base' image
2645       test = os_name not in base.oslist
2646       _ErrorIf(test, constants.CV_ENODEOS, node,
2647                "Extra OS %s not present on reference node (%s)",
2648                os_name, base.name)
2649       if test:
2650         continue
2651       assert base.oslist[os_name], "Base node has empty OS status?"
2652       _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
2653       if not b_status:
2654         # base OS is invalid, skipping
2655         continue
2656       for kind, a, b in [("API version", f_api, b_api),
2657                          ("variants list", f_var, b_var),
2658                          ("parameters", beautify_params(f_param),
2659                           beautify_params(b_param))]:
2660         _ErrorIf(a != b, constants.CV_ENODEOS, node,
2661                  "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
2662                  kind, os_name, base.name,
2663                  utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
2664
2665     # check any missing OSes
2666     missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
2667     _ErrorIf(missing, constants.CV_ENODEOS, node,
2668              "OSes present on reference node %s but missing on this node: %s",
2669              base.name, utils.CommaJoin(missing))
2670
2671   def _VerifyOob(self, ninfo, nresult):
2672     """Verifies out of band functionality of a node.
2673
2674     @type ninfo: L{objects.Node}
2675     @param ninfo: the node to check
2676     @param nresult: the remote results for the node
2677
2678     """
2679     node = ninfo.name
2680     # We just have to verify the paths on master and/or master candidates
2681     # as the oob helper is invoked on the master
2682     if ((ninfo.master_candidate or ninfo.master_capable) and
2683         constants.NV_OOB_PATHS in nresult):
2684       for path_result in nresult[constants.NV_OOB_PATHS]:
2685         self._ErrorIf(path_result, constants.CV_ENODEOOBPATH, node, path_result)
2686
2687   def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
2688     """Verifies and updates the node volume data.
2689
2690     This function will update a L{NodeImage}'s internal structures
2691     with data from the remote call.
2692
2693     @type ninfo: L{objects.Node}
2694     @param ninfo: the node to check
2695     @param nresult: the remote results for the node
2696     @param nimg: the node image object
2697     @param vg_name: the configured VG name
2698
2699     """
2700     node = ninfo.name
2701     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2702
2703     nimg.lvm_fail = True
2704     lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
2705     if vg_name is None:
2706       pass
2707     elif isinstance(lvdata, basestring):
2708       _ErrorIf(True, constants.CV_ENODELVM, node, "LVM problem on node: %s",
2709                utils.SafeEncode(lvdata))
2710     elif not isinstance(lvdata, dict):
2711       _ErrorIf(True, constants.CV_ENODELVM, node,
2712                "rpc call to node failed (lvlist)")
2713     else:
2714       nimg.volumes = lvdata
2715       nimg.lvm_fail = False
2716
2717   def _UpdateNodeInstances(self, ninfo, nresult, nimg):
2718     """Verifies and updates the node instance list.
2719
2720     If the listing was successful, then updates this node's instance
2721     list. Otherwise, it marks the RPC call as failed for the instance
2722     list key.
2723
2724     @type ninfo: L{objects.Node}
2725     @param ninfo: the node to check
2726     @param nresult: the remote results for the node
2727     @param nimg: the node image object
2728
2729     """
2730     idata = nresult.get(constants.NV_INSTANCELIST, None)
2731     test = not isinstance(idata, list)
2732     self._ErrorIf(test, constants.CV_ENODEHV, ninfo.name,
2733                   "rpc call to node failed (instancelist): %s",
2734                   utils.SafeEncode(str(idata)))
2735     if test:
2736       nimg.hyp_fail = True
2737     else:
2738       nimg.instances = idata
2739
2740   def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
2741     """Verifies and computes a node information map
2742
2743     @type ninfo: L{objects.Node}
2744     @param ninfo: the node to check
2745     @param nresult: the remote results for the node
2746     @param nimg: the node image object
2747     @param vg_name: the configured VG name
2748
2749     """
2750     node = ninfo.name
2751     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2752
2753     # try to read free memory (from the hypervisor)
2754     hv_info = nresult.get(constants.NV_HVINFO, None)
2755     test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
2756     _ErrorIf(test, constants.CV_ENODEHV, node,
2757              "rpc call to node failed (hvinfo)")
2758     if not test:
2759       try:
2760         nimg.mfree = int(hv_info["memory_free"])
2761       except (ValueError, TypeError):
2762         _ErrorIf(True, constants.CV_ENODERPC, node,
2763                  "node returned invalid nodeinfo, check hypervisor")
2764
2765     # FIXME: devise a free space model for file based instances as well
2766     if vg_name is not None:
2767       test = (constants.NV_VGLIST not in nresult or
2768               vg_name not in nresult[constants.NV_VGLIST])
2769       _ErrorIf(test, constants.CV_ENODELVM, node,
2770                "node didn't return data for the volume group '%s'"
2771                " - it is either missing or broken", vg_name)
2772       if not test:
2773         try:
2774           nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
2775         except (ValueError, TypeError):
2776           _ErrorIf(True, constants.CV_ENODERPC, node,
2777                    "node returned invalid LVM info, check LVM status")
2778
2779   def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
2780     """Gets per-disk status information for all instances.
2781
2782     @type nodelist: list of strings
2783     @param nodelist: Node names
2784     @type node_image: dict of (name, L{objects.Node})
2785     @param node_image: Node objects
2786     @type instanceinfo: dict of (name, L{objects.Instance})
2787     @param instanceinfo: Instance objects
2788     @rtype: {instance: {node: [(succes, payload)]}}
2789     @return: a dictionary of per-instance dictionaries with nodes as
2790         keys and disk information as values; the disk information is a
2791         list of tuples (success, payload)
2792
2793     """
2794     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2795
2796     node_disks = {}
2797     node_disks_devonly = {}
2798     diskless_instances = set()
2799     diskless = constants.DT_DISKLESS
2800
2801     for nname in nodelist:
2802       node_instances = list(itertools.chain(node_image[nname].pinst,
2803                                             node_image[nname].sinst))
2804       diskless_instances.update(inst for inst in node_instances
2805                                 if instanceinfo[inst].disk_template == diskless)
2806       disks = [(inst, disk)
2807                for inst in node_instances
2808                for disk in instanceinfo[inst].disks]
2809
2810       if not disks:
2811         # No need to collect data
2812         continue
2813
2814       node_disks[nname] = disks
2815
2816       # Creating copies as SetDiskID below will modify the objects and that can
2817       # lead to incorrect data returned from nodes
2818       devonly = [dev.Copy() for (_, dev) in disks]
2819
2820       for dev in devonly:
2821         self.cfg.SetDiskID(dev, nname)
2822
2823       node_disks_devonly[nname] = devonly
2824
2825     assert len(node_disks) == len(node_disks_devonly)
2826
2827     # Collect data from all nodes with disks
2828     result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
2829                                                           node_disks_devonly)
2830
2831     assert len(result) == len(node_disks)
2832
2833     instdisk = {}
2834
2835     for (nname, nres) in result.items():
2836       disks = node_disks[nname]
2837
2838       if nres.offline:
2839         # No data from this node
2840         data = len(disks) * [(False, "node offline")]
2841       else:
2842         msg = nres.fail_msg
2843         _ErrorIf(msg, constants.CV_ENODERPC, nname,
2844                  "while getting disk information: %s", msg)
2845         if msg:
2846           # No data from this node
2847           data = len(disks) * [(False, msg)]
2848         else:
2849           data = []
2850           for idx, i in enumerate(nres.payload):
2851             if isinstance(i, (tuple, list)) and len(i) == 2:
2852               data.append(i)
2853             else:
2854               logging.warning("Invalid result from node %s, entry %d: %s",
2855                               nname, idx, i)
2856               data.append((False, "Invalid result from the remote node"))
2857
2858       for ((inst, _), status) in zip(disks, data):
2859         instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
2860
2861     # Add empty entries for diskless instances.
2862     for inst in diskless_instances:
2863       assert inst not in instdisk
2864       instdisk[inst] = {}
2865
2866     assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
2867                       len(nnames) <= len(instanceinfo[inst].all_nodes) and
2868                       compat.all(isinstance(s, (tuple, list)) and
2869                                  len(s) == 2 for s in statuses)
2870                       for inst, nnames in instdisk.items()
2871                       for nname, statuses in nnames.items())
2872     assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
2873
2874     return instdisk
2875
2876   @staticmethod
2877   def _SshNodeSelector(group_uuid, all_nodes):
2878     """Create endless iterators for all potential SSH check hosts.
2879
2880     """
2881     nodes = [node for node in all_nodes
2882              if (node.group != group_uuid and
2883                  not node.offline)]
2884     keyfunc = operator.attrgetter("group")
2885
2886     return map(itertools.cycle,
2887                [sorted(map(operator.attrgetter("name"), names))
2888                 for _, names in itertools.groupby(sorted(nodes, key=keyfunc),
2889                                                   keyfunc)])
2890
2891   @classmethod
2892   def _SelectSshCheckNodes(cls, group_nodes, group_uuid, all_nodes):
2893     """Choose which nodes should talk to which other nodes.
2894
2895     We will make nodes contact all nodes in their group, and one node from
2896     every other group.
2897
2898     @warning: This algorithm has a known issue if one node group is much
2899       smaller than others (e.g. just one node). In such a case all other
2900       nodes will talk to the single node.
2901
2902     """
2903     online_nodes = sorted(node.name for node in group_nodes if not node.offline)
2904     sel = cls._SshNodeSelector(group_uuid, all_nodes)
2905
2906     return (online_nodes,
2907             dict((name, sorted([i.next() for i in sel]))
2908                  for name in online_nodes))
2909
2910   def BuildHooksEnv(self):
2911     """Build hooks env.
2912
2913     Cluster-Verify hooks just ran in the post phase and their failure makes
2914     the output be logged in the verify output and the verification to fail.
2915
2916     """
2917     env = {
2918       "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
2919       }
2920
2921     env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
2922                for node in self.my_node_info.values())
2923
2924     return env
2925
2926   def BuildHooksNodes(self):
2927     """Build hooks nodes.
2928
2929     """
2930     return ([], self.my_node_names)
2931
2932   def Exec(self, feedback_fn):
2933     """Verify integrity of the node group, performing various test on nodes.
2934
2935     """
2936     # This method has too many local variables. pylint: disable=R0914
2937     feedback_fn("* Verifying group '%s'" % self.group_info.name)
2938
2939     if not self.my_node_names:
2940       # empty node group
2941       feedback_fn("* Empty node group, skipping verification")
2942       return True
2943
2944     self.bad = False
2945     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2946     verbose = self.op.verbose
2947     self._feedback_fn = feedback_fn
2948
2949     vg_name = self.cfg.GetVGName()
2950     drbd_helper = self.cfg.GetDRBDHelper()
2951     cluster = self.cfg.GetClusterInfo()
2952     groupinfo = self.cfg.GetAllNodeGroupsInfo()
2953     hypervisors = cluster.enabled_hypervisors
2954     node_data_list = [self.my_node_info[name] for name in self.my_node_names]
2955
2956     i_non_redundant = [] # Non redundant instances
2957     i_non_a_balanced = [] # Non auto-balanced instances
2958     i_offline = 0 # Count of offline instances
2959     n_offline = 0 # Count of offline nodes
2960     n_drained = 0 # Count of nodes being drained
2961     node_vol_should = {}
2962
2963     # FIXME: verify OS list
2964
2965     # File verification
2966     filemap = _ComputeAncillaryFiles(cluster, False)
2967
2968     # do local checksums
2969     master_node = self.master_node = self.cfg.GetMasterNode()
2970     master_ip = self.cfg.GetMasterIP()
2971
2972     feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names))
2973
2974     user_scripts = []
2975     if self.cfg.GetUseExternalMipScript():
2976       user_scripts.append(constants.EXTERNAL_MASTER_SETUP_SCRIPT)
2977
2978     node_verify_param = {
2979       constants.NV_FILELIST:
2980         utils.UniqueSequence(filename
2981                              for files in filemap
2982                              for filename in files),
2983       constants.NV_NODELIST:
2984         self._SelectSshCheckNodes(node_data_list, self.group_uuid,
2985                                   self.all_node_info.values()),
2986       constants.NV_HYPERVISOR: hypervisors,
2987       constants.NV_HVPARAMS:
2988         _GetAllHypervisorParameters(cluster, self.all_inst_info.values()),
2989       constants.NV_NODENETTEST: [(node.name, node.primary_ip, node.secondary_ip)
2990                                  for node in node_data_list
2991                                  if not node.offline],
2992       constants.NV_INSTANCELIST: hypervisors,
2993       constants.NV_VERSION: None,
2994       constants.NV_HVINFO: self.cfg.GetHypervisorType(),
2995       constants.NV_NODESETUP: None,
2996       constants.NV_TIME: None,
2997       constants.NV_MASTERIP: (master_node, master_ip),
2998       constants.NV_OSLIST: None,
2999       constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
3000       constants.NV_USERSCRIPTS: user_scripts,
3001       }
3002
3003     if vg_name is not None:
3004       node_verify_param[constants.NV_VGLIST] = None
3005       node_verify_param[constants.NV_LVLIST] = vg_name
3006       node_verify_param[constants.NV_PVLIST] = [vg_name]
3007       node_verify_param[constants.NV_DRBDLIST] = None
3008
3009     if drbd_helper:
3010       node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
3011
3012     # bridge checks
3013     # FIXME: this needs to be changed per node-group, not cluster-wide
3014     bridges = set()
3015     default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
3016     if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3017       bridges.add(default_nicpp[constants.NIC_LINK])
3018     for instance in self.my_inst_info.values():
3019       for nic in instance.nics:
3020         full_nic = cluster.SimpleFillNIC(nic.nicparams)
3021         if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3022           bridges.add(full_nic[constants.NIC_LINK])
3023
3024     if bridges:
3025       node_verify_param[constants.NV_BRIDGES] = list(bridges)
3026
3027     # Build our expected cluster state
3028     node_image = dict((node.name, self.NodeImage(offline=node.offline,
3029                                                  name=node.name,
3030                                                  vm_capable=node.vm_capable))
3031                       for node in node_data_list)
3032
3033     # Gather OOB paths
3034     oob_paths = []
3035     for node in self.all_node_info.values():
3036       path = _SupportsOob(self.cfg, node)
3037       if path and path not in oob_paths:
3038         oob_paths.append(path)
3039
3040     if oob_paths:
3041       node_verify_param[constants.NV_OOB_PATHS] = oob_paths
3042
3043     for instance in self.my_inst_names:
3044       inst_config = self.my_inst_info[instance]
3045
3046       for nname in inst_config.all_nodes:
3047         if nname not in node_image:
3048           gnode = self.NodeImage(name=nname)
3049           gnode.ghost = (nname not in self.all_node_info)
3050           node_image[nname] = gnode
3051
3052       inst_config.MapLVsByNode(node_vol_should)
3053
3054       pnode = inst_config.primary_node
3055       node_image[pnode].pinst.append(instance)
3056
3057       for snode in inst_config.secondary_nodes:
3058         nimg = node_image[snode]
3059         nimg.sinst.append(instance)
3060         if pnode not in nimg.sbp:
3061           nimg.sbp[pnode] = []
3062         nimg.sbp[pnode].append(instance)
3063
3064     # At this point, we have the in-memory data structures complete,
3065     # except for the runtime information, which we'll gather next
3066
3067     # Due to the way our RPC system works, exact response times cannot be
3068     # guaranteed (e.g. a broken node could run into a timeout). By keeping the
3069     # time before and after executing the request, we can at least have a time
3070     # window.
3071     nvinfo_starttime = time.time()
3072     all_nvinfo = self.rpc.call_node_verify(self.my_node_names,
3073                                            node_verify_param,
3074                                            self.cfg.GetClusterName())
3075     nvinfo_endtime = time.time()
3076
3077     if self.extra_lv_nodes and vg_name is not None:
3078       extra_lv_nvinfo = \
3079           self.rpc.call_node_verify(self.extra_lv_nodes,
3080                                     {constants.NV_LVLIST: vg_name},
3081                                     self.cfg.GetClusterName())
3082     else:
3083       extra_lv_nvinfo = {}
3084
3085     all_drbd_map = self.cfg.ComputeDRBDMap()
3086
3087     feedback_fn("* Gathering disk information (%s nodes)" %
3088                 len(self.my_node_names))
3089     instdisk = self._CollectDiskInfo(self.my_node_names, node_image,
3090                                      self.my_inst_info)
3091
3092     feedback_fn("* Verifying configuration file consistency")
3093
3094     # If not all nodes are being checked, we need to make sure the master node
3095     # and a non-checked vm_capable node are in the list.
3096     absent_nodes = set(self.all_node_info).difference(self.my_node_info)
3097     if absent_nodes:
3098       vf_nvinfo = all_nvinfo.copy()
3099       vf_node_info = list(self.my_node_info.values())
3100       additional_nodes = []
3101       if master_node not in self.my_node_info:
3102         additional_nodes.append(master_node)
3103         vf_node_info.append(self.all_node_info[master_node])
3104       # Add the first vm_capable node we find which is not included
3105       for node in absent_nodes:
3106         nodeinfo = self.all_node_info[node]
3107         if nodeinfo.vm_capable and not nodeinfo.offline:
3108           additional_nodes.append(node)
3109           vf_node_info.append(self.all_node_info[node])
3110           break
3111       key = constants.NV_FILELIST
3112       vf_nvinfo.update(self.rpc.call_node_verify(additional_nodes,
3113                                                  {key: node_verify_param[key]},
3114                                                  self.cfg.GetClusterName()))
3115     else:
3116       vf_nvinfo = all_nvinfo
3117       vf_node_info = self.my_node_info.values()
3118
3119     self._VerifyFiles(_ErrorIf, vf_node_info, master_node, vf_nvinfo, filemap)
3120
3121     feedback_fn("* Verifying node status")
3122
3123     refos_img = None
3124
3125     for node_i in node_data_list:
3126       node = node_i.name
3127       nimg = node_image[node]
3128
3129       if node_i.offline:
3130         if verbose:
3131           feedback_fn("* Skipping offline node %s" % (node,))
3132         n_offline += 1
3133         continue
3134
3135       if node == master_node:
3136         ntype = "master"
3137       elif node_i.master_candidate:
3138         ntype = "master candidate"
3139       elif node_i.drained:
3140         ntype = "drained"
3141         n_drained += 1
3142       else:
3143         ntype = "regular"
3144       if verbose:
3145         feedback_fn("* Verifying node %s (%s)" % (node, ntype))
3146
3147       msg = all_nvinfo[node].fail_msg
3148       _ErrorIf(msg, constants.CV_ENODERPC, node, "while contacting node: %s",
3149                msg)
3150       if msg:
3151         nimg.rpc_fail = True
3152         continue
3153
3154       nresult = all_nvinfo[node].payload
3155
3156       nimg.call_ok = self._VerifyNode(node_i, nresult)
3157       self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
3158       self._VerifyNodeNetwork(node_i, nresult)
3159       self._VerifyNodeUserScripts(node_i, nresult)
3160       self._VerifyOob(node_i, nresult)
3161
3162       if nimg.vm_capable:
3163         self._VerifyNodeLVM(node_i, nresult, vg_name)
3164         self._VerifyNodeDrbd(node_i, nresult, self.all_inst_info, drbd_helper,
3165                              all_drbd_map)
3166
3167         self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
3168         self._UpdateNodeInstances(node_i, nresult, nimg)
3169         self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
3170         self._UpdateNodeOS(node_i, nresult, nimg)
3171
3172         if not nimg.os_fail:
3173           if refos_img is None:
3174             refos_img = nimg
3175           self._VerifyNodeOS(node_i, nimg, refos_img)
3176         self._VerifyNodeBridges(node_i, nresult, bridges)
3177
3178         # Check whether all running instancies are primary for the node. (This
3179         # can no longer be done from _VerifyInstance below, since some of the
3180         # wrong instances could be from other node groups.)
3181         non_primary_inst = set(nimg.instances).difference(nimg.pinst)
3182
3183         for inst in non_primary_inst:
3184           # FIXME: investigate best way to handle offline insts
3185           if inst.admin_state == constants.ADMINST_OFFLINE:
3186             if verbose:
3187               feedback_fn("* Skipping offline instance %s" % inst.name)
3188             i_offline += 1
3189             continue
3190           test = inst in self.all_inst_info
3191           _ErrorIf(test, constants.CV_EINSTANCEWRONGNODE, inst,
3192                    "instance should not run on node %s", node_i.name)
3193           _ErrorIf(not test, constants.CV_ENODEORPHANINSTANCE, node_i.name,
3194                    "node is running unknown instance %s", inst)
3195
3196     for node, result in extra_lv_nvinfo.items():
3197       self._UpdateNodeVolumes(self.all_node_info[node], result.payload,
3198                               node_image[node], vg_name)
3199
3200     feedback_fn("* Verifying instance status")
3201     for instance in self.my_inst_names:
3202       if verbose:
3203         feedback_fn("* Verifying instance %s" % instance)
3204       inst_config = self.my_inst_info[instance]
3205       self._VerifyInstance(instance, inst_config, node_image,
3206                            instdisk[instance])
3207       inst_nodes_offline = []
3208
3209       pnode = inst_config.primary_node
3210       pnode_img = node_image[pnode]
3211       _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
3212                constants.CV_ENODERPC, pnode, "instance %s, connection to"
3213                " primary node failed", instance)
3214
3215       _ErrorIf(inst_config.admin_state == constants.ADMINST_UP and
3216                pnode_img.offline,
3217                constants.CV_EINSTANCEBADNODE, instance,
3218                "instance is marked as running and lives on offline node %s",
3219                inst_config.primary_node)
3220
3221       # If the instance is non-redundant we cannot survive losing its primary
3222       # node, so we are not N+1 compliant. On the other hand we have no disk
3223       # templates with more than one secondary so that situation is not well
3224       # supported either.
3225       # FIXME: does not support file-backed instances
3226       if not inst_config.secondary_nodes:
3227         i_non_redundant.append(instance)
3228
3229       _ErrorIf(len(inst_config.secondary_nodes) > 1,
3230                constants.CV_EINSTANCELAYOUT,
3231                instance, "instance has multiple secondary nodes: %s",
3232                utils.CommaJoin(inst_config.secondary_nodes),
3233                code=self.ETYPE_WARNING)
3234
3235       if inst_config.disk_template in constants.DTS_INT_MIRROR:
3236         pnode = inst_config.primary_node
3237         instance_nodes = utils.NiceSort(inst_config.all_nodes)
3238         instance_groups = {}
3239
3240         for node in instance_nodes:
3241           instance_groups.setdefault(self.all_node_info[node].group,
3242                                      []).append(node)
3243
3244         pretty_list = [
3245           "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
3246           # Sort so that we always list the primary node first.
3247           for group, nodes in sorted(instance_groups.items(),
3248                                      key=lambda (_, nodes): pnode in nodes,
3249                                      reverse=True)]
3250
3251         self._ErrorIf(len(instance_groups) > 1,
3252                       constants.CV_EINSTANCESPLITGROUPS,
3253                       instance, "instance has primary and secondary nodes in"
3254                       " different groups: %s", utils.CommaJoin(pretty_list),
3255                       code=self.ETYPE_WARNING)
3256
3257       if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
3258         i_non_a_balanced.append(instance)
3259
3260       for snode in inst_config.secondary_nodes:
3261         s_img = node_image[snode]
3262         _ErrorIf(s_img.rpc_fail and not s_img.offline, constants.CV_ENODERPC,
3263                  snode, "instance %s, connection to secondary node failed",
3264                  instance)
3265
3266         if s_img.offline:
3267           inst_nodes_offline.append(snode)
3268
3269       # warn that the instance lives on offline nodes
3270       _ErrorIf(inst_nodes_offline, constants.CV_EINSTANCEBADNODE, instance,
3271                "instance has offline secondary node(s) %s",
3272                utils.CommaJoin(inst_nodes_offline))
3273       # ... or ghost/non-vm_capable nodes
3274       for node in inst_config.all_nodes:
3275         _ErrorIf(node_image[node].ghost, constants.CV_EINSTANCEBADNODE,
3276                  instance, "instance lives on ghost node %s", node)
3277         _ErrorIf(not node_image[node].vm_capable, constants.CV_EINSTANCEBADNODE,
3278                  instance, "instance lives on non-vm_capable node %s", node)
3279
3280     feedback_fn("* Verifying orphan volumes")
3281     reserved = utils.FieldSet(*cluster.reserved_lvs)
3282
3283     # We will get spurious "unknown volume" warnings if any node of this group
3284     # is secondary for an instance whose primary is in another group. To avoid
3285     # them, we find these instances and add their volumes to node_vol_should.
3286     for inst in self.all_inst_info.values():
3287       for secondary in inst.secondary_nodes:
3288         if (secondary in self.my_node_info
3289             and inst.name not in self.my_inst_info):
3290           inst.MapLVsByNode(node_vol_should)
3291           break
3292
3293     self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
3294
3295     if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
3296       feedback_fn("* Verifying N+1 Memory redundancy")
3297       self._VerifyNPlusOneMemory(node_image, self.my_inst_info)
3298
3299     feedback_fn("* Other Notes")
3300     if i_non_redundant:
3301       feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
3302                   % len(i_non_redundant))
3303
3304     if i_non_a_balanced:
3305       feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
3306                   % len(i_non_a_balanced))
3307
3308     if i_offline:
3309       feedback_fn("  - NOTICE: %d offline instance(s) found." % i_offline)
3310
3311     if n_offline:
3312       feedback_fn("  - NOTICE: %d offline node(s) found." % n_offline)
3313
3314     if n_drained:
3315       feedback_fn("  - NOTICE: %d drained node(s) found." % n_drained)
3316
3317     return not self.bad
3318
3319   def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
3320     """Analyze the post-hooks' result
3321
3322     This method analyses the hook result, handles it, and sends some
3323     nicely-formatted feedback back to the user.
3324
3325     @param phase: one of L{constants.HOOKS_PHASE_POST} or
3326         L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
3327     @param hooks_results: the results of the multi-node hooks rpc call
3328     @param feedback_fn: function used send feedback back to the caller
3329     @param lu_result: previous Exec result
3330     @return: the new Exec result, based on the previous result
3331         and hook results
3332
3333     """
3334     # We only really run POST phase hooks, only for non-empty groups,
3335     # and are only interested in their results
3336     if not self.my_node_names:
3337       # empty node group
3338       pass
3339     elif phase == constants.HOOKS_PHASE_POST:
3340       # Used to change hooks' output to proper indentation
3341       feedback_fn("* Hooks Results")
3342       assert hooks_results, "invalid result from hooks"
3343
3344       for node_name in hooks_results:
3345         res = hooks_results[node_name]
3346         msg = res.fail_msg
3347         test = msg and not res.offline
3348         self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3349                       "Communication failure in hooks execution: %s", msg)
3350         if res.offline or msg:
3351           # No need to investigate payload if node is offline or gave
3352           # an error.
3353           continue
3354         for script, hkr, output in res.payload:
3355           test = hkr == constants.HKR_FAIL
3356           self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3357                         "Script %s failed, output:", script)
3358           if test:
3359             output = self._HOOKS_INDENT_RE.sub("      ", output)
3360             feedback_fn("%s" % output)
3361             lu_result = False
3362
3363     return lu_result
3364
3365
3366 class LUClusterVerifyDisks(NoHooksLU):
3367   """Verifies the cluster disks status.
3368
3369   """
3370   REQ_BGL = False
3371
3372   def ExpandNames(self):
3373     self.share_locks = _ShareAll()
3374     self.needed_locks = {
3375       locking.LEVEL_NODEGROUP: locking.ALL_SET,
3376       }
3377
3378   def Exec(self, feedback_fn):
3379     group_names = self.owned_locks(locking.LEVEL_NODEGROUP)
3380
3381     # Submit one instance of L{opcodes.OpGroupVerifyDisks} per node group
3382     return ResultWithJobs([[opcodes.OpGroupVerifyDisks(group_name=group)]
3383                            for group in group_names])
3384
3385
3386 class LUGroupVerifyDisks(NoHooksLU):
3387   """Verifies the status of all disks in a node group.
3388
3389   """
3390   REQ_BGL = False
3391
3392   def ExpandNames(self):
3393     # Raises errors.OpPrereqError on its own if group can't be found
3394     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
3395
3396     self.share_locks = _ShareAll()
3397     self.needed_locks = {
3398       locking.LEVEL_INSTANCE: [],
3399       locking.LEVEL_NODEGROUP: [],
3400       locking.LEVEL_NODE: [],
3401       }
3402
3403   def DeclareLocks(self, level):
3404     if level == locking.LEVEL_INSTANCE:
3405       assert not self.needed_locks[locking.LEVEL_INSTANCE]
3406
3407       # Lock instances optimistically, needs verification once node and group
3408       # locks have been acquired
3409       self.needed_locks[locking.LEVEL_INSTANCE] = \
3410         self.cfg.GetNodeGroupInstances(self.group_uuid)
3411
3412     elif level == locking.LEVEL_NODEGROUP:
3413       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
3414
3415       self.needed_locks[locking.LEVEL_NODEGROUP] = \
3416         set([self.group_uuid] +
3417             # Lock all groups used by instances optimistically; this requires
3418             # going via the node before it's locked, requiring verification
3419             # later on
3420             [group_uuid
3421              for instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
3422              for group_uuid in self.cfg.GetInstanceNodeGroups(instance_name)])
3423
3424     elif level == locking.LEVEL_NODE:
3425       # This will only lock the nodes in the group to be verified which contain
3426       # actual instances
3427       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
3428       self._LockInstancesNodes()
3429
3430       # Lock all nodes in group to be verified
3431       assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
3432       member_nodes = self.cfg.GetNodeGroup(self.group_uuid).members
3433       self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
3434
3435   def CheckPrereq(self):
3436     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
3437     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
3438     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
3439
3440     assert self.group_uuid in owned_groups
3441
3442     # Check if locked instances are still correct
3443     _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
3444
3445     # Get instance information
3446     self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
3447
3448     # Check if node groups for locked instances are still correct
3449     for (instance_name, inst) in self.instances.items():
3450       assert owned_nodes.issuperset(inst.all_nodes), \
3451         "Instance %s's nodes changed while we kept the lock" % instance_name
3452
3453       inst_groups = _CheckInstanceNodeGroups(self.cfg, instance_name,
3454                                              owned_groups)
3455
3456       assert self.group_uuid in inst_groups, \
3457         "Instance %s has no node in group %s" % (instance_name, self.group_uuid)
3458
3459   def Exec(self, feedback_fn):
3460     """Verify integrity of cluster disks.
3461
3462     @rtype: tuple of three items
3463     @return: a tuple of (dict of node-to-node_error, list of instances
3464         which need activate-disks, dict of instance: (node, volume) for
3465         missing volumes
3466
3467     """
3468     res_nodes = {}
3469     res_instances = set()
3470     res_missing = {}
3471
3472     nv_dict = _MapInstanceDisksToNodes([inst
3473             for inst in self.instances.values()
3474             if inst.admin_state == constants.ADMINST_UP])
3475
3476     if nv_dict:
3477       nodes = utils.NiceSort(set(self.owned_locks(locking.LEVEL_NODE)) &
3478                              set(self.cfg.GetVmCapableNodeList()))
3479
3480       node_lvs = self.rpc.call_lv_list(nodes, [])
3481
3482       for (node, node_res) in node_lvs.items():
3483         if node_res.offline:
3484           continue
3485
3486         msg = node_res.fail_msg
3487         if msg:
3488           logging.warning("Error enumerating LVs on node %s: %s", node, msg)
3489           res_nodes[node] = msg
3490           continue
3491
3492         for lv_name, (_, _, lv_online) in node_res.payload.items():
3493           inst = nv_dict.pop((node, lv_name), None)
3494           if not (lv_online or inst is None):
3495             res_instances.add(inst)
3496
3497       # any leftover items in nv_dict are missing LVs, let's arrange the data
3498       # better
3499       for key, inst in nv_dict.iteritems():
3500         res_missing.setdefault(inst, []).append(list(key))
3501
3502     return (res_nodes, list(res_instances), res_missing)
3503
3504
3505 class LUClusterRepairDiskSizes(NoHooksLU):
3506   """Verifies the cluster disks sizes.
3507
3508   """
3509   REQ_BGL = False
3510
3511   def ExpandNames(self):
3512     if self.op.instances:
3513       self.wanted_names = _GetWantedInstances(self, self.op.instances)
3514       self.needed_locks = {
3515         locking.LEVEL_NODE_RES: [],
3516         locking.LEVEL_INSTANCE: self.wanted_names,
3517         }
3518       self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
3519     else:
3520       self.wanted_names = None
3521       self.needed_locks = {
3522         locking.LEVEL_NODE_RES: locking.ALL_SET,
3523         locking.LEVEL_INSTANCE: locking.ALL_SET,
3524         }
3525     self.share_locks = {
3526       locking.LEVEL_NODE_RES: 1,
3527       locking.LEVEL_INSTANCE: 0,
3528       }
3529
3530   def DeclareLocks(self, level):
3531     if level == locking.LEVEL_NODE_RES and self.wanted_names is not None:
3532       self._LockInstancesNodes(primary_only=True, level=level)
3533
3534   def CheckPrereq(self):
3535     """Check prerequisites.
3536
3537     This only checks the optional instance list against the existing names.
3538
3539     """
3540     if self.wanted_names is None:
3541       self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
3542
3543     self.wanted_instances = \
3544         map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
3545
3546   def _EnsureChildSizes(self, disk):
3547     """Ensure children of the disk have the needed disk size.
3548
3549     This is valid mainly for DRBD8 and fixes an issue where the
3550     children have smaller disk size.
3551
3552     @param disk: an L{ganeti.objects.Disk} object
3553
3554     """
3555     if disk.dev_type == constants.LD_DRBD8:
3556       assert disk.children, "Empty children for DRBD8?"
3557       fchild = disk.children[0]
3558       mismatch = fchild.size < disk.size
3559       if mismatch:
3560         self.LogInfo("Child disk has size %d, parent %d, fixing",
3561                      fchild.size, disk.size)
3562         fchild.size = disk.size
3563
3564       # and we recurse on this child only, not on the metadev
3565       return self._EnsureChildSizes(fchild) or mismatch
3566     else:
3567       return False
3568
3569   def Exec(self, feedback_fn):
3570     """Verify the size of cluster disks.
3571
3572     """
3573     # TODO: check child disks too
3574     # TODO: check differences in size between primary/secondary nodes
3575     per_node_disks = {}
3576     for instance in self.wanted_instances:
3577       pnode = instance.primary_node
3578       if pnode not in per_node_disks:
3579         per_node_disks[pnode] = []
3580       for idx, disk in enumerate(instance.disks):
3581         per_node_disks[pnode].append((instance, idx, disk))
3582
3583     assert not (frozenset(per_node_disks.keys()) -
3584                 self.owned_locks(locking.LEVEL_NODE_RES)), \
3585       "Not owning correct locks"
3586     assert not self.owned_locks(locking.LEVEL_NODE)
3587
3588     changed = []
3589     for node, dskl in per_node_disks.items():
3590       newl = [v[2].Copy() for v in dskl]
3591       for dsk in newl:
3592         self.cfg.SetDiskID(dsk, node)
3593       result = self.rpc.call_blockdev_getsize(node, newl)
3594       if result.fail_msg:
3595         self.LogWarning("Failure in blockdev_getsize call to node"
3596                         " %s, ignoring", node)
3597         continue
3598       if len(result.payload) != len(dskl):
3599         logging.warning("Invalid result from node %s: len(dksl)=%d,"
3600                         " result.payload=%s", node, len(dskl), result.payload)
3601         self.LogWarning("Invalid result from node %s, ignoring node results",
3602                         node)
3603         continue
3604       for ((instance, idx, disk), size) in zip(dskl, result.payload):
3605         if size is None:
3606           self.LogWarning("Disk %d of instance %s did not return size"
3607                           " information, ignoring", idx, instance.name)
3608           continue
3609         if not isinstance(size, (int, long)):
3610           self.LogWarning("Disk %d of instance %s did not return valid"
3611                           " size information, ignoring", idx, instance.name)
3612           continue
3613         size = size >> 20
3614         if size != disk.size:
3615           self.LogInfo("Disk %d of instance %s has mismatched size,"
3616                        " correcting: recorded %d, actual %d", idx,
3617                        instance.name, disk.size, size)
3618           disk.size = size
3619           self.cfg.Update(instance, feedback_fn)
3620           changed.append((instance.name, idx, size))
3621         if self._EnsureChildSizes(disk):
3622           self.cfg.Update(instance, feedback_fn)
3623           changed.append((instance.name, idx, disk.size))
3624     return changed
3625
3626
3627 class LUClusterRename(LogicalUnit):
3628   """Rename the cluster.
3629
3630   """
3631   HPATH = "cluster-rename"
3632   HTYPE = constants.HTYPE_CLUSTER
3633
3634   def BuildHooksEnv(self):
3635     """Build hooks env.
3636
3637     """
3638     return {
3639       "OP_TARGET": self.cfg.GetClusterName(),
3640       "NEW_NAME": self.op.name,
3641       }
3642
3643   def BuildHooksNodes(self):
3644     """Build hooks nodes.
3645
3646     """
3647     return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
3648
3649   def CheckPrereq(self):
3650     """Verify that the passed name is a valid one.
3651
3652     """
3653     hostname = netutils.GetHostname(name=self.op.name,
3654                                     family=self.cfg.GetPrimaryIPFamily())
3655
3656     new_name = hostname.name
3657     self.ip = new_ip = hostname.ip
3658     old_name = self.cfg.GetClusterName()
3659     old_ip = self.cfg.GetMasterIP()
3660     if new_name == old_name and new_ip == old_ip:
3661       raise errors.OpPrereqError("Neither the name nor the IP address of the"
3662                                  " cluster has changed",
3663                                  errors.ECODE_INVAL)
3664     if new_ip != old_ip:
3665       if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
3666         raise errors.OpPrereqError("The given cluster IP address (%s) is"
3667                                    " reachable on the network" %
3668                                    new_ip, errors.ECODE_NOTUNIQUE)
3669
3670     self.op.name = new_name
3671
3672   def Exec(self, feedback_fn):
3673     """Rename the cluster.
3674
3675     """
3676     clustername = self.op.name
3677     new_ip = self.ip
3678
3679     # shutdown the master IP
3680     master_params = self.cfg.GetMasterNetworkParameters()
3681     ems = self.cfg.GetUseExternalMipScript()
3682     result = self.rpc.call_node_deactivate_master_ip(master_params.name,
3683                                                      master_params, ems)
3684     result.Raise("Could not disable the master role")
3685
3686     try:
3687       cluster = self.cfg.GetClusterInfo()
3688       cluster.cluster_name = clustername
3689       cluster.master_ip = new_ip
3690       self.cfg.Update(cluster, feedback_fn)
3691
3692       # update the known hosts file
3693       ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
3694       node_list = self.cfg.GetOnlineNodeList()
3695       try:
3696         node_list.remove(master_params.name)
3697       except ValueError:
3698         pass
3699       _UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE)
3700     finally:
3701       master_params.ip = new_ip
3702       result = self.rpc.call_node_activate_master_ip(master_params.name,
3703                                                      master_params, ems)
3704       msg = result.fail_msg
3705       if msg:
3706         self.LogWarning("Could not re-enable the master role on"
3707                         " the master, please restart manually: %s", msg)
3708
3709     return clustername
3710
3711
3712 def _ValidateNetmask(cfg, netmask):
3713   """Checks if a netmask is valid.
3714
3715   @type cfg: L{config.ConfigWriter}
3716   @param cfg: The cluster configuration
3717   @type netmask: int
3718   @param netmask: the netmask to be verified
3719   @raise errors.OpPrereqError: if the validation fails
3720
3721   """
3722   ip_family = cfg.GetPrimaryIPFamily()
3723   try:
3724     ipcls = netutils.IPAddress.GetClassFromIpFamily(ip_family)
3725   except errors.ProgrammerError:
3726     raise errors.OpPrereqError("Invalid primary ip family: %s." %
3727                                ip_family)
3728   if not ipcls.ValidateNetmask(netmask):
3729     raise errors.OpPrereqError("CIDR netmask (%s) not valid" %
3730                                 (netmask))
3731
3732
3733 class LUClusterSetParams(LogicalUnit):
3734   """Change the parameters of the cluster.
3735
3736   """
3737   HPATH = "cluster-modify"
3738   HTYPE = constants.HTYPE_CLUSTER
3739   REQ_BGL = False
3740
3741   def CheckArguments(self):
3742     """Check parameters
3743
3744     """
3745     if self.op.uid_pool:
3746       uidpool.CheckUidPool(self.op.uid_pool)
3747
3748     if self.op.add_uids:
3749       uidpool.CheckUidPool(self.op.add_uids)
3750
3751     if self.op.remove_uids:
3752       uidpool.CheckUidPool(self.op.remove_uids)
3753
3754     if self.op.master_netmask is not None:
3755       _ValidateNetmask(self.cfg, self.op.master_netmask)
3756
3757     if self.op.diskparams:
3758       for dt_params in self.op.diskparams.values():
3759         utils.ForceDictType(dt_params, constants.DISK_DT_TYPES)
3760
3761   def ExpandNames(self):
3762     # FIXME: in the future maybe other cluster params won't require checking on
3763     # all nodes to be modified.
3764     self.needed_locks = {
3765       locking.LEVEL_NODE: locking.ALL_SET,
3766     }
3767     self.share_locks[locking.LEVEL_NODE] = 1
3768
3769   def BuildHooksEnv(self):
3770     """Build hooks env.
3771
3772     """
3773     return {
3774       "OP_TARGET": self.cfg.GetClusterName(),
3775       "NEW_VG_NAME": self.op.vg_name,
3776       }
3777
3778   def BuildHooksNodes(self):
3779     """Build hooks nodes.
3780
3781     """
3782     mn = self.cfg.GetMasterNode()
3783     return ([mn], [mn])
3784
3785   def CheckPrereq(self):
3786     """Check prerequisites.
3787
3788     This checks whether the given params don't conflict and
3789     if the given volume group is valid.
3790
3791     """
3792     if self.op.vg_name is not None and not self.op.vg_name:
3793       if self.cfg.HasAnyDiskOfType(constants.LD_LV):
3794         raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
3795                                    " instances exist", errors.ECODE_INVAL)
3796
3797     if self.op.drbd_helper is not None and not self.op.drbd_helper:
3798       if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
3799         raise errors.OpPrereqError("Cannot disable drbd helper while"
3800                                    " drbd-based instances exist",
3801                                    errors.ECODE_INVAL)
3802
3803     node_list = self.owned_locks(locking.LEVEL_NODE)
3804
3805     # if vg_name not None, checks given volume group on all nodes
3806     if self.op.vg_name:
3807       vglist = self.rpc.call_vg_list(node_list)
3808       for node in node_list:
3809         msg = vglist[node].fail_msg
3810         if msg:
3811           # ignoring down node
3812           self.LogWarning("Error while gathering data on node %s"
3813                           " (ignoring node): %s", node, msg)
3814           continue
3815         vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
3816                                               self.op.vg_name,
3817                                               constants.MIN_VG_SIZE)
3818         if vgstatus:
3819           raise errors.OpPrereqError("Error on node '%s': %s" %
3820                                      (node, vgstatus), errors.ECODE_ENVIRON)
3821
3822     if self.op.drbd_helper:
3823       # checks given drbd helper on all nodes
3824       helpers = self.rpc.call_drbd_helper(node_list)
3825       for (node, ninfo) in self.cfg.GetMultiNodeInfo(node_list):
3826         if ninfo.offline:
3827           self.LogInfo("Not checking drbd helper on offline node %s", node)
3828           continue
3829         msg = helpers[node].fail_msg
3830         if msg:
3831           raise errors.OpPrereqError("Error checking drbd helper on node"
3832                                      " '%s': %s" % (node, msg),
3833                                      errors.ECODE_ENVIRON)
3834         node_helper = helpers[node].payload
3835         if node_helper != self.op.drbd_helper:
3836           raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
3837                                      (node, node_helper), errors.ECODE_ENVIRON)
3838
3839     self.cluster = cluster = self.cfg.GetClusterInfo()
3840     # validate params changes
3841     if self.op.beparams:
3842       objects.UpgradeBeParams(self.op.beparams)
3843       utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
3844       self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
3845
3846     if self.op.ndparams:
3847       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
3848       self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
3849
3850       # TODO: we need a more general way to handle resetting
3851       # cluster-level parameters to default values
3852       if self.new_ndparams["oob_program"] == "":
3853         self.new_ndparams["oob_program"] = \
3854             constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
3855
3856     if self.op.hv_state:
3857       new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
3858                                             self.cluster.hv_state_static)
3859       self.new_hv_state = dict((hv, cluster.SimpleFillHvState(values))
3860                                for hv, values in new_hv_state.items())
3861
3862     if self.op.disk_state:
3863       new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state,
3864                                                 self.cluster.disk_state_static)
3865       self.new_disk_state = \
3866         dict((storage, dict((name, cluster.SimpleFillDiskState(values))
3867                             for name, values in svalues.items()))
3868              for storage, svalues in new_disk_state.items())
3869
3870     if self.op.ipolicy:
3871       self.new_ipolicy = _GetUpdatedIPolicy(cluster.ipolicy, self.op.ipolicy,
3872                                             group_policy=False)
3873
3874     if self.op.nicparams:
3875       utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
3876       self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
3877       objects.NIC.CheckParameterSyntax(self.new_nicparams)
3878       nic_errors = []
3879
3880       # check all instances for consistency
3881       for instance in self.cfg.GetAllInstancesInfo().values():
3882         for nic_idx, nic in enumerate(instance.nics):
3883           params_copy = copy.deepcopy(nic.nicparams)
3884           params_filled = objects.FillDict(self.new_nicparams, params_copy)
3885
3886           # check parameter syntax
3887           try:
3888             objects.NIC.CheckParameterSyntax(params_filled)
3889           except errors.ConfigurationError, err:
3890             nic_errors.append("Instance %s, nic/%d: %s" %
3891                               (instance.name, nic_idx, err))
3892
3893           # if we're moving instances to routed, check that they have an ip
3894           target_mode = params_filled[constants.NIC_MODE]
3895           if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
3896             nic_errors.append("Instance %s, nic/%d: routed NIC with no ip"
3897                               " address" % (instance.name, nic_idx))
3898       if nic_errors:
3899         raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
3900                                    "\n".join(nic_errors))
3901
3902     # hypervisor list/parameters
3903     self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
3904     if self.op.hvparams:
3905       for hv_name, hv_dict in self.op.hvparams.items():
3906         if hv_name not in self.new_hvparams:
3907           self.new_hvparams[hv_name] = hv_dict
3908         else:
3909           self.new_hvparams[hv_name].update(hv_dict)
3910
3911     # disk template parameters
3912     self.new_diskparams = objects.FillDict(cluster.diskparams, {})
3913     if self.op.diskparams:
3914       for dt_name, dt_params in self.op.diskparams.items():
3915         if dt_name not in self.op.diskparams:
3916           self.new_diskparams[dt_name] = dt_params
3917         else:
3918           self.new_diskparams[dt_name].update(dt_params)
3919
3920     # os hypervisor parameters
3921     self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
3922     if self.op.os_hvp:
3923       for os_name, hvs in self.op.os_hvp.items():
3924         if os_name not in self.new_os_hvp:
3925           self.new_os_hvp[os_name] = hvs
3926         else:
3927           for hv_name, hv_dict in hvs.items():
3928             if hv_name not in self.new_os_hvp[os_name]:
3929               self.new_os_hvp[os_name][hv_name] = hv_dict
3930             else:
3931               self.new_os_hvp[os_name][hv_name].update(hv_dict)
3932
3933     # os parameters
3934     self.new_osp = objects.FillDict(cluster.osparams, {})
3935     if self.op.osparams:
3936       for os_name, osp in self.op.osparams.items():
3937         if os_name not in self.new_osp:
3938           self.new_osp[os_name] = {}
3939
3940         self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
3941                                                   use_none=True)
3942
3943         if not self.new_osp[os_name]:
3944           # we removed all parameters
3945           del self.new_osp[os_name]
3946         else:
3947           # check the parameter validity (remote check)
3948           _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
3949                          os_name, self.new_osp[os_name])
3950
3951     # changes to the hypervisor list
3952     if self.op.enabled_hypervisors is not None:
3953       self.hv_list = self.op.enabled_hypervisors
3954       for hv in self.hv_list:
3955         # if the hypervisor doesn't already exist in the cluster
3956         # hvparams, we initialize it to empty, and then (in both
3957         # cases) we make sure to fill the defaults, as we might not
3958         # have a complete defaults list if the hypervisor wasn't
3959         # enabled before
3960         if hv not in new_hvp:
3961           new_hvp[hv] = {}
3962         new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
3963         utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
3964     else:
3965       self.hv_list = cluster.enabled_hypervisors
3966
3967     if self.op.hvparams or self.op.enabled_hypervisors is not None:
3968       # either the enabled list has changed, or the parameters have, validate
3969       for hv_name, hv_params in self.new_hvparams.items():
3970         if ((self.op.hvparams and hv_name in self.op.hvparams) or
3971             (self.op.enabled_hypervisors and
3972              hv_name in self.op.enabled_hypervisors)):
3973           # either this is a new hypervisor, or its parameters have changed
3974           hv_class = hypervisor.GetHypervisor(hv_name)
3975           utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3976           hv_class.CheckParameterSyntax(hv_params)
3977           _CheckHVParams(self, node_list, hv_name, hv_params)
3978
3979     if self.op.os_hvp:
3980       # no need to check any newly-enabled hypervisors, since the
3981       # defaults have already been checked in the above code-block
3982       for os_name, os_hvp in self.new_os_hvp.items():
3983         for hv_name, hv_params in os_hvp.items():
3984           utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3985           # we need to fill in the new os_hvp on top of the actual hv_p
3986           cluster_defaults = self.new_hvparams.get(hv_name, {})
3987           new_osp = objects.FillDict(cluster_defaults, hv_params)
3988           hv_class = hypervisor.GetHypervisor(hv_name)
3989           hv_class.CheckParameterSyntax(new_osp)
3990           _CheckHVParams(self, node_list, hv_name, new_osp)
3991
3992     if self.op.default_iallocator:
3993       alloc_script = utils.FindFile(self.op.default_iallocator,
3994                                     constants.IALLOCATOR_SEARCH_PATH,
3995                                     os.path.isfile)
3996       if alloc_script is None:
3997         raise errors.OpPrereqError("Invalid default iallocator script '%s'"
3998                                    " specified" % self.op.default_iallocator,
3999                                    errors.ECODE_INVAL)
4000
4001   def Exec(self, feedback_fn):
4002     """Change the parameters of the cluster.
4003
4004     """
4005     if self.op.vg_name is not None:
4006       new_volume = self.op.vg_name
4007       if not new_volume:
4008         new_volume = None
4009       if new_volume != self.cfg.GetVGName():
4010         self.cfg.SetVGName(new_volume)
4011       else:
4012         feedback_fn("Cluster LVM configuration already in desired"
4013                     " state, not changing")
4014     if self.op.drbd_helper is not None:
4015       new_helper = self.op.drbd_helper
4016       if not new_helper:
4017         new_helper = None
4018       if new_helper != self.cfg.GetDRBDHelper():
4019         self.cfg.SetDRBDHelper(new_helper)
4020       else:
4021         feedback_fn("Cluster DRBD helper already in desired state,"
4022                     " not changing")
4023     if self.op.hvparams:
4024       self.cluster.hvparams = self.new_hvparams
4025     if self.op.os_hvp:
4026       self.cluster.os_hvp = self.new_os_hvp
4027     if self.op.enabled_hypervisors is not None:
4028       self.cluster.hvparams = self.new_hvparams
4029       self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
4030     if self.op.beparams:
4031       self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
4032     if self.op.nicparams:
4033       self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
4034     if self.op.ipolicy:
4035       self.cluster.ipolicy = self.new_ipolicy
4036     if self.op.osparams:
4037       self.cluster.osparams = self.new_osp
4038     if self.op.ndparams:
4039       self.cluster.ndparams = self.new_ndparams
4040     if self.op.diskparams:
4041       self.cluster.diskparams = self.new_diskparams
4042     if self.op.hv_state:
4043       self.cluster.hv_state_static = self.new_hv_state
4044     if self.op.disk_state:
4045       self.cluster.disk_state_static = self.new_disk_state
4046
4047     if self.op.candidate_pool_size is not None:
4048       self.cluster.candidate_pool_size = self.op.candidate_pool_size
4049       # we need to update the pool size here, otherwise the save will fail
4050       _AdjustCandidatePool(self, [])
4051
4052     if self.op.maintain_node_health is not None:
4053       if self.op.maintain_node_health and not constants.ENABLE_CONFD:
4054         feedback_fn("Note: CONFD was disabled at build time, node health"
4055                     " maintenance is not useful (still enabling it)")
4056       self.cluster.maintain_node_health = self.op.maintain_node_health
4057
4058     if self.op.prealloc_wipe_disks is not None:
4059       self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
4060
4061     if self.op.add_uids is not None:
4062       uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
4063
4064     if self.op.remove_uids is not None:
4065       uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
4066
4067     if self.op.uid_pool is not None:
4068       self.cluster.uid_pool = self.op.uid_pool
4069
4070     if self.op.default_iallocator is not None:
4071       self.cluster.default_iallocator = self.op.default_iallocator
4072
4073     if self.op.reserved_lvs is not None:
4074       self.cluster.reserved_lvs = self.op.reserved_lvs
4075
4076     if self.op.use_external_mip_script is not None:
4077       self.cluster.use_external_mip_script = self.op.use_external_mip_script
4078
4079     def helper_os(aname, mods, desc):
4080       desc += " OS list"
4081       lst = getattr(self.cluster, aname)
4082       for key, val in mods:
4083         if key == constants.DDM_ADD:
4084           if val in lst:
4085             feedback_fn("OS %s already in %s, ignoring" % (val, desc))
4086           else:
4087             lst.append(val)
4088         elif key == constants.DDM_REMOVE:
4089           if val in lst:
4090             lst.remove(val)
4091           else:
4092             feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
4093         else:
4094           raise errors.ProgrammerError("Invalid modification '%s'" % key)
4095
4096     if self.op.hidden_os:
4097       helper_os("hidden_os", self.op.hidden_os, "hidden")
4098
4099     if self.op.blacklisted_os:
4100       helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
4101
4102     if self.op.master_netdev:
4103       master_params = self.cfg.GetMasterNetworkParameters()
4104       ems = self.cfg.GetUseExternalMipScript()
4105       feedback_fn("Shutting down master ip on the current netdev (%s)" %
4106                   self.cluster.master_netdev)
4107       result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4108                                                        master_params, ems)
4109       result.Raise("Could not disable the master ip")
4110       feedback_fn("Changing master_netdev from %s to %s" %
4111                   (master_params.netdev, self.op.master_netdev))
4112       self.cluster.master_netdev = self.op.master_netdev
4113
4114     if self.op.master_netmask:
4115       master_params = self.cfg.GetMasterNetworkParameters()
4116       feedback_fn("Changing master IP netmask to %s" % self.op.master_netmask)
4117       result = self.rpc.call_node_change_master_netmask(master_params.name,
4118                                                         master_params.netmask,
4119                                                         self.op.master_netmask,
4120                                                         master_params.ip,
4121                                                         master_params.netdev)
4122       if result.fail_msg:
4123         msg = "Could not change the master IP netmask: %s" % result.fail_msg
4124         feedback_fn(msg)
4125
4126       self.cluster.master_netmask = self.op.master_netmask
4127
4128     self.cfg.Update(self.cluster, feedback_fn)
4129
4130     if self.op.master_netdev:
4131       master_params = self.cfg.GetMasterNetworkParameters()
4132       feedback_fn("Starting the master ip on the new master netdev (%s)" %
4133                   self.op.master_netdev)
4134       ems = self.cfg.GetUseExternalMipScript()
4135       result = self.rpc.call_node_activate_master_ip(master_params.name,
4136                                                      master_params, ems)
4137       if result.fail_msg:
4138         self.LogWarning("Could not re-enable the master ip on"
4139                         " the master, please restart manually: %s",
4140                         result.fail_msg)
4141
4142
4143 def _UploadHelper(lu, nodes, fname):
4144   """Helper for uploading a file and showing warnings.
4145
4146   """
4147   if os.path.exists(fname):
4148     result = lu.rpc.call_upload_file(nodes, fname)
4149     for to_node, to_result in result.items():
4150       msg = to_result.fail_msg
4151       if msg:
4152         msg = ("Copy of file %s to node %s failed: %s" %
4153                (fname, to_node, msg))
4154         lu.proc.LogWarning(msg)
4155
4156
4157 def _ComputeAncillaryFiles(cluster, redist):
4158   """Compute files external to Ganeti which need to be consistent.
4159
4160   @type redist: boolean
4161   @param redist: Whether to include files which need to be redistributed
4162
4163   """
4164   # Compute files for all nodes
4165   files_all = set([
4166     constants.SSH_KNOWN_HOSTS_FILE,
4167     constants.CONFD_HMAC_KEY,
4168     constants.CLUSTER_DOMAIN_SECRET_FILE,
4169     constants.SPICE_CERT_FILE,
4170     constants.SPICE_CACERT_FILE,
4171     constants.RAPI_USERS_FILE,
4172     ])
4173
4174   if not redist:
4175     files_all.update(constants.ALL_CERT_FILES)
4176     files_all.update(ssconf.SimpleStore().GetFileList())
4177   else:
4178     # we need to ship at least the RAPI certificate
4179     files_all.add(constants.RAPI_CERT_FILE)
4180
4181   if cluster.modify_etc_hosts:
4182     files_all.add(constants.ETC_HOSTS)
4183
4184   # Files which are optional, these must:
4185   # - be present in one other category as well
4186   # - either exist or not exist on all nodes of that category (mc, vm all)
4187   files_opt = set([
4188     constants.RAPI_USERS_FILE,
4189     ])
4190
4191   # Files which should only be on master candidates
4192   files_mc = set()
4193
4194   if not redist:
4195     files_mc.add(constants.CLUSTER_CONF_FILE)
4196
4197     # FIXME: this should also be replicated but Ganeti doesn't support files_mc
4198     # replication
4199     files_mc.add(constants.DEFAULT_MASTER_SETUP_SCRIPT)
4200
4201   # Files which should only be on VM-capable nodes
4202   files_vm = set(filename
4203     for hv_name in cluster.enabled_hypervisors
4204     for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[0])
4205
4206   files_opt |= set(filename
4207     for hv_name in cluster.enabled_hypervisors
4208     for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[1])
4209
4210   # Filenames in each category must be unique
4211   all_files_set = files_all | files_mc | files_vm
4212   assert (len(all_files_set) ==
4213           sum(map(len, [files_all, files_mc, files_vm]))), \
4214          "Found file listed in more than one file list"
4215
4216   # Optional files must be present in one other category
4217   assert all_files_set.issuperset(files_opt), \
4218          "Optional file not in a different required list"
4219
4220   return (files_all, files_opt, files_mc, files_vm)
4221
4222
4223 def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
4224   """Distribute additional files which are part of the cluster configuration.
4225
4226   ConfigWriter takes care of distributing the config and ssconf files, but
4227   there are more files which should be distributed to all nodes. This function
4228   makes sure those are copied.
4229
4230   @param lu: calling logical unit
4231   @param additional_nodes: list of nodes not in the config to distribute to
4232   @type additional_vm: boolean
4233   @param additional_vm: whether the additional nodes are vm-capable or not
4234
4235   """
4236   # Gather target nodes
4237   cluster = lu.cfg.GetClusterInfo()
4238   master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
4239
4240   online_nodes = lu.cfg.GetOnlineNodeList()
4241   vm_nodes = lu.cfg.GetVmCapableNodeList()
4242
4243   if additional_nodes is not None:
4244     online_nodes.extend(additional_nodes)
4245     if additional_vm:
4246       vm_nodes.extend(additional_nodes)
4247
4248   # Never distribute to master node
4249   for nodelist in [online_nodes, vm_nodes]:
4250     if master_info.name in nodelist:
4251       nodelist.remove(master_info.name)
4252
4253   # Gather file lists
4254   (files_all, _, files_mc, files_vm) = \
4255     _ComputeAncillaryFiles(cluster, True)
4256
4257   # Never re-distribute configuration file from here
4258   assert not (constants.CLUSTER_CONF_FILE in files_all or
4259               constants.CLUSTER_CONF_FILE in files_vm)
4260   assert not files_mc, "Master candidates not handled in this function"
4261
4262   filemap = [
4263     (online_nodes, files_all),
4264     (vm_nodes, files_vm),
4265     ]
4266
4267   # Upload the files
4268   for (node_list, files) in filemap:
4269     for fname in files:
4270       _UploadHelper(lu, node_list, fname)
4271
4272
4273 class LUClusterRedistConf(NoHooksLU):
4274   """Force the redistribution of cluster configuration.
4275
4276   This is a very simple LU.
4277
4278   """
4279   REQ_BGL = False
4280
4281   def ExpandNames(self):
4282     self.needed_locks = {
4283       locking.LEVEL_NODE: locking.ALL_SET,
4284     }
4285     self.share_locks[locking.LEVEL_NODE] = 1
4286
4287   def Exec(self, feedback_fn):
4288     """Redistribute the configuration.
4289
4290     """
4291     self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
4292     _RedistributeAncillaryFiles(self)
4293
4294
4295 class LUClusterActivateMasterIp(NoHooksLU):
4296   """Activate the master IP on the master node.
4297
4298   """
4299   def Exec(self, feedback_fn):
4300     """Activate the master IP.
4301
4302     """
4303     master_params = self.cfg.GetMasterNetworkParameters()
4304     ems = self.cfg.GetUseExternalMipScript()
4305     result = self.rpc.call_node_activate_master_ip(master_params.name,
4306                                                    master_params, ems)
4307     result.Raise("Could not activate the master IP")
4308
4309
4310 class LUClusterDeactivateMasterIp(NoHooksLU):
4311   """Deactivate the master IP on the master node.
4312
4313   """
4314   def Exec(self, feedback_fn):
4315     """Deactivate the master IP.
4316
4317     """
4318     master_params = self.cfg.GetMasterNetworkParameters()
4319     ems = self.cfg.GetUseExternalMipScript()
4320     result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4321                                                      master_params, ems)
4322     result.Raise("Could not deactivate the master IP")
4323
4324
4325 def _WaitForSync(lu, instance, disks=None, oneshot=False):
4326   """Sleep and poll for an instance's disk to sync.
4327
4328   """
4329   if not instance.disks or disks is not None and not disks:
4330     return True
4331
4332   disks = _ExpandCheckDisks(instance, disks)
4333
4334   if not oneshot:
4335     lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
4336
4337   node = instance.primary_node
4338
4339   for dev in disks:
4340     lu.cfg.SetDiskID(dev, node)
4341
4342   # TODO: Convert to utils.Retry
4343
4344   retries = 0
4345   degr_retries = 10 # in seconds, as we sleep 1 second each time
4346   while True:
4347     max_time = 0
4348     done = True
4349     cumul_degraded = False
4350     rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
4351     msg = rstats.fail_msg
4352     if msg:
4353       lu.LogWarning("Can't get any data from node %s: %s", node, msg)
4354       retries += 1
4355       if retries >= 10:
4356         raise errors.RemoteError("Can't contact node %s for mirror data,"
4357                                  " aborting." % node)
4358       time.sleep(6)
4359       continue
4360     rstats = rstats.payload
4361     retries = 0
4362     for i, mstat in enumerate(rstats):
4363       if mstat is None:
4364         lu.LogWarning("Can't compute data for node %s/%s",
4365                            node, disks[i].iv_name)
4366         continue
4367
4368       cumul_degraded = (cumul_degraded or
4369                         (mstat.is_degraded and mstat.sync_percent is None))
4370       if mstat.sync_percent is not None:
4371         done = False
4372         if mstat.estimated_time is not None:
4373           rem_time = ("%s remaining (estimated)" %
4374                       utils.FormatSeconds(mstat.estimated_time))
4375           max_time = mstat.estimated_time
4376         else:
4377           rem_time = "no time estimate"
4378         lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
4379                         (disks[i].iv_name, mstat.sync_percent, rem_time))
4380
4381     # if we're done but degraded, let's do a few small retries, to
4382     # make sure we see a stable and not transient situation; therefore
4383     # we force restart of the loop
4384     if (done or oneshot) and cumul_degraded and degr_retries > 0:
4385       logging.info("Degraded disks found, %d retries left", degr_retries)
4386       degr_retries -= 1
4387       time.sleep(1)
4388       continue
4389
4390     if done or oneshot:
4391       break
4392
4393     time.sleep(min(60, max_time))
4394
4395   if done:
4396     lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
4397   return not cumul_degraded
4398
4399
4400 def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
4401   """Check that mirrors are not degraded.
4402
4403   The ldisk parameter, if True, will change the test from the
4404   is_degraded attribute (which represents overall non-ok status for
4405   the device(s)) to the ldisk (representing the local storage status).
4406
4407   """
4408   lu.cfg.SetDiskID(dev, node)
4409
4410   result = True
4411
4412   if on_primary or dev.AssembleOnSecondary():
4413     rstats = lu.rpc.call_blockdev_find(node, dev)
4414     msg = rstats.fail_msg
4415     if msg:
4416       lu.LogWarning("Can't find disk on node %s: %s", node, msg)
4417       result = False
4418     elif not rstats.payload:
4419       lu.LogWarning("Can't find disk on node %s", node)
4420       result = False
4421     else:
4422       if ldisk:
4423         result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
4424       else:
4425         result = result and not rstats.payload.is_degraded
4426
4427   if dev.children:
4428     for child in dev.children:
4429       result = result and _CheckDiskConsistency(lu, child, node, on_primary)
4430
4431   return result
4432
4433
4434 class LUOobCommand(NoHooksLU):
4435   """Logical unit for OOB handling.
4436
4437   """
4438   REG_BGL = False
4439   _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
4440
4441   def ExpandNames(self):
4442     """Gather locks we need.
4443
4444     """
4445     if self.op.node_names:
4446       self.op.node_names = _GetWantedNodes(self, self.op.node_names)
4447       lock_names = self.op.node_names
4448     else:
4449       lock_names = locking.ALL_SET
4450
4451     self.needed_locks = {
4452       locking.LEVEL_NODE: lock_names,
4453       }
4454
4455   def CheckPrereq(self):
4456     """Check prerequisites.
4457
4458     This checks:
4459      - the node exists in the configuration
4460      - OOB is supported
4461
4462     Any errors are signaled by raising errors.OpPrereqError.
4463
4464     """
4465     self.nodes = []
4466     self.master_node = self.cfg.GetMasterNode()
4467
4468     assert self.op.power_delay >= 0.0
4469
4470     if self.op.node_names:
4471       if (self.op.command in self._SKIP_MASTER and
4472           self.master_node in self.op.node_names):
4473         master_node_obj = self.cfg.GetNodeInfo(self.master_node)
4474         master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
4475
4476         if master_oob_handler:
4477           additional_text = ("run '%s %s %s' if you want to operate on the"
4478                              " master regardless") % (master_oob_handler,
4479                                                       self.op.command,
4480                                                       self.master_node)
4481         else:
4482           additional_text = "it does not support out-of-band operations"
4483
4484         raise errors.OpPrereqError(("Operating on the master node %s is not"
4485                                     " allowed for %s; %s") %
4486                                    (self.master_node, self.op.command,
4487                                     additional_text), errors.ECODE_INVAL)
4488     else:
4489       self.op.node_names = self.cfg.GetNodeList()
4490       if self.op.command in self._SKIP_MASTER:
4491         self.op.node_names.remove(self.master_node)
4492
4493     if self.op.command in self._SKIP_MASTER:
4494       assert self.master_node not in self.op.node_names
4495
4496     for (node_name, node) in self.cfg.GetMultiNodeInfo(self.op.node_names):
4497       if node is None:
4498         raise errors.OpPrereqError("Node %s not found" % node_name,
4499                                    errors.ECODE_NOENT)
4500       else:
4501         self.nodes.append(node)
4502
4503       if (not self.op.ignore_status and
4504           (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
4505         raise errors.OpPrereqError(("Cannot power off node %s because it is"
4506                                     " not marked offline") % node_name,
4507                                    errors.ECODE_STATE)
4508
4509   def Exec(self, feedback_fn):
4510     """Execute OOB and return result if we expect any.
4511
4512     """
4513     master_node = self.master_node
4514     ret = []
4515
4516     for idx, node in enumerate(utils.NiceSort(self.nodes,
4517                                               key=lambda node: node.name)):
4518       node_entry = [(constants.RS_NORMAL, node.name)]
4519       ret.append(node_entry)
4520
4521       oob_program = _SupportsOob(self.cfg, node)
4522
4523       if not oob_program:
4524         node_entry.append((constants.RS_UNAVAIL, None))
4525         continue
4526
4527       logging.info("Executing out-of-band command '%s' using '%s' on %s",
4528                    self.op.command, oob_program, node.name)
4529       result = self.rpc.call_run_oob(master_node, oob_program,
4530                                      self.op.command, node.name,
4531                                      self.op.timeout)
4532
4533       if result.fail_msg:
4534         self.LogWarning("Out-of-band RPC failed on node '%s': %s",
4535                         node.name, result.fail_msg)
4536         node_entry.append((constants.RS_NODATA, None))
4537       else:
4538         try:
4539           self._CheckPayload(result)
4540         except errors.OpExecError, err:
4541           self.LogWarning("Payload returned by node '%s' is not valid: %s",
4542                           node.name, err)
4543           node_entry.append((constants.RS_NODATA, None))
4544         else:
4545           if self.op.command == constants.OOB_HEALTH:
4546             # For health we should log important events
4547             for item, status in result.payload:
4548               if status in [constants.OOB_STATUS_WARNING,
4549                             constants.OOB_STATUS_CRITICAL]:
4550                 self.LogWarning("Item '%s' on node '%s' has status '%s'",
4551                                 item, node.name, status)
4552
4553           if self.op.command == constants.OOB_POWER_ON:
4554             node.powered = True
4555           elif self.op.command == constants.OOB_POWER_OFF:
4556             node.powered = False
4557           elif self.op.command == constants.OOB_POWER_STATUS:
4558             powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
4559             if powered != node.powered:
4560               logging.warning(("Recorded power state (%s) of node '%s' does not"
4561                                " match actual power state (%s)"), node.powered,
4562                               node.name, powered)
4563
4564           # For configuration changing commands we should update the node
4565           if self.op.command in (constants.OOB_POWER_ON,
4566                                  constants.OOB_POWER_OFF):
4567             self.cfg.Update(node, feedback_fn)
4568
4569           node_entry.append((constants.RS_NORMAL, result.payload))
4570
4571           if (self.op.command == constants.OOB_POWER_ON and
4572               idx < len(self.nodes) - 1):
4573             time.sleep(self.op.power_delay)
4574
4575     return ret
4576
4577   def _CheckPayload(self, result):
4578     """Checks if the payload is valid.
4579
4580     @param result: RPC result
4581     @raises errors.OpExecError: If payload is not valid
4582
4583     """
4584     errs = []
4585     if self.op.command == constants.OOB_HEALTH:
4586       if not isinstance(result.payload, list):
4587         errs.append("command 'health' is expected to return a list but got %s" %
4588                     type(result.payload))
4589       else:
4590         for item, status in result.payload:
4591           if status not in constants.OOB_STATUSES:
4592             errs.append("health item '%s' has invalid status '%s'" %
4593                         (item, status))
4594
4595     if self.op.command == constants.OOB_POWER_STATUS:
4596       if not isinstance(result.payload, dict):
4597         errs.append("power-status is expected to return a dict but got %s" %
4598                     type(result.payload))
4599
4600     if self.op.command in [
4601         constants.OOB_POWER_ON,
4602         constants.OOB_POWER_OFF,
4603         constants.OOB_POWER_CYCLE,
4604         ]:
4605       if result.payload is not None:
4606         errs.append("%s is expected to not return payload but got '%s'" %
4607                     (self.op.command, result.payload))
4608
4609     if errs:
4610       raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
4611                                utils.CommaJoin(errs))
4612
4613
4614 class _OsQuery(_QueryBase):
4615   FIELDS = query.OS_FIELDS
4616
4617   def ExpandNames(self, lu):
4618     # Lock all nodes in shared mode
4619     # Temporary removal of locks, should be reverted later
4620     # TODO: reintroduce locks when they are lighter-weight
4621     lu.needed_locks = {}
4622     #self.share_locks[locking.LEVEL_NODE] = 1
4623     #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4624
4625     # The following variables interact with _QueryBase._GetNames
4626     if self.names:
4627       self.wanted = self.names
4628     else:
4629       self.wanted = locking.ALL_SET
4630
4631     self.do_locking = self.use_locking
4632
4633   def DeclareLocks(self, lu, level):
4634     pass
4635
4636   @staticmethod
4637   def _DiagnoseByOS(rlist):
4638     """Remaps a per-node return list into an a per-os per-node dictionary
4639
4640     @param rlist: a map with node names as keys and OS objects as values
4641
4642     @rtype: dict
4643     @return: a dictionary with osnames as keys and as value another
4644         map, with nodes as keys and tuples of (path, status, diagnose,
4645         variants, parameters, api_versions) as values, eg::
4646
4647           {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
4648                                      (/srv/..., False, "invalid api")],
4649                            "node2": [(/srv/..., True, "", [], [])]}
4650           }
4651
4652     """
4653     all_os = {}
4654     # we build here the list of nodes that didn't fail the RPC (at RPC
4655     # level), so that nodes with a non-responding node daemon don't
4656     # make all OSes invalid
4657     good_nodes = [node_name for node_name in rlist
4658                   if not rlist[node_name].fail_msg]
4659     for node_name, nr in rlist.items():
4660       if nr.fail_msg or not nr.payload:
4661         continue
4662       for (name, path, status, diagnose, variants,
4663            params, api_versions) in nr.payload:
4664         if name not in all_os:
4665           # build a list of nodes for this os containing empty lists
4666           # for each node in node_list
4667           all_os[name] = {}
4668           for nname in good_nodes:
4669             all_os[name][nname] = []
4670         # convert params from [name, help] to (name, help)
4671         params = [tuple(v) for v in params]
4672         all_os[name][node_name].append((path, status, diagnose,
4673                                         variants, params, api_versions))
4674     return all_os
4675
4676   def _GetQueryData(self, lu):
4677     """Computes the list of nodes and their attributes.
4678
4679     """
4680     # Locking is not used
4681     assert not (compat.any(lu.glm.is_owned(level)
4682                            for level in locking.LEVELS
4683                            if level != locking.LEVEL_CLUSTER) or
4684                 self.do_locking or self.use_locking)
4685
4686     valid_nodes = [node.name
4687                    for node in lu.cfg.GetAllNodesInfo().values()
4688                    if not node.offline and node.vm_capable]
4689     pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
4690     cluster = lu.cfg.GetClusterInfo()
4691
4692     data = {}
4693
4694     for (os_name, os_data) in pol.items():
4695       info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
4696                           hidden=(os_name in cluster.hidden_os),
4697                           blacklisted=(os_name in cluster.blacklisted_os))
4698
4699       variants = set()
4700       parameters = set()
4701       api_versions = set()
4702
4703       for idx, osl in enumerate(os_data.values()):
4704         info.valid = bool(info.valid and osl and osl[0][1])
4705         if not info.valid:
4706           break
4707
4708         (node_variants, node_params, node_api) = osl[0][3:6]
4709         if idx == 0:
4710           # First entry
4711           variants.update(node_variants)
4712           parameters.update(node_params)
4713           api_versions.update(node_api)
4714         else:
4715           # Filter out inconsistent values
4716           variants.intersection_update(node_variants)
4717           parameters.intersection_update(node_params)
4718           api_versions.intersection_update(node_api)
4719
4720       info.variants = list(variants)
4721       info.parameters = list(parameters)
4722       info.api_versions = list(api_versions)
4723
4724       data[os_name] = info
4725
4726     # Prepare data in requested order
4727     return [data[name] for name in self._GetNames(lu, pol.keys(), None)
4728             if name in data]
4729
4730
4731 class LUOsDiagnose(NoHooksLU):
4732   """Logical unit for OS diagnose/query.
4733
4734   """
4735   REQ_BGL = False
4736
4737   @staticmethod
4738   def _BuildFilter(fields, names):
4739     """Builds a filter for querying OSes.
4740
4741     """
4742     name_filter = qlang.MakeSimpleFilter("name", names)
4743
4744     # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
4745     # respective field is not requested
4746     status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
4747                      for fname in ["hidden", "blacklisted"]
4748                      if fname not in fields]
4749     if "valid" not in fields:
4750       status_filter.append([qlang.OP_TRUE, "valid"])
4751
4752     if status_filter:
4753       status_filter.insert(0, qlang.OP_AND)
4754     else:
4755       status_filter = None
4756
4757     if name_filter and status_filter:
4758       return [qlang.OP_AND, name_filter, status_filter]
4759     elif name_filter:
4760       return name_filter
4761     else:
4762       return status_filter
4763
4764   def CheckArguments(self):
4765     self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
4766                        self.op.output_fields, False)
4767
4768   def ExpandNames(self):
4769     self.oq.ExpandNames(self)
4770
4771   def Exec(self, feedback_fn):
4772     return self.oq.OldStyleQuery(self)
4773
4774
4775 class LUNodeRemove(LogicalUnit):
4776   """Logical unit for removing a node.
4777
4778   """
4779   HPATH = "node-remove"
4780   HTYPE = constants.HTYPE_NODE
4781
4782   def BuildHooksEnv(self):
4783     """Build hooks env.
4784
4785     This doesn't run on the target node in the pre phase as a failed
4786     node would then be impossible to remove.
4787
4788     """
4789     return {
4790       "OP_TARGET": self.op.node_name,
4791       "NODE_NAME": self.op.node_name,
4792       }
4793
4794   def BuildHooksNodes(self):
4795     """Build hooks nodes.
4796
4797     """
4798     all_nodes = self.cfg.GetNodeList()
4799     try:
4800       all_nodes.remove(self.op.node_name)
4801     except ValueError:
4802       logging.warning("Node '%s', which is about to be removed, was not found"
4803                       " in the list of all nodes", self.op.node_name)
4804     return (all_nodes, all_nodes)
4805
4806   def CheckPrereq(self):
4807     """Check prerequisites.
4808
4809     This checks:
4810      - the node exists in the configuration
4811      - it does not have primary or secondary instances
4812      - it's not the master
4813
4814     Any errors are signaled by raising errors.OpPrereqError.
4815
4816     """
4817     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4818     node = self.cfg.GetNodeInfo(self.op.node_name)
4819     assert node is not None
4820
4821     masternode = self.cfg.GetMasterNode()
4822     if node.name == masternode:
4823       raise errors.OpPrereqError("Node is the master node, failover to another"
4824                                  " node is required", errors.ECODE_INVAL)
4825
4826     for instance_name, instance in self.cfg.GetAllInstancesInfo().items():
4827       if node.name in instance.all_nodes:
4828         raise errors.OpPrereqError("Instance %s is still running on the node,"
4829                                    " please remove first" % instance_name,
4830                                    errors.ECODE_INVAL)
4831     self.op.node_name = node.name
4832     self.node = node
4833
4834   def Exec(self, feedback_fn):
4835     """Removes the node from the cluster.
4836
4837     """
4838     node = self.node
4839     logging.info("Stopping the node daemon and removing configs from node %s",
4840                  node.name)
4841
4842     modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
4843
4844     assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
4845       "Not owning BGL"
4846
4847     # Promote nodes to master candidate as needed
4848     _AdjustCandidatePool(self, exceptions=[node.name])
4849     self.context.RemoveNode(node.name)
4850
4851     # Run post hooks on the node before it's removed
4852     _RunPostHook(self, node.name)
4853
4854     result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
4855     msg = result.fail_msg
4856     if msg:
4857       self.LogWarning("Errors encountered on the remote node while leaving"
4858                       " the cluster: %s", msg)
4859
4860     # Remove node from our /etc/hosts
4861     if self.cfg.GetClusterInfo().modify_etc_hosts:
4862       master_node = self.cfg.GetMasterNode()
4863       result = self.rpc.call_etc_hosts_modify(master_node,
4864                                               constants.ETC_HOSTS_REMOVE,
4865                                               node.name, None)
4866       result.Raise("Can't update hosts file with new host data")
4867       _RedistributeAncillaryFiles(self)
4868
4869
4870 class _NodeQuery(_QueryBase):
4871   FIELDS = query.NODE_FIELDS
4872
4873   def ExpandNames(self, lu):
4874     lu.needed_locks = {}
4875     lu.share_locks = _ShareAll()
4876
4877     if self.names:
4878       self.wanted = _GetWantedNodes(lu, self.names)
4879     else:
4880       self.wanted = locking.ALL_SET
4881
4882     self.do_locking = (self.use_locking and
4883                        query.NQ_LIVE in self.requested_data)
4884
4885     if self.do_locking:
4886       # If any non-static field is requested we need to lock the nodes
4887       lu.needed_locks[locking.LEVEL_NODE] = self.wanted
4888
4889   def DeclareLocks(self, lu, level):
4890     pass
4891
4892   def _GetQueryData(self, lu):
4893     """Computes the list of nodes and their attributes.
4894
4895     """
4896     all_info = lu.cfg.GetAllNodesInfo()
4897
4898     nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
4899
4900     # Gather data as requested
4901     if query.NQ_LIVE in self.requested_data:
4902       # filter out non-vm_capable nodes
4903       toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
4904
4905       node_data = lu.rpc.call_node_info(toquery_nodes, [lu.cfg.GetVGName()],
4906                                         [lu.cfg.GetHypervisorType()])
4907       live_data = dict((name, _MakeLegacyNodeInfo(nresult.payload))
4908                        for (name, nresult) in node_data.items()
4909                        if not nresult.fail_msg and nresult.payload)
4910     else:
4911       live_data = None
4912
4913     if query.NQ_INST in self.requested_data:
4914       node_to_primary = dict([(name, set()) for name in nodenames])
4915       node_to_secondary = dict([(name, set()) for name in nodenames])
4916
4917       inst_data = lu.cfg.GetAllInstancesInfo()
4918
4919       for inst in inst_data.values():
4920         if inst.primary_node in node_to_primary:
4921           node_to_primary[inst.primary_node].add(inst.name)
4922         for secnode in inst.secondary_nodes:
4923           if secnode in node_to_secondary:
4924             node_to_secondary[secnode].add(inst.name)
4925     else:
4926       node_to_primary = None
4927       node_to_secondary = None
4928
4929     if query.NQ_OOB in self.requested_data:
4930       oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
4931                          for name, node in all_info.iteritems())
4932     else:
4933       oob_support = None
4934
4935     if query.NQ_GROUP in self.requested_data:
4936       groups = lu.cfg.GetAllNodeGroupsInfo()
4937     else:
4938       groups = {}
4939
4940     return query.NodeQueryData([all_info[name] for name in nodenames],
4941                                live_data, lu.cfg.GetMasterNode(),
4942                                node_to_primary, node_to_secondary, groups,
4943                                oob_support, lu.cfg.GetClusterInfo())
4944
4945
4946 class LUNodeQuery(NoHooksLU):
4947   """Logical unit for querying nodes.
4948
4949   """
4950   # pylint: disable=W0142
4951   REQ_BGL = False
4952
4953   def CheckArguments(self):
4954     self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
4955                          self.op.output_fields, self.op.use_locking)
4956
4957   def ExpandNames(self):
4958     self.nq.ExpandNames(self)
4959
4960   def DeclareLocks(self, level):
4961     self.nq.DeclareLocks(self, level)
4962
4963   def Exec(self, feedback_fn):
4964     return self.nq.OldStyleQuery(self)
4965
4966
4967 class LUNodeQueryvols(NoHooksLU):
4968   """Logical unit for getting volumes on node(s).
4969
4970   """
4971   REQ_BGL = False
4972   _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
4973   _FIELDS_STATIC = utils.FieldSet("node")
4974
4975   def CheckArguments(self):
4976     _CheckOutputFields(static=self._FIELDS_STATIC,
4977                        dynamic=self._FIELDS_DYNAMIC,
4978                        selected=self.op.output_fields)
4979
4980   def ExpandNames(self):
4981     self.share_locks = _ShareAll()
4982     self.needed_locks = {}
4983
4984     if not self.op.nodes:
4985       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4986     else:
4987       self.needed_locks[locking.LEVEL_NODE] = \
4988         _GetWantedNodes(self, self.op.nodes)
4989
4990   def Exec(self, feedback_fn):
4991     """Computes the list of nodes and their attributes.
4992
4993     """
4994     nodenames = self.owned_locks(locking.LEVEL_NODE)
4995     volumes = self.rpc.call_node_volumes(nodenames)
4996
4997     ilist = self.cfg.GetAllInstancesInfo()
4998     vol2inst = _MapInstanceDisksToNodes(ilist.values())
4999
5000     output = []
5001     for node in nodenames:
5002       nresult = volumes[node]
5003       if nresult.offline:
5004         continue
5005       msg = nresult.fail_msg
5006       if msg:
5007         self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
5008         continue
5009
5010       node_vols = sorted(nresult.payload,
5011                          key=operator.itemgetter("dev"))
5012
5013       for vol in node_vols:
5014         node_output = []
5015         for field in self.op.output_fields:
5016           if field == "node":
5017             val = node
5018           elif field == "phys":
5019             val = vol["dev"]
5020           elif field == "vg":
5021             val = vol["vg"]
5022           elif field == "name":
5023             val = vol["name"]
5024           elif field == "size":
5025             val = int(float(vol["size"]))
5026           elif field == "instance":
5027             val = vol2inst.get((node, vol["vg"] + "/" + vol["name"]), "-")
5028           else:
5029             raise errors.ParameterError(field)
5030           node_output.append(str(val))
5031
5032         output.append(node_output)
5033
5034     return output
5035
5036
5037 class LUNodeQueryStorage(NoHooksLU):
5038   """Logical unit for getting information on storage units on node(s).
5039
5040   """
5041   _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
5042   REQ_BGL = False
5043
5044   def CheckArguments(self):
5045     _CheckOutputFields(static=self._FIELDS_STATIC,
5046                        dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
5047                        selected=self.op.output_fields)
5048
5049   def ExpandNames(self):
5050     self.share_locks = _ShareAll()
5051     self.needed_locks = {}
5052
5053     if self.op.nodes:
5054       self.needed_locks[locking.LEVEL_NODE] = \
5055         _GetWantedNodes(self, self.op.nodes)
5056     else:
5057       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5058
5059   def Exec(self, feedback_fn):
5060     """Computes the list of nodes and their attributes.
5061
5062     """
5063     self.nodes = self.owned_locks(locking.LEVEL_NODE)
5064
5065     # Always get name to sort by
5066     if constants.SF_NAME in self.op.output_fields:
5067       fields = self.op.output_fields[:]
5068     else:
5069       fields = [constants.SF_NAME] + self.op.output_fields
5070
5071     # Never ask for node or type as it's only known to the LU
5072     for extra in [constants.SF_NODE, constants.SF_TYPE]:
5073       while extra in fields:
5074         fields.remove(extra)
5075
5076     field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
5077     name_idx = field_idx[constants.SF_NAME]
5078
5079     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5080     data = self.rpc.call_storage_list(self.nodes,
5081                                       self.op.storage_type, st_args,
5082                                       self.op.name, fields)
5083
5084     result = []
5085
5086     for node in utils.NiceSort(self.nodes):
5087       nresult = data[node]
5088       if nresult.offline:
5089         continue
5090
5091       msg = nresult.fail_msg
5092       if msg:
5093         self.LogWarning("Can't get storage data from node %s: %s", node, msg)
5094         continue
5095
5096       rows = dict([(row[name_idx], row) for row in nresult.payload])
5097
5098       for name in utils.NiceSort(rows.keys()):
5099         row = rows[name]
5100
5101         out = []
5102
5103         for field in self.op.output_fields:
5104           if field == constants.SF_NODE:
5105             val = node
5106           elif field == constants.SF_TYPE:
5107             val = self.op.storage_type
5108           elif field in field_idx:
5109             val = row[field_idx[field]]
5110           else:
5111             raise errors.ParameterError(field)
5112
5113           out.append(val)
5114
5115         result.append(out)
5116
5117     return result
5118
5119
5120 class _InstanceQuery(_QueryBase):
5121   FIELDS = query.INSTANCE_FIELDS
5122
5123   def ExpandNames(self, lu):
5124     lu.needed_locks = {}
5125     lu.share_locks = _ShareAll()
5126
5127     if self.names:
5128       self.wanted = _GetWantedInstances(lu, self.names)
5129     else:
5130       self.wanted = locking.ALL_SET
5131
5132     self.do_locking = (self.use_locking and
5133                        query.IQ_LIVE in self.requested_data)
5134     if self.do_locking:
5135       lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
5136       lu.needed_locks[locking.LEVEL_NODEGROUP] = []
5137       lu.needed_locks[locking.LEVEL_NODE] = []
5138       lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5139
5140     self.do_grouplocks = (self.do_locking and
5141                           query.IQ_NODES in self.requested_data)
5142
5143   def DeclareLocks(self, lu, level):
5144     if self.do_locking:
5145       if level == locking.LEVEL_NODEGROUP and self.do_grouplocks:
5146         assert not lu.needed_locks[locking.LEVEL_NODEGROUP]
5147
5148         # Lock all groups used by instances optimistically; this requires going
5149         # via the node before it's locked, requiring verification later on
5150         lu.needed_locks[locking.LEVEL_NODEGROUP] = \
5151           set(group_uuid
5152               for instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
5153               for group_uuid in lu.cfg.GetInstanceNodeGroups(instance_name))
5154       elif level == locking.LEVEL_NODE:
5155         lu._LockInstancesNodes() # pylint: disable=W0212
5156
5157   @staticmethod
5158   def _CheckGroupLocks(lu):
5159     owned_instances = frozenset(lu.owned_locks(locking.LEVEL_INSTANCE))
5160     owned_groups = frozenset(lu.owned_locks(locking.LEVEL_NODEGROUP))
5161
5162     # Check if node groups for locked instances are still correct
5163     for instance_name in owned_instances:
5164       _CheckInstanceNodeGroups(lu.cfg, instance_name, owned_groups)
5165
5166   def _GetQueryData(self, lu):
5167     """Computes the list of instances and their attributes.
5168
5169     """
5170     if self.do_grouplocks:
5171       self._CheckGroupLocks(lu)
5172
5173     cluster = lu.cfg.GetClusterInfo()
5174     all_info = lu.cfg.GetAllInstancesInfo()
5175
5176     instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
5177
5178     instance_list = [all_info[name] for name in instance_names]
5179     nodes = frozenset(itertools.chain(*(inst.all_nodes
5180                                         for inst in instance_list)))
5181     hv_list = list(set([inst.hypervisor for inst in instance_list]))
5182     bad_nodes = []
5183     offline_nodes = []
5184     wrongnode_inst = set()
5185
5186     # Gather data as requested
5187     if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
5188       live_data = {}
5189       node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
5190       for name in nodes:
5191         result = node_data[name]
5192         if result.offline:
5193           # offline nodes will be in both lists
5194           assert result.fail_msg
5195           offline_nodes.append(name)
5196         if result.fail_msg:
5197           bad_nodes.append(name)
5198         elif result.payload:
5199           for inst in result.payload:
5200             if inst in all_info:
5201               if all_info[inst].primary_node == name:
5202                 live_data.update(result.payload)
5203               else:
5204                 wrongnode_inst.add(inst)
5205             else:
5206               # orphan instance; we don't list it here as we don't
5207               # handle this case yet in the output of instance listing
5208               logging.warning("Orphan instance '%s' found on node %s",
5209                               inst, name)
5210         # else no instance is alive
5211     else:
5212       live_data = {}
5213
5214     if query.IQ_DISKUSAGE in self.requested_data:
5215       disk_usage = dict((inst.name,
5216                          _ComputeDiskSize(inst.disk_template,
5217                                           [{constants.IDISK_SIZE: disk.size}
5218                                            for disk in inst.disks]))
5219                         for inst in instance_list)
5220     else:
5221       disk_usage = None
5222
5223     if query.IQ_CONSOLE in self.requested_data:
5224       consinfo = {}
5225       for inst in instance_list:
5226         if inst.name in live_data:
5227           # Instance is running
5228           consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
5229         else:
5230           consinfo[inst.name] = None
5231       assert set(consinfo.keys()) == set(instance_names)
5232     else:
5233       consinfo = None
5234
5235     if query.IQ_NODES in self.requested_data:
5236       node_names = set(itertools.chain(*map(operator.attrgetter("all_nodes"),
5237                                             instance_list)))
5238       nodes = dict(lu.cfg.GetMultiNodeInfo(node_names))
5239       groups = dict((uuid, lu.cfg.GetNodeGroup(uuid))
5240                     for uuid in set(map(operator.attrgetter("group"),
5241                                         nodes.values())))
5242     else:
5243       nodes = None
5244       groups = None
5245
5246     return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
5247                                    disk_usage, offline_nodes, bad_nodes,
5248                                    live_data, wrongnode_inst, consinfo,
5249                                    nodes, groups)
5250
5251
5252 class LUQuery(NoHooksLU):
5253   """Query for resources/items of a certain kind.
5254
5255   """
5256   # pylint: disable=W0142
5257   REQ_BGL = False
5258
5259   def CheckArguments(self):
5260     qcls = _GetQueryImplementation(self.op.what)
5261
5262     self.impl = qcls(self.op.qfilter, self.op.fields, self.op.use_locking)
5263
5264   def ExpandNames(self):
5265     self.impl.ExpandNames(self)
5266
5267   def DeclareLocks(self, level):
5268     self.impl.DeclareLocks(self, level)
5269
5270   def Exec(self, feedback_fn):
5271     return self.impl.NewStyleQuery(self)
5272
5273
5274 class LUQueryFields(NoHooksLU):
5275   """Query for resources/items of a certain kind.
5276
5277   """
5278   # pylint: disable=W0142
5279   REQ_BGL = False
5280
5281   def CheckArguments(self):
5282     self.qcls = _GetQueryImplementation(self.op.what)
5283
5284   def ExpandNames(self):
5285     self.needed_locks = {}
5286
5287   def Exec(self, feedback_fn):
5288     return query.QueryFields(self.qcls.FIELDS, self.op.fields)
5289
5290
5291 class LUNodeModifyStorage(NoHooksLU):
5292   """Logical unit for modifying a storage volume on a node.
5293
5294   """
5295   REQ_BGL = False
5296
5297   def CheckArguments(self):
5298     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5299
5300     storage_type = self.op.storage_type
5301
5302     try:
5303       modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
5304     except KeyError:
5305       raise errors.OpPrereqError("Storage units of type '%s' can not be"
5306                                  " modified" % storage_type,
5307                                  errors.ECODE_INVAL)
5308
5309     diff = set(self.op.changes.keys()) - modifiable
5310     if diff:
5311       raise errors.OpPrereqError("The following fields can not be modified for"
5312                                  " storage units of type '%s': %r" %
5313                                  (storage_type, list(diff)),
5314                                  errors.ECODE_INVAL)
5315
5316   def ExpandNames(self):
5317     self.needed_locks = {
5318       locking.LEVEL_NODE: self.op.node_name,
5319       }
5320
5321   def Exec(self, feedback_fn):
5322     """Computes the list of nodes and their attributes.
5323
5324     """
5325     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5326     result = self.rpc.call_storage_modify(self.op.node_name,
5327                                           self.op.storage_type, st_args,
5328                                           self.op.name, self.op.changes)
5329     result.Raise("Failed to modify storage unit '%s' on %s" %
5330                  (self.op.name, self.op.node_name))
5331
5332
5333 class LUNodeAdd(LogicalUnit):
5334   """Logical unit for adding node to the cluster.
5335
5336   """
5337   HPATH = "node-add"
5338   HTYPE = constants.HTYPE_NODE
5339   _NFLAGS = ["master_capable", "vm_capable"]
5340
5341   def CheckArguments(self):
5342     self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
5343     # validate/normalize the node name
5344     self.hostname = netutils.GetHostname(name=self.op.node_name,
5345                                          family=self.primary_ip_family)
5346     self.op.node_name = self.hostname.name
5347
5348     if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
5349       raise errors.OpPrereqError("Cannot readd the master node",
5350                                  errors.ECODE_STATE)
5351
5352     if self.op.readd and self.op.group:
5353       raise errors.OpPrereqError("Cannot pass a node group when a node is"
5354                                  " being readded", errors.ECODE_INVAL)
5355
5356   def BuildHooksEnv(self):
5357     """Build hooks env.
5358
5359     This will run on all nodes before, and on all nodes + the new node after.
5360
5361     """
5362     return {
5363       "OP_TARGET": self.op.node_name,
5364       "NODE_NAME": self.op.node_name,
5365       "NODE_PIP": self.op.primary_ip,
5366       "NODE_SIP": self.op.secondary_ip,
5367       "MASTER_CAPABLE": str(self.op.master_capable),
5368       "VM_CAPABLE": str(self.op.vm_capable),
5369       }
5370
5371   def BuildHooksNodes(self):
5372     """Build hooks nodes.
5373
5374     """
5375     # Exclude added node
5376     pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
5377     post_nodes = pre_nodes + [self.op.node_name, ]
5378
5379     return (pre_nodes, post_nodes)
5380
5381   def CheckPrereq(self):
5382     """Check prerequisites.
5383
5384     This checks:
5385      - the new node is not already in the config
5386      - it is resolvable
5387      - its parameters (single/dual homed) matches the cluster
5388
5389     Any errors are signaled by raising errors.OpPrereqError.
5390
5391     """
5392     cfg = self.cfg
5393     hostname = self.hostname
5394     node = hostname.name
5395     primary_ip = self.op.primary_ip = hostname.ip
5396     if self.op.secondary_ip is None:
5397       if self.primary_ip_family == netutils.IP6Address.family:
5398         raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
5399                                    " IPv4 address must be given as secondary",
5400                                    errors.ECODE_INVAL)
5401       self.op.secondary_ip = primary_ip
5402
5403     secondary_ip = self.op.secondary_ip
5404     if not netutils.IP4Address.IsValid(secondary_ip):
5405       raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5406                                  " address" % secondary_ip, errors.ECODE_INVAL)
5407
5408     node_list = cfg.GetNodeList()
5409     if not self.op.readd and node in node_list:
5410       raise errors.OpPrereqError("Node %s is already in the configuration" %
5411                                  node, errors.ECODE_EXISTS)
5412     elif self.op.readd and node not in node_list:
5413       raise errors.OpPrereqError("Node %s is not in the configuration" % node,
5414                                  errors.ECODE_NOENT)
5415
5416     self.changed_primary_ip = False
5417
5418     for existing_node_name, existing_node in cfg.GetMultiNodeInfo(node_list):
5419       if self.op.readd and node == existing_node_name:
5420         if existing_node.secondary_ip != secondary_ip:
5421           raise errors.OpPrereqError("Readded node doesn't have the same IP"
5422                                      " address configuration as before",
5423                                      errors.ECODE_INVAL)
5424         if existing_node.primary_ip != primary_ip:
5425           self.changed_primary_ip = True
5426
5427         continue
5428
5429       if (existing_node.primary_ip == primary_ip or
5430           existing_node.secondary_ip == primary_ip or
5431           existing_node.primary_ip == secondary_ip or
5432           existing_node.secondary_ip == secondary_ip):
5433         raise errors.OpPrereqError("New node ip address(es) conflict with"
5434                                    " existing node %s" % existing_node.name,
5435                                    errors.ECODE_NOTUNIQUE)
5436
5437     # After this 'if' block, None is no longer a valid value for the
5438     # _capable op attributes
5439     if self.op.readd:
5440       old_node = self.cfg.GetNodeInfo(node)
5441       assert old_node is not None, "Can't retrieve locked node %s" % node
5442       for attr in self._NFLAGS:
5443         if getattr(self.op, attr) is None:
5444           setattr(self.op, attr, getattr(old_node, attr))
5445     else:
5446       for attr in self._NFLAGS:
5447         if getattr(self.op, attr) is None:
5448           setattr(self.op, attr, True)
5449
5450     if self.op.readd and not self.op.vm_capable:
5451       pri, sec = cfg.GetNodeInstances(node)
5452       if pri or sec:
5453         raise errors.OpPrereqError("Node %s being re-added with vm_capable"
5454                                    " flag set to false, but it already holds"
5455                                    " instances" % node,
5456                                    errors.ECODE_STATE)
5457
5458     # check that the type of the node (single versus dual homed) is the
5459     # same as for the master
5460     myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
5461     master_singlehomed = myself.secondary_ip == myself.primary_ip
5462     newbie_singlehomed = secondary_ip == primary_ip
5463     if master_singlehomed != newbie_singlehomed:
5464       if master_singlehomed:
5465         raise errors.OpPrereqError("The master has no secondary ip but the"
5466                                    " new node has one",
5467                                    errors.ECODE_INVAL)
5468       else:
5469         raise errors.OpPrereqError("The master has a secondary ip but the"
5470                                    " new node doesn't have one",
5471                                    errors.ECODE_INVAL)
5472
5473     # checks reachability
5474     if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
5475       raise errors.OpPrereqError("Node not reachable by ping",
5476                                  errors.ECODE_ENVIRON)
5477
5478     if not newbie_singlehomed:
5479       # check reachability from my secondary ip to newbie's secondary ip
5480       if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
5481                            source=myself.secondary_ip):
5482         raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5483                                    " based ping to node daemon port",
5484                                    errors.ECODE_ENVIRON)
5485
5486     if self.op.readd:
5487       exceptions = [node]
5488     else:
5489       exceptions = []
5490
5491     if self.op.master_capable:
5492       self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
5493     else:
5494       self.master_candidate = False
5495
5496     if self.op.readd:
5497       self.new_node = old_node
5498     else:
5499       node_group = cfg.LookupNodeGroup(self.op.group)
5500       self.new_node = objects.Node(name=node,
5501                                    primary_ip=primary_ip,
5502                                    secondary_ip=secondary_ip,
5503                                    master_candidate=self.master_candidate,
5504                                    offline=False, drained=False,
5505                                    group=node_group)
5506
5507     if self.op.ndparams:
5508       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
5509
5510     if self.op.hv_state:
5511       self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
5512
5513     if self.op.disk_state:
5514       self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
5515
5516   def Exec(self, feedback_fn):
5517     """Adds the new node to the cluster.
5518
5519     """
5520     new_node = self.new_node
5521     node = new_node.name
5522
5523     assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
5524       "Not owning BGL"
5525
5526     # We adding a new node so we assume it's powered
5527     new_node.powered = True
5528
5529     # for re-adds, reset the offline/drained/master-candidate flags;
5530     # we need to reset here, otherwise offline would prevent RPC calls
5531     # later in the procedure; this also means that if the re-add
5532     # fails, we are left with a non-offlined, broken node
5533     if self.op.readd:
5534       new_node.drained = new_node.offline = False # pylint: disable=W0201
5535       self.LogInfo("Readding a node, the offline/drained flags were reset")
5536       # if we demote the node, we do cleanup later in the procedure
5537       new_node.master_candidate = self.master_candidate
5538       if self.changed_primary_ip:
5539         new_node.primary_ip = self.op.primary_ip
5540
5541     # copy the master/vm_capable flags
5542     for attr in self._NFLAGS:
5543       setattr(new_node, attr, getattr(self.op, attr))
5544
5545     # notify the user about any possible mc promotion
5546     if new_node.master_candidate:
5547       self.LogInfo("Node will be a master candidate")
5548
5549     if self.op.ndparams:
5550       new_node.ndparams = self.op.ndparams
5551     else:
5552       new_node.ndparams = {}
5553
5554     if self.op.hv_state:
5555       new_node.hv_state_static = self.new_hv_state
5556
5557     if self.op.disk_state:
5558       new_node.disk_state_static = self.new_disk_state
5559
5560     # check connectivity
5561     result = self.rpc.call_version([node])[node]
5562     result.Raise("Can't get version information from node %s" % node)
5563     if constants.PROTOCOL_VERSION == result.payload:
5564       logging.info("Communication to node %s fine, sw version %s match",
5565                    node, result.payload)
5566     else:
5567       raise errors.OpExecError("Version mismatch master version %s,"
5568                                " node version %s" %
5569                                (constants.PROTOCOL_VERSION, result.payload))
5570
5571     # Add node to our /etc/hosts, and add key to known_hosts
5572     if self.cfg.GetClusterInfo().modify_etc_hosts:
5573       master_node = self.cfg.GetMasterNode()
5574       result = self.rpc.call_etc_hosts_modify(master_node,
5575                                               constants.ETC_HOSTS_ADD,
5576                                               self.hostname.name,
5577                                               self.hostname.ip)
5578       result.Raise("Can't update hosts file with new host data")
5579
5580     if new_node.secondary_ip != new_node.primary_ip:
5581       _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
5582                                False)
5583
5584     node_verify_list = [self.cfg.GetMasterNode()]
5585     node_verify_param = {
5586       constants.NV_NODELIST: ([node], {}),
5587       # TODO: do a node-net-test as well?
5588     }
5589
5590     result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
5591                                        self.cfg.GetClusterName())
5592     for verifier in node_verify_list:
5593       result[verifier].Raise("Cannot communicate with node %s" % verifier)
5594       nl_payload = result[verifier].payload[constants.NV_NODELIST]
5595       if nl_payload:
5596         for failed in nl_payload:
5597           feedback_fn("ssh/hostname verification failed"
5598                       " (checking from %s): %s" %
5599                       (verifier, nl_payload[failed]))
5600         raise errors.OpExecError("ssh/hostname verification failed")
5601
5602     if self.op.readd:
5603       _RedistributeAncillaryFiles(self)
5604       self.context.ReaddNode(new_node)
5605       # make sure we redistribute the config
5606       self.cfg.Update(new_node, feedback_fn)
5607       # and make sure the new node will not have old files around
5608       if not new_node.master_candidate:
5609         result = self.rpc.call_node_demote_from_mc(new_node.name)
5610         msg = result.fail_msg
5611         if msg:
5612           self.LogWarning("Node failed to demote itself from master"
5613                           " candidate status: %s" % msg)
5614     else:
5615       _RedistributeAncillaryFiles(self, additional_nodes=[node],
5616                                   additional_vm=self.op.vm_capable)
5617       self.context.AddNode(new_node, self.proc.GetECId())
5618
5619
5620 class LUNodeSetParams(LogicalUnit):
5621   """Modifies the parameters of a node.
5622
5623   @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
5624       to the node role (as _ROLE_*)
5625   @cvar _R2F: a dictionary from node role to tuples of flags
5626   @cvar _FLAGS: a list of attribute names corresponding to the flags
5627
5628   """
5629   HPATH = "node-modify"
5630   HTYPE = constants.HTYPE_NODE
5631   REQ_BGL = False
5632   (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
5633   _F2R = {
5634     (True, False, False): _ROLE_CANDIDATE,
5635     (False, True, False): _ROLE_DRAINED,
5636     (False, False, True): _ROLE_OFFLINE,
5637     (False, False, False): _ROLE_REGULAR,
5638     }
5639   _R2F = dict((v, k) for k, v in _F2R.items())
5640   _FLAGS = ["master_candidate", "drained", "offline"]
5641
5642   def CheckArguments(self):
5643     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5644     all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
5645                 self.op.master_capable, self.op.vm_capable,
5646                 self.op.secondary_ip, self.op.ndparams, self.op.hv_state,
5647                 self.op.disk_state]
5648     if all_mods.count(None) == len(all_mods):
5649       raise errors.OpPrereqError("Please pass at least one modification",
5650                                  errors.ECODE_INVAL)
5651     if all_mods.count(True) > 1:
5652       raise errors.OpPrereqError("Can't set the node into more than one"
5653                                  " state at the same time",
5654                                  errors.ECODE_INVAL)
5655
5656     # Boolean value that tells us whether we might be demoting from MC
5657     self.might_demote = (self.op.master_candidate == False or
5658                          self.op.offline == True or
5659                          self.op.drained == True or
5660                          self.op.master_capable == False)
5661
5662     if self.op.secondary_ip:
5663       if not netutils.IP4Address.IsValid(self.op.secondary_ip):
5664         raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5665                                    " address" % self.op.secondary_ip,
5666                                    errors.ECODE_INVAL)
5667
5668     self.lock_all = self.op.auto_promote and self.might_demote
5669     self.lock_instances = self.op.secondary_ip is not None
5670
5671   def _InstanceFilter(self, instance):
5672     """Filter for getting affected instances.
5673
5674     """
5675     return (instance.disk_template in constants.DTS_INT_MIRROR and
5676             self.op.node_name in instance.all_nodes)
5677
5678   def ExpandNames(self):
5679     if self.lock_all:
5680       self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
5681     else:
5682       self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
5683
5684     # Since modifying a node can have severe effects on currently running
5685     # operations the resource lock is at least acquired in shared mode
5686     self.needed_locks[locking.LEVEL_NODE_RES] = \
5687       self.needed_locks[locking.LEVEL_NODE]
5688
5689     # Get node resource and instance locks in shared mode; they are not used
5690     # for anything but read-only access
5691     self.share_locks[locking.LEVEL_NODE_RES] = 1
5692     self.share_locks[locking.LEVEL_INSTANCE] = 1
5693
5694     if self.lock_instances:
5695       self.needed_locks[locking.LEVEL_INSTANCE] = \
5696         frozenset(self.cfg.GetInstancesInfoByFilter(self._InstanceFilter))
5697
5698   def BuildHooksEnv(self):
5699     """Build hooks env.
5700
5701     This runs on the master node.
5702
5703     """
5704     return {
5705       "OP_TARGET": self.op.node_name,
5706       "MASTER_CANDIDATE": str(self.op.master_candidate),
5707       "OFFLINE": str(self.op.offline),
5708       "DRAINED": str(self.op.drained),
5709       "MASTER_CAPABLE": str(self.op.master_capable),
5710       "VM_CAPABLE": str(self.op.vm_capable),
5711       }
5712
5713   def BuildHooksNodes(self):
5714     """Build hooks nodes.
5715
5716     """
5717     nl = [self.cfg.GetMasterNode(), self.op.node_name]
5718     return (nl, nl)
5719
5720   def CheckPrereq(self):
5721     """Check prerequisites.
5722
5723     This only checks the instance list against the existing names.
5724
5725     """
5726     node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
5727
5728     if self.lock_instances:
5729       affected_instances = \
5730         self.cfg.GetInstancesInfoByFilter(self._InstanceFilter)
5731
5732       # Verify instance locks
5733       owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
5734       wanted_instances = frozenset(affected_instances.keys())
5735       if wanted_instances - owned_instances:
5736         raise errors.OpPrereqError("Instances affected by changing node %s's"
5737                                    " secondary IP address have changed since"
5738                                    " locks were acquired, wanted '%s', have"
5739                                    " '%s'; retry the operation" %
5740                                    (self.op.node_name,
5741                                     utils.CommaJoin(wanted_instances),
5742                                     utils.CommaJoin(owned_instances)),
5743                                    errors.ECODE_STATE)
5744     else:
5745       affected_instances = None
5746
5747     if (self.op.master_candidate is not None or
5748         self.op.drained is not None or
5749         self.op.offline is not None):
5750       # we can't change the master's node flags
5751       if self.op.node_name == self.cfg.GetMasterNode():
5752         raise errors.OpPrereqError("The master role can be changed"
5753                                    " only via master-failover",
5754                                    errors.ECODE_INVAL)
5755
5756     if self.op.master_candidate and not node.master_capable:
5757       raise errors.OpPrereqError("Node %s is not master capable, cannot make"
5758                                  " it a master candidate" % node.name,
5759                                  errors.ECODE_STATE)
5760
5761     if self.op.vm_capable == False:
5762       (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
5763       if ipri or isec:
5764         raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
5765                                    " the vm_capable flag" % node.name,
5766                                    errors.ECODE_STATE)
5767
5768     if node.master_candidate and self.might_demote and not self.lock_all:
5769       assert not self.op.auto_promote, "auto_promote set but lock_all not"
5770       # check if after removing the current node, we're missing master
5771       # candidates
5772       (mc_remaining, mc_should, _) = \
5773           self.cfg.GetMasterCandidateStats(exceptions=[node.name])
5774       if mc_remaining < mc_should:
5775         raise errors.OpPrereqError("Not enough master candidates, please"
5776                                    " pass auto promote option to allow"
5777                                    " promotion", errors.ECODE_STATE)
5778
5779     self.old_flags = old_flags = (node.master_candidate,
5780                                   node.drained, node.offline)
5781     assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
5782     self.old_role = old_role = self._F2R[old_flags]
5783
5784     # Check for ineffective changes
5785     for attr in self._FLAGS:
5786       if (getattr(self.op, attr) == False and getattr(node, attr) == False):
5787         self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
5788         setattr(self.op, attr, None)
5789
5790     # Past this point, any flag change to False means a transition
5791     # away from the respective state, as only real changes are kept
5792
5793     # TODO: We might query the real power state if it supports OOB
5794     if _SupportsOob(self.cfg, node):
5795       if self.op.offline is False and not (node.powered or
5796                                            self.op.powered == True):
5797         raise errors.OpPrereqError(("Node %s needs to be turned on before its"
5798                                     " offline status can be reset") %
5799                                    self.op.node_name)
5800     elif self.op.powered is not None:
5801       raise errors.OpPrereqError(("Unable to change powered state for node %s"
5802                                   " as it does not support out-of-band"
5803                                   " handling") % self.op.node_name)
5804
5805     # If we're being deofflined/drained, we'll MC ourself if needed
5806     if (self.op.drained == False or self.op.offline == False or
5807         (self.op.master_capable and not node.master_capable)):
5808       if _DecideSelfPromotion(self):
5809         self.op.master_candidate = True
5810         self.LogInfo("Auto-promoting node to master candidate")
5811
5812     # If we're no longer master capable, we'll demote ourselves from MC
5813     if self.op.master_capable == False and node.master_candidate:
5814       self.LogInfo("Demoting from master candidate")
5815       self.op.master_candidate = False
5816
5817     # Compute new role
5818     assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
5819     if self.op.master_candidate:
5820       new_role = self._ROLE_CANDIDATE
5821     elif self.op.drained:
5822       new_role = self._ROLE_DRAINED
5823     elif self.op.offline:
5824       new_role = self._ROLE_OFFLINE
5825     elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
5826       # False is still in new flags, which means we're un-setting (the
5827       # only) True flag
5828       new_role = self._ROLE_REGULAR
5829     else: # no new flags, nothing, keep old role
5830       new_role = old_role
5831
5832     self.new_role = new_role
5833
5834     if old_role == self._ROLE_OFFLINE and new_role != old_role:
5835       # Trying to transition out of offline status
5836       # TODO: Use standard RPC runner, but make sure it works when the node is
5837       # still marked offline
5838       result = rpc.BootstrapRunner().call_version([node.name])[node.name]
5839       if result.fail_msg:
5840         raise errors.OpPrereqError("Node %s is being de-offlined but fails"
5841                                    " to report its version: %s" %
5842                                    (node.name, result.fail_msg),
5843                                    errors.ECODE_STATE)
5844       else:
5845         self.LogWarning("Transitioning node from offline to online state"
5846                         " without using re-add. Please make sure the node"
5847                         " is healthy!")
5848
5849     if self.op.secondary_ip:
5850       # Ok even without locking, because this can't be changed by any LU
5851       master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
5852       master_singlehomed = master.secondary_ip == master.primary_ip
5853       if master_singlehomed and self.op.secondary_ip:
5854         raise errors.OpPrereqError("Cannot change the secondary ip on a single"
5855                                    " homed cluster", errors.ECODE_INVAL)
5856
5857       assert not (frozenset(affected_instances) -
5858                   self.owned_locks(locking.LEVEL_INSTANCE))
5859
5860       if node.offline:
5861         if affected_instances:
5862           raise errors.OpPrereqError("Cannot change secondary IP address:"
5863                                      " offline node has instances (%s)"
5864                                      " configured to use it" %
5865                                      utils.CommaJoin(affected_instances.keys()))
5866       else:
5867         # On online nodes, check that no instances are running, and that
5868         # the node has the new ip and we can reach it.
5869         for instance in affected_instances.values():
5870           _CheckInstanceState(self, instance, INSTANCE_DOWN,
5871                               msg="cannot change secondary ip")
5872
5873         _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
5874         if master.name != node.name:
5875           # check reachability from master secondary ip to new secondary ip
5876           if not netutils.TcpPing(self.op.secondary_ip,
5877                                   constants.DEFAULT_NODED_PORT,
5878                                   source=master.secondary_ip):
5879             raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5880                                        " based ping to node daemon port",
5881                                        errors.ECODE_ENVIRON)
5882
5883     if self.op.ndparams:
5884       new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
5885       utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
5886       self.new_ndparams = new_ndparams
5887
5888     if self.op.hv_state:
5889       self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
5890                                                  self.node.hv_state_static)
5891
5892     if self.op.disk_state:
5893       self.new_disk_state = \
5894         _MergeAndVerifyDiskState(self.op.disk_state,
5895                                  self.node.disk_state_static)
5896
5897   def Exec(self, feedback_fn):
5898     """Modifies a node.
5899
5900     """
5901     node = self.node
5902     old_role = self.old_role
5903     new_role = self.new_role
5904
5905     result = []
5906
5907     if self.op.ndparams:
5908       node.ndparams = self.new_ndparams
5909
5910     if self.op.powered is not None:
5911       node.powered = self.op.powered
5912
5913     if self.op.hv_state:
5914       node.hv_state_static = self.new_hv_state
5915
5916     if self.op.disk_state:
5917       node.disk_state_static = self.new_disk_state
5918
5919     for attr in ["master_capable", "vm_capable"]:
5920       val = getattr(self.op, attr)
5921       if val is not None:
5922         setattr(node, attr, val)
5923         result.append((attr, str(val)))
5924
5925     if new_role != old_role:
5926       # Tell the node to demote itself, if no longer MC and not offline
5927       if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
5928         msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
5929         if msg:
5930           self.LogWarning("Node failed to demote itself: %s", msg)
5931
5932       new_flags = self._R2F[new_role]
5933       for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
5934         if of != nf:
5935           result.append((desc, str(nf)))
5936       (node.master_candidate, node.drained, node.offline) = new_flags
5937
5938       # we locked all nodes, we adjust the CP before updating this node
5939       if self.lock_all:
5940         _AdjustCandidatePool(self, [node.name])
5941
5942     if self.op.secondary_ip:
5943       node.secondary_ip = self.op.secondary_ip
5944       result.append(("secondary_ip", self.op.secondary_ip))
5945
5946     # this will trigger configuration file update, if needed
5947     self.cfg.Update(node, feedback_fn)
5948
5949     # this will trigger job queue propagation or cleanup if the mc
5950     # flag changed
5951     if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
5952       self.context.ReaddNode(node)
5953
5954     return result
5955
5956
5957 class LUNodePowercycle(NoHooksLU):
5958   """Powercycles a node.
5959
5960   """
5961   REQ_BGL = False
5962
5963   def CheckArguments(self):
5964     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5965     if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
5966       raise errors.OpPrereqError("The node is the master and the force"
5967                                  " parameter was not set",
5968                                  errors.ECODE_INVAL)
5969
5970   def ExpandNames(self):
5971     """Locking for PowercycleNode.
5972
5973     This is a last-resort option and shouldn't block on other
5974     jobs. Therefore, we grab no locks.
5975
5976     """
5977     self.needed_locks = {}
5978
5979   def Exec(self, feedback_fn):
5980     """Reboots a node.
5981
5982     """
5983     result = self.rpc.call_node_powercycle(self.op.node_name,
5984                                            self.cfg.GetHypervisorType())
5985     result.Raise("Failed to schedule the reboot")
5986     return result.payload
5987
5988
5989 class LUClusterQuery(NoHooksLU):
5990   """Query cluster configuration.
5991
5992   """
5993   REQ_BGL = False
5994
5995   def ExpandNames(self):
5996     self.needed_locks = {}
5997
5998   def Exec(self, feedback_fn):
5999     """Return cluster config.
6000
6001     """
6002     cluster = self.cfg.GetClusterInfo()
6003     os_hvp = {}
6004
6005     # Filter just for enabled hypervisors
6006     for os_name, hv_dict in cluster.os_hvp.items():
6007       os_hvp[os_name] = {}
6008       for hv_name, hv_params in hv_dict.items():
6009         if hv_name in cluster.enabled_hypervisors:
6010           os_hvp[os_name][hv_name] = hv_params
6011
6012     # Convert ip_family to ip_version
6013     primary_ip_version = constants.IP4_VERSION
6014     if cluster.primary_ip_family == netutils.IP6Address.family:
6015       primary_ip_version = constants.IP6_VERSION
6016
6017     result = {
6018       "software_version": constants.RELEASE_VERSION,
6019       "protocol_version": constants.PROTOCOL_VERSION,
6020       "config_version": constants.CONFIG_VERSION,
6021       "os_api_version": max(constants.OS_API_VERSIONS),
6022       "export_version": constants.EXPORT_VERSION,
6023       "architecture": (platform.architecture()[0], platform.machine()),
6024       "name": cluster.cluster_name,
6025       "master": cluster.master_node,
6026       "default_hypervisor": cluster.primary_hypervisor,
6027       "enabled_hypervisors": cluster.enabled_hypervisors,
6028       "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
6029                         for hypervisor_name in cluster.enabled_hypervisors]),
6030       "os_hvp": os_hvp,
6031       "beparams": cluster.beparams,
6032       "osparams": cluster.osparams,
6033       "ipolicy": cluster.ipolicy,
6034       "nicparams": cluster.nicparams,
6035       "ndparams": cluster.ndparams,
6036       "candidate_pool_size": cluster.candidate_pool_size,
6037       "master_netdev": cluster.master_netdev,
6038       "master_netmask": cluster.master_netmask,
6039       "use_external_mip_script": cluster.use_external_mip_script,
6040       "volume_group_name": cluster.volume_group_name,
6041       "drbd_usermode_helper": cluster.drbd_usermode_helper,
6042       "file_storage_dir": cluster.file_storage_dir,
6043       "shared_file_storage_dir": cluster.shared_file_storage_dir,
6044       "maintain_node_health": cluster.maintain_node_health,
6045       "ctime": cluster.ctime,
6046       "mtime": cluster.mtime,
6047       "uuid": cluster.uuid,
6048       "tags": list(cluster.GetTags()),
6049       "uid_pool": cluster.uid_pool,
6050       "default_iallocator": cluster.default_iallocator,
6051       "reserved_lvs": cluster.reserved_lvs,
6052       "primary_ip_version": primary_ip_version,
6053       "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
6054       "hidden_os": cluster.hidden_os,
6055       "blacklisted_os": cluster.blacklisted_os,
6056       }
6057
6058     return result
6059
6060
6061 class LUClusterConfigQuery(NoHooksLU):
6062   """Return configuration values.
6063
6064   """
6065   REQ_BGL = False
6066   _FIELDS_DYNAMIC = utils.FieldSet()
6067   _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
6068                                   "watcher_pause", "volume_group_name")
6069
6070   def CheckArguments(self):
6071     _CheckOutputFields(static=self._FIELDS_STATIC,
6072                        dynamic=self._FIELDS_DYNAMIC,
6073                        selected=self.op.output_fields)
6074
6075   def ExpandNames(self):
6076     self.needed_locks = {}
6077
6078   def Exec(self, feedback_fn):
6079     """Dump a representation of the cluster config to the standard output.
6080
6081     """
6082     values = []
6083     for field in self.op.output_fields:
6084       if field == "cluster_name":
6085         entry = self.cfg.GetClusterName()
6086       elif field == "master_node":
6087         entry = self.cfg.GetMasterNode()
6088       elif field == "drain_flag":
6089         entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
6090       elif field == "watcher_pause":
6091         entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
6092       elif field == "volume_group_name":
6093         entry = self.cfg.GetVGName()
6094       else:
6095         raise errors.ParameterError(field)
6096       values.append(entry)
6097     return values
6098
6099
6100 class LUInstanceActivateDisks(NoHooksLU):
6101   """Bring up an instance's disks.
6102
6103   """
6104   REQ_BGL = False
6105
6106   def ExpandNames(self):
6107     self._ExpandAndLockInstance()
6108     self.needed_locks[locking.LEVEL_NODE] = []
6109     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6110
6111   def DeclareLocks(self, level):
6112     if level == locking.LEVEL_NODE:
6113       self._LockInstancesNodes()
6114
6115   def CheckPrereq(self):
6116     """Check prerequisites.
6117
6118     This checks that the instance is in the cluster.
6119
6120     """
6121     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6122     assert self.instance is not None, \
6123       "Cannot retrieve locked instance %s" % self.op.instance_name
6124     _CheckNodeOnline(self, self.instance.primary_node)
6125
6126   def Exec(self, feedback_fn):
6127     """Activate the disks.
6128
6129     """
6130     disks_ok, disks_info = \
6131               _AssembleInstanceDisks(self, self.instance,
6132                                      ignore_size=self.op.ignore_size)
6133     if not disks_ok:
6134       raise errors.OpExecError("Cannot activate block devices")
6135
6136     return disks_info
6137
6138
6139 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
6140                            ignore_size=False):
6141   """Prepare the block devices for an instance.
6142
6143   This sets up the block devices on all nodes.
6144
6145   @type lu: L{LogicalUnit}
6146   @param lu: the logical unit on whose behalf we execute
6147   @type instance: L{objects.Instance}
6148   @param instance: the instance for whose disks we assemble
6149   @type disks: list of L{objects.Disk} or None
6150   @param disks: which disks to assemble (or all, if None)
6151   @type ignore_secondaries: boolean
6152   @param ignore_secondaries: if true, errors on secondary nodes
6153       won't result in an error return from the function
6154   @type ignore_size: boolean
6155   @param ignore_size: if true, the current known size of the disk
6156       will not be used during the disk activation, useful for cases
6157       when the size is wrong
6158   @return: False if the operation failed, otherwise a list of
6159       (host, instance_visible_name, node_visible_name)
6160       with the mapping from node devices to instance devices
6161
6162   """
6163   device_info = []
6164   disks_ok = True
6165   iname = instance.name
6166   disks = _ExpandCheckDisks(instance, disks)
6167
6168   # With the two passes mechanism we try to reduce the window of
6169   # opportunity for the race condition of switching DRBD to primary
6170   # before handshaking occured, but we do not eliminate it
6171
6172   # The proper fix would be to wait (with some limits) until the
6173   # connection has been made and drbd transitions from WFConnection
6174   # into any other network-connected state (Connected, SyncTarget,
6175   # SyncSource, etc.)
6176
6177   # 1st pass, assemble on all nodes in secondary mode
6178   for idx, inst_disk in enumerate(disks):
6179     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6180       if ignore_size:
6181         node_disk = node_disk.Copy()
6182         node_disk.UnsetSize()
6183       lu.cfg.SetDiskID(node_disk, node)
6184       result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False, idx)
6185       msg = result.fail_msg
6186       if msg:
6187         lu.proc.LogWarning("Could not prepare block device %s on node %s"
6188                            " (is_primary=False, pass=1): %s",
6189                            inst_disk.iv_name, node, msg)
6190         if not ignore_secondaries:
6191           disks_ok = False
6192
6193   # FIXME: race condition on drbd migration to primary
6194
6195   # 2nd pass, do only the primary node
6196   for idx, inst_disk in enumerate(disks):
6197     dev_path = None
6198
6199     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6200       if node != instance.primary_node:
6201         continue
6202       if ignore_size:
6203         node_disk = node_disk.Copy()
6204         node_disk.UnsetSize()
6205       lu.cfg.SetDiskID(node_disk, node)
6206       result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True, idx)
6207       msg = result.fail_msg
6208       if msg:
6209         lu.proc.LogWarning("Could not prepare block device %s on node %s"
6210                            " (is_primary=True, pass=2): %s",
6211                            inst_disk.iv_name, node, msg)
6212         disks_ok = False
6213       else:
6214         dev_path = result.payload
6215
6216     device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
6217
6218   # leave the disks configured for the primary node
6219   # this is a workaround that would be fixed better by
6220   # improving the logical/physical id handling
6221   for disk in disks:
6222     lu.cfg.SetDiskID(disk, instance.primary_node)
6223
6224   return disks_ok, device_info
6225
6226
6227 def _StartInstanceDisks(lu, instance, force):
6228   """Start the disks of an instance.
6229
6230   """
6231   disks_ok, _ = _AssembleInstanceDisks(lu, instance,
6232                                            ignore_secondaries=force)
6233   if not disks_ok:
6234     _ShutdownInstanceDisks(lu, instance)
6235     if force is not None and not force:
6236       lu.proc.LogWarning("", hint="If the message above refers to a"
6237                          " secondary node,"
6238                          " you can retry the operation using '--force'.")
6239     raise errors.OpExecError("Disk consistency error")
6240
6241
6242 class LUInstanceDeactivateDisks(NoHooksLU):
6243   """Shutdown an instance's disks.
6244
6245   """
6246   REQ_BGL = False
6247
6248   def ExpandNames(self):
6249     self._ExpandAndLockInstance()
6250     self.needed_locks[locking.LEVEL_NODE] = []
6251     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6252
6253   def DeclareLocks(self, level):
6254     if level == locking.LEVEL_NODE:
6255       self._LockInstancesNodes()
6256
6257   def CheckPrereq(self):
6258     """Check prerequisites.
6259
6260     This checks that the instance is in the cluster.
6261
6262     """
6263     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6264     assert self.instance is not None, \
6265       "Cannot retrieve locked instance %s" % self.op.instance_name
6266
6267   def Exec(self, feedback_fn):
6268     """Deactivate the disks
6269
6270     """
6271     instance = self.instance
6272     if self.op.force:
6273       _ShutdownInstanceDisks(self, instance)
6274     else:
6275       _SafeShutdownInstanceDisks(self, instance)
6276
6277
6278 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
6279   """Shutdown block devices of an instance.
6280
6281   This function checks if an instance is running, before calling
6282   _ShutdownInstanceDisks.
6283
6284   """
6285   _CheckInstanceState(lu, instance, INSTANCE_DOWN, msg="cannot shutdown disks")
6286   _ShutdownInstanceDisks(lu, instance, disks=disks)
6287
6288
6289 def _ExpandCheckDisks(instance, disks):
6290   """Return the instance disks selected by the disks list
6291
6292   @type disks: list of L{objects.Disk} or None
6293   @param disks: selected disks
6294   @rtype: list of L{objects.Disk}
6295   @return: selected instance disks to act on
6296
6297   """
6298   if disks is None:
6299     return instance.disks
6300   else:
6301     if not set(disks).issubset(instance.disks):
6302       raise errors.ProgrammerError("Can only act on disks belonging to the"
6303                                    " target instance")
6304     return disks
6305
6306
6307 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
6308   """Shutdown block devices of an instance.
6309
6310   This does the shutdown on all nodes of the instance.
6311
6312   If the ignore_primary is false, errors on the primary node are
6313   ignored.
6314
6315   """
6316   all_result = True
6317   disks = _ExpandCheckDisks(instance, disks)
6318
6319   for disk in disks:
6320     for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
6321       lu.cfg.SetDiskID(top_disk, node)
6322       result = lu.rpc.call_blockdev_shutdown(node, top_disk)
6323       msg = result.fail_msg
6324       if msg:
6325         lu.LogWarning("Could not shutdown block device %s on node %s: %s",
6326                       disk.iv_name, node, msg)
6327         if ((node == instance.primary_node and not ignore_primary) or
6328             (node != instance.primary_node and not result.offline)):
6329           all_result = False
6330   return all_result
6331
6332
6333 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
6334   """Checks if a node has enough free memory.
6335
6336   This function check if a given node has the needed amount of free
6337   memory. In case the node has less memory or we cannot get the
6338   information from the node, this function raise an OpPrereqError
6339   exception.
6340
6341   @type lu: C{LogicalUnit}
6342   @param lu: a logical unit from which we get configuration data
6343   @type node: C{str}
6344   @param node: the node to check
6345   @type reason: C{str}
6346   @param reason: string to use in the error message
6347   @type requested: C{int}
6348   @param requested: the amount of memory in MiB to check for
6349   @type hypervisor_name: C{str}
6350   @param hypervisor_name: the hypervisor to ask for memory stats
6351   @raise errors.OpPrereqError: if the node doesn't have enough memory, or
6352       we cannot check the node
6353
6354   """
6355   nodeinfo = lu.rpc.call_node_info([node], None, [hypervisor_name])
6356   nodeinfo[node].Raise("Can't get data from node %s" % node,
6357                        prereq=True, ecode=errors.ECODE_ENVIRON)
6358   (_, _, (hv_info, )) = nodeinfo[node].payload
6359
6360   free_mem = hv_info.get("memory_free", None)
6361   if not isinstance(free_mem, int):
6362     raise errors.OpPrereqError("Can't compute free memory on node %s, result"
6363                                " was '%s'" % (node, free_mem),
6364                                errors.ECODE_ENVIRON)
6365   if requested > free_mem:
6366     raise errors.OpPrereqError("Not enough memory on node %s for %s:"
6367                                " needed %s MiB, available %s MiB" %
6368                                (node, reason, requested, free_mem),
6369                                errors.ECODE_NORES)
6370
6371
6372 def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
6373   """Checks if nodes have enough free disk space in the all VGs.
6374
6375   This function check if all given nodes have the needed amount of
6376   free disk. In case any node has less disk or we cannot get the
6377   information from the node, this function raise an OpPrereqError
6378   exception.
6379
6380   @type lu: C{LogicalUnit}
6381   @param lu: a logical unit from which we get configuration data
6382   @type nodenames: C{list}
6383   @param nodenames: the list of node names to check
6384   @type req_sizes: C{dict}
6385   @param req_sizes: the hash of vg and corresponding amount of disk in
6386       MiB to check for
6387   @raise errors.OpPrereqError: if the node doesn't have enough disk,
6388       or we cannot check the node
6389
6390   """
6391   for vg, req_size in req_sizes.items():
6392     _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
6393
6394
6395 def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
6396   """Checks if nodes have enough free disk space in the specified VG.
6397
6398   This function check if all given nodes have the needed amount of
6399   free disk. In case any node has less disk or we cannot get the
6400   information from the node, this function raise an OpPrereqError
6401   exception.
6402
6403   @type lu: C{LogicalUnit}
6404   @param lu: a logical unit from which we get configuration data
6405   @type nodenames: C{list}
6406   @param nodenames: the list of node names to check
6407   @type vg: C{str}
6408   @param vg: the volume group to check
6409   @type requested: C{int}
6410   @param requested: the amount of disk in MiB to check for
6411   @raise errors.OpPrereqError: if the node doesn't have enough disk,
6412       or we cannot check the node
6413
6414   """
6415   nodeinfo = lu.rpc.call_node_info(nodenames, [vg], None)
6416   for node in nodenames:
6417     info = nodeinfo[node]
6418     info.Raise("Cannot get current information from node %s" % node,
6419                prereq=True, ecode=errors.ECODE_ENVIRON)
6420     (_, (vg_info, ), _) = info.payload
6421     vg_free = vg_info.get("vg_free", None)
6422     if not isinstance(vg_free, int):
6423       raise errors.OpPrereqError("Can't compute free disk space on node"
6424                                  " %s for vg %s, result was '%s'" %
6425                                  (node, vg, vg_free), errors.ECODE_ENVIRON)
6426     if requested > vg_free:
6427       raise errors.OpPrereqError("Not enough disk space on target node %s"
6428                                  " vg %s: required %d MiB, available %d MiB" %
6429                                  (node, vg, requested, vg_free),
6430                                  errors.ECODE_NORES)
6431
6432
6433 def _CheckNodesPhysicalCPUs(lu, nodenames, requested, hypervisor_name):
6434   """Checks if nodes have enough physical CPUs
6435
6436   This function checks if all given nodes have the needed number of
6437   physical CPUs. In case any node has less CPUs or we cannot get the
6438   information from the node, this function raises an OpPrereqError
6439   exception.
6440
6441   @type lu: C{LogicalUnit}
6442   @param lu: a logical unit from which we get configuration data
6443   @type nodenames: C{list}
6444   @param nodenames: the list of node names to check
6445   @type requested: C{int}
6446   @param requested: the minimum acceptable number of physical CPUs
6447   @raise errors.OpPrereqError: if the node doesn't have enough CPUs,
6448       or we cannot check the node
6449
6450   """
6451   nodeinfo = lu.rpc.call_node_info(nodenames, None, [hypervisor_name])
6452   for node in nodenames:
6453     info = nodeinfo[node]
6454     info.Raise("Cannot get current information from node %s" % node,
6455                prereq=True, ecode=errors.ECODE_ENVIRON)
6456     (_, _, (hv_info, )) = info.payload
6457     num_cpus = hv_info.get("cpu_total", None)
6458     if not isinstance(num_cpus, int):
6459       raise errors.OpPrereqError("Can't compute the number of physical CPUs"
6460                                  " on node %s, result was '%s'" %
6461                                  (node, num_cpus), errors.ECODE_ENVIRON)
6462     if requested > num_cpus:
6463       raise errors.OpPrereqError("Node %s has %s physical CPUs, but %s are "
6464                                  "required" % (node, num_cpus, requested),
6465                                  errors.ECODE_NORES)
6466
6467
6468 class LUInstanceStartup(LogicalUnit):
6469   """Starts an instance.
6470
6471   """
6472   HPATH = "instance-start"
6473   HTYPE = constants.HTYPE_INSTANCE
6474   REQ_BGL = False
6475
6476   def CheckArguments(self):
6477     # extra beparams
6478     if self.op.beparams:
6479       # fill the beparams dict
6480       objects.UpgradeBeParams(self.op.beparams)
6481       utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
6482
6483   def ExpandNames(self):
6484     self._ExpandAndLockInstance()
6485     self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
6486
6487   def DeclareLocks(self, level):
6488     if level == locking.LEVEL_NODE_RES:
6489       self._LockInstancesNodes(primary_only=True, level=locking.LEVEL_NODE_RES)
6490
6491   def BuildHooksEnv(self):
6492     """Build hooks env.
6493
6494     This runs on master, primary and secondary nodes of the instance.
6495
6496     """
6497     env = {
6498       "FORCE": self.op.force,
6499       }
6500
6501     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6502
6503     return env
6504
6505   def BuildHooksNodes(self):
6506     """Build hooks nodes.
6507
6508     """
6509     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6510     return (nl, nl)
6511
6512   def CheckPrereq(self):
6513     """Check prerequisites.
6514
6515     This checks that the instance is in the cluster.
6516
6517     """
6518     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6519     assert self.instance is not None, \
6520       "Cannot retrieve locked instance %s" % self.op.instance_name
6521
6522     # extra hvparams
6523     if self.op.hvparams:
6524       # check hypervisor parameter syntax (locally)
6525       cluster = self.cfg.GetClusterInfo()
6526       utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
6527       filled_hvp = cluster.FillHV(instance)
6528       filled_hvp.update(self.op.hvparams)
6529       hv_type = hypervisor.GetHypervisor(instance.hypervisor)
6530       hv_type.CheckParameterSyntax(filled_hvp)
6531       _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
6532
6533     _CheckInstanceState(self, instance, INSTANCE_ONLINE)
6534
6535     self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
6536
6537     if self.primary_offline and self.op.ignore_offline_nodes:
6538       self.proc.LogWarning("Ignoring offline primary node")
6539
6540       if self.op.hvparams or self.op.beparams:
6541         self.proc.LogWarning("Overridden parameters are ignored")
6542     else:
6543       _CheckNodeOnline(self, instance.primary_node)
6544
6545       bep = self.cfg.GetClusterInfo().FillBE(instance)
6546       bep.update(self.op.beparams)
6547
6548       # check bridges existence
6549       _CheckInstanceBridgesExist(self, instance)
6550
6551       remote_info = self.rpc.call_instance_info(instance.primary_node,
6552                                                 instance.name,
6553                                                 instance.hypervisor)
6554       remote_info.Raise("Error checking node %s" % instance.primary_node,
6555                         prereq=True, ecode=errors.ECODE_ENVIRON)
6556       if not remote_info.payload: # not running already
6557         _CheckNodeFreeMemory(self, instance.primary_node,
6558                              "starting instance %s" % instance.name,
6559                              bep[constants.BE_MINMEM], instance.hypervisor)
6560
6561   def Exec(self, feedback_fn):
6562     """Start the instance.
6563
6564     """
6565     instance = self.instance
6566     force = self.op.force
6567
6568     if not self.op.no_remember:
6569       self.cfg.MarkInstanceUp(instance.name)
6570
6571     if self.primary_offline:
6572       assert self.op.ignore_offline_nodes
6573       self.proc.LogInfo("Primary node offline, marked instance as started")
6574     else:
6575       node_current = instance.primary_node
6576
6577       _StartInstanceDisks(self, instance, force)
6578
6579       result = \
6580         self.rpc.call_instance_start(node_current,
6581                                      (instance, self.op.hvparams,
6582                                       self.op.beparams),
6583                                      self.op.startup_paused)
6584       msg = result.fail_msg
6585       if msg:
6586         _ShutdownInstanceDisks(self, instance)
6587         raise errors.OpExecError("Could not start instance: %s" % msg)
6588
6589
6590 class LUInstanceReboot(LogicalUnit):
6591   """Reboot an instance.
6592
6593   """
6594   HPATH = "instance-reboot"
6595   HTYPE = constants.HTYPE_INSTANCE
6596   REQ_BGL = False
6597
6598   def ExpandNames(self):
6599     self._ExpandAndLockInstance()
6600
6601   def BuildHooksEnv(self):
6602     """Build hooks env.
6603
6604     This runs on master, primary and secondary nodes of the instance.
6605
6606     """
6607     env = {
6608       "IGNORE_SECONDARIES": self.op.ignore_secondaries,
6609       "REBOOT_TYPE": self.op.reboot_type,
6610       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6611       }
6612
6613     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6614
6615     return env
6616
6617   def BuildHooksNodes(self):
6618     """Build hooks nodes.
6619
6620     """
6621     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6622     return (nl, nl)
6623
6624   def CheckPrereq(self):
6625     """Check prerequisites.
6626
6627     This checks that the instance is in the cluster.
6628
6629     """
6630     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6631     assert self.instance is not None, \
6632       "Cannot retrieve locked instance %s" % self.op.instance_name
6633     _CheckInstanceState(self, instance, INSTANCE_ONLINE)
6634     _CheckNodeOnline(self, instance.primary_node)
6635
6636     # check bridges existence
6637     _CheckInstanceBridgesExist(self, instance)
6638
6639   def Exec(self, feedback_fn):
6640     """Reboot the instance.
6641
6642     """
6643     instance = self.instance
6644     ignore_secondaries = self.op.ignore_secondaries
6645     reboot_type = self.op.reboot_type
6646
6647     remote_info = self.rpc.call_instance_info(instance.primary_node,
6648                                               instance.name,
6649                                               instance.hypervisor)
6650     remote_info.Raise("Error checking node %s" % instance.primary_node)
6651     instance_running = bool(remote_info.payload)
6652
6653     node_current = instance.primary_node
6654
6655     if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
6656                                             constants.INSTANCE_REBOOT_HARD]:
6657       for disk in instance.disks:
6658         self.cfg.SetDiskID(disk, node_current)
6659       result = self.rpc.call_instance_reboot(node_current, instance,
6660                                              reboot_type,
6661                                              self.op.shutdown_timeout)
6662       result.Raise("Could not reboot instance")
6663     else:
6664       if instance_running:
6665         result = self.rpc.call_instance_shutdown(node_current, instance,
6666                                                  self.op.shutdown_timeout)
6667         result.Raise("Could not shutdown instance for full reboot")
6668         _ShutdownInstanceDisks(self, instance)
6669       else:
6670         self.LogInfo("Instance %s was already stopped, starting now",
6671                      instance.name)
6672       _StartInstanceDisks(self, instance, ignore_secondaries)
6673       result = self.rpc.call_instance_start(node_current,
6674                                             (instance, None, None), False)
6675       msg = result.fail_msg
6676       if msg:
6677         _ShutdownInstanceDisks(self, instance)
6678         raise errors.OpExecError("Could not start instance for"
6679                                  " full reboot: %s" % msg)
6680
6681     self.cfg.MarkInstanceUp(instance.name)
6682
6683
6684 class LUInstanceShutdown(LogicalUnit):
6685   """Shutdown an instance.
6686
6687   """
6688   HPATH = "instance-stop"
6689   HTYPE = constants.HTYPE_INSTANCE
6690   REQ_BGL = False
6691
6692   def ExpandNames(self):
6693     self._ExpandAndLockInstance()
6694
6695   def BuildHooksEnv(self):
6696     """Build hooks env.
6697
6698     This runs on master, primary and secondary nodes of the instance.
6699
6700     """
6701     env = _BuildInstanceHookEnvByObject(self, self.instance)
6702     env["TIMEOUT"] = self.op.timeout
6703     return env
6704
6705   def BuildHooksNodes(self):
6706     """Build hooks nodes.
6707
6708     """
6709     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6710     return (nl, nl)
6711
6712   def CheckPrereq(self):
6713     """Check prerequisites.
6714
6715     This checks that the instance is in the cluster.
6716
6717     """
6718     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6719     assert self.instance is not None, \
6720       "Cannot retrieve locked instance %s" % self.op.instance_name
6721
6722     _CheckInstanceState(self, self.instance, INSTANCE_ONLINE)
6723
6724     self.primary_offline = \
6725       self.cfg.GetNodeInfo(self.instance.primary_node).offline
6726
6727     if self.primary_offline and self.op.ignore_offline_nodes:
6728       self.proc.LogWarning("Ignoring offline primary node")
6729     else:
6730       _CheckNodeOnline(self, self.instance.primary_node)
6731
6732   def Exec(self, feedback_fn):
6733     """Shutdown the instance.
6734
6735     """
6736     instance = self.instance
6737     node_current = instance.primary_node
6738     timeout = self.op.timeout
6739
6740     if not self.op.no_remember:
6741       self.cfg.MarkInstanceDown(instance.name)
6742
6743     if self.primary_offline:
6744       assert self.op.ignore_offline_nodes
6745       self.proc.LogInfo("Primary node offline, marked instance as stopped")
6746     else:
6747       result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
6748       msg = result.fail_msg
6749       if msg:
6750         self.proc.LogWarning("Could not shutdown instance: %s" % msg)
6751
6752       _ShutdownInstanceDisks(self, instance)
6753
6754
6755 class LUInstanceReinstall(LogicalUnit):
6756   """Reinstall an instance.
6757
6758   """
6759   HPATH = "instance-reinstall"
6760   HTYPE = constants.HTYPE_INSTANCE
6761   REQ_BGL = False
6762
6763   def ExpandNames(self):
6764     self._ExpandAndLockInstance()
6765
6766   def BuildHooksEnv(self):
6767     """Build hooks env.
6768
6769     This runs on master, primary and secondary nodes of the instance.
6770
6771     """
6772     return _BuildInstanceHookEnvByObject(self, self.instance)
6773
6774   def BuildHooksNodes(self):
6775     """Build hooks nodes.
6776
6777     """
6778     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6779     return (nl, nl)
6780
6781   def CheckPrereq(self):
6782     """Check prerequisites.
6783
6784     This checks that the instance is in the cluster and is not running.
6785
6786     """
6787     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6788     assert instance is not None, \
6789       "Cannot retrieve locked instance %s" % self.op.instance_name
6790     _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
6791                      " offline, cannot reinstall")
6792     for node in instance.secondary_nodes:
6793       _CheckNodeOnline(self, node, "Instance secondary node offline,"
6794                        " cannot reinstall")
6795
6796     if instance.disk_template == constants.DT_DISKLESS:
6797       raise errors.OpPrereqError("Instance '%s' has no disks" %
6798                                  self.op.instance_name,
6799                                  errors.ECODE_INVAL)
6800     _CheckInstanceState(self, instance, INSTANCE_DOWN, msg="cannot reinstall")
6801
6802     if self.op.os_type is not None:
6803       # OS verification
6804       pnode = _ExpandNodeName(self.cfg, instance.primary_node)
6805       _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
6806       instance_os = self.op.os_type
6807     else:
6808       instance_os = instance.os
6809
6810     nodelist = list(instance.all_nodes)
6811
6812     if self.op.osparams:
6813       i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
6814       _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
6815       self.os_inst = i_osdict # the new dict (without defaults)
6816     else:
6817       self.os_inst = None
6818
6819     self.instance = instance
6820
6821   def Exec(self, feedback_fn):
6822     """Reinstall the instance.
6823
6824     """
6825     inst = self.instance
6826
6827     if self.op.os_type is not None:
6828       feedback_fn("Changing OS to '%s'..." % self.op.os_type)
6829       inst.os = self.op.os_type
6830       # Write to configuration
6831       self.cfg.Update(inst, feedback_fn)
6832
6833     _StartInstanceDisks(self, inst, None)
6834     try:
6835       feedback_fn("Running the instance OS create scripts...")
6836       # FIXME: pass debug option from opcode to backend
6837       result = self.rpc.call_instance_os_add(inst.primary_node,
6838                                              (inst, self.os_inst), True,
6839                                              self.op.debug_level)
6840       result.Raise("Could not install OS for instance %s on node %s" %
6841                    (inst.name, inst.primary_node))
6842     finally:
6843       _ShutdownInstanceDisks(self, inst)
6844
6845
6846 class LUInstanceRecreateDisks(LogicalUnit):
6847   """Recreate an instance's missing disks.
6848
6849   """
6850   HPATH = "instance-recreate-disks"
6851   HTYPE = constants.HTYPE_INSTANCE
6852   REQ_BGL = False
6853
6854   def CheckArguments(self):
6855     # normalise the disk list
6856     self.op.disks = sorted(frozenset(self.op.disks))
6857
6858   def ExpandNames(self):
6859     self._ExpandAndLockInstance()
6860     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6861     if self.op.nodes:
6862       self.op.nodes = [_ExpandNodeName(self.cfg, n) for n in self.op.nodes]
6863       self.needed_locks[locking.LEVEL_NODE] = list(self.op.nodes)
6864     else:
6865       self.needed_locks[locking.LEVEL_NODE] = []
6866     self.needed_locks[locking.LEVEL_NODE_RES] = []
6867
6868   def DeclareLocks(self, level):
6869     if level == locking.LEVEL_NODE:
6870       # if we replace the nodes, we only need to lock the old primary,
6871       # otherwise we need to lock all nodes for disk re-creation
6872       primary_only = bool(self.op.nodes)
6873       self._LockInstancesNodes(primary_only=primary_only)
6874     elif level == locking.LEVEL_NODE_RES:
6875       # Copy node locks
6876       self.needed_locks[locking.LEVEL_NODE_RES] = \
6877         self.needed_locks[locking.LEVEL_NODE][:]
6878
6879   def BuildHooksEnv(self):
6880     """Build hooks env.
6881
6882     This runs on master, primary and secondary nodes of the instance.
6883
6884     """
6885     return _BuildInstanceHookEnvByObject(self, self.instance)
6886
6887   def BuildHooksNodes(self):
6888     """Build hooks nodes.
6889
6890     """
6891     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6892     return (nl, nl)
6893
6894   def CheckPrereq(self):
6895     """Check prerequisites.
6896
6897     This checks that the instance is in the cluster and is not running.
6898
6899     """
6900     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6901     assert instance is not None, \
6902       "Cannot retrieve locked instance %s" % self.op.instance_name
6903     if self.op.nodes:
6904       if len(self.op.nodes) != len(instance.all_nodes):
6905         raise errors.OpPrereqError("Instance %s currently has %d nodes, but"
6906                                    " %d replacement nodes were specified" %
6907                                    (instance.name, len(instance.all_nodes),
6908                                     len(self.op.nodes)),
6909                                    errors.ECODE_INVAL)
6910       assert instance.disk_template != constants.DT_DRBD8 or \
6911           len(self.op.nodes) == 2
6912       assert instance.disk_template != constants.DT_PLAIN or \
6913           len(self.op.nodes) == 1
6914       primary_node = self.op.nodes[0]
6915     else:
6916       primary_node = instance.primary_node
6917     _CheckNodeOnline(self, primary_node)
6918
6919     if instance.disk_template == constants.DT_DISKLESS:
6920       raise errors.OpPrereqError("Instance '%s' has no disks" %
6921                                  self.op.instance_name, errors.ECODE_INVAL)
6922     # if we replace nodes *and* the old primary is offline, we don't
6923     # check
6924     assert instance.primary_node in self.owned_locks(locking.LEVEL_NODE)
6925     assert instance.primary_node in self.owned_locks(locking.LEVEL_NODE_RES)
6926     old_pnode = self.cfg.GetNodeInfo(instance.primary_node)
6927     if not (self.op.nodes and old_pnode.offline):
6928       _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
6929                           msg="cannot recreate disks")
6930
6931     if not self.op.disks:
6932       self.op.disks = range(len(instance.disks))
6933     else:
6934       for idx in self.op.disks:
6935         if idx >= len(instance.disks):
6936           raise errors.OpPrereqError("Invalid disk index '%s'" % idx,
6937                                      errors.ECODE_INVAL)
6938     if self.op.disks != range(len(instance.disks)) and self.op.nodes:
6939       raise errors.OpPrereqError("Can't recreate disks partially and"
6940                                  " change the nodes at the same time",
6941                                  errors.ECODE_INVAL)
6942     self.instance = instance
6943
6944   def Exec(self, feedback_fn):
6945     """Recreate the disks.
6946
6947     """
6948     instance = self.instance
6949
6950     assert (self.owned_locks(locking.LEVEL_NODE) ==
6951             self.owned_locks(locking.LEVEL_NODE_RES))
6952
6953     to_skip = []
6954     mods = [] # keeps track of needed logical_id changes
6955
6956     for idx, disk in enumerate(instance.disks):
6957       if idx not in self.op.disks: # disk idx has not been passed in
6958         to_skip.append(idx)
6959         continue
6960       # update secondaries for disks, if needed
6961       if self.op.nodes:
6962         if disk.dev_type == constants.LD_DRBD8:
6963           # need to update the nodes and minors
6964           assert len(self.op.nodes) == 2
6965           assert len(disk.logical_id) == 6 # otherwise disk internals
6966                                            # have changed
6967           (_, _, old_port, _, _, old_secret) = disk.logical_id
6968           new_minors = self.cfg.AllocateDRBDMinor(self.op.nodes, instance.name)
6969           new_id = (self.op.nodes[0], self.op.nodes[1], old_port,
6970                     new_minors[0], new_minors[1], old_secret)
6971           assert len(disk.logical_id) == len(new_id)
6972           mods.append((idx, new_id))
6973
6974     # now that we have passed all asserts above, we can apply the mods
6975     # in a single run (to avoid partial changes)
6976     for idx, new_id in mods:
6977       instance.disks[idx].logical_id = new_id
6978
6979     # change primary node, if needed
6980     if self.op.nodes:
6981       instance.primary_node = self.op.nodes[0]
6982       self.LogWarning("Changing the instance's nodes, you will have to"
6983                       " remove any disks left on the older nodes manually")
6984
6985     if self.op.nodes:
6986       self.cfg.Update(instance, feedback_fn)
6987
6988     _CreateDisks(self, instance, to_skip=to_skip)
6989
6990
6991 class LUInstanceRename(LogicalUnit):
6992   """Rename an instance.
6993
6994   """
6995   HPATH = "instance-rename"
6996   HTYPE = constants.HTYPE_INSTANCE
6997
6998   def CheckArguments(self):
6999     """Check arguments.
7000
7001     """
7002     if self.op.ip_check and not self.op.name_check:
7003       # TODO: make the ip check more flexible and not depend on the name check
7004       raise errors.OpPrereqError("IP address check requires a name check",
7005                                  errors.ECODE_INVAL)
7006
7007   def BuildHooksEnv(self):
7008     """Build hooks env.
7009
7010     This runs on master, primary and secondary nodes of the instance.
7011
7012     """
7013     env = _BuildInstanceHookEnvByObject(self, self.instance)
7014     env["INSTANCE_NEW_NAME"] = self.op.new_name
7015     return env
7016
7017   def BuildHooksNodes(self):
7018     """Build hooks nodes.
7019
7020     """
7021     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7022     return (nl, nl)
7023
7024   def CheckPrereq(self):
7025     """Check prerequisites.
7026
7027     This checks that the instance is in the cluster and is not running.
7028
7029     """
7030     self.op.instance_name = _ExpandInstanceName(self.cfg,
7031                                                 self.op.instance_name)
7032     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7033     assert instance is not None
7034     _CheckNodeOnline(self, instance.primary_node)
7035     _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
7036                         msg="cannot rename")
7037     self.instance = instance
7038
7039     new_name = self.op.new_name
7040     if self.op.name_check:
7041       hostname = netutils.GetHostname(name=new_name)
7042       if hostname.name != new_name:
7043         self.LogInfo("Resolved given name '%s' to '%s'", new_name,
7044                      hostname.name)
7045       if not utils.MatchNameComponent(self.op.new_name, [hostname.name]):
7046         raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
7047                                     " same as given hostname '%s'") %
7048                                     (hostname.name, self.op.new_name),
7049                                     errors.ECODE_INVAL)
7050       new_name = self.op.new_name = hostname.name
7051       if (self.op.ip_check and
7052           netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
7053         raise errors.OpPrereqError("IP %s of instance %s already in use" %
7054                                    (hostname.ip, new_name),
7055                                    errors.ECODE_NOTUNIQUE)
7056
7057     instance_list = self.cfg.GetInstanceList()
7058     if new_name in instance_list and new_name != instance.name:
7059       raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
7060                                  new_name, errors.ECODE_EXISTS)
7061
7062   def Exec(self, feedback_fn):
7063     """Rename the instance.
7064
7065     """
7066     inst = self.instance
7067     old_name = inst.name
7068
7069     rename_file_storage = False
7070     if (inst.disk_template in constants.DTS_FILEBASED and
7071         self.op.new_name != inst.name):
7072       old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
7073       rename_file_storage = True
7074
7075     self.cfg.RenameInstance(inst.name, self.op.new_name)
7076     # Change the instance lock. This is definitely safe while we hold the BGL.
7077     # Otherwise the new lock would have to be added in acquired mode.
7078     assert self.REQ_BGL
7079     self.glm.remove(locking.LEVEL_INSTANCE, old_name)
7080     self.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
7081
7082     # re-read the instance from the configuration after rename
7083     inst = self.cfg.GetInstanceInfo(self.op.new_name)
7084
7085     if rename_file_storage:
7086       new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
7087       result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
7088                                                      old_file_storage_dir,
7089                                                      new_file_storage_dir)
7090       result.Raise("Could not rename on node %s directory '%s' to '%s'"
7091                    " (but the instance has been renamed in Ganeti)" %
7092                    (inst.primary_node, old_file_storage_dir,
7093                     new_file_storage_dir))
7094
7095     _StartInstanceDisks(self, inst, None)
7096     try:
7097       result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
7098                                                  old_name, self.op.debug_level)
7099       msg = result.fail_msg
7100       if msg:
7101         msg = ("Could not run OS rename script for instance %s on node %s"
7102                " (but the instance has been renamed in Ganeti): %s" %
7103                (inst.name, inst.primary_node, msg))
7104         self.proc.LogWarning(msg)
7105     finally:
7106       _ShutdownInstanceDisks(self, inst)
7107
7108     return inst.name
7109
7110
7111 class LUInstanceRemove(LogicalUnit):
7112   """Remove an instance.
7113
7114   """
7115   HPATH = "instance-remove"
7116   HTYPE = constants.HTYPE_INSTANCE
7117   REQ_BGL = False
7118
7119   def ExpandNames(self):
7120     self._ExpandAndLockInstance()
7121     self.needed_locks[locking.LEVEL_NODE] = []
7122     self.needed_locks[locking.LEVEL_NODE_RES] = []
7123     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7124
7125   def DeclareLocks(self, level):
7126     if level == locking.LEVEL_NODE:
7127       self._LockInstancesNodes()
7128     elif level == locking.LEVEL_NODE_RES:
7129       # Copy node locks
7130       self.needed_locks[locking.LEVEL_NODE_RES] = \
7131         self.needed_locks[locking.LEVEL_NODE][:]
7132
7133   def BuildHooksEnv(self):
7134     """Build hooks env.
7135
7136     This runs on master, primary and secondary nodes of the instance.
7137
7138     """
7139     env = _BuildInstanceHookEnvByObject(self, self.instance)
7140     env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
7141     return env
7142
7143   def BuildHooksNodes(self):
7144     """Build hooks nodes.
7145
7146     """
7147     nl = [self.cfg.GetMasterNode()]
7148     nl_post = list(self.instance.all_nodes) + nl
7149     return (nl, nl_post)
7150
7151   def CheckPrereq(self):
7152     """Check prerequisites.
7153
7154     This checks that the instance is in the cluster.
7155
7156     """
7157     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7158     assert self.instance is not None, \
7159       "Cannot retrieve locked instance %s" % self.op.instance_name
7160
7161   def Exec(self, feedback_fn):
7162     """Remove the instance.
7163
7164     """
7165     instance = self.instance
7166     logging.info("Shutting down instance %s on node %s",
7167                  instance.name, instance.primary_node)
7168
7169     result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
7170                                              self.op.shutdown_timeout)
7171     msg = result.fail_msg
7172     if msg:
7173       if self.op.ignore_failures:
7174         feedback_fn("Warning: can't shutdown instance: %s" % msg)
7175       else:
7176         raise errors.OpExecError("Could not shutdown instance %s on"
7177                                  " node %s: %s" %
7178                                  (instance.name, instance.primary_node, msg))
7179
7180     assert (self.owned_locks(locking.LEVEL_NODE) ==
7181             self.owned_locks(locking.LEVEL_NODE_RES))
7182     assert not (set(instance.all_nodes) -
7183                 self.owned_locks(locking.LEVEL_NODE)), \
7184       "Not owning correct locks"
7185
7186     _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
7187
7188
7189 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
7190   """Utility function to remove an instance.
7191
7192   """
7193   logging.info("Removing block devices for instance %s", instance.name)
7194
7195   if not _RemoveDisks(lu, instance):
7196     if not ignore_failures:
7197       raise errors.OpExecError("Can't remove instance's disks")
7198     feedback_fn("Warning: can't remove instance's disks")
7199
7200   logging.info("Removing instance %s out of cluster config", instance.name)
7201
7202   lu.cfg.RemoveInstance(instance.name)
7203
7204   assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
7205     "Instance lock removal conflict"
7206
7207   # Remove lock for the instance
7208   lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
7209
7210
7211 class LUInstanceQuery(NoHooksLU):
7212   """Logical unit for querying instances.
7213
7214   """
7215   # pylint: disable=W0142
7216   REQ_BGL = False
7217
7218   def CheckArguments(self):
7219     self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
7220                              self.op.output_fields, self.op.use_locking)
7221
7222   def ExpandNames(self):
7223     self.iq.ExpandNames(self)
7224
7225   def DeclareLocks(self, level):
7226     self.iq.DeclareLocks(self, level)
7227
7228   def Exec(self, feedback_fn):
7229     return self.iq.OldStyleQuery(self)
7230
7231
7232 class LUInstanceFailover(LogicalUnit):
7233   """Failover an instance.
7234
7235   """
7236   HPATH = "instance-failover"
7237   HTYPE = constants.HTYPE_INSTANCE
7238   REQ_BGL = False
7239
7240   def CheckArguments(self):
7241     """Check the arguments.
7242
7243     """
7244     self.iallocator = getattr(self.op, "iallocator", None)
7245     self.target_node = getattr(self.op, "target_node", None)
7246
7247   def ExpandNames(self):
7248     self._ExpandAndLockInstance()
7249
7250     if self.op.target_node is not None:
7251       self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7252
7253     self.needed_locks[locking.LEVEL_NODE] = []
7254     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7255
7256     ignore_consistency = self.op.ignore_consistency
7257     shutdown_timeout = self.op.shutdown_timeout
7258     self._migrater = TLMigrateInstance(self, self.op.instance_name,
7259                                        cleanup=False,
7260                                        failover=True,
7261                                        ignore_consistency=ignore_consistency,
7262                                        shutdown_timeout=shutdown_timeout,
7263                                        ignore_ipolicy=self.op.ignore_ipolicy)
7264     self.tasklets = [self._migrater]
7265
7266   def DeclareLocks(self, level):
7267     if level == locking.LEVEL_NODE:
7268       instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
7269       if instance.disk_template in constants.DTS_EXT_MIRROR:
7270         if self.op.target_node is None:
7271           self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7272         else:
7273           self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
7274                                                    self.op.target_node]
7275         del self.recalculate_locks[locking.LEVEL_NODE]
7276       else:
7277         self._LockInstancesNodes()
7278
7279   def BuildHooksEnv(self):
7280     """Build hooks env.
7281
7282     This runs on master, primary and secondary nodes of the instance.
7283
7284     """
7285     instance = self._migrater.instance
7286     source_node = instance.primary_node
7287     target_node = self.op.target_node
7288     env = {
7289       "IGNORE_CONSISTENCY": self.op.ignore_consistency,
7290       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7291       "OLD_PRIMARY": source_node,
7292       "NEW_PRIMARY": target_node,
7293       }
7294
7295     if instance.disk_template in constants.DTS_INT_MIRROR:
7296       env["OLD_SECONDARY"] = instance.secondary_nodes[0]
7297       env["NEW_SECONDARY"] = source_node
7298     else:
7299       env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
7300
7301     env.update(_BuildInstanceHookEnvByObject(self, instance))
7302
7303     return env
7304
7305   def BuildHooksNodes(self):
7306     """Build hooks nodes.
7307
7308     """
7309     instance = self._migrater.instance
7310     nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
7311     return (nl, nl + [instance.primary_node])
7312
7313
7314 class LUInstanceMigrate(LogicalUnit):
7315   """Migrate an instance.
7316
7317   This is migration without shutting down, compared to the failover,
7318   which is done with shutdown.
7319
7320   """
7321   HPATH = "instance-migrate"
7322   HTYPE = constants.HTYPE_INSTANCE
7323   REQ_BGL = False
7324
7325   def ExpandNames(self):
7326     self._ExpandAndLockInstance()
7327
7328     if self.op.target_node is not None:
7329       self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7330
7331     self.needed_locks[locking.LEVEL_NODE] = []
7332     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7333
7334     self._migrater = TLMigrateInstance(self, self.op.instance_name,
7335                                        cleanup=self.op.cleanup,
7336                                        failover=False,
7337                                        fallback=self.op.allow_failover,
7338                                        ignore_ipolicy=self.op.ignore_ipolicy)
7339     self.tasklets = [self._migrater]
7340
7341   def DeclareLocks(self, level):
7342     if level == locking.LEVEL_NODE:
7343       instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
7344       if instance.disk_template in constants.DTS_EXT_MIRROR:
7345         if self.op.target_node is None:
7346           self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7347         else:
7348           self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
7349                                                    self.op.target_node]
7350         del self.recalculate_locks[locking.LEVEL_NODE]
7351       else:
7352         self._LockInstancesNodes()
7353
7354   def BuildHooksEnv(self):
7355     """Build hooks env.
7356
7357     This runs on master, primary and secondary nodes of the instance.
7358
7359     """
7360     instance = self._migrater.instance
7361     source_node = instance.primary_node
7362     target_node = self.op.target_node
7363     env = _BuildInstanceHookEnvByObject(self, instance)
7364     env.update({
7365       "MIGRATE_LIVE": self._migrater.live,
7366       "MIGRATE_CLEANUP": self.op.cleanup,
7367       "OLD_PRIMARY": source_node,
7368       "NEW_PRIMARY": target_node,
7369       })
7370
7371     if instance.disk_template in constants.DTS_INT_MIRROR:
7372       env["OLD_SECONDARY"] = target_node
7373       env["NEW_SECONDARY"] = source_node
7374     else:
7375       env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
7376
7377     return env
7378
7379   def BuildHooksNodes(self):
7380     """Build hooks nodes.
7381
7382     """
7383     instance = self._migrater.instance
7384     nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
7385     return (nl, nl + [instance.primary_node])
7386
7387
7388 class LUInstanceMove(LogicalUnit):
7389   """Move an instance by data-copying.
7390
7391   """
7392   HPATH = "instance-move"
7393   HTYPE = constants.HTYPE_INSTANCE
7394   REQ_BGL = False
7395
7396   def ExpandNames(self):
7397     self._ExpandAndLockInstance()
7398     target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7399     self.op.target_node = target_node
7400     self.needed_locks[locking.LEVEL_NODE] = [target_node]
7401     self.needed_locks[locking.LEVEL_NODE_RES] = []
7402     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7403
7404   def DeclareLocks(self, level):
7405     if level == locking.LEVEL_NODE:
7406       self._LockInstancesNodes(primary_only=True)
7407     elif level == locking.LEVEL_NODE_RES:
7408       # Copy node locks
7409       self.needed_locks[locking.LEVEL_NODE_RES] = \
7410         self.needed_locks[locking.LEVEL_NODE][:]
7411
7412   def BuildHooksEnv(self):
7413     """Build hooks env.
7414
7415     This runs on master, primary and secondary nodes of the instance.
7416
7417     """
7418     env = {
7419       "TARGET_NODE": self.op.target_node,
7420       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7421       }
7422     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
7423     return env
7424
7425   def BuildHooksNodes(self):
7426     """Build hooks nodes.
7427
7428     """
7429     nl = [
7430       self.cfg.GetMasterNode(),
7431       self.instance.primary_node,
7432       self.op.target_node,
7433       ]
7434     return (nl, nl)
7435
7436   def CheckPrereq(self):
7437     """Check prerequisites.
7438
7439     This checks that the instance is in the cluster.
7440
7441     """
7442     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7443     assert self.instance is not None, \
7444       "Cannot retrieve locked instance %s" % self.op.instance_name
7445
7446     node = self.cfg.GetNodeInfo(self.op.target_node)
7447     assert node is not None, \
7448       "Cannot retrieve locked node %s" % self.op.target_node
7449
7450     self.target_node = target_node = node.name
7451
7452     if target_node == instance.primary_node:
7453       raise errors.OpPrereqError("Instance %s is already on the node %s" %
7454                                  (instance.name, target_node),
7455                                  errors.ECODE_STATE)
7456
7457     bep = self.cfg.GetClusterInfo().FillBE(instance)
7458
7459     for idx, dsk in enumerate(instance.disks):
7460       if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
7461         raise errors.OpPrereqError("Instance disk %d has a complex layout,"
7462                                    " cannot copy" % idx, errors.ECODE_STATE)
7463
7464     _CheckNodeOnline(self, target_node)
7465     _CheckNodeNotDrained(self, target_node)
7466     _CheckNodeVmCapable(self, target_node)
7467     ipolicy = _CalculateGroupIPolicy(self.cfg.GetClusterInfo(),
7468                                      self.cfg.GetNodeGroup(node.group))
7469     _CheckTargetNodeIPolicy(self, ipolicy, instance, node,
7470                             ignore=self.op.ignore_ipolicy)
7471
7472     if instance.admin_state == constants.ADMINST_UP:
7473       # check memory requirements on the secondary node
7474       _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
7475                            instance.name, bep[constants.BE_MAXMEM],
7476                            instance.hypervisor)
7477     else:
7478       self.LogInfo("Not checking memory on the secondary node as"
7479                    " instance will not be started")
7480
7481     # check bridge existance
7482     _CheckInstanceBridgesExist(self, instance, node=target_node)
7483
7484   def Exec(self, feedback_fn):
7485     """Move an instance.
7486
7487     The move is done by shutting it down on its present node, copying
7488     the data over (slow) and starting it on the new node.
7489
7490     """
7491     instance = self.instance
7492
7493     source_node = instance.primary_node
7494     target_node = self.target_node
7495
7496     self.LogInfo("Shutting down instance %s on source node %s",
7497                  instance.name, source_node)
7498
7499     assert (self.owned_locks(locking.LEVEL_NODE) ==
7500             self.owned_locks(locking.LEVEL_NODE_RES))
7501
7502     result = self.rpc.call_instance_shutdown(source_node, instance,
7503                                              self.op.shutdown_timeout)
7504     msg = result.fail_msg
7505     if msg:
7506       if self.op.ignore_consistency:
7507         self.proc.LogWarning("Could not shutdown instance %s on node %s."
7508                              " Proceeding anyway. Please make sure node"
7509                              " %s is down. Error details: %s",
7510                              instance.name, source_node, source_node, msg)
7511       else:
7512         raise errors.OpExecError("Could not shutdown instance %s on"
7513                                  " node %s: %s" %
7514                                  (instance.name, source_node, msg))
7515
7516     # create the target disks
7517     try:
7518       _CreateDisks(self, instance, target_node=target_node)
7519     except errors.OpExecError:
7520       self.LogWarning("Device creation failed, reverting...")
7521       try:
7522         _RemoveDisks(self, instance, target_node=target_node)
7523       finally:
7524         self.cfg.ReleaseDRBDMinors(instance.name)
7525         raise
7526
7527     cluster_name = self.cfg.GetClusterInfo().cluster_name
7528
7529     errs = []
7530     # activate, get path, copy the data over
7531     for idx, disk in enumerate(instance.disks):
7532       self.LogInfo("Copying data for disk %d", idx)
7533       result = self.rpc.call_blockdev_assemble(target_node, disk,
7534                                                instance.name, True, idx)
7535       if result.fail_msg:
7536         self.LogWarning("Can't assemble newly created disk %d: %s",
7537                         idx, result.fail_msg)
7538         errs.append(result.fail_msg)
7539         break
7540       dev_path = result.payload
7541       result = self.rpc.call_blockdev_export(source_node, disk,
7542                                              target_node, dev_path,
7543                                              cluster_name)
7544       if result.fail_msg:
7545         self.LogWarning("Can't copy data over for disk %d: %s",
7546                         idx, result.fail_msg)
7547         errs.append(result.fail_msg)
7548         break
7549
7550     if errs:
7551       self.LogWarning("Some disks failed to copy, aborting")
7552       try:
7553         _RemoveDisks(self, instance, target_node=target_node)
7554       finally:
7555         self.cfg.ReleaseDRBDMinors(instance.name)
7556         raise errors.OpExecError("Errors during disk copy: %s" %
7557                                  (",".join(errs),))
7558
7559     instance.primary_node = target_node
7560     self.cfg.Update(instance, feedback_fn)
7561
7562     self.LogInfo("Removing the disks on the original node")
7563     _RemoveDisks(self, instance, target_node=source_node)
7564
7565     # Only start the instance if it's marked as up
7566     if instance.admin_state == constants.ADMINST_UP:
7567       self.LogInfo("Starting instance %s on node %s",
7568                    instance.name, target_node)
7569
7570       disks_ok, _ = _AssembleInstanceDisks(self, instance,
7571                                            ignore_secondaries=True)
7572       if not disks_ok:
7573         _ShutdownInstanceDisks(self, instance)
7574         raise errors.OpExecError("Can't activate the instance's disks")
7575
7576       result = self.rpc.call_instance_start(target_node,
7577                                             (instance, None, None), False)
7578       msg = result.fail_msg
7579       if msg:
7580         _ShutdownInstanceDisks(self, instance)
7581         raise errors.OpExecError("Could not start instance %s on node %s: %s" %
7582                                  (instance.name, target_node, msg))
7583
7584
7585 class LUNodeMigrate(LogicalUnit):
7586   """Migrate all instances from a node.
7587
7588   """
7589   HPATH = "node-migrate"
7590   HTYPE = constants.HTYPE_NODE
7591   REQ_BGL = False
7592
7593   def CheckArguments(self):
7594     pass
7595
7596   def ExpandNames(self):
7597     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
7598
7599     self.share_locks = _ShareAll()
7600     self.needed_locks = {
7601       locking.LEVEL_NODE: [self.op.node_name],
7602       }
7603
7604   def BuildHooksEnv(self):
7605     """Build hooks env.
7606
7607     This runs on the master, the primary and all the secondaries.
7608
7609     """
7610     return {
7611       "NODE_NAME": self.op.node_name,
7612       }
7613
7614   def BuildHooksNodes(self):
7615     """Build hooks nodes.
7616
7617     """
7618     nl = [self.cfg.GetMasterNode()]
7619     return (nl, nl)
7620
7621   def CheckPrereq(self):
7622     pass
7623
7624   def Exec(self, feedback_fn):
7625     # Prepare jobs for migration instances
7626     jobs = [
7627       [opcodes.OpInstanceMigrate(instance_name=inst.name,
7628                                  mode=self.op.mode,
7629                                  live=self.op.live,
7630                                  iallocator=self.op.iallocator,
7631                                  target_node=self.op.target_node,
7632                                  ignore_ipolicy=self.op.ignore_ipolicy)]
7633       for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name)
7634       ]
7635
7636     # TODO: Run iallocator in this opcode and pass correct placement options to
7637     # OpInstanceMigrate. Since other jobs can modify the cluster between
7638     # running the iallocator and the actual migration, a good consistency model
7639     # will have to be found.
7640
7641     assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
7642             frozenset([self.op.node_name]))
7643
7644     return ResultWithJobs(jobs)
7645
7646
7647 class TLMigrateInstance(Tasklet):
7648   """Tasklet class for instance migration.
7649
7650   @type live: boolean
7651   @ivar live: whether the migration will be done live or non-live;
7652       this variable is initalized only after CheckPrereq has run
7653   @type cleanup: boolean
7654   @ivar cleanup: Wheater we cleanup from a failed migration
7655   @type iallocator: string
7656   @ivar iallocator: The iallocator used to determine target_node
7657   @type target_node: string
7658   @ivar target_node: If given, the target_node to reallocate the instance to
7659   @type failover: boolean
7660   @ivar failover: Whether operation results in failover or migration
7661   @type fallback: boolean
7662   @ivar fallback: Whether fallback to failover is allowed if migration not
7663                   possible
7664   @type ignore_consistency: boolean
7665   @ivar ignore_consistency: Wheter we should ignore consistency between source
7666                             and target node
7667   @type shutdown_timeout: int
7668   @ivar shutdown_timeout: In case of failover timeout of the shutdown
7669   @type ignore_ipolicy: bool
7670   @ivar ignore_ipolicy: If true, we can ignore instance policy when migrating
7671
7672   """
7673
7674   # Constants
7675   _MIGRATION_POLL_INTERVAL = 1      # seconds
7676   _MIGRATION_FEEDBACK_INTERVAL = 10 # seconds
7677
7678   def __init__(self, lu, instance_name, cleanup=False,
7679                failover=False, fallback=False,
7680                ignore_consistency=False,
7681                shutdown_timeout=constants.DEFAULT_SHUTDOWN_TIMEOUT,
7682                ignore_ipolicy=False):
7683     """Initializes this class.
7684
7685     """
7686     Tasklet.__init__(self, lu)
7687
7688     # Parameters
7689     self.instance_name = instance_name
7690     self.cleanup = cleanup
7691     self.live = False # will be overridden later
7692     self.failover = failover
7693     self.fallback = fallback
7694     self.ignore_consistency = ignore_consistency
7695     self.shutdown_timeout = shutdown_timeout
7696     self.ignore_ipolicy = ignore_ipolicy
7697
7698   def CheckPrereq(self):
7699     """Check prerequisites.
7700
7701     This checks that the instance is in the cluster.
7702
7703     """
7704     instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
7705     instance = self.cfg.GetInstanceInfo(instance_name)
7706     assert instance is not None
7707     self.instance = instance
7708     cluster = self.cfg.GetClusterInfo()
7709
7710     if (not self.cleanup and
7711         not instance.admin_state == constants.ADMINST_UP and
7712         not self.failover and self.fallback):
7713       self.lu.LogInfo("Instance is marked down or offline, fallback allowed,"
7714                       " switching to failover")
7715       self.failover = True
7716
7717     if instance.disk_template not in constants.DTS_MIRRORED:
7718       if self.failover:
7719         text = "failovers"
7720       else:
7721         text = "migrations"
7722       raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
7723                                  " %s" % (instance.disk_template, text),
7724                                  errors.ECODE_STATE)
7725
7726     if instance.disk_template in constants.DTS_EXT_MIRROR:
7727       _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
7728
7729       if self.lu.op.iallocator:
7730         self._RunAllocator()
7731       else:
7732         # We set set self.target_node as it is required by
7733         # BuildHooksEnv
7734         self.target_node = self.lu.op.target_node
7735
7736       # Check that the target node is correct in terms of instance policy
7737       nodeinfo = self.cfg.GetNodeInfo(self.target_node)
7738       group_info = self.cfg.GetNodeGroup(nodeinfo.group)
7739       ipolicy = _CalculateGroupIPolicy(cluster, group_info)
7740       _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo,
7741                               ignore=self.ignore_ipolicy)
7742
7743       # self.target_node is already populated, either directly or by the
7744       # iallocator run
7745       target_node = self.target_node
7746       if self.target_node == instance.primary_node:
7747         raise errors.OpPrereqError("Cannot migrate instance %s"
7748                                    " to its primary (%s)" %
7749                                    (instance.name, instance.primary_node))
7750
7751       if len(self.lu.tasklets) == 1:
7752         # It is safe to release locks only when we're the only tasklet
7753         # in the LU
7754         _ReleaseLocks(self.lu, locking.LEVEL_NODE,
7755                       keep=[instance.primary_node, self.target_node])
7756
7757     else:
7758       secondary_nodes = instance.secondary_nodes
7759       if not secondary_nodes:
7760         raise errors.ConfigurationError("No secondary node but using"
7761                                         " %s disk template" %
7762                                         instance.disk_template)
7763       target_node = secondary_nodes[0]
7764       if self.lu.op.iallocator or (self.lu.op.target_node and
7765                                    self.lu.op.target_node != target_node):
7766         if self.failover:
7767           text = "failed over"
7768         else:
7769           text = "migrated"
7770         raise errors.OpPrereqError("Instances with disk template %s cannot"
7771                                    " be %s to arbitrary nodes"
7772                                    " (neither an iallocator nor a target"
7773                                    " node can be passed)" %
7774                                    (instance.disk_template, text),
7775                                    errors.ECODE_INVAL)
7776       nodeinfo = self.cfg.GetNodeInfo(target_node)
7777       group_info = self.cfg.GetNodeGroup(nodeinfo.group)
7778       ipolicy = _CalculateGroupIPolicy(cluster, group_info)
7779       _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo,
7780                               ignore=self.ignore_ipolicy)
7781
7782     i_be = cluster.FillBE(instance)
7783
7784     # check memory requirements on the secondary node
7785     if (not self.cleanup and
7786          (not self.failover or instance.admin_state == constants.ADMINST_UP)):
7787       _CheckNodeFreeMemory(self.lu, target_node, "migrating instance %s" %
7788                            instance.name, i_be[constants.BE_MAXMEM],
7789                            instance.hypervisor)
7790     else:
7791       self.lu.LogInfo("Not checking memory on the secondary node as"
7792                       " instance will not be started")
7793
7794     # check if failover must be forced instead of migration
7795     if (not self.cleanup and not self.failover and
7796         i_be[constants.BE_ALWAYS_FAILOVER]):
7797       if self.fallback:
7798         self.lu.LogInfo("Instance configured to always failover; fallback"
7799                         " to failover")
7800         self.failover = True
7801       else:
7802         raise errors.OpPrereqError("This instance has been configured to"
7803                                    " always failover, please allow failover",
7804                                    errors.ECODE_STATE)
7805
7806     # check bridge existance
7807     _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
7808
7809     if not self.cleanup:
7810       _CheckNodeNotDrained(self.lu, target_node)
7811       if not self.failover:
7812         result = self.rpc.call_instance_migratable(instance.primary_node,
7813                                                    instance)
7814         if result.fail_msg and self.fallback:
7815           self.lu.LogInfo("Can't migrate, instance offline, fallback to"
7816                           " failover")
7817           self.failover = True
7818         else:
7819           result.Raise("Can't migrate, please use failover",
7820                        prereq=True, ecode=errors.ECODE_STATE)
7821
7822     assert not (self.failover and self.cleanup)
7823
7824     if not self.failover:
7825       if self.lu.op.live is not None and self.lu.op.mode is not None:
7826         raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
7827                                    " parameters are accepted",
7828                                    errors.ECODE_INVAL)
7829       if self.lu.op.live is not None:
7830         if self.lu.op.live:
7831           self.lu.op.mode = constants.HT_MIGRATION_LIVE
7832         else:
7833           self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
7834         # reset the 'live' parameter to None so that repeated
7835         # invocations of CheckPrereq do not raise an exception
7836         self.lu.op.live = None
7837       elif self.lu.op.mode is None:
7838         # read the default value from the hypervisor
7839         i_hv = cluster.FillHV(self.instance, skip_globals=False)
7840         self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
7841
7842       self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
7843     else:
7844       # Failover is never live
7845       self.live = False
7846
7847   def _RunAllocator(self):
7848     """Run the allocator based on input opcode.
7849
7850     """
7851     # FIXME: add a self.ignore_ipolicy option
7852     ial = IAllocator(self.cfg, self.rpc,
7853                      mode=constants.IALLOCATOR_MODE_RELOC,
7854                      name=self.instance_name,
7855                      # TODO See why hail breaks with a single node below
7856                      relocate_from=[self.instance.primary_node,
7857                                     self.instance.primary_node],
7858                      )
7859
7860     ial.Run(self.lu.op.iallocator)
7861
7862     if not ial.success:
7863       raise errors.OpPrereqError("Can't compute nodes using"
7864                                  " iallocator '%s': %s" %
7865                                  (self.lu.op.iallocator, ial.info),
7866                                  errors.ECODE_NORES)
7867     if len(ial.result) != ial.required_nodes:
7868       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7869                                  " of nodes (%s), required %s" %
7870                                  (self.lu.op.iallocator, len(ial.result),
7871                                   ial.required_nodes), errors.ECODE_FAULT)
7872     self.target_node = ial.result[0]
7873     self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
7874                  self.instance_name, self.lu.op.iallocator,
7875                  utils.CommaJoin(ial.result))
7876
7877   def _WaitUntilSync(self):
7878     """Poll with custom rpc for disk sync.
7879
7880     This uses our own step-based rpc call.
7881
7882     """
7883     self.feedback_fn("* wait until resync is done")
7884     all_done = False
7885     while not all_done:
7886       all_done = True
7887       result = self.rpc.call_drbd_wait_sync(self.all_nodes,
7888                                             self.nodes_ip,
7889                                             self.instance.disks)
7890       min_percent = 100
7891       for node, nres in result.items():
7892         nres.Raise("Cannot resync disks on node %s" % node)
7893         node_done, node_percent = nres.payload
7894         all_done = all_done and node_done
7895         if node_percent is not None:
7896           min_percent = min(min_percent, node_percent)
7897       if not all_done:
7898         if min_percent < 100:
7899           self.feedback_fn("   - progress: %.1f%%" % min_percent)
7900         time.sleep(2)
7901
7902   def _EnsureSecondary(self, node):
7903     """Demote a node to secondary.
7904
7905     """
7906     self.feedback_fn("* switching node %s to secondary mode" % node)
7907
7908     for dev in self.instance.disks:
7909       self.cfg.SetDiskID(dev, node)
7910
7911     result = self.rpc.call_blockdev_close(node, self.instance.name,
7912                                           self.instance.disks)
7913     result.Raise("Cannot change disk to secondary on node %s" % node)
7914
7915   def _GoStandalone(self):
7916     """Disconnect from the network.
7917
7918     """
7919     self.feedback_fn("* changing into standalone mode")
7920     result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
7921                                                self.instance.disks)
7922     for node, nres in result.items():
7923       nres.Raise("Cannot disconnect disks node %s" % node)
7924
7925   def _GoReconnect(self, multimaster):
7926     """Reconnect to the network.
7927
7928     """
7929     if multimaster:
7930       msg = "dual-master"
7931     else:
7932       msg = "single-master"
7933     self.feedback_fn("* changing disks into %s mode" % msg)
7934     result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
7935                                            self.instance.disks,
7936                                            self.instance.name, multimaster)
7937     for node, nres in result.items():
7938       nres.Raise("Cannot change disks config on node %s" % node)
7939
7940   def _ExecCleanup(self):
7941     """Try to cleanup after a failed migration.
7942
7943     The cleanup is done by:
7944       - check that the instance is running only on one node
7945         (and update the config if needed)
7946       - change disks on its secondary node to secondary
7947       - wait until disks are fully synchronized
7948       - disconnect from the network
7949       - change disks into single-master mode
7950       - wait again until disks are fully synchronized
7951
7952     """
7953     instance = self.instance
7954     target_node = self.target_node
7955     source_node = self.source_node
7956
7957     # check running on only one node
7958     self.feedback_fn("* checking where the instance actually runs"
7959                      " (if this hangs, the hypervisor might be in"
7960                      " a bad state)")
7961     ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
7962     for node, result in ins_l.items():
7963       result.Raise("Can't contact node %s" % node)
7964
7965     runningon_source = instance.name in ins_l[source_node].payload
7966     runningon_target = instance.name in ins_l[target_node].payload
7967
7968     if runningon_source and runningon_target:
7969       raise errors.OpExecError("Instance seems to be running on two nodes,"
7970                                " or the hypervisor is confused; you will have"
7971                                " to ensure manually that it runs only on one"
7972                                " and restart this operation")
7973
7974     if not (runningon_source or runningon_target):
7975       raise errors.OpExecError("Instance does not seem to be running at all;"
7976                                " in this case it's safer to repair by"
7977                                " running 'gnt-instance stop' to ensure disk"
7978                                " shutdown, and then restarting it")
7979
7980     if runningon_target:
7981       # the migration has actually succeeded, we need to update the config
7982       self.feedback_fn("* instance running on secondary node (%s),"
7983                        " updating config" % target_node)
7984       instance.primary_node = target_node
7985       self.cfg.Update(instance, self.feedback_fn)
7986       demoted_node = source_node
7987     else:
7988       self.feedback_fn("* instance confirmed to be running on its"
7989                        " primary node (%s)" % source_node)
7990       demoted_node = target_node
7991
7992     if instance.disk_template in constants.DTS_INT_MIRROR:
7993       self._EnsureSecondary(demoted_node)
7994       try:
7995         self._WaitUntilSync()
7996       except errors.OpExecError:
7997         # we ignore here errors, since if the device is standalone, it
7998         # won't be able to sync
7999         pass
8000       self._GoStandalone()
8001       self._GoReconnect(False)
8002       self._WaitUntilSync()
8003
8004     self.feedback_fn("* done")
8005
8006   def _RevertDiskStatus(self):
8007     """Try to revert the disk status after a failed migration.
8008
8009     """
8010     target_node = self.target_node
8011     if self.instance.disk_template in constants.DTS_EXT_MIRROR:
8012       return
8013
8014     try:
8015       self._EnsureSecondary(target_node)
8016       self._GoStandalone()
8017       self._GoReconnect(False)
8018       self._WaitUntilSync()
8019     except errors.OpExecError, err:
8020       self.lu.LogWarning("Migration failed and I can't reconnect the drives,"
8021                          " please try to recover the instance manually;"
8022                          " error '%s'" % str(err))
8023
8024   def _AbortMigration(self):
8025     """Call the hypervisor code to abort a started migration.
8026
8027     """
8028     instance = self.instance
8029     target_node = self.target_node
8030     source_node = self.source_node
8031     migration_info = self.migration_info
8032
8033     abort_result = self.rpc.call_instance_finalize_migration_dst(target_node,
8034                                                                  instance,
8035                                                                  migration_info,
8036                                                                  False)
8037     abort_msg = abort_result.fail_msg
8038     if abort_msg:
8039       logging.error("Aborting migration failed on target node %s: %s",
8040                     target_node, abort_msg)
8041       # Don't raise an exception here, as we stil have to try to revert the
8042       # disk status, even if this step failed.
8043
8044     abort_result = self.rpc.call_instance_finalize_migration_src(source_node,
8045         instance, False, self.live)
8046     abort_msg = abort_result.fail_msg
8047     if abort_msg:
8048       logging.error("Aborting migration failed on source node %s: %s",
8049                     source_node, abort_msg)
8050
8051   def _ExecMigration(self):
8052     """Migrate an instance.
8053
8054     The migrate is done by:
8055       - change the disks into dual-master mode
8056       - wait until disks are fully synchronized again
8057       - migrate the instance
8058       - change disks on the new secondary node (the old primary) to secondary
8059       - wait until disks are fully synchronized
8060       - change disks into single-master mode
8061
8062     """
8063     instance = self.instance
8064     target_node = self.target_node
8065     source_node = self.source_node
8066
8067     # Check for hypervisor version mismatch and warn the user.
8068     nodeinfo = self.rpc.call_node_info([source_node, target_node],
8069                                        None, [self.instance.hypervisor])
8070     for ninfo in nodeinfo.values():
8071       ninfo.Raise("Unable to retrieve node information from node '%s'" %
8072                   ninfo.node)
8073     (_, _, (src_info, )) = nodeinfo[source_node].payload
8074     (_, _, (dst_info, )) = nodeinfo[target_node].payload
8075
8076     if ((constants.HV_NODEINFO_KEY_VERSION in src_info) and
8077         (constants.HV_NODEINFO_KEY_VERSION in dst_info)):
8078       src_version = src_info[constants.HV_NODEINFO_KEY_VERSION]
8079       dst_version = dst_info[constants.HV_NODEINFO_KEY_VERSION]
8080       if src_version != dst_version:
8081         self.feedback_fn("* warning: hypervisor version mismatch between"
8082                          " source (%s) and target (%s) node" %
8083                          (src_version, dst_version))
8084
8085     self.feedback_fn("* checking disk consistency between source and target")
8086     for dev in instance.disks:
8087       if not _CheckDiskConsistency(self.lu, dev, target_node, False):
8088         raise errors.OpExecError("Disk %s is degraded or not fully"
8089                                  " synchronized on target node,"
8090                                  " aborting migration" % dev.iv_name)
8091
8092     # First get the migration information from the remote node
8093     result = self.rpc.call_migration_info(source_node, instance)
8094     msg = result.fail_msg
8095     if msg:
8096       log_err = ("Failed fetching source migration information from %s: %s" %
8097                  (source_node, msg))
8098       logging.error(log_err)
8099       raise errors.OpExecError(log_err)
8100
8101     self.migration_info = migration_info = result.payload
8102
8103     if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
8104       # Then switch the disks to master/master mode
8105       self._EnsureSecondary(target_node)
8106       self._GoStandalone()
8107       self._GoReconnect(True)
8108       self._WaitUntilSync()
8109
8110     self.feedback_fn("* preparing %s to accept the instance" % target_node)
8111     result = self.rpc.call_accept_instance(target_node,
8112                                            instance,
8113                                            migration_info,
8114                                            self.nodes_ip[target_node])
8115
8116     msg = result.fail_msg
8117     if msg:
8118       logging.error("Instance pre-migration failed, trying to revert"
8119                     " disk status: %s", msg)
8120       self.feedback_fn("Pre-migration failed, aborting")
8121       self._AbortMigration()
8122       self._RevertDiskStatus()
8123       raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
8124                                (instance.name, msg))
8125
8126     self.feedback_fn("* migrating instance to %s" % target_node)
8127     result = self.rpc.call_instance_migrate(source_node, instance,
8128                                             self.nodes_ip[target_node],
8129                                             self.live)
8130     msg = result.fail_msg
8131     if msg:
8132       logging.error("Instance migration failed, trying to revert"
8133                     " disk status: %s", msg)
8134       self.feedback_fn("Migration failed, aborting")
8135       self._AbortMigration()
8136       self._RevertDiskStatus()
8137       raise errors.OpExecError("Could not migrate instance %s: %s" %
8138                                (instance.name, msg))
8139
8140     self.feedback_fn("* starting memory transfer")
8141     last_feedback = time.time()
8142     while True:
8143       result = self.rpc.call_instance_get_migration_status(source_node,
8144                                                            instance)
8145       msg = result.fail_msg
8146       ms = result.payload   # MigrationStatus instance
8147       if msg or (ms.status in constants.HV_MIGRATION_FAILED_STATUSES):
8148         logging.error("Instance migration failed, trying to revert"
8149                       " disk status: %s", msg)
8150         self.feedback_fn("Migration failed, aborting")
8151         self._AbortMigration()
8152         self._RevertDiskStatus()
8153         raise errors.OpExecError("Could not migrate instance %s: %s" %
8154                                  (instance.name, msg))
8155
8156       if result.payload.status != constants.HV_MIGRATION_ACTIVE:
8157         self.feedback_fn("* memory transfer complete")
8158         break
8159
8160       if (utils.TimeoutExpired(last_feedback,
8161                                self._MIGRATION_FEEDBACK_INTERVAL) and
8162           ms.transferred_ram is not None):
8163         mem_progress = 100 * float(ms.transferred_ram) / float(ms.total_ram)
8164         self.feedback_fn("* memory transfer progress: %.2f %%" % mem_progress)
8165         last_feedback = time.time()
8166
8167       time.sleep(self._MIGRATION_POLL_INTERVAL)
8168
8169     result = self.rpc.call_instance_finalize_migration_src(source_node,
8170                                                            instance,
8171                                                            True,
8172                                                            self.live)
8173     msg = result.fail_msg
8174     if msg:
8175       logging.error("Instance migration succeeded, but finalization failed"
8176                     " on the source node: %s", msg)
8177       raise errors.OpExecError("Could not finalize instance migration: %s" %
8178                                msg)
8179
8180     instance.primary_node = target_node
8181
8182     # distribute new instance config to the other nodes
8183     self.cfg.Update(instance, self.feedback_fn)
8184
8185     result = self.rpc.call_instance_finalize_migration_dst(target_node,
8186                                                            instance,
8187                                                            migration_info,
8188                                                            True)
8189     msg = result.fail_msg
8190     if msg:
8191       logging.error("Instance migration succeeded, but finalization failed"
8192                     " on the target node: %s", msg)
8193       raise errors.OpExecError("Could not finalize instance migration: %s" %
8194                                msg)
8195
8196     if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
8197       self._EnsureSecondary(source_node)
8198       self._WaitUntilSync()
8199       self._GoStandalone()
8200       self._GoReconnect(False)
8201       self._WaitUntilSync()
8202
8203     self.feedback_fn("* done")
8204
8205   def _ExecFailover(self):
8206     """Failover an instance.
8207
8208     The failover is done by shutting it down on its present node and
8209     starting it on the secondary.
8210
8211     """
8212     instance = self.instance
8213     primary_node = self.cfg.GetNodeInfo(instance.primary_node)
8214
8215     source_node = instance.primary_node
8216     target_node = self.target_node
8217
8218     if instance.admin_state == constants.ADMINST_UP:
8219       self.feedback_fn("* checking disk consistency between source and target")
8220       for dev in instance.disks:
8221         # for drbd, these are drbd over lvm
8222         if not _CheckDiskConsistency(self.lu, dev, target_node, False):
8223           if primary_node.offline:
8224             self.feedback_fn("Node %s is offline, ignoring degraded disk %s on"
8225                              " target node %s" %
8226                              (primary_node.name, dev.iv_name, target_node))
8227           elif not self.ignore_consistency:
8228             raise errors.OpExecError("Disk %s is degraded on target node,"
8229                                      " aborting failover" % dev.iv_name)
8230     else:
8231       self.feedback_fn("* not checking disk consistency as instance is not"
8232                        " running")
8233
8234     self.feedback_fn("* shutting down instance on source node")
8235     logging.info("Shutting down instance %s on node %s",
8236                  instance.name, source_node)
8237
8238     result = self.rpc.call_instance_shutdown(source_node, instance,
8239                                              self.shutdown_timeout)
8240     msg = result.fail_msg
8241     if msg:
8242       if self.ignore_consistency or primary_node.offline:
8243         self.lu.LogWarning("Could not shutdown instance %s on node %s,"
8244                            " proceeding anyway; please make sure node"
8245                            " %s is down; error details: %s",
8246                            instance.name, source_node, source_node, msg)
8247       else:
8248         raise errors.OpExecError("Could not shutdown instance %s on"
8249                                  " node %s: %s" %
8250                                  (instance.name, source_node, msg))
8251
8252     self.feedback_fn("* deactivating the instance's disks on source node")
8253     if not _ShutdownInstanceDisks(self.lu, instance, ignore_primary=True):
8254       raise errors.OpExecError("Can't shut down the instance's disks")
8255
8256     instance.primary_node = target_node
8257     # distribute new instance config to the other nodes
8258     self.cfg.Update(instance, self.feedback_fn)
8259
8260     # Only start the instance if it's marked as up
8261     if instance.admin_state == constants.ADMINST_UP:
8262       self.feedback_fn("* activating the instance's disks on target node %s" %
8263                        target_node)
8264       logging.info("Starting instance %s on node %s",
8265                    instance.name, target_node)
8266
8267       disks_ok, _ = _AssembleInstanceDisks(self.lu, instance,
8268                                            ignore_secondaries=True)
8269       if not disks_ok:
8270         _ShutdownInstanceDisks(self.lu, instance)
8271         raise errors.OpExecError("Can't activate the instance's disks")
8272
8273       self.feedback_fn("* starting the instance on the target node %s" %
8274                        target_node)
8275       result = self.rpc.call_instance_start(target_node, (instance, None, None),
8276                                             False)
8277       msg = result.fail_msg
8278       if msg:
8279         _ShutdownInstanceDisks(self.lu, instance)
8280         raise errors.OpExecError("Could not start instance %s on node %s: %s" %
8281                                  (instance.name, target_node, msg))
8282
8283   def Exec(self, feedback_fn):
8284     """Perform the migration.
8285
8286     """
8287     self.feedback_fn = feedback_fn
8288     self.source_node = self.instance.primary_node
8289
8290     # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
8291     if self.instance.disk_template in constants.DTS_INT_MIRROR:
8292       self.target_node = self.instance.secondary_nodes[0]
8293       # Otherwise self.target_node has been populated either
8294       # directly, or through an iallocator.
8295
8296     self.all_nodes = [self.source_node, self.target_node]
8297     self.nodes_ip = dict((name, node.secondary_ip) for (name, node)
8298                          in self.cfg.GetMultiNodeInfo(self.all_nodes))
8299
8300     if self.failover:
8301       feedback_fn("Failover instance %s" % self.instance.name)
8302       self._ExecFailover()
8303     else:
8304       feedback_fn("Migrating instance %s" % self.instance.name)
8305
8306       if self.cleanup:
8307         return self._ExecCleanup()
8308       else:
8309         return self._ExecMigration()
8310
8311
8312 def _CreateBlockDev(lu, node, instance, device, force_create,
8313                     info, force_open):
8314   """Create a tree of block devices on a given node.
8315
8316   If this device type has to be created on secondaries, create it and
8317   all its children.
8318
8319   If not, just recurse to children keeping the same 'force' value.
8320
8321   @param lu: the lu on whose behalf we execute
8322   @param node: the node on which to create the device
8323   @type instance: L{objects.Instance}
8324   @param instance: the instance which owns the device
8325   @type device: L{objects.Disk}
8326   @param device: the device to create
8327   @type force_create: boolean
8328   @param force_create: whether to force creation of this device; this
8329       will be change to True whenever we find a device which has
8330       CreateOnSecondary() attribute
8331   @param info: the extra 'metadata' we should attach to the device
8332       (this will be represented as a LVM tag)
8333   @type force_open: boolean
8334   @param force_open: this parameter will be passes to the
8335       L{backend.BlockdevCreate} function where it specifies
8336       whether we run on primary or not, and it affects both
8337       the child assembly and the device own Open() execution
8338
8339   """
8340   if device.CreateOnSecondary():
8341     force_create = True
8342
8343   if device.children:
8344     for child in device.children:
8345       _CreateBlockDev(lu, node, instance, child, force_create,
8346                       info, force_open)
8347
8348   if not force_create:
8349     return
8350
8351   _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
8352
8353
8354 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
8355   """Create a single block device on a given node.
8356
8357   This will not recurse over children of the device, so they must be
8358   created in advance.
8359
8360   @param lu: the lu on whose behalf we execute
8361   @param node: the node on which to create the device
8362   @type instance: L{objects.Instance}
8363   @param instance: the instance which owns the device
8364   @type device: L{objects.Disk}
8365   @param device: the device to create
8366   @param info: the extra 'metadata' we should attach to the device
8367       (this will be represented as a LVM tag)
8368   @type force_open: boolean
8369   @param force_open: this parameter will be passes to the
8370       L{backend.BlockdevCreate} function where it specifies
8371       whether we run on primary or not, and it affects both
8372       the child assembly and the device own Open() execution
8373
8374   """
8375   lu.cfg.SetDiskID(device, node)
8376   result = lu.rpc.call_blockdev_create(node, device, device.size,
8377                                        instance.name, force_open, info)
8378   result.Raise("Can't create block device %s on"
8379                " node %s for instance %s" % (device, node, instance.name))
8380   if device.physical_id is None:
8381     device.physical_id = result.payload
8382
8383
8384 def _GenerateUniqueNames(lu, exts):
8385   """Generate a suitable LV name.
8386
8387   This will generate a logical volume name for the given instance.
8388
8389   """
8390   results = []
8391   for val in exts:
8392     new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
8393     results.append("%s%s" % (new_id, val))
8394   return results
8395
8396
8397 def _ComputeLDParams(disk_template, disk_params):
8398   """Computes Logical Disk parameters from Disk Template parameters.
8399
8400   @type disk_template: string
8401   @param disk_template: disk template, one of L{constants.DISK_TEMPLATES}
8402   @type disk_params: dict
8403   @param disk_params: disk template parameters; dict(template_name -> parameters
8404   @rtype: list(dict)
8405   @return: a list of dicts, one for each node of the disk hierarchy. Each dict
8406     contains the LD parameters of the node. The tree is flattened in-order.
8407
8408   """
8409   if disk_template not in constants.DISK_TEMPLATES:
8410     raise errors.ProgrammerError("Unknown disk template %s" % disk_template)
8411
8412   result = list()
8413   dt_params = disk_params[disk_template]
8414   if disk_template == constants.DT_DRBD8:
8415     drbd_params = {
8416       constants.LDP_RESYNC_RATE: dt_params[constants.DRBD_RESYNC_RATE],
8417       constants.LDP_BARRIERS: dt_params[constants.DRBD_DISK_BARRIERS],
8418       constants.LDP_NO_META_FLUSH: dt_params[constants.DRBD_META_BARRIERS],
8419       constants.LDP_DEFAULT_METAVG: dt_params[constants.DRBD_DEFAULT_METAVG],
8420       constants.LDP_DISK_CUSTOM: dt_params[constants.DRBD_DISK_CUSTOM],
8421       constants.LDP_NET_CUSTOM: dt_params[constants.DRBD_NET_CUSTOM],
8422       constants.LDP_DYNAMIC_RESYNC: dt_params[constants.DRBD_DYNAMIC_RESYNC],
8423       constants.LDP_PLAN_AHEAD: dt_params[constants.DRBD_PLAN_AHEAD],
8424       constants.LDP_FILL_TARGET: dt_params[constants.DRBD_FILL_TARGET],
8425       constants.LDP_DELAY_TARGET: dt_params[constants.DRBD_DELAY_TARGET],
8426       constants.LDP_MAX_RATE: dt_params[constants.DRBD_MAX_RATE],
8427       constants.LDP_MIN_RATE: dt_params[constants.DRBD_MIN_RATE],
8428       }
8429
8430     drbd_params = \
8431       objects.FillDict(constants.DISK_LD_DEFAULTS[constants.LD_DRBD8],
8432                        drbd_params)
8433
8434     result.append(drbd_params)
8435
8436     # data LV
8437     data_params = {
8438       constants.LDP_STRIPES: dt_params[constants.DRBD_DATA_STRIPES],
8439       }
8440     data_params = \
8441       objects.FillDict(constants.DISK_LD_DEFAULTS[constants.LD_LV],
8442                        data_params)
8443     result.append(data_params)
8444
8445     # metadata LV
8446     meta_params = {
8447       constants.LDP_STRIPES: dt_params[constants.DRBD_META_STRIPES],
8448       }
8449     meta_params = \
8450       objects.FillDict(constants.DISK_LD_DEFAULTS[constants.LD_LV],
8451                        meta_params)
8452     result.append(meta_params)
8453
8454   elif (disk_template == constants.DT_FILE or
8455         disk_template == constants.DT_SHARED_FILE):
8456     result.append(constants.DISK_LD_DEFAULTS[constants.LD_FILE])
8457
8458   elif disk_template == constants.DT_PLAIN:
8459     params = {
8460       constants.LDP_STRIPES: dt_params[constants.LV_STRIPES],
8461       }
8462     params = \
8463       objects.FillDict(constants.DISK_LD_DEFAULTS[constants.LD_LV],
8464                        params)
8465     result.append(params)
8466
8467   elif disk_template == constants.DT_BLOCK:
8468     result.append(constants.DISK_LD_DEFAULTS[constants.LD_BLOCKDEV])
8469
8470   return result
8471
8472
8473 def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
8474                          iv_name, p_minor, s_minor, drbd_params, data_params,
8475                          meta_params):
8476   """Generate a drbd8 device complete with its children.
8477
8478   """
8479   assert len(vgnames) == len(names) == 2
8480   port = lu.cfg.AllocatePort()
8481   shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
8482
8483   dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
8484                           logical_id=(vgnames[0], names[0]),
8485                           params=data_params)
8486   dev_meta = objects.Disk(dev_type=constants.LD_LV, size=DRBD_META_SIZE,
8487                           logical_id=(vgnames[1], names[1]),
8488                           params=meta_params)
8489   drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
8490                           logical_id=(primary, secondary, port,
8491                                       p_minor, s_minor,
8492                                       shared_secret),
8493                           children=[dev_data, dev_meta],
8494                           iv_name=iv_name, params=drbd_params)
8495   return drbd_dev
8496
8497
8498 def _GenerateDiskTemplate(lu, template_name,
8499                           instance_name, primary_node,
8500                           secondary_nodes, disk_info,
8501                           file_storage_dir, file_driver,
8502                           base_index, feedback_fn, disk_params):
8503   """Generate the entire disk layout for a given template type.
8504
8505   """
8506   #TODO: compute space requirements
8507
8508   vgname = lu.cfg.GetVGName()
8509   disk_count = len(disk_info)
8510   disks = []
8511   ld_params = _ComputeLDParams(template_name, disk_params)
8512   if template_name == constants.DT_DISKLESS:
8513     pass
8514   elif template_name == constants.DT_PLAIN:
8515     if secondary_nodes:
8516       raise errors.ProgrammerError("Wrong template configuration")
8517
8518     names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
8519                                       for i in range(disk_count)])
8520     for idx, disk in enumerate(disk_info):
8521       disk_index = idx + base_index
8522       vg = disk.get(constants.IDISK_VG, vgname)
8523       feedback_fn("* disk %i, vg %s, name %s" % (idx, vg, names[idx]))
8524       disk_dev = objects.Disk(dev_type=constants.LD_LV,
8525                               size=disk[constants.IDISK_SIZE],
8526                               logical_id=(vg, names[idx]),
8527                               iv_name="disk/%d" % disk_index,
8528                               mode=disk[constants.IDISK_MODE],
8529                               params=ld_params[0])
8530       disks.append(disk_dev)
8531   elif template_name == constants.DT_DRBD8:
8532     drbd_params, data_params, meta_params = ld_params
8533     if len(secondary_nodes) != 1:
8534       raise errors.ProgrammerError("Wrong template configuration")
8535     remote_node = secondary_nodes[0]
8536     minors = lu.cfg.AllocateDRBDMinor(
8537       [primary_node, remote_node] * len(disk_info), instance_name)
8538
8539     names = []
8540     for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
8541                                                for i in range(disk_count)]):
8542       names.append(lv_prefix + "_data")
8543       names.append(lv_prefix + "_meta")
8544     for idx, disk in enumerate(disk_info):
8545       disk_index = idx + base_index
8546       drbd_default_metavg = drbd_params[constants.LDP_DEFAULT_METAVG]
8547       data_vg = disk.get(constants.IDISK_VG, vgname)
8548       meta_vg = disk.get(constants.IDISK_METAVG, drbd_default_metavg)
8549       disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
8550                                       disk[constants.IDISK_SIZE],
8551                                       [data_vg, meta_vg],
8552                                       names[idx * 2:idx * 2 + 2],
8553                                       "disk/%d" % disk_index,
8554                                       minors[idx * 2], minors[idx * 2 + 1],
8555                                       drbd_params, data_params, meta_params)
8556       disk_dev.mode = disk[constants.IDISK_MODE]
8557       disks.append(disk_dev)
8558   elif template_name == constants.DT_FILE:
8559     if secondary_nodes:
8560       raise errors.ProgrammerError("Wrong template configuration")
8561
8562     opcodes.RequireFileStorage()
8563
8564     for idx, disk in enumerate(disk_info):
8565       disk_index = idx + base_index
8566       disk_dev = objects.Disk(dev_type=constants.LD_FILE,
8567                               size=disk[constants.IDISK_SIZE],
8568                               iv_name="disk/%d" % disk_index,
8569                               logical_id=(file_driver,
8570                                           "%s/disk%d" % (file_storage_dir,
8571                                                          disk_index)),
8572                               mode=disk[constants.IDISK_MODE],
8573                               params=ld_params[0])
8574       disks.append(disk_dev)
8575   elif template_name == constants.DT_SHARED_FILE:
8576     if secondary_nodes:
8577       raise errors.ProgrammerError("Wrong template configuration")
8578
8579     opcodes.RequireSharedFileStorage()
8580
8581     for idx, disk in enumerate(disk_info):
8582       disk_index = idx + base_index
8583       disk_dev = objects.Disk(dev_type=constants.LD_FILE,
8584                               size=disk[constants.IDISK_SIZE],
8585                               iv_name="disk/%d" % disk_index,
8586                               logical_id=(file_driver,
8587                                           "%s/disk%d" % (file_storage_dir,
8588                                                          disk_index)),
8589                               mode=disk[constants.IDISK_MODE],
8590                               params=ld_params[0])
8591       disks.append(disk_dev)
8592   elif template_name == constants.DT_BLOCK:
8593     if secondary_nodes:
8594       raise errors.ProgrammerError("Wrong template configuration")
8595
8596     for idx, disk in enumerate(disk_info):
8597       disk_index = idx + base_index
8598       disk_dev = objects.Disk(dev_type=constants.LD_BLOCKDEV,
8599                               size=disk[constants.IDISK_SIZE],
8600                               logical_id=(constants.BLOCKDEV_DRIVER_MANUAL,
8601                                           disk[constants.IDISK_ADOPT]),
8602                               iv_name="disk/%d" % disk_index,
8603                               mode=disk[constants.IDISK_MODE],
8604                               params=ld_params[0])
8605       disks.append(disk_dev)
8606
8607   else:
8608     raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
8609   return disks
8610
8611
8612 def _GetInstanceInfoText(instance):
8613   """Compute that text that should be added to the disk's metadata.
8614
8615   """
8616   return "originstname+%s" % instance.name
8617
8618
8619 def _CalcEta(time_taken, written, total_size):
8620   """Calculates the ETA based on size written and total size.
8621
8622   @param time_taken: The time taken so far
8623   @param written: amount written so far
8624   @param total_size: The total size of data to be written
8625   @return: The remaining time in seconds
8626
8627   """
8628   avg_time = time_taken / float(written)
8629   return (total_size - written) * avg_time
8630
8631
8632 def _WipeDisks(lu, instance):
8633   """Wipes instance disks.
8634
8635   @type lu: L{LogicalUnit}
8636   @param lu: the logical unit on whose behalf we execute
8637   @type instance: L{objects.Instance}
8638   @param instance: the instance whose disks we should create
8639   @return: the success of the wipe
8640
8641   """
8642   node = instance.primary_node
8643
8644   for device in instance.disks:
8645     lu.cfg.SetDiskID(device, node)
8646
8647   logging.info("Pause sync of instance %s disks", instance.name)
8648   result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, True)
8649
8650   for idx, success in enumerate(result.payload):
8651     if not success:
8652       logging.warn("pause-sync of instance %s for disks %d failed",
8653                    instance.name, idx)
8654
8655   try:
8656     for idx, device in enumerate(instance.disks):
8657       # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
8658       # MAX_WIPE_CHUNK at max
8659       wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 *
8660                             constants.MIN_WIPE_CHUNK_PERCENT)
8661       # we _must_ make this an int, otherwise rounding errors will
8662       # occur
8663       wipe_chunk_size = int(wipe_chunk_size)
8664
8665       lu.LogInfo("* Wiping disk %d", idx)
8666       logging.info("Wiping disk %d for instance %s, node %s using"
8667                    " chunk size %s", idx, instance.name, node, wipe_chunk_size)
8668
8669       offset = 0
8670       size = device.size
8671       last_output = 0
8672       start_time = time.time()
8673
8674       while offset < size:
8675         wipe_size = min(wipe_chunk_size, size - offset)
8676         logging.debug("Wiping disk %d, offset %s, chunk %s",
8677                       idx, offset, wipe_size)
8678         result = lu.rpc.call_blockdev_wipe(node, device, offset, wipe_size)
8679         result.Raise("Could not wipe disk %d at offset %d for size %d" %
8680                      (idx, offset, wipe_size))
8681         now = time.time()
8682         offset += wipe_size
8683         if now - last_output >= 60:
8684           eta = _CalcEta(now - start_time, offset, size)
8685           lu.LogInfo(" - done: %.1f%% ETA: %s" %
8686                      (offset / float(size) * 100, utils.FormatSeconds(eta)))
8687           last_output = now
8688   finally:
8689     logging.info("Resume sync of instance %s disks", instance.name)
8690
8691     result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, False)
8692
8693     for idx, success in enumerate(result.payload):
8694       if not success:
8695         lu.LogWarning("Resume sync of disk %d failed, please have a"
8696                       " look at the status and troubleshoot the issue", idx)
8697         logging.warn("resume-sync of instance %s for disks %d failed",
8698                      instance.name, idx)
8699
8700
8701 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
8702   """Create all disks for an instance.
8703
8704   This abstracts away some work from AddInstance.
8705
8706   @type lu: L{LogicalUnit}
8707   @param lu: the logical unit on whose behalf we execute
8708   @type instance: L{objects.Instance}
8709   @param instance: the instance whose disks we should create
8710   @type to_skip: list
8711   @param to_skip: list of indices to skip
8712   @type target_node: string
8713   @param target_node: if passed, overrides the target node for creation
8714   @rtype: boolean
8715   @return: the success of the creation
8716
8717   """
8718   info = _GetInstanceInfoText(instance)
8719   if target_node is None:
8720     pnode = instance.primary_node
8721     all_nodes = instance.all_nodes
8722   else:
8723     pnode = target_node
8724     all_nodes = [pnode]
8725
8726   if instance.disk_template in constants.DTS_FILEBASED:
8727     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
8728     result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
8729
8730     result.Raise("Failed to create directory '%s' on"
8731                  " node %s" % (file_storage_dir, pnode))
8732
8733   # Note: this needs to be kept in sync with adding of disks in
8734   # LUInstanceSetParams
8735   for idx, device in enumerate(instance.disks):
8736     if to_skip and idx in to_skip:
8737       continue
8738     logging.info("Creating volume %s for instance %s",
8739                  device.iv_name, instance.name)
8740     #HARDCODE
8741     for node in all_nodes:
8742       f_create = node == pnode
8743       _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
8744
8745
8746 def _RemoveDisks(lu, instance, target_node=None):
8747   """Remove all disks for an instance.
8748
8749   This abstracts away some work from `AddInstance()` and
8750   `RemoveInstance()`. Note that in case some of the devices couldn't
8751   be removed, the removal will continue with the other ones (compare
8752   with `_CreateDisks()`).
8753
8754   @type lu: L{LogicalUnit}
8755   @param lu: the logical unit on whose behalf we execute
8756   @type instance: L{objects.Instance}
8757   @param instance: the instance whose disks we should remove
8758   @type target_node: string
8759   @param target_node: used to override the node on which to remove the disks
8760   @rtype: boolean
8761   @return: the success of the removal
8762
8763   """
8764   logging.info("Removing block devices for instance %s", instance.name)
8765
8766   all_result = True
8767   for device in instance.disks:
8768     if target_node:
8769       edata = [(target_node, device)]
8770     else:
8771       edata = device.ComputeNodeTree(instance.primary_node)
8772     for node, disk in edata:
8773       lu.cfg.SetDiskID(disk, node)
8774       msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
8775       if msg:
8776         lu.LogWarning("Could not remove block device %s on node %s,"
8777                       " continuing anyway: %s", device.iv_name, node, msg)
8778         all_result = False
8779
8780     # if this is a DRBD disk, return its port to the pool
8781     if device.dev_type in constants.LDS_DRBD:
8782       tcp_port = device.logical_id[2]
8783       lu.cfg.AddTcpUdpPort(tcp_port)
8784
8785   if instance.disk_template == constants.DT_FILE:
8786     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
8787     if target_node:
8788       tgt = target_node
8789     else:
8790       tgt = instance.primary_node
8791     result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
8792     if result.fail_msg:
8793       lu.LogWarning("Could not remove directory '%s' on node %s: %s",
8794                     file_storage_dir, instance.primary_node, result.fail_msg)
8795       all_result = False
8796
8797   return all_result
8798
8799
8800 def _ComputeDiskSizePerVG(disk_template, disks):
8801   """Compute disk size requirements in the volume group
8802
8803   """
8804   def _compute(disks, payload):
8805     """Universal algorithm.
8806
8807     """
8808     vgs = {}
8809     for disk in disks:
8810       vgs[disk[constants.IDISK_VG]] = \
8811         vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
8812
8813     return vgs
8814
8815   # Required free disk space as a function of disk and swap space
8816   req_size_dict = {
8817     constants.DT_DISKLESS: {},
8818     constants.DT_PLAIN: _compute(disks, 0),
8819     # 128 MB are added for drbd metadata for each disk
8820     constants.DT_DRBD8: _compute(disks, DRBD_META_SIZE),
8821     constants.DT_FILE: {},
8822     constants.DT_SHARED_FILE: {},
8823   }
8824
8825   if disk_template not in req_size_dict:
8826     raise errors.ProgrammerError("Disk template '%s' size requirement"
8827                                  " is unknown" % disk_template)
8828
8829   return req_size_dict[disk_template]
8830
8831
8832 def _ComputeDiskSize(disk_template, disks):
8833   """Compute disk size requirements in the volume group
8834
8835   """
8836   # Required free disk space as a function of disk and swap space
8837   req_size_dict = {
8838     constants.DT_DISKLESS: None,
8839     constants.DT_PLAIN: sum(d[constants.IDISK_SIZE] for d in disks),
8840     # 128 MB are added for drbd metadata for each disk
8841     constants.DT_DRBD8:
8842       sum(d[constants.IDISK_SIZE] + DRBD_META_SIZE for d in disks),
8843     constants.DT_FILE: None,
8844     constants.DT_SHARED_FILE: 0,
8845     constants.DT_BLOCK: 0,
8846   }
8847
8848   if disk_template not in req_size_dict:
8849     raise errors.ProgrammerError("Disk template '%s' size requirement"
8850                                  " is unknown" % disk_template)
8851
8852   return req_size_dict[disk_template]
8853
8854
8855 def _FilterVmNodes(lu, nodenames):
8856   """Filters out non-vm_capable nodes from a list.
8857
8858   @type lu: L{LogicalUnit}
8859   @param lu: the logical unit for which we check
8860   @type nodenames: list
8861   @param nodenames: the list of nodes on which we should check
8862   @rtype: list
8863   @return: the list of vm-capable nodes
8864
8865   """
8866   vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
8867   return [name for name in nodenames if name not in vm_nodes]
8868
8869
8870 def _CheckHVParams(lu, nodenames, hvname, hvparams):
8871   """Hypervisor parameter validation.
8872
8873   This function abstract the hypervisor parameter validation to be
8874   used in both instance create and instance modify.
8875
8876   @type lu: L{LogicalUnit}
8877   @param lu: the logical unit for which we check
8878   @type nodenames: list
8879   @param nodenames: the list of nodes on which we should check
8880   @type hvname: string
8881   @param hvname: the name of the hypervisor we should use
8882   @type hvparams: dict
8883   @param hvparams: the parameters which we need to check
8884   @raise errors.OpPrereqError: if the parameters are not valid
8885
8886   """
8887   nodenames = _FilterVmNodes(lu, nodenames)
8888
8889   cluster = lu.cfg.GetClusterInfo()
8890   hvfull = objects.FillDict(cluster.hvparams.get(hvname, {}), hvparams)
8891
8892   hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames, hvname, hvfull)
8893   for node in nodenames:
8894     info = hvinfo[node]
8895     if info.offline:
8896       continue
8897     info.Raise("Hypervisor parameter validation failed on node %s" % node)
8898
8899
8900 def _CheckOSParams(lu, required, nodenames, osname, osparams):
8901   """OS parameters validation.
8902
8903   @type lu: L{LogicalUnit}
8904   @param lu: the logical unit for which we check
8905   @type required: boolean
8906   @param required: whether the validation should fail if the OS is not
8907       found
8908   @type nodenames: list
8909   @param nodenames: the list of nodes on which we should check
8910   @type osname: string
8911   @param osname: the name of the hypervisor we should use
8912   @type osparams: dict
8913   @param osparams: the parameters which we need to check
8914   @raise errors.OpPrereqError: if the parameters are not valid
8915
8916   """
8917   nodenames = _FilterVmNodes(lu, nodenames)
8918   result = lu.rpc.call_os_validate(nodenames, required, osname,
8919                                    [constants.OS_VALIDATE_PARAMETERS],
8920                                    osparams)
8921   for node, nres in result.items():
8922     # we don't check for offline cases since this should be run only
8923     # against the master node and/or an instance's nodes
8924     nres.Raise("OS Parameters validation failed on node %s" % node)
8925     if not nres.payload:
8926       lu.LogInfo("OS %s not found on node %s, validation skipped",
8927                  osname, node)
8928
8929
8930 class LUInstanceCreate(LogicalUnit):
8931   """Create an instance.
8932
8933   """
8934   HPATH = "instance-add"
8935   HTYPE = constants.HTYPE_INSTANCE
8936   REQ_BGL = False
8937
8938   def CheckArguments(self):
8939     """Check arguments.
8940
8941     """
8942     # do not require name_check to ease forward/backward compatibility
8943     # for tools
8944     if self.op.no_install and self.op.start:
8945       self.LogInfo("No-installation mode selected, disabling startup")
8946       self.op.start = False
8947     # validate/normalize the instance name
8948     self.op.instance_name = \
8949       netutils.Hostname.GetNormalizedName(self.op.instance_name)
8950
8951     if self.op.ip_check and not self.op.name_check:
8952       # TODO: make the ip check more flexible and not depend on the name check
8953       raise errors.OpPrereqError("Cannot do IP address check without a name"
8954                                  " check", errors.ECODE_INVAL)
8955
8956     # check nics' parameter names
8957     for nic in self.op.nics:
8958       utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
8959
8960     # check disks. parameter names and consistent adopt/no-adopt strategy
8961     has_adopt = has_no_adopt = False
8962     for disk in self.op.disks:
8963       utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
8964       if constants.IDISK_ADOPT in disk:
8965         has_adopt = True
8966       else:
8967         has_no_adopt = True
8968     if has_adopt and has_no_adopt:
8969       raise errors.OpPrereqError("Either all disks are adopted or none is",
8970                                  errors.ECODE_INVAL)
8971     if has_adopt:
8972       if self.op.disk_template not in constants.DTS_MAY_ADOPT:
8973         raise errors.OpPrereqError("Disk adoption is not supported for the"
8974                                    " '%s' disk template" %
8975                                    self.op.disk_template,
8976                                    errors.ECODE_INVAL)
8977       if self.op.iallocator is not None:
8978         raise errors.OpPrereqError("Disk adoption not allowed with an"
8979                                    " iallocator script", errors.ECODE_INVAL)
8980       if self.op.mode == constants.INSTANCE_IMPORT:
8981         raise errors.OpPrereqError("Disk adoption not allowed for"
8982                                    " instance import", errors.ECODE_INVAL)
8983     else:
8984       if self.op.disk_template in constants.DTS_MUST_ADOPT:
8985         raise errors.OpPrereqError("Disk template %s requires disk adoption,"
8986                                    " but no 'adopt' parameter given" %
8987                                    self.op.disk_template,
8988                                    errors.ECODE_INVAL)
8989
8990     self.adopt_disks = has_adopt
8991
8992     # instance name verification
8993     if self.op.name_check:
8994       self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
8995       self.op.instance_name = self.hostname1.name
8996       # used in CheckPrereq for ip ping check
8997       self.check_ip = self.hostname1.ip
8998     else:
8999       self.check_ip = None
9000
9001     # file storage checks
9002     if (self.op.file_driver and
9003         not self.op.file_driver in constants.FILE_DRIVER):
9004       raise errors.OpPrereqError("Invalid file driver name '%s'" %
9005                                  self.op.file_driver, errors.ECODE_INVAL)
9006
9007     if self.op.disk_template == constants.DT_FILE:
9008       opcodes.RequireFileStorage()
9009     elif self.op.disk_template == constants.DT_SHARED_FILE:
9010       opcodes.RequireSharedFileStorage()
9011
9012     ### Node/iallocator related checks
9013     _CheckIAllocatorOrNode(self, "iallocator", "pnode")
9014
9015     if self.op.pnode is not None:
9016       if self.op.disk_template in constants.DTS_INT_MIRROR:
9017         if self.op.snode is None:
9018           raise errors.OpPrereqError("The networked disk templates need"
9019                                      " a mirror node", errors.ECODE_INVAL)
9020       elif self.op.snode:
9021         self.LogWarning("Secondary node will be ignored on non-mirrored disk"
9022                         " template")
9023         self.op.snode = None
9024
9025     self._cds = _GetClusterDomainSecret()
9026
9027     if self.op.mode == constants.INSTANCE_IMPORT:
9028       # On import force_variant must be True, because if we forced it at
9029       # initial install, our only chance when importing it back is that it
9030       # works again!
9031       self.op.force_variant = True
9032
9033       if self.op.no_install:
9034         self.LogInfo("No-installation mode has no effect during import")
9035
9036     elif self.op.mode == constants.INSTANCE_CREATE:
9037       if self.op.os_type is None:
9038         raise errors.OpPrereqError("No guest OS specified",
9039                                    errors.ECODE_INVAL)
9040       if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
9041         raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
9042                                    " installation" % self.op.os_type,
9043                                    errors.ECODE_STATE)
9044       if self.op.disk_template is None:
9045         raise errors.OpPrereqError("No disk template specified",
9046                                    errors.ECODE_INVAL)
9047
9048     elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
9049       # Check handshake to ensure both clusters have the same domain secret
9050       src_handshake = self.op.source_handshake
9051       if not src_handshake:
9052         raise errors.OpPrereqError("Missing source handshake",
9053                                    errors.ECODE_INVAL)
9054
9055       errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
9056                                                            src_handshake)
9057       if errmsg:
9058         raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
9059                                    errors.ECODE_INVAL)
9060
9061       # Load and check source CA
9062       self.source_x509_ca_pem = self.op.source_x509_ca
9063       if not self.source_x509_ca_pem:
9064         raise errors.OpPrereqError("Missing source X509 CA",
9065                                    errors.ECODE_INVAL)
9066
9067       try:
9068         (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
9069                                                     self._cds)
9070       except OpenSSL.crypto.Error, err:
9071         raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
9072                                    (err, ), errors.ECODE_INVAL)
9073
9074       (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
9075       if errcode is not None:
9076         raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
9077                                    errors.ECODE_INVAL)
9078
9079       self.source_x509_ca = cert
9080
9081       src_instance_name = self.op.source_instance_name
9082       if not src_instance_name:
9083         raise errors.OpPrereqError("Missing source instance name",
9084                                    errors.ECODE_INVAL)
9085
9086       self.source_instance_name = \
9087           netutils.GetHostname(name=src_instance_name).name
9088
9089     else:
9090       raise errors.OpPrereqError("Invalid instance creation mode %r" %
9091                                  self.op.mode, errors.ECODE_INVAL)
9092
9093   def ExpandNames(self):
9094     """ExpandNames for CreateInstance.
9095
9096     Figure out the right locks for instance creation.
9097
9098     """
9099     self.needed_locks = {}
9100
9101     instance_name = self.op.instance_name
9102     # this is just a preventive check, but someone might still add this
9103     # instance in the meantime, and creation will fail at lock-add time
9104     if instance_name in self.cfg.GetInstanceList():
9105       raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
9106                                  instance_name, errors.ECODE_EXISTS)
9107
9108     self.add_locks[locking.LEVEL_INSTANCE] = instance_name
9109
9110     if self.op.iallocator:
9111       # TODO: Find a solution to not lock all nodes in the cluster, e.g. by
9112       # specifying a group on instance creation and then selecting nodes from
9113       # that group
9114       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9115       self.needed_locks[locking.LEVEL_NODE_RES] = locking.ALL_SET
9116     else:
9117       self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
9118       nodelist = [self.op.pnode]
9119       if self.op.snode is not None:
9120         self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
9121         nodelist.append(self.op.snode)
9122       self.needed_locks[locking.LEVEL_NODE] = nodelist
9123       # Lock resources of instance's primary and secondary nodes (copy to
9124       # prevent accidential modification)
9125       self.needed_locks[locking.LEVEL_NODE_RES] = list(nodelist)
9126
9127     # in case of import lock the source node too
9128     if self.op.mode == constants.INSTANCE_IMPORT:
9129       src_node = self.op.src_node
9130       src_path = self.op.src_path
9131
9132       if src_path is None:
9133         self.op.src_path = src_path = self.op.instance_name
9134
9135       if src_node is None:
9136         self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9137         self.op.src_node = None
9138         if os.path.isabs(src_path):
9139           raise errors.OpPrereqError("Importing an instance from a path"
9140                                      " requires a source node option",
9141                                      errors.ECODE_INVAL)
9142       else:
9143         self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
9144         if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
9145           self.needed_locks[locking.LEVEL_NODE].append(src_node)
9146         if not os.path.isabs(src_path):
9147           self.op.src_path = src_path = \
9148             utils.PathJoin(constants.EXPORT_DIR, src_path)
9149
9150   def _RunAllocator(self):
9151     """Run the allocator based on input opcode.
9152
9153     """
9154     nics = [n.ToDict() for n in self.nics]
9155     ial = IAllocator(self.cfg, self.rpc,
9156                      mode=constants.IALLOCATOR_MODE_ALLOC,
9157                      name=self.op.instance_name,
9158                      disk_template=self.op.disk_template,
9159                      tags=self.op.tags,
9160                      os=self.op.os_type,
9161                      vcpus=self.be_full[constants.BE_VCPUS],
9162                      memory=self.be_full[constants.BE_MAXMEM],
9163                      disks=self.disks,
9164                      nics=nics,
9165                      hypervisor=self.op.hypervisor,
9166                      )
9167
9168     ial.Run(self.op.iallocator)
9169
9170     if not ial.success:
9171       raise errors.OpPrereqError("Can't compute nodes using"
9172                                  " iallocator '%s': %s" %
9173                                  (self.op.iallocator, ial.info),
9174                                  errors.ECODE_NORES)
9175     if len(ial.result) != ial.required_nodes:
9176       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
9177                                  " of nodes (%s), required %s" %
9178                                  (self.op.iallocator, len(ial.result),
9179                                   ial.required_nodes), errors.ECODE_FAULT)
9180     self.op.pnode = ial.result[0]
9181     self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
9182                  self.op.instance_name, self.op.iallocator,
9183                  utils.CommaJoin(ial.result))
9184     if ial.required_nodes == 2:
9185       self.op.snode = ial.result[1]
9186
9187   def BuildHooksEnv(self):
9188     """Build hooks env.
9189
9190     This runs on master, primary and secondary nodes of the instance.
9191
9192     """
9193     env = {
9194       "ADD_MODE": self.op.mode,
9195       }
9196     if self.op.mode == constants.INSTANCE_IMPORT:
9197       env["SRC_NODE"] = self.op.src_node
9198       env["SRC_PATH"] = self.op.src_path
9199       env["SRC_IMAGES"] = self.src_images
9200
9201     env.update(_BuildInstanceHookEnv(
9202       name=self.op.instance_name,
9203       primary_node=self.op.pnode,
9204       secondary_nodes=self.secondaries,
9205       status=self.op.start,
9206       os_type=self.op.os_type,
9207       minmem=self.be_full[constants.BE_MINMEM],
9208       maxmem=self.be_full[constants.BE_MAXMEM],
9209       vcpus=self.be_full[constants.BE_VCPUS],
9210       nics=_NICListToTuple(self, self.nics),
9211       disk_template=self.op.disk_template,
9212       disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE])
9213              for d in self.disks],
9214       bep=self.be_full,
9215       hvp=self.hv_full,
9216       hypervisor_name=self.op.hypervisor,
9217       tags=self.op.tags,
9218     ))
9219
9220     return env
9221
9222   def BuildHooksNodes(self):
9223     """Build hooks nodes.
9224
9225     """
9226     nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
9227     return nl, nl
9228
9229   def _ReadExportInfo(self):
9230     """Reads the export information from disk.
9231
9232     It will override the opcode source node and path with the actual
9233     information, if these two were not specified before.
9234
9235     @return: the export information
9236
9237     """
9238     assert self.op.mode == constants.INSTANCE_IMPORT
9239
9240     src_node = self.op.src_node
9241     src_path = self.op.src_path
9242
9243     if src_node is None:
9244       locked_nodes = self.owned_locks(locking.LEVEL_NODE)
9245       exp_list = self.rpc.call_export_list(locked_nodes)
9246       found = False
9247       for node in exp_list:
9248         if exp_list[node].fail_msg:
9249           continue
9250         if src_path in exp_list[node].payload:
9251           found = True
9252           self.op.src_node = src_node = node
9253           self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
9254                                                        src_path)
9255           break
9256       if not found:
9257         raise errors.OpPrereqError("No export found for relative path %s" %
9258                                     src_path, errors.ECODE_INVAL)
9259
9260     _CheckNodeOnline(self, src_node)
9261     result = self.rpc.call_export_info(src_node, src_path)
9262     result.Raise("No export or invalid export found in dir %s" % src_path)
9263
9264     export_info = objects.SerializableConfigParser.Loads(str(result.payload))
9265     if not export_info.has_section(constants.INISECT_EXP):
9266       raise errors.ProgrammerError("Corrupted export config",
9267                                    errors.ECODE_ENVIRON)
9268
9269     ei_version = export_info.get(constants.INISECT_EXP, "version")
9270     if (int(ei_version) != constants.EXPORT_VERSION):
9271       raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
9272                                  (ei_version, constants.EXPORT_VERSION),
9273                                  errors.ECODE_ENVIRON)
9274     return export_info
9275
9276   def _ReadExportParams(self, einfo):
9277     """Use export parameters as defaults.
9278
9279     In case the opcode doesn't specify (as in override) some instance
9280     parameters, then try to use them from the export information, if
9281     that declares them.
9282
9283     """
9284     self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
9285
9286     if self.op.disk_template is None:
9287       if einfo.has_option(constants.INISECT_INS, "disk_template"):
9288         self.op.disk_template = einfo.get(constants.INISECT_INS,
9289                                           "disk_template")
9290         if self.op.disk_template not in constants.DISK_TEMPLATES:
9291           raise errors.OpPrereqError("Disk template specified in configuration"
9292                                      " file is not one of the allowed values:"
9293                                      " %s" % " ".join(constants.DISK_TEMPLATES))
9294       else:
9295         raise errors.OpPrereqError("No disk template specified and the export"
9296                                    " is missing the disk_template information",
9297                                    errors.ECODE_INVAL)
9298
9299     if not self.op.disks:
9300       disks = []
9301       # TODO: import the disk iv_name too
9302       for idx in range(constants.MAX_DISKS):
9303         if einfo.has_option(constants.INISECT_INS, "disk%d_size" % idx):
9304           disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
9305           disks.append({constants.IDISK_SIZE: disk_sz})
9306       self.op.disks = disks
9307       if not disks and self.op.disk_template != constants.DT_DISKLESS:
9308         raise errors.OpPrereqError("No disk info specified and the export"
9309                                    " is missing the disk information",
9310                                    errors.ECODE_INVAL)
9311
9312     if not self.op.nics:
9313       nics = []
9314       for idx in range(constants.MAX_NICS):
9315         if einfo.has_option(constants.INISECT_INS, "nic%d_mac" % idx):
9316           ndict = {}
9317           for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
9318             v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
9319             ndict[name] = v
9320           nics.append(ndict)
9321         else:
9322           break
9323       self.op.nics = nics
9324
9325     if not self.op.tags and einfo.has_option(constants.INISECT_INS, "tags"):
9326       self.op.tags = einfo.get(constants.INISECT_INS, "tags").split()
9327
9328     if (self.op.hypervisor is None and
9329         einfo.has_option(constants.INISECT_INS, "hypervisor")):
9330       self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
9331
9332     if einfo.has_section(constants.INISECT_HYP):
9333       # use the export parameters but do not override the ones
9334       # specified by the user
9335       for name, value in einfo.items(constants.INISECT_HYP):
9336         if name not in self.op.hvparams:
9337           self.op.hvparams[name] = value
9338
9339     if einfo.has_section(constants.INISECT_BEP):
9340       # use the parameters, without overriding
9341       for name, value in einfo.items(constants.INISECT_BEP):
9342         if name not in self.op.beparams:
9343           self.op.beparams[name] = value
9344         # Compatibility for the old "memory" be param
9345         if name == constants.BE_MEMORY:
9346           if constants.BE_MAXMEM not in self.op.beparams:
9347             self.op.beparams[constants.BE_MAXMEM] = value
9348           if constants.BE_MINMEM not in self.op.beparams:
9349             self.op.beparams[constants.BE_MINMEM] = value
9350     else:
9351       # try to read the parameters old style, from the main section
9352       for name in constants.BES_PARAMETERS:
9353         if (name not in self.op.beparams and
9354             einfo.has_option(constants.INISECT_INS, name)):
9355           self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
9356
9357     if einfo.has_section(constants.INISECT_OSP):
9358       # use the parameters, without overriding
9359       for name, value in einfo.items(constants.INISECT_OSP):
9360         if name not in self.op.osparams:
9361           self.op.osparams[name] = value
9362
9363   def _RevertToDefaults(self, cluster):
9364     """Revert the instance parameters to the default values.
9365
9366     """
9367     # hvparams
9368     hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
9369     for name in self.op.hvparams.keys():
9370       if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
9371         del self.op.hvparams[name]
9372     # beparams
9373     be_defs = cluster.SimpleFillBE({})
9374     for name in self.op.beparams.keys():
9375       if name in be_defs and be_defs[name] == self.op.beparams[name]:
9376         del self.op.beparams[name]
9377     # nic params
9378     nic_defs = cluster.SimpleFillNIC({})
9379     for nic in self.op.nics:
9380       for name in constants.NICS_PARAMETERS:
9381         if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
9382           del nic[name]
9383     # osparams
9384     os_defs = cluster.SimpleFillOS(self.op.os_type, {})
9385     for name in self.op.osparams.keys():
9386       if name in os_defs and os_defs[name] == self.op.osparams[name]:
9387         del self.op.osparams[name]
9388
9389   def _CalculateFileStorageDir(self):
9390     """Calculate final instance file storage dir.
9391
9392     """
9393     # file storage dir calculation/check
9394     self.instance_file_storage_dir = None
9395     if self.op.disk_template in constants.DTS_FILEBASED:
9396       # build the full file storage dir path
9397       joinargs = []
9398
9399       if self.op.disk_template == constants.DT_SHARED_FILE:
9400         get_fsd_fn = self.cfg.GetSharedFileStorageDir
9401       else:
9402         get_fsd_fn = self.cfg.GetFileStorageDir
9403
9404       cfg_storagedir = get_fsd_fn()
9405       if not cfg_storagedir:
9406         raise errors.OpPrereqError("Cluster file storage dir not defined")
9407       joinargs.append(cfg_storagedir)
9408
9409       if self.op.file_storage_dir is not None:
9410         joinargs.append(self.op.file_storage_dir)
9411
9412       joinargs.append(self.op.instance_name)
9413
9414       # pylint: disable=W0142
9415       self.instance_file_storage_dir = utils.PathJoin(*joinargs)
9416
9417   def CheckPrereq(self): # pylint: disable=R0914
9418     """Check prerequisites.
9419
9420     """
9421     self._CalculateFileStorageDir()
9422
9423     if self.op.mode == constants.INSTANCE_IMPORT:
9424       export_info = self._ReadExportInfo()
9425       self._ReadExportParams(export_info)
9426
9427     if (not self.cfg.GetVGName() and
9428         self.op.disk_template not in constants.DTS_NOT_LVM):
9429       raise errors.OpPrereqError("Cluster does not support lvm-based"
9430                                  " instances", errors.ECODE_STATE)
9431
9432     if (self.op.hypervisor is None or
9433         self.op.hypervisor == constants.VALUE_AUTO):
9434       self.op.hypervisor = self.cfg.GetHypervisorType()
9435
9436     cluster = self.cfg.GetClusterInfo()
9437     enabled_hvs = cluster.enabled_hypervisors
9438     if self.op.hypervisor not in enabled_hvs:
9439       raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
9440                                  " cluster (%s)" % (self.op.hypervisor,
9441                                   ",".join(enabled_hvs)),
9442                                  errors.ECODE_STATE)
9443
9444     # Check tag validity
9445     for tag in self.op.tags:
9446       objects.TaggableObject.ValidateTag(tag)
9447
9448     # check hypervisor parameter syntax (locally)
9449     utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
9450     filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
9451                                       self.op.hvparams)
9452     hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
9453     hv_type.CheckParameterSyntax(filled_hvp)
9454     self.hv_full = filled_hvp
9455     # check that we don't specify global parameters on an instance
9456     _CheckGlobalHvParams(self.op.hvparams)
9457
9458     # fill and remember the beparams dict
9459     default_beparams = cluster.beparams[constants.PP_DEFAULT]
9460     for param, value in self.op.beparams.iteritems():
9461       if value == constants.VALUE_AUTO:
9462         self.op.beparams[param] = default_beparams[param]
9463     objects.UpgradeBeParams(self.op.beparams)
9464     utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
9465     self.be_full = cluster.SimpleFillBE(self.op.beparams)
9466
9467     # build os parameters
9468     self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
9469
9470     # now that hvp/bep are in final format, let's reset to defaults,
9471     # if told to do so
9472     if self.op.identify_defaults:
9473       self._RevertToDefaults(cluster)
9474
9475     # NIC buildup
9476     self.nics = []
9477     for idx, nic in enumerate(self.op.nics):
9478       nic_mode_req = nic.get(constants.INIC_MODE, None)
9479       nic_mode = nic_mode_req
9480       if nic_mode is None or nic_mode == constants.VALUE_AUTO:
9481         nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
9482
9483       # in routed mode, for the first nic, the default ip is 'auto'
9484       if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
9485         default_ip_mode = constants.VALUE_AUTO
9486       else:
9487         default_ip_mode = constants.VALUE_NONE
9488
9489       # ip validity checks
9490       ip = nic.get(constants.INIC_IP, default_ip_mode)
9491       if ip is None or ip.lower() == constants.VALUE_NONE:
9492         nic_ip = None
9493       elif ip.lower() == constants.VALUE_AUTO:
9494         if not self.op.name_check:
9495           raise errors.OpPrereqError("IP address set to auto but name checks"
9496                                      " have been skipped",
9497                                      errors.ECODE_INVAL)
9498         nic_ip = self.hostname1.ip
9499       else:
9500         if not netutils.IPAddress.IsValid(ip):
9501           raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
9502                                      errors.ECODE_INVAL)
9503         nic_ip = ip
9504
9505       # TODO: check the ip address for uniqueness
9506       if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
9507         raise errors.OpPrereqError("Routed nic mode requires an ip address",
9508                                    errors.ECODE_INVAL)
9509
9510       # MAC address verification
9511       mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
9512       if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9513         mac = utils.NormalizeAndValidateMac(mac)
9514
9515         try:
9516           self.cfg.ReserveMAC(mac, self.proc.GetECId())
9517         except errors.ReservationError:
9518           raise errors.OpPrereqError("MAC address %s already in use"
9519                                      " in cluster" % mac,
9520                                      errors.ECODE_NOTUNIQUE)
9521
9522       #  Build nic parameters
9523       link = nic.get(constants.INIC_LINK, None)
9524       if link == constants.VALUE_AUTO:
9525         link = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_LINK]
9526       nicparams = {}
9527       if nic_mode_req:
9528         nicparams[constants.NIC_MODE] = nic_mode
9529       if link:
9530         nicparams[constants.NIC_LINK] = link
9531
9532       check_params = cluster.SimpleFillNIC(nicparams)
9533       objects.NIC.CheckParameterSyntax(check_params)
9534       self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
9535
9536     # disk checks/pre-build
9537     default_vg = self.cfg.GetVGName()
9538     self.disks = []
9539     for disk in self.op.disks:
9540       mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
9541       if mode not in constants.DISK_ACCESS_SET:
9542         raise errors.OpPrereqError("Invalid disk access mode '%s'" %
9543                                    mode, errors.ECODE_INVAL)
9544       size = disk.get(constants.IDISK_SIZE, None)
9545       if size is None:
9546         raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
9547       try:
9548         size = int(size)
9549       except (TypeError, ValueError):
9550         raise errors.OpPrereqError("Invalid disk size '%s'" % size,
9551                                    errors.ECODE_INVAL)
9552
9553       data_vg = disk.get(constants.IDISK_VG, default_vg)
9554       new_disk = {
9555         constants.IDISK_SIZE: size,
9556         constants.IDISK_MODE: mode,
9557         constants.IDISK_VG: data_vg,
9558         }
9559       if constants.IDISK_METAVG in disk:
9560         new_disk[constants.IDISK_METAVG] = disk[constants.IDISK_METAVG]
9561       if constants.IDISK_ADOPT in disk:
9562         new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
9563       self.disks.append(new_disk)
9564
9565     if self.op.mode == constants.INSTANCE_IMPORT:
9566       disk_images = []
9567       for idx in range(len(self.disks)):
9568         option = "disk%d_dump" % idx
9569         if export_info.has_option(constants.INISECT_INS, option):
9570           # FIXME: are the old os-es, disk sizes, etc. useful?
9571           export_name = export_info.get(constants.INISECT_INS, option)
9572           image = utils.PathJoin(self.op.src_path, export_name)
9573           disk_images.append(image)
9574         else:
9575           disk_images.append(False)
9576
9577       self.src_images = disk_images
9578
9579       old_name = export_info.get(constants.INISECT_INS, "name")
9580       if self.op.instance_name == old_name:
9581         for idx, nic in enumerate(self.nics):
9582           if nic.mac == constants.VALUE_AUTO:
9583             nic_mac_ini = "nic%d_mac" % idx
9584             nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
9585
9586     # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
9587
9588     # ip ping checks (we use the same ip that was resolved in ExpandNames)
9589     if self.op.ip_check:
9590       if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
9591         raise errors.OpPrereqError("IP %s of instance %s already in use" %
9592                                    (self.check_ip, self.op.instance_name),
9593                                    errors.ECODE_NOTUNIQUE)
9594
9595     #### mac address generation
9596     # By generating here the mac address both the allocator and the hooks get
9597     # the real final mac address rather than the 'auto' or 'generate' value.
9598     # There is a race condition between the generation and the instance object
9599     # creation, which means that we know the mac is valid now, but we're not
9600     # sure it will be when we actually add the instance. If things go bad
9601     # adding the instance will abort because of a duplicate mac, and the
9602     # creation job will fail.
9603     for nic in self.nics:
9604       if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9605         nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
9606
9607     #### allocator run
9608
9609     if self.op.iallocator is not None:
9610       self._RunAllocator()
9611
9612     # Release all unneeded node locks
9613     _ReleaseLocks(self, locking.LEVEL_NODE,
9614                   keep=filter(None, [self.op.pnode, self.op.snode,
9615                                      self.op.src_node]))
9616     _ReleaseLocks(self, locking.LEVEL_NODE_RES,
9617                   keep=filter(None, [self.op.pnode, self.op.snode,
9618                                      self.op.src_node]))
9619
9620     #### node related checks
9621
9622     # check primary node
9623     self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
9624     assert self.pnode is not None, \
9625       "Cannot retrieve locked node %s" % self.op.pnode
9626     if pnode.offline:
9627       raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
9628                                  pnode.name, errors.ECODE_STATE)
9629     if pnode.drained:
9630       raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
9631                                  pnode.name, errors.ECODE_STATE)
9632     if not pnode.vm_capable:
9633       raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
9634                                  " '%s'" % pnode.name, errors.ECODE_STATE)
9635
9636     self.secondaries = []
9637
9638     # mirror node verification
9639     if self.op.disk_template in constants.DTS_INT_MIRROR:
9640       if self.op.snode == pnode.name:
9641         raise errors.OpPrereqError("The secondary node cannot be the"
9642                                    " primary node", errors.ECODE_INVAL)
9643       _CheckNodeOnline(self, self.op.snode)
9644       _CheckNodeNotDrained(self, self.op.snode)
9645       _CheckNodeVmCapable(self, self.op.snode)
9646       self.secondaries.append(self.op.snode)
9647
9648       snode = self.cfg.GetNodeInfo(self.op.snode)
9649       if pnode.group != snode.group:
9650         self.LogWarning("The primary and secondary nodes are in two"
9651                         " different node groups; the disk parameters"
9652                         " from the first disk's node group will be"
9653                         " used")
9654
9655     nodenames = [pnode.name] + self.secondaries
9656
9657     # Verify instance specs
9658     ispec = {
9659       constants.ISPEC_MEM_SIZE: self.be_full.get(constants.BE_MAXMEM, None),
9660       constants.ISPEC_CPU_COUNT: self.be_full.get(constants.BE_VCPUS, None),
9661       constants.ISPEC_DISK_COUNT: len(self.disks),
9662       constants.ISPEC_DISK_SIZE: [disk["size"] for disk in self.disks],
9663       constants.ISPEC_NIC_COUNT: len(self.nics),
9664       }
9665
9666     group_info = self.cfg.GetNodeGroup(pnode.group)
9667     ipolicy = _CalculateGroupIPolicy(cluster, group_info)
9668     res = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec)
9669     if not self.op.ignore_ipolicy and res:
9670       raise errors.OpPrereqError(("Instance allocation to group %s violates"
9671                                   " policy: %s") % (pnode.group,
9672                                                     utils.CommaJoin(res)),
9673                                   errors.ECODE_INVAL)
9674
9675     # disk parameters (not customizable at instance or node level)
9676     # just use the primary node parameters, ignoring the secondary.
9677     self.diskparams = group_info.diskparams
9678
9679     if not self.adopt_disks:
9680       # Check lv size requirements, if not adopting
9681       req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
9682       _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
9683
9684     elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
9685       all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
9686                                 disk[constants.IDISK_ADOPT])
9687                      for disk in self.disks])
9688       if len(all_lvs) != len(self.disks):
9689         raise errors.OpPrereqError("Duplicate volume names given for adoption",
9690                                    errors.ECODE_INVAL)
9691       for lv_name in all_lvs:
9692         try:
9693           # FIXME: lv_name here is "vg/lv" need to ensure that other calls
9694           # to ReserveLV uses the same syntax
9695           self.cfg.ReserveLV(lv_name, self.proc.GetECId())
9696         except errors.ReservationError:
9697           raise errors.OpPrereqError("LV named %s used by another instance" %
9698                                      lv_name, errors.ECODE_NOTUNIQUE)
9699
9700       vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
9701       vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
9702
9703       node_lvs = self.rpc.call_lv_list([pnode.name],
9704                                        vg_names.payload.keys())[pnode.name]
9705       node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
9706       node_lvs = node_lvs.payload
9707
9708       delta = all_lvs.difference(node_lvs.keys())
9709       if delta:
9710         raise errors.OpPrereqError("Missing logical volume(s): %s" %
9711                                    utils.CommaJoin(delta),
9712                                    errors.ECODE_INVAL)
9713       online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
9714       if online_lvs:
9715         raise errors.OpPrereqError("Online logical volumes found, cannot"
9716                                    " adopt: %s" % utils.CommaJoin(online_lvs),
9717                                    errors.ECODE_STATE)
9718       # update the size of disk based on what is found
9719       for dsk in self.disks:
9720         dsk[constants.IDISK_SIZE] = \
9721           int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
9722                                         dsk[constants.IDISK_ADOPT])][0]))
9723
9724     elif self.op.disk_template == constants.DT_BLOCK:
9725       # Normalize and de-duplicate device paths
9726       all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
9727                        for disk in self.disks])
9728       if len(all_disks) != len(self.disks):
9729         raise errors.OpPrereqError("Duplicate disk names given for adoption",
9730                                    errors.ECODE_INVAL)
9731       baddisks = [d for d in all_disks
9732                   if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
9733       if baddisks:
9734         raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
9735                                    " cannot be adopted" %
9736                                    (", ".join(baddisks),
9737                                     constants.ADOPTABLE_BLOCKDEV_ROOT),
9738                                    errors.ECODE_INVAL)
9739
9740       node_disks = self.rpc.call_bdev_sizes([pnode.name],
9741                                             list(all_disks))[pnode.name]
9742       node_disks.Raise("Cannot get block device information from node %s" %
9743                        pnode.name)
9744       node_disks = node_disks.payload
9745       delta = all_disks.difference(node_disks.keys())
9746       if delta:
9747         raise errors.OpPrereqError("Missing block device(s): %s" %
9748                                    utils.CommaJoin(delta),
9749                                    errors.ECODE_INVAL)
9750       for dsk in self.disks:
9751         dsk[constants.IDISK_SIZE] = \
9752           int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
9753
9754     _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
9755
9756     _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
9757     # check OS parameters (remotely)
9758     _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
9759
9760     _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
9761
9762     # memory check on primary node
9763     #TODO(dynmem): use MINMEM for checking
9764     if self.op.start:
9765       _CheckNodeFreeMemory(self, self.pnode.name,
9766                            "creating instance %s" % self.op.instance_name,
9767                            self.be_full[constants.BE_MAXMEM],
9768                            self.op.hypervisor)
9769
9770     self.dry_run_result = list(nodenames)
9771
9772   def Exec(self, feedback_fn):
9773     """Create and add the instance to the cluster.
9774
9775     """
9776     instance = self.op.instance_name
9777     pnode_name = self.pnode.name
9778
9779     assert not (self.owned_locks(locking.LEVEL_NODE_RES) -
9780                 self.owned_locks(locking.LEVEL_NODE)), \
9781       "Node locks differ from node resource locks"
9782
9783     ht_kind = self.op.hypervisor
9784     if ht_kind in constants.HTS_REQ_PORT:
9785       network_port = self.cfg.AllocatePort()
9786     else:
9787       network_port = None
9788
9789     disks = _GenerateDiskTemplate(self,
9790                                   self.op.disk_template,
9791                                   instance, pnode_name,
9792                                   self.secondaries,
9793                                   self.disks,
9794                                   self.instance_file_storage_dir,
9795                                   self.op.file_driver,
9796                                   0,
9797                                   feedback_fn,
9798                                   self.diskparams)
9799
9800     iobj = objects.Instance(name=instance, os=self.op.os_type,
9801                             primary_node=pnode_name,
9802                             nics=self.nics, disks=disks,
9803                             disk_template=self.op.disk_template,
9804                             admin_state=constants.ADMINST_DOWN,
9805                             network_port=network_port,
9806                             beparams=self.op.beparams,
9807                             hvparams=self.op.hvparams,
9808                             hypervisor=self.op.hypervisor,
9809                             osparams=self.op.osparams,
9810                             )
9811
9812     if self.op.tags:
9813       for tag in self.op.tags:
9814         iobj.AddTag(tag)
9815
9816     if self.adopt_disks:
9817       if self.op.disk_template == constants.DT_PLAIN:
9818         # rename LVs to the newly-generated names; we need to construct
9819         # 'fake' LV disks with the old data, plus the new unique_id
9820         tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
9821         rename_to = []
9822         for t_dsk, a_dsk in zip(tmp_disks, self.disks):
9823           rename_to.append(t_dsk.logical_id)
9824           t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
9825           self.cfg.SetDiskID(t_dsk, pnode_name)
9826         result = self.rpc.call_blockdev_rename(pnode_name,
9827                                                zip(tmp_disks, rename_to))
9828         result.Raise("Failed to rename adoped LVs")
9829     else:
9830       feedback_fn("* creating instance disks...")
9831       try:
9832         _CreateDisks(self, iobj)
9833       except errors.OpExecError:
9834         self.LogWarning("Device creation failed, reverting...")
9835         try:
9836           _RemoveDisks(self, iobj)
9837         finally:
9838           self.cfg.ReleaseDRBDMinors(instance)
9839           raise
9840
9841     feedback_fn("adding instance %s to cluster config" % instance)
9842
9843     self.cfg.AddInstance(iobj, self.proc.GetECId())
9844
9845     # Declare that we don't want to remove the instance lock anymore, as we've
9846     # added the instance to the config
9847     del self.remove_locks[locking.LEVEL_INSTANCE]
9848
9849     if self.op.mode == constants.INSTANCE_IMPORT:
9850       # Release unused nodes
9851       _ReleaseLocks(self, locking.LEVEL_NODE, keep=[self.op.src_node])
9852     else:
9853       # Release all nodes
9854       _ReleaseLocks(self, locking.LEVEL_NODE)
9855
9856     disk_abort = False
9857     if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
9858       feedback_fn("* wiping instance disks...")
9859       try:
9860         _WipeDisks(self, iobj)
9861       except errors.OpExecError, err:
9862         logging.exception("Wiping disks failed")
9863         self.LogWarning("Wiping instance disks failed (%s)", err)
9864         disk_abort = True
9865
9866     if disk_abort:
9867       # Something is already wrong with the disks, don't do anything else
9868       pass
9869     elif self.op.wait_for_sync:
9870       disk_abort = not _WaitForSync(self, iobj)
9871     elif iobj.disk_template in constants.DTS_INT_MIRROR:
9872       # make sure the disks are not degraded (still sync-ing is ok)
9873       feedback_fn("* checking mirrors status")
9874       disk_abort = not _WaitForSync(self, iobj, oneshot=True)
9875     else:
9876       disk_abort = False
9877
9878     if disk_abort:
9879       _RemoveDisks(self, iobj)
9880       self.cfg.RemoveInstance(iobj.name)
9881       # Make sure the instance lock gets removed
9882       self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
9883       raise errors.OpExecError("There are some degraded disks for"
9884                                " this instance")
9885
9886     # Release all node resource locks
9887     _ReleaseLocks(self, locking.LEVEL_NODE_RES)
9888
9889     if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
9890       if self.op.mode == constants.INSTANCE_CREATE:
9891         if not self.op.no_install:
9892           pause_sync = (iobj.disk_template in constants.DTS_INT_MIRROR and
9893                         not self.op.wait_for_sync)
9894           if pause_sync:
9895             feedback_fn("* pausing disk sync to install instance OS")
9896             result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
9897                                                               iobj.disks, True)
9898             for idx, success in enumerate(result.payload):
9899               if not success:
9900                 logging.warn("pause-sync of instance %s for disk %d failed",
9901                              instance, idx)
9902
9903           feedback_fn("* running the instance OS create scripts...")
9904           # FIXME: pass debug option from opcode to backend
9905           os_add_result = \
9906             self.rpc.call_instance_os_add(pnode_name, (iobj, None), False,
9907                                           self.op.debug_level)
9908           if pause_sync:
9909             feedback_fn("* resuming disk sync")
9910             result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
9911                                                               iobj.disks, False)
9912             for idx, success in enumerate(result.payload):
9913               if not success:
9914                 logging.warn("resume-sync of instance %s for disk %d failed",
9915                              instance, idx)
9916
9917           os_add_result.Raise("Could not add os for instance %s"
9918                               " on node %s" % (instance, pnode_name))
9919
9920       elif self.op.mode == constants.INSTANCE_IMPORT:
9921         feedback_fn("* running the instance OS import scripts...")
9922
9923         transfers = []
9924
9925         for idx, image in enumerate(self.src_images):
9926           if not image:
9927             continue
9928
9929           # FIXME: pass debug option from opcode to backend
9930           dt = masterd.instance.DiskTransfer("disk/%s" % idx,
9931                                              constants.IEIO_FILE, (image, ),
9932                                              constants.IEIO_SCRIPT,
9933                                              (iobj.disks[idx], idx),
9934                                              None)
9935           transfers.append(dt)
9936
9937         import_result = \
9938           masterd.instance.TransferInstanceData(self, feedback_fn,
9939                                                 self.op.src_node, pnode_name,
9940                                                 self.pnode.secondary_ip,
9941                                                 iobj, transfers)
9942         if not compat.all(import_result):
9943           self.LogWarning("Some disks for instance %s on node %s were not"
9944                           " imported successfully" % (instance, pnode_name))
9945
9946       elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
9947         feedback_fn("* preparing remote import...")
9948         # The source cluster will stop the instance before attempting to make a
9949         # connection. In some cases stopping an instance can take a long time,
9950         # hence the shutdown timeout is added to the connection timeout.
9951         connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
9952                            self.op.source_shutdown_timeout)
9953         timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
9954
9955         assert iobj.primary_node == self.pnode.name
9956         disk_results = \
9957           masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
9958                                         self.source_x509_ca,
9959                                         self._cds, timeouts)
9960         if not compat.all(disk_results):
9961           # TODO: Should the instance still be started, even if some disks
9962           # failed to import (valid for local imports, too)?
9963           self.LogWarning("Some disks for instance %s on node %s were not"
9964                           " imported successfully" % (instance, pnode_name))
9965
9966         # Run rename script on newly imported instance
9967         assert iobj.name == instance
9968         feedback_fn("Running rename script for %s" % instance)
9969         result = self.rpc.call_instance_run_rename(pnode_name, iobj,
9970                                                    self.source_instance_name,
9971                                                    self.op.debug_level)
9972         if result.fail_msg:
9973           self.LogWarning("Failed to run rename script for %s on node"
9974                           " %s: %s" % (instance, pnode_name, result.fail_msg))
9975
9976       else:
9977         # also checked in the prereq part
9978         raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
9979                                      % self.op.mode)
9980
9981     assert not self.owned_locks(locking.LEVEL_NODE_RES)
9982
9983     if self.op.start:
9984       iobj.admin_state = constants.ADMINST_UP
9985       self.cfg.Update(iobj, feedback_fn)
9986       logging.info("Starting instance %s on node %s", instance, pnode_name)
9987       feedback_fn("* starting instance...")
9988       result = self.rpc.call_instance_start(pnode_name, (iobj, None, None),
9989                                             False)
9990       result.Raise("Could not start instance")
9991
9992     return list(iobj.all_nodes)
9993
9994
9995 class LUInstanceConsole(NoHooksLU):
9996   """Connect to an instance's console.
9997
9998   This is somewhat special in that it returns the command line that
9999   you need to run on the master node in order to connect to the
10000   console.
10001
10002   """
10003   REQ_BGL = False
10004
10005   def ExpandNames(self):
10006     self.share_locks = _ShareAll()
10007     self._ExpandAndLockInstance()
10008
10009   def CheckPrereq(self):
10010     """Check prerequisites.
10011
10012     This checks that the instance is in the cluster.
10013
10014     """
10015     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10016     assert self.instance is not None, \
10017       "Cannot retrieve locked instance %s" % self.op.instance_name
10018     _CheckNodeOnline(self, self.instance.primary_node)
10019
10020   def Exec(self, feedback_fn):
10021     """Connect to the console of an instance
10022
10023     """
10024     instance = self.instance
10025     node = instance.primary_node
10026
10027     node_insts = self.rpc.call_instance_list([node],
10028                                              [instance.hypervisor])[node]
10029     node_insts.Raise("Can't get node information from %s" % node)
10030
10031     if instance.name not in node_insts.payload:
10032       if instance.admin_state == constants.ADMINST_UP:
10033         state = constants.INSTST_ERRORDOWN
10034       elif instance.admin_state == constants.ADMINST_DOWN:
10035         state = constants.INSTST_ADMINDOWN
10036       else:
10037         state = constants.INSTST_ADMINOFFLINE
10038       raise errors.OpExecError("Instance %s is not running (state %s)" %
10039                                (instance.name, state))
10040
10041     logging.debug("Connecting to console of %s on %s", instance.name, node)
10042
10043     return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
10044
10045
10046 def _GetInstanceConsole(cluster, instance):
10047   """Returns console information for an instance.
10048
10049   @type cluster: L{objects.Cluster}
10050   @type instance: L{objects.Instance}
10051   @rtype: dict
10052
10053   """
10054   hyper = hypervisor.GetHypervisor(instance.hypervisor)
10055   # beparams and hvparams are passed separately, to avoid editing the
10056   # instance and then saving the defaults in the instance itself.
10057   hvparams = cluster.FillHV(instance)
10058   beparams = cluster.FillBE(instance)
10059   console = hyper.GetInstanceConsole(instance, hvparams, beparams)
10060
10061   assert console.instance == instance.name
10062   assert console.Validate()
10063
10064   return console.ToDict()
10065
10066
10067 class LUInstanceReplaceDisks(LogicalUnit):
10068   """Replace the disks of an instance.
10069
10070   """
10071   HPATH = "mirrors-replace"
10072   HTYPE = constants.HTYPE_INSTANCE
10073   REQ_BGL = False
10074
10075   def CheckArguments(self):
10076     TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
10077                                   self.op.iallocator)
10078
10079   def ExpandNames(self):
10080     self._ExpandAndLockInstance()
10081
10082     assert locking.LEVEL_NODE not in self.needed_locks
10083     assert locking.LEVEL_NODE_RES not in self.needed_locks
10084     assert locking.LEVEL_NODEGROUP not in self.needed_locks
10085
10086     assert self.op.iallocator is None or self.op.remote_node is None, \
10087       "Conflicting options"
10088
10089     if self.op.remote_node is not None:
10090       self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10091
10092       # Warning: do not remove the locking of the new secondary here
10093       # unless DRBD8.AddChildren is changed to work in parallel;
10094       # currently it doesn't since parallel invocations of
10095       # FindUnusedMinor will conflict
10096       self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
10097       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
10098     else:
10099       self.needed_locks[locking.LEVEL_NODE] = []
10100       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10101
10102       if self.op.iallocator is not None:
10103         # iallocator will select a new node in the same group
10104         self.needed_locks[locking.LEVEL_NODEGROUP] = []
10105
10106     self.needed_locks[locking.LEVEL_NODE_RES] = []
10107
10108     self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
10109                                    self.op.iallocator, self.op.remote_node,
10110                                    self.op.disks, False, self.op.early_release,
10111                                    self.op.ignore_ipolicy)
10112
10113     self.tasklets = [self.replacer]
10114
10115   def DeclareLocks(self, level):
10116     if level == locking.LEVEL_NODEGROUP:
10117       assert self.op.remote_node is None
10118       assert self.op.iallocator is not None
10119       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
10120
10121       self.share_locks[locking.LEVEL_NODEGROUP] = 1
10122       # Lock all groups used by instance optimistically; this requires going
10123       # via the node before it's locked, requiring verification later on
10124       self.needed_locks[locking.LEVEL_NODEGROUP] = \
10125         self.cfg.GetInstanceNodeGroups(self.op.instance_name)
10126
10127     elif level == locking.LEVEL_NODE:
10128       if self.op.iallocator is not None:
10129         assert self.op.remote_node is None
10130         assert not self.needed_locks[locking.LEVEL_NODE]
10131
10132         # Lock member nodes of all locked groups
10133         self.needed_locks[locking.LEVEL_NODE] = [node_name
10134           for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
10135           for node_name in self.cfg.GetNodeGroup(group_uuid).members]
10136       else:
10137         self._LockInstancesNodes()
10138     elif level == locking.LEVEL_NODE_RES:
10139       # Reuse node locks
10140       self.needed_locks[locking.LEVEL_NODE_RES] = \
10141         self.needed_locks[locking.LEVEL_NODE]
10142
10143   def BuildHooksEnv(self):
10144     """Build hooks env.
10145
10146     This runs on the master, the primary and all the secondaries.
10147
10148     """
10149     instance = self.replacer.instance
10150     env = {
10151       "MODE": self.op.mode,
10152       "NEW_SECONDARY": self.op.remote_node,
10153       "OLD_SECONDARY": instance.secondary_nodes[0],
10154       }
10155     env.update(_BuildInstanceHookEnvByObject(self, instance))
10156     return env
10157
10158   def BuildHooksNodes(self):
10159     """Build hooks nodes.
10160
10161     """
10162     instance = self.replacer.instance
10163     nl = [
10164       self.cfg.GetMasterNode(),
10165       instance.primary_node,
10166       ]
10167     if self.op.remote_node is not None:
10168       nl.append(self.op.remote_node)
10169     return nl, nl
10170
10171   def CheckPrereq(self):
10172     """Check prerequisites.
10173
10174     """
10175     assert (self.glm.is_owned(locking.LEVEL_NODEGROUP) or
10176             self.op.iallocator is None)
10177
10178     # Verify if node group locks are still correct
10179     owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
10180     if owned_groups:
10181       _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
10182
10183     return LogicalUnit.CheckPrereq(self)
10184
10185
10186 class TLReplaceDisks(Tasklet):
10187   """Replaces disks for an instance.
10188
10189   Note: Locking is not within the scope of this class.
10190
10191   """
10192   def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
10193                disks, delay_iallocator, early_release, ignore_ipolicy):
10194     """Initializes this class.
10195
10196     """
10197     Tasklet.__init__(self, lu)
10198
10199     # Parameters
10200     self.instance_name = instance_name
10201     self.mode = mode
10202     self.iallocator_name = iallocator_name
10203     self.remote_node = remote_node
10204     self.disks = disks
10205     self.delay_iallocator = delay_iallocator
10206     self.early_release = early_release
10207     self.ignore_ipolicy = ignore_ipolicy
10208
10209     # Runtime data
10210     self.instance = None
10211     self.new_node = None
10212     self.target_node = None
10213     self.other_node = None
10214     self.remote_node_info = None
10215     self.node_secondary_ip = None
10216
10217   @staticmethod
10218   def CheckArguments(mode, remote_node, iallocator):
10219     """Helper function for users of this class.
10220
10221     """
10222     # check for valid parameter combination
10223     if mode == constants.REPLACE_DISK_CHG:
10224       if remote_node is None and iallocator is None:
10225         raise errors.OpPrereqError("When changing the secondary either an"
10226                                    " iallocator script must be used or the"
10227                                    " new node given", errors.ECODE_INVAL)
10228
10229       if remote_node is not None and iallocator is not None:
10230         raise errors.OpPrereqError("Give either the iallocator or the new"
10231                                    " secondary, not both", errors.ECODE_INVAL)
10232
10233     elif remote_node is not None or iallocator is not None:
10234       # Not replacing the secondary
10235       raise errors.OpPrereqError("The iallocator and new node options can"
10236                                  " only be used when changing the"
10237                                  " secondary node", errors.ECODE_INVAL)
10238
10239   @staticmethod
10240   def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
10241     """Compute a new secondary node using an IAllocator.
10242
10243     """
10244     ial = IAllocator(lu.cfg, lu.rpc,
10245                      mode=constants.IALLOCATOR_MODE_RELOC,
10246                      name=instance_name,
10247                      relocate_from=list(relocate_from))
10248
10249     ial.Run(iallocator_name)
10250
10251     if not ial.success:
10252       raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
10253                                  " %s" % (iallocator_name, ial.info),
10254                                  errors.ECODE_NORES)
10255
10256     if len(ial.result) != ial.required_nodes:
10257       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
10258                                  " of nodes (%s), required %s" %
10259                                  (iallocator_name,
10260                                   len(ial.result), ial.required_nodes),
10261                                  errors.ECODE_FAULT)
10262
10263     remote_node_name = ial.result[0]
10264
10265     lu.LogInfo("Selected new secondary for instance '%s': %s",
10266                instance_name, remote_node_name)
10267
10268     return remote_node_name
10269
10270   def _FindFaultyDisks(self, node_name):
10271     """Wrapper for L{_FindFaultyInstanceDisks}.
10272
10273     """
10274     return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
10275                                     node_name, True)
10276
10277   def _CheckDisksActivated(self, instance):
10278     """Checks if the instance disks are activated.
10279
10280     @param instance: The instance to check disks
10281     @return: True if they are activated, False otherwise
10282
10283     """
10284     nodes = instance.all_nodes
10285
10286     for idx, dev in enumerate(instance.disks):
10287       for node in nodes:
10288         self.lu.LogInfo("Checking disk/%d on %s", idx, node)
10289         self.cfg.SetDiskID(dev, node)
10290
10291         result = self.rpc.call_blockdev_find(node, dev)
10292
10293         if result.offline:
10294           continue
10295         elif result.fail_msg or not result.payload:
10296           return False
10297
10298     return True
10299
10300   def CheckPrereq(self):
10301     """Check prerequisites.
10302
10303     This checks that the instance is in the cluster.
10304
10305     """
10306     self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
10307     assert instance is not None, \
10308       "Cannot retrieve locked instance %s" % self.instance_name
10309
10310     if instance.disk_template != constants.DT_DRBD8:
10311       raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
10312                                  " instances", errors.ECODE_INVAL)
10313
10314     if len(instance.secondary_nodes) != 1:
10315       raise errors.OpPrereqError("The instance has a strange layout,"
10316                                  " expected one secondary but found %d" %
10317                                  len(instance.secondary_nodes),
10318                                  errors.ECODE_FAULT)
10319
10320     if not self.delay_iallocator:
10321       self._CheckPrereq2()
10322
10323   def _CheckPrereq2(self):
10324     """Check prerequisites, second part.
10325
10326     This function should always be part of CheckPrereq. It was separated and is
10327     now called from Exec because during node evacuation iallocator was only
10328     called with an unmodified cluster model, not taking planned changes into
10329     account.
10330
10331     """
10332     instance = self.instance
10333     secondary_node = instance.secondary_nodes[0]
10334
10335     if self.iallocator_name is None:
10336       remote_node = self.remote_node
10337     else:
10338       remote_node = self._RunAllocator(self.lu, self.iallocator_name,
10339                                        instance.name, instance.secondary_nodes)
10340
10341     if remote_node is None:
10342       self.remote_node_info = None
10343     else:
10344       assert remote_node in self.lu.owned_locks(locking.LEVEL_NODE), \
10345              "Remote node '%s' is not locked" % remote_node
10346
10347       self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
10348       assert self.remote_node_info is not None, \
10349         "Cannot retrieve locked node %s" % remote_node
10350
10351     if remote_node == self.instance.primary_node:
10352       raise errors.OpPrereqError("The specified node is the primary node of"
10353                                  " the instance", errors.ECODE_INVAL)
10354
10355     if remote_node == secondary_node:
10356       raise errors.OpPrereqError("The specified node is already the"
10357                                  " secondary node of the instance",
10358                                  errors.ECODE_INVAL)
10359
10360     if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
10361                                     constants.REPLACE_DISK_CHG):
10362       raise errors.OpPrereqError("Cannot specify disks to be replaced",
10363                                  errors.ECODE_INVAL)
10364
10365     if self.mode == constants.REPLACE_DISK_AUTO:
10366       if not self._CheckDisksActivated(instance):
10367         raise errors.OpPrereqError("Please run activate-disks on instance %s"
10368                                    " first" % self.instance_name,
10369                                    errors.ECODE_STATE)
10370       faulty_primary = self._FindFaultyDisks(instance.primary_node)
10371       faulty_secondary = self._FindFaultyDisks(secondary_node)
10372
10373       if faulty_primary and faulty_secondary:
10374         raise errors.OpPrereqError("Instance %s has faulty disks on more than"
10375                                    " one node and can not be repaired"
10376                                    " automatically" % self.instance_name,
10377                                    errors.ECODE_STATE)
10378
10379       if faulty_primary:
10380         self.disks = faulty_primary
10381         self.target_node = instance.primary_node
10382         self.other_node = secondary_node
10383         check_nodes = [self.target_node, self.other_node]
10384       elif faulty_secondary:
10385         self.disks = faulty_secondary
10386         self.target_node = secondary_node
10387         self.other_node = instance.primary_node
10388         check_nodes = [self.target_node, self.other_node]
10389       else:
10390         self.disks = []
10391         check_nodes = []
10392
10393     else:
10394       # Non-automatic modes
10395       if self.mode == constants.REPLACE_DISK_PRI:
10396         self.target_node = instance.primary_node
10397         self.other_node = secondary_node
10398         check_nodes = [self.target_node, self.other_node]
10399
10400       elif self.mode == constants.REPLACE_DISK_SEC:
10401         self.target_node = secondary_node
10402         self.other_node = instance.primary_node
10403         check_nodes = [self.target_node, self.other_node]
10404
10405       elif self.mode == constants.REPLACE_DISK_CHG:
10406         self.new_node = remote_node
10407         self.other_node = instance.primary_node
10408         self.target_node = secondary_node
10409         check_nodes = [self.new_node, self.other_node]
10410
10411         _CheckNodeNotDrained(self.lu, remote_node)
10412         _CheckNodeVmCapable(self.lu, remote_node)
10413
10414         old_node_info = self.cfg.GetNodeInfo(secondary_node)
10415         assert old_node_info is not None
10416         if old_node_info.offline and not self.early_release:
10417           # doesn't make sense to delay the release
10418           self.early_release = True
10419           self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
10420                           " early-release mode", secondary_node)
10421
10422       else:
10423         raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
10424                                      self.mode)
10425
10426       # If not specified all disks should be replaced
10427       if not self.disks:
10428         self.disks = range(len(self.instance.disks))
10429
10430     # TODO: This is ugly, but right now we can't distinguish between internal
10431     # submitted opcode and external one. We should fix that.
10432     if self.remote_node_info:
10433       # We change the node, lets verify it still meets instance policy
10434       new_group_info = self.cfg.GetNodeGroup(self.remote_node_info.group)
10435       ipolicy = _CalculateGroupIPolicy(self.cfg.GetClusterInfo(),
10436                                        new_group_info)
10437       _CheckTargetNodeIPolicy(self, ipolicy, instance, self.remote_node_info,
10438                               ignore=self.ignore_ipolicy)
10439
10440     # TODO: compute disk parameters
10441     primary_node_info = self.cfg.GetNodeInfo(instance.primary_node)
10442     secondary_node_info = self.cfg.GetNodeInfo(secondary_node)
10443     if primary_node_info.group != secondary_node_info.group:
10444       self.lu.LogInfo("The instance primary and secondary nodes are in two"
10445                       " different node groups; the disk parameters of the"
10446                       " primary node's group will be applied.")
10447
10448     self.diskparams = self.cfg.GetNodeGroup(primary_node_info.group).diskparams
10449
10450     for node in check_nodes:
10451       _CheckNodeOnline(self.lu, node)
10452
10453     touched_nodes = frozenset(node_name for node_name in [self.new_node,
10454                                                           self.other_node,
10455                                                           self.target_node]
10456                               if node_name is not None)
10457
10458     # Release unneeded node and node resource locks
10459     _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
10460     _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES, keep=touched_nodes)
10461
10462     # Release any owned node group
10463     if self.lu.glm.is_owned(locking.LEVEL_NODEGROUP):
10464       _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
10465
10466     # Check whether disks are valid
10467     for disk_idx in self.disks:
10468       instance.FindDisk(disk_idx)
10469
10470     # Get secondary node IP addresses
10471     self.node_secondary_ip = dict((name, node.secondary_ip) for (name, node)
10472                                   in self.cfg.GetMultiNodeInfo(touched_nodes))
10473
10474   def Exec(self, feedback_fn):
10475     """Execute disk replacement.
10476
10477     This dispatches the disk replacement to the appropriate handler.
10478
10479     """
10480     if self.delay_iallocator:
10481       self._CheckPrereq2()
10482
10483     if __debug__:
10484       # Verify owned locks before starting operation
10485       owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
10486       assert set(owned_nodes) == set(self.node_secondary_ip), \
10487           ("Incorrect node locks, owning %s, expected %s" %
10488            (owned_nodes, self.node_secondary_ip.keys()))
10489       assert (self.lu.owned_locks(locking.LEVEL_NODE) ==
10490               self.lu.owned_locks(locking.LEVEL_NODE_RES))
10491
10492       owned_instances = self.lu.owned_locks(locking.LEVEL_INSTANCE)
10493       assert list(owned_instances) == [self.instance_name], \
10494           "Instance '%s' not locked" % self.instance_name
10495
10496       assert not self.lu.glm.is_owned(locking.LEVEL_NODEGROUP), \
10497           "Should not own any node group lock at this point"
10498
10499     if not self.disks:
10500       feedback_fn("No disks need replacement")
10501       return
10502
10503     feedback_fn("Replacing disk(s) %s for %s" %
10504                 (utils.CommaJoin(self.disks), self.instance.name))
10505
10506     activate_disks = (self.instance.admin_state != constants.ADMINST_UP)
10507
10508     # Activate the instance disks if we're replacing them on a down instance
10509     if activate_disks:
10510       _StartInstanceDisks(self.lu, self.instance, True)
10511
10512     try:
10513       # Should we replace the secondary node?
10514       if self.new_node is not None:
10515         fn = self._ExecDrbd8Secondary
10516       else:
10517         fn = self._ExecDrbd8DiskOnly
10518
10519       result = fn(feedback_fn)
10520     finally:
10521       # Deactivate the instance disks if we're replacing them on a
10522       # down instance
10523       if activate_disks:
10524         _SafeShutdownInstanceDisks(self.lu, self.instance)
10525
10526     assert not self.lu.owned_locks(locking.LEVEL_NODE)
10527
10528     if __debug__:
10529       # Verify owned locks
10530       owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE_RES)
10531       nodes = frozenset(self.node_secondary_ip)
10532       assert ((self.early_release and not owned_nodes) or
10533               (not self.early_release and not (set(owned_nodes) - nodes))), \
10534         ("Not owning the correct locks, early_release=%s, owned=%r,"
10535          " nodes=%r" % (self.early_release, owned_nodes, nodes))
10536
10537     return result
10538
10539   def _CheckVolumeGroup(self, nodes):
10540     self.lu.LogInfo("Checking volume groups")
10541
10542     vgname = self.cfg.GetVGName()
10543
10544     # Make sure volume group exists on all involved nodes
10545     results = self.rpc.call_vg_list(nodes)
10546     if not results:
10547       raise errors.OpExecError("Can't list volume groups on the nodes")
10548
10549     for node in nodes:
10550       res = results[node]
10551       res.Raise("Error checking node %s" % node)
10552       if vgname not in res.payload:
10553         raise errors.OpExecError("Volume group '%s' not found on node %s" %
10554                                  (vgname, node))
10555
10556   def _CheckDisksExistence(self, nodes):
10557     # Check disk existence
10558     for idx, dev in enumerate(self.instance.disks):
10559       if idx not in self.disks:
10560         continue
10561
10562       for node in nodes:
10563         self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
10564         self.cfg.SetDiskID(dev, node)
10565
10566         result = self.rpc.call_blockdev_find(node, dev)
10567
10568         msg = result.fail_msg
10569         if msg or not result.payload:
10570           if not msg:
10571             msg = "disk not found"
10572           raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
10573                                    (idx, node, msg))
10574
10575   def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
10576     for idx, dev in enumerate(self.instance.disks):
10577       if idx not in self.disks:
10578         continue
10579
10580       self.lu.LogInfo("Checking disk/%d consistency on node %s" %
10581                       (idx, node_name))
10582
10583       if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
10584                                    ldisk=ldisk):
10585         raise errors.OpExecError("Node %s has degraded storage, unsafe to"
10586                                  " replace disks for instance %s" %
10587                                  (node_name, self.instance.name))
10588
10589   def _CreateNewStorage(self, node_name):
10590     """Create new storage on the primary or secondary node.
10591
10592     This is only used for same-node replaces, not for changing the
10593     secondary node, hence we don't want to modify the existing disk.
10594
10595     """
10596     iv_names = {}
10597
10598     for idx, dev in enumerate(self.instance.disks):
10599       if idx not in self.disks:
10600         continue
10601
10602       self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
10603
10604       self.cfg.SetDiskID(dev, node_name)
10605
10606       lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
10607       names = _GenerateUniqueNames(self.lu, lv_names)
10608
10609       _, data_p, meta_p = _ComputeLDParams(constants.DT_DRBD8, self.diskparams)
10610
10611       vg_data = dev.children[0].logical_id[0]
10612       lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
10613                              logical_id=(vg_data, names[0]), params=data_p)
10614       vg_meta = dev.children[1].logical_id[0]
10615       lv_meta = objects.Disk(dev_type=constants.LD_LV, size=DRBD_META_SIZE,
10616                              logical_id=(vg_meta, names[1]), params=meta_p)
10617
10618       new_lvs = [lv_data, lv_meta]
10619       old_lvs = [child.Copy() for child in dev.children]
10620       iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
10621
10622       # we pass force_create=True to force the LVM creation
10623       for new_lv in new_lvs:
10624         _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
10625                         _GetInstanceInfoText(self.instance), False)
10626
10627     return iv_names
10628
10629   def _CheckDevices(self, node_name, iv_names):
10630     for name, (dev, _, _) in iv_names.iteritems():
10631       self.cfg.SetDiskID(dev, node_name)
10632
10633       result = self.rpc.call_blockdev_find(node_name, dev)
10634
10635       msg = result.fail_msg
10636       if msg or not result.payload:
10637         if not msg:
10638           msg = "disk not found"
10639         raise errors.OpExecError("Can't find DRBD device %s: %s" %
10640                                  (name, msg))
10641
10642       if result.payload.is_degraded:
10643         raise errors.OpExecError("DRBD device %s is degraded!" % name)
10644
10645   def _RemoveOldStorage(self, node_name, iv_names):
10646     for name, (_, old_lvs, _) in iv_names.iteritems():
10647       self.lu.LogInfo("Remove logical volumes for %s" % name)
10648
10649       for lv in old_lvs:
10650         self.cfg.SetDiskID(lv, node_name)
10651
10652         msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
10653         if msg:
10654           self.lu.LogWarning("Can't remove old LV: %s" % msg,
10655                              hint="remove unused LVs manually")
10656
10657   def _ExecDrbd8DiskOnly(self, feedback_fn): # pylint: disable=W0613
10658     """Replace a disk on the primary or secondary for DRBD 8.
10659
10660     The algorithm for replace is quite complicated:
10661
10662       1. for each disk to be replaced:
10663
10664         1. create new LVs on the target node with unique names
10665         1. detach old LVs from the drbd device
10666         1. rename old LVs to name_replaced.<time_t>
10667         1. rename new LVs to old LVs
10668         1. attach the new LVs (with the old names now) to the drbd device
10669
10670       1. wait for sync across all devices
10671
10672       1. for each modified disk:
10673
10674         1. remove old LVs (which have the name name_replaces.<time_t>)
10675
10676     Failures are not very well handled.
10677
10678     """
10679     steps_total = 6
10680
10681     # Step: check device activation
10682     self.lu.LogStep(1, steps_total, "Check device existence")
10683     self._CheckDisksExistence([self.other_node, self.target_node])
10684     self._CheckVolumeGroup([self.target_node, self.other_node])
10685
10686     # Step: check other node consistency
10687     self.lu.LogStep(2, steps_total, "Check peer consistency")
10688     self._CheckDisksConsistency(self.other_node,
10689                                 self.other_node == self.instance.primary_node,
10690                                 False)
10691
10692     # Step: create new storage
10693     self.lu.LogStep(3, steps_total, "Allocate new storage")
10694     iv_names = self._CreateNewStorage(self.target_node)
10695
10696     # Step: for each lv, detach+rename*2+attach
10697     self.lu.LogStep(4, steps_total, "Changing drbd configuration")
10698     for dev, old_lvs, new_lvs in iv_names.itervalues():
10699       self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
10700
10701       result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
10702                                                      old_lvs)
10703       result.Raise("Can't detach drbd from local storage on node"
10704                    " %s for device %s" % (self.target_node, dev.iv_name))
10705       #dev.children = []
10706       #cfg.Update(instance)
10707
10708       # ok, we created the new LVs, so now we know we have the needed
10709       # storage; as such, we proceed on the target node to rename
10710       # old_lv to _old, and new_lv to old_lv; note that we rename LVs
10711       # using the assumption that logical_id == physical_id (which in
10712       # turn is the unique_id on that node)
10713
10714       # FIXME(iustin): use a better name for the replaced LVs
10715       temp_suffix = int(time.time())
10716       ren_fn = lambda d, suff: (d.physical_id[0],
10717                                 d.physical_id[1] + "_replaced-%s" % suff)
10718
10719       # Build the rename list based on what LVs exist on the node
10720       rename_old_to_new = []
10721       for to_ren in old_lvs:
10722         result = self.rpc.call_blockdev_find(self.target_node, to_ren)
10723         if not result.fail_msg and result.payload:
10724           # device exists
10725           rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
10726
10727       self.lu.LogInfo("Renaming the old LVs on the target node")
10728       result = self.rpc.call_blockdev_rename(self.target_node,
10729                                              rename_old_to_new)
10730       result.Raise("Can't rename old LVs on node %s" % self.target_node)
10731
10732       # Now we rename the new LVs to the old LVs
10733       self.lu.LogInfo("Renaming the new LVs on the target node")
10734       rename_new_to_old = [(new, old.physical_id)
10735                            for old, new in zip(old_lvs, new_lvs)]
10736       result = self.rpc.call_blockdev_rename(self.target_node,
10737                                              rename_new_to_old)
10738       result.Raise("Can't rename new LVs on node %s" % self.target_node)
10739
10740       # Intermediate steps of in memory modifications
10741       for old, new in zip(old_lvs, new_lvs):
10742         new.logical_id = old.logical_id
10743         self.cfg.SetDiskID(new, self.target_node)
10744
10745       # We need to modify old_lvs so that removal later removes the
10746       # right LVs, not the newly added ones; note that old_lvs is a
10747       # copy here
10748       for disk in old_lvs:
10749         disk.logical_id = ren_fn(disk, temp_suffix)
10750         self.cfg.SetDiskID(disk, self.target_node)
10751
10752       # Now that the new lvs have the old name, we can add them to the device
10753       self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
10754       result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
10755                                                   new_lvs)
10756       msg = result.fail_msg
10757       if msg:
10758         for new_lv in new_lvs:
10759           msg2 = self.rpc.call_blockdev_remove(self.target_node,
10760                                                new_lv).fail_msg
10761           if msg2:
10762             self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
10763                                hint=("cleanup manually the unused logical"
10764                                      "volumes"))
10765         raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
10766
10767     cstep = itertools.count(5)
10768
10769     if self.early_release:
10770       self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
10771       self._RemoveOldStorage(self.target_node, iv_names)
10772       # TODO: Check if releasing locks early still makes sense
10773       _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
10774     else:
10775       # Release all resource locks except those used by the instance
10776       _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
10777                     keep=self.node_secondary_ip.keys())
10778
10779     # Release all node locks while waiting for sync
10780     _ReleaseLocks(self.lu, locking.LEVEL_NODE)
10781
10782     # TODO: Can the instance lock be downgraded here? Take the optional disk
10783     # shutdown in the caller into consideration.
10784
10785     # Wait for sync
10786     # This can fail as the old devices are degraded and _WaitForSync
10787     # does a combined result over all disks, so we don't check its return value
10788     self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
10789     _WaitForSync(self.lu, self.instance)
10790
10791     # Check all devices manually
10792     self._CheckDevices(self.instance.primary_node, iv_names)
10793
10794     # Step: remove old storage
10795     if not self.early_release:
10796       self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
10797       self._RemoveOldStorage(self.target_node, iv_names)
10798
10799   def _ExecDrbd8Secondary(self, feedback_fn):
10800     """Replace the secondary node for DRBD 8.
10801
10802     The algorithm for replace is quite complicated:
10803       - for all disks of the instance:
10804         - create new LVs on the new node with same names
10805         - shutdown the drbd device on the old secondary
10806         - disconnect the drbd network on the primary
10807         - create the drbd device on the new secondary
10808         - network attach the drbd on the primary, using an artifice:
10809           the drbd code for Attach() will connect to the network if it
10810           finds a device which is connected to the good local disks but
10811           not network enabled
10812       - wait for sync across all devices
10813       - remove all disks from the old secondary
10814
10815     Failures are not very well handled.
10816
10817     """
10818     steps_total = 6
10819
10820     pnode = self.instance.primary_node
10821
10822     # Step: check device activation
10823     self.lu.LogStep(1, steps_total, "Check device existence")
10824     self._CheckDisksExistence([self.instance.primary_node])
10825     self._CheckVolumeGroup([self.instance.primary_node])
10826
10827     # Step: check other node consistency
10828     self.lu.LogStep(2, steps_total, "Check peer consistency")
10829     self._CheckDisksConsistency(self.instance.primary_node, True, True)
10830
10831     # Step: create new storage
10832     self.lu.LogStep(3, steps_total, "Allocate new storage")
10833     for idx, dev in enumerate(self.instance.disks):
10834       self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
10835                       (self.new_node, idx))
10836       # we pass force_create=True to force LVM creation
10837       for new_lv in dev.children:
10838         _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
10839                         _GetInstanceInfoText(self.instance), False)
10840
10841     # Step 4: dbrd minors and drbd setups changes
10842     # after this, we must manually remove the drbd minors on both the
10843     # error and the success paths
10844     self.lu.LogStep(4, steps_total, "Changing drbd configuration")
10845     minors = self.cfg.AllocateDRBDMinor([self.new_node
10846                                          for dev in self.instance.disks],
10847                                         self.instance.name)
10848     logging.debug("Allocated minors %r", minors)
10849
10850     iv_names = {}
10851     for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
10852       self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
10853                       (self.new_node, idx))
10854       # create new devices on new_node; note that we create two IDs:
10855       # one without port, so the drbd will be activated without
10856       # networking information on the new node at this stage, and one
10857       # with network, for the latter activation in step 4
10858       (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
10859       if self.instance.primary_node == o_node1:
10860         p_minor = o_minor1
10861       else:
10862         assert self.instance.primary_node == o_node2, "Three-node instance?"
10863         p_minor = o_minor2
10864
10865       new_alone_id = (self.instance.primary_node, self.new_node, None,
10866                       p_minor, new_minor, o_secret)
10867       new_net_id = (self.instance.primary_node, self.new_node, o_port,
10868                     p_minor, new_minor, o_secret)
10869
10870       iv_names[idx] = (dev, dev.children, new_net_id)
10871       logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
10872                     new_net_id)
10873       drbd_params, _, _ = _ComputeLDParams(constants.DT_DRBD8, self.diskparams)
10874       new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
10875                               logical_id=new_alone_id,
10876                               children=dev.children,
10877                               size=dev.size,
10878                               params=drbd_params)
10879       try:
10880         _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
10881                               _GetInstanceInfoText(self.instance), False)
10882       except errors.GenericError:
10883         self.cfg.ReleaseDRBDMinors(self.instance.name)
10884         raise
10885
10886     # We have new devices, shutdown the drbd on the old secondary
10887     for idx, dev in enumerate(self.instance.disks):
10888       self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
10889       self.cfg.SetDiskID(dev, self.target_node)
10890       msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
10891       if msg:
10892         self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
10893                            "node: %s" % (idx, msg),
10894                            hint=("Please cleanup this device manually as"
10895                                  " soon as possible"))
10896
10897     self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
10898     result = self.rpc.call_drbd_disconnect_net([pnode], self.node_secondary_ip,
10899                                                self.instance.disks)[pnode]
10900
10901     msg = result.fail_msg
10902     if msg:
10903       # detaches didn't succeed (unlikely)
10904       self.cfg.ReleaseDRBDMinors(self.instance.name)
10905       raise errors.OpExecError("Can't detach the disks from the network on"
10906                                " old node: %s" % (msg,))
10907
10908     # if we managed to detach at least one, we update all the disks of
10909     # the instance to point to the new secondary
10910     self.lu.LogInfo("Updating instance configuration")
10911     for dev, _, new_logical_id in iv_names.itervalues():
10912       dev.logical_id = new_logical_id
10913       self.cfg.SetDiskID(dev, self.instance.primary_node)
10914
10915     self.cfg.Update(self.instance, feedback_fn)
10916
10917     # Release all node locks (the configuration has been updated)
10918     _ReleaseLocks(self.lu, locking.LEVEL_NODE)
10919
10920     # and now perform the drbd attach
10921     self.lu.LogInfo("Attaching primary drbds to new secondary"
10922                     " (standalone => connected)")
10923     result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
10924                                             self.new_node],
10925                                            self.node_secondary_ip,
10926                                            self.instance.disks,
10927                                            self.instance.name,
10928                                            False)
10929     for to_node, to_result in result.items():
10930       msg = to_result.fail_msg
10931       if msg:
10932         self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
10933                            to_node, msg,
10934                            hint=("please do a gnt-instance info to see the"
10935                                  " status of disks"))
10936
10937     cstep = itertools.count(5)
10938
10939     if self.early_release:
10940       self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
10941       self._RemoveOldStorage(self.target_node, iv_names)
10942       # TODO: Check if releasing locks early still makes sense
10943       _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
10944     else:
10945       # Release all resource locks except those used by the instance
10946       _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
10947                     keep=self.node_secondary_ip.keys())
10948
10949     # TODO: Can the instance lock be downgraded here? Take the optional disk
10950     # shutdown in the caller into consideration.
10951
10952     # Wait for sync
10953     # This can fail as the old devices are degraded and _WaitForSync
10954     # does a combined result over all disks, so we don't check its return value
10955     self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
10956     _WaitForSync(self.lu, self.instance)
10957
10958     # Check all devices manually
10959     self._CheckDevices(self.instance.primary_node, iv_names)
10960
10961     # Step: remove old storage
10962     if not self.early_release:
10963       self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
10964       self._RemoveOldStorage(self.target_node, iv_names)
10965
10966
10967 class LURepairNodeStorage(NoHooksLU):
10968   """Repairs the volume group on a node.
10969
10970   """
10971   REQ_BGL = False
10972
10973   def CheckArguments(self):
10974     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
10975
10976     storage_type = self.op.storage_type
10977
10978     if (constants.SO_FIX_CONSISTENCY not in
10979         constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
10980       raise errors.OpPrereqError("Storage units of type '%s' can not be"
10981                                  " repaired" % storage_type,
10982                                  errors.ECODE_INVAL)
10983
10984   def ExpandNames(self):
10985     self.needed_locks = {
10986       locking.LEVEL_NODE: [self.op.node_name],
10987       }
10988
10989   def _CheckFaultyDisks(self, instance, node_name):
10990     """Ensure faulty disks abort the opcode or at least warn."""
10991     try:
10992       if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
10993                                   node_name, True):
10994         raise errors.OpPrereqError("Instance '%s' has faulty disks on"
10995                                    " node '%s'" % (instance.name, node_name),
10996                                    errors.ECODE_STATE)
10997     except errors.OpPrereqError, err:
10998       if self.op.ignore_consistency:
10999         self.proc.LogWarning(str(err.args[0]))
11000       else:
11001         raise
11002
11003   def CheckPrereq(self):
11004     """Check prerequisites.
11005
11006     """
11007     # Check whether any instance on this node has faulty disks
11008     for inst in _GetNodeInstances(self.cfg, self.op.node_name):
11009       if inst.admin_state != constants.ADMINST_UP:
11010         continue
11011       check_nodes = set(inst.all_nodes)
11012       check_nodes.discard(self.op.node_name)
11013       for inst_node_name in check_nodes:
11014         self._CheckFaultyDisks(inst, inst_node_name)
11015
11016   def Exec(self, feedback_fn):
11017     feedback_fn("Repairing storage unit '%s' on %s ..." %
11018                 (self.op.name, self.op.node_name))
11019
11020     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
11021     result = self.rpc.call_storage_execute(self.op.node_name,
11022                                            self.op.storage_type, st_args,
11023                                            self.op.name,
11024                                            constants.SO_FIX_CONSISTENCY)
11025     result.Raise("Failed to repair storage unit '%s' on %s" %
11026                  (self.op.name, self.op.node_name))
11027
11028
11029 class LUNodeEvacuate(NoHooksLU):
11030   """Evacuates instances off a list of nodes.
11031
11032   """
11033   REQ_BGL = False
11034
11035   _MODE2IALLOCATOR = {
11036     constants.NODE_EVAC_PRI: constants.IALLOCATOR_NEVAC_PRI,
11037     constants.NODE_EVAC_SEC: constants.IALLOCATOR_NEVAC_SEC,
11038     constants.NODE_EVAC_ALL: constants.IALLOCATOR_NEVAC_ALL,
11039     }
11040   assert frozenset(_MODE2IALLOCATOR.keys()) == constants.NODE_EVAC_MODES
11041   assert (frozenset(_MODE2IALLOCATOR.values()) ==
11042           constants.IALLOCATOR_NEVAC_MODES)
11043
11044   def CheckArguments(self):
11045     _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
11046
11047   def ExpandNames(self):
11048     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
11049
11050     if self.op.remote_node is not None:
11051       self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
11052       assert self.op.remote_node
11053
11054       if self.op.remote_node == self.op.node_name:
11055         raise errors.OpPrereqError("Can not use evacuated node as a new"
11056                                    " secondary node", errors.ECODE_INVAL)
11057
11058       if self.op.mode != constants.NODE_EVAC_SEC:
11059         raise errors.OpPrereqError("Without the use of an iallocator only"
11060                                    " secondary instances can be evacuated",
11061                                    errors.ECODE_INVAL)
11062
11063     # Declare locks
11064     self.share_locks = _ShareAll()
11065     self.needed_locks = {
11066       locking.LEVEL_INSTANCE: [],
11067       locking.LEVEL_NODEGROUP: [],
11068       locking.LEVEL_NODE: [],
11069       }
11070
11071     # Determine nodes (via group) optimistically, needs verification once locks
11072     # have been acquired
11073     self.lock_nodes = self._DetermineNodes()
11074
11075   def _DetermineNodes(self):
11076     """Gets the list of nodes to operate on.
11077
11078     """
11079     if self.op.remote_node is None:
11080       # Iallocator will choose any node(s) in the same group
11081       group_nodes = self.cfg.GetNodeGroupMembersByNodes([self.op.node_name])
11082     else:
11083       group_nodes = frozenset([self.op.remote_node])
11084
11085     # Determine nodes to be locked
11086     return set([self.op.node_name]) | group_nodes
11087
11088   def _DetermineInstances(self):
11089     """Builds list of instances to operate on.
11090
11091     """
11092     assert self.op.mode in constants.NODE_EVAC_MODES
11093
11094     if self.op.mode == constants.NODE_EVAC_PRI:
11095       # Primary instances only
11096       inst_fn = _GetNodePrimaryInstances
11097       assert self.op.remote_node is None, \
11098         "Evacuating primary instances requires iallocator"
11099     elif self.op.mode == constants.NODE_EVAC_SEC:
11100       # Secondary instances only
11101       inst_fn = _GetNodeSecondaryInstances
11102     else:
11103       # All instances
11104       assert self.op.mode == constants.NODE_EVAC_ALL
11105       inst_fn = _GetNodeInstances
11106       # TODO: In 2.6, change the iallocator interface to take an evacuation mode
11107       # per instance
11108       raise errors.OpPrereqError("Due to an issue with the iallocator"
11109                                  " interface it is not possible to evacuate"
11110                                  " all instances at once; specify explicitly"
11111                                  " whether to evacuate primary or secondary"
11112                                  " instances",
11113                                  errors.ECODE_INVAL)
11114
11115     return inst_fn(self.cfg, self.op.node_name)
11116
11117   def DeclareLocks(self, level):
11118     if level == locking.LEVEL_INSTANCE:
11119       # Lock instances optimistically, needs verification once node and group
11120       # locks have been acquired
11121       self.needed_locks[locking.LEVEL_INSTANCE] = \
11122         set(i.name for i in self._DetermineInstances())
11123
11124     elif level == locking.LEVEL_NODEGROUP:
11125       # Lock node groups for all potential target nodes optimistically, needs
11126       # verification once nodes have been acquired
11127       self.needed_locks[locking.LEVEL_NODEGROUP] = \
11128         self.cfg.GetNodeGroupsFromNodes(self.lock_nodes)
11129
11130     elif level == locking.LEVEL_NODE:
11131       self.needed_locks[locking.LEVEL_NODE] = self.lock_nodes
11132
11133   def CheckPrereq(self):
11134     # Verify locks
11135     owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
11136     owned_nodes = self.owned_locks(locking.LEVEL_NODE)
11137     owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
11138
11139     need_nodes = self._DetermineNodes()
11140
11141     if not owned_nodes.issuperset(need_nodes):
11142       raise errors.OpPrereqError("Nodes in same group as '%s' changed since"
11143                                  " locks were acquired, current nodes are"
11144                                  " are '%s', used to be '%s'; retry the"
11145                                  " operation" %
11146                                  (self.op.node_name,
11147                                   utils.CommaJoin(need_nodes),
11148                                   utils.CommaJoin(owned_nodes)),
11149                                  errors.ECODE_STATE)
11150
11151     wanted_groups = self.cfg.GetNodeGroupsFromNodes(owned_nodes)
11152     if owned_groups != wanted_groups:
11153       raise errors.OpExecError("Node groups changed since locks were acquired,"
11154                                " current groups are '%s', used to be '%s';"
11155                                " retry the operation" %
11156                                (utils.CommaJoin(wanted_groups),
11157                                 utils.CommaJoin(owned_groups)))
11158
11159     # Determine affected instances
11160     self.instances = self._DetermineInstances()
11161     self.instance_names = [i.name for i in self.instances]
11162
11163     if set(self.instance_names) != owned_instances:
11164       raise errors.OpExecError("Instances on node '%s' changed since locks"
11165                                " were acquired, current instances are '%s',"
11166                                " used to be '%s'; retry the operation" %
11167                                (self.op.node_name,
11168                                 utils.CommaJoin(self.instance_names),
11169                                 utils.CommaJoin(owned_instances)))
11170
11171     if self.instance_names:
11172       self.LogInfo("Evacuating instances from node '%s': %s",
11173                    self.op.node_name,
11174                    utils.CommaJoin(utils.NiceSort(self.instance_names)))
11175     else:
11176       self.LogInfo("No instances to evacuate from node '%s'",
11177                    self.op.node_name)
11178
11179     if self.op.remote_node is not None:
11180       for i in self.instances:
11181         if i.primary_node == self.op.remote_node:
11182           raise errors.OpPrereqError("Node %s is the primary node of"
11183                                      " instance %s, cannot use it as"
11184                                      " secondary" %
11185                                      (self.op.remote_node, i.name),
11186                                      errors.ECODE_INVAL)
11187
11188   def Exec(self, feedback_fn):
11189     assert (self.op.iallocator is not None) ^ (self.op.remote_node is not None)
11190
11191     if not self.instance_names:
11192       # No instances to evacuate
11193       jobs = []
11194
11195     elif self.op.iallocator is not None:
11196       # TODO: Implement relocation to other group
11197       ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_NODE_EVAC,
11198                        evac_mode=self._MODE2IALLOCATOR[self.op.mode],
11199                        instances=list(self.instance_names))
11200
11201       ial.Run(self.op.iallocator)
11202
11203       if not ial.success:
11204         raise errors.OpPrereqError("Can't compute node evacuation using"
11205                                    " iallocator '%s': %s" %
11206                                    (self.op.iallocator, ial.info),
11207                                    errors.ECODE_NORES)
11208
11209       jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, True)
11210
11211     elif self.op.remote_node is not None:
11212       assert self.op.mode == constants.NODE_EVAC_SEC
11213       jobs = [
11214         [opcodes.OpInstanceReplaceDisks(instance_name=instance_name,
11215                                         remote_node=self.op.remote_node,
11216                                         disks=[],
11217                                         mode=constants.REPLACE_DISK_CHG,
11218                                         early_release=self.op.early_release)]
11219         for instance_name in self.instance_names
11220         ]
11221
11222     else:
11223       raise errors.ProgrammerError("No iallocator or remote node")
11224
11225     return ResultWithJobs(jobs)
11226
11227
11228 def _SetOpEarlyRelease(early_release, op):
11229   """Sets C{early_release} flag on opcodes if available.
11230
11231   """
11232   try:
11233     op.early_release = early_release
11234   except AttributeError:
11235     assert not isinstance(op, opcodes.OpInstanceReplaceDisks)
11236
11237   return op
11238
11239
11240 def _NodeEvacDest(use_nodes, group, nodes):
11241   """Returns group or nodes depending on caller's choice.
11242
11243   """
11244   if use_nodes:
11245     return utils.CommaJoin(nodes)
11246   else:
11247     return group
11248
11249
11250 def _LoadNodeEvacResult(lu, alloc_result, early_release, use_nodes):
11251   """Unpacks the result of change-group and node-evacuate iallocator requests.
11252
11253   Iallocator modes L{constants.IALLOCATOR_MODE_NODE_EVAC} and
11254   L{constants.IALLOCATOR_MODE_CHG_GROUP}.
11255
11256   @type lu: L{LogicalUnit}
11257   @param lu: Logical unit instance
11258   @type alloc_result: tuple/list
11259   @param alloc_result: Result from iallocator
11260   @type early_release: bool
11261   @param early_release: Whether to release locks early if possible
11262   @type use_nodes: bool
11263   @param use_nodes: Whether to display node names instead of groups
11264
11265   """
11266   (moved, failed, jobs) = alloc_result
11267
11268   if failed:
11269     failreason = utils.CommaJoin("%s (%s)" % (name, reason)
11270                                  for (name, reason) in failed)
11271     lu.LogWarning("Unable to evacuate instances %s", failreason)
11272     raise errors.OpExecError("Unable to evacuate instances %s" % failreason)
11273
11274   if moved:
11275     lu.LogInfo("Instances to be moved: %s",
11276                utils.CommaJoin("%s (to %s)" %
11277                                (name, _NodeEvacDest(use_nodes, group, nodes))
11278                                for (name, group, nodes) in moved))
11279
11280   return [map(compat.partial(_SetOpEarlyRelease, early_release),
11281               map(opcodes.OpCode.LoadOpCode, ops))
11282           for ops in jobs]
11283
11284
11285 class LUInstanceGrowDisk(LogicalUnit):
11286   """Grow a disk of an instance.
11287
11288   """
11289   HPATH = "disk-grow"
11290   HTYPE = constants.HTYPE_INSTANCE
11291   REQ_BGL = False
11292
11293   def ExpandNames(self):
11294     self._ExpandAndLockInstance()
11295     self.needed_locks[locking.LEVEL_NODE] = []
11296     self.needed_locks[locking.LEVEL_NODE_RES] = []
11297     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
11298     self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
11299
11300   def DeclareLocks(self, level):
11301     if level == locking.LEVEL_NODE:
11302       self._LockInstancesNodes()
11303     elif level == locking.LEVEL_NODE_RES:
11304       # Copy node locks
11305       self.needed_locks[locking.LEVEL_NODE_RES] = \
11306         self.needed_locks[locking.LEVEL_NODE][:]
11307
11308   def BuildHooksEnv(self):
11309     """Build hooks env.
11310
11311     This runs on the master, the primary and all the secondaries.
11312
11313     """
11314     env = {
11315       "DISK": self.op.disk,
11316       "AMOUNT": self.op.amount,
11317       }
11318     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
11319     return env
11320
11321   def BuildHooksNodes(self):
11322     """Build hooks nodes.
11323
11324     """
11325     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
11326     return (nl, nl)
11327
11328   def CheckPrereq(self):
11329     """Check prerequisites.
11330
11331     This checks that the instance is in the cluster.
11332
11333     """
11334     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
11335     assert instance is not None, \
11336       "Cannot retrieve locked instance %s" % self.op.instance_name
11337     nodenames = list(instance.all_nodes)
11338     for node in nodenames:
11339       _CheckNodeOnline(self, node)
11340
11341     self.instance = instance
11342
11343     if instance.disk_template not in constants.DTS_GROWABLE:
11344       raise errors.OpPrereqError("Instance's disk layout does not support"
11345                                  " growing", errors.ECODE_INVAL)
11346
11347     self.disk = instance.FindDisk(self.op.disk)
11348
11349     if instance.disk_template not in (constants.DT_FILE,
11350                                       constants.DT_SHARED_FILE):
11351       # TODO: check the free disk space for file, when that feature will be
11352       # supported
11353       _CheckNodesFreeDiskPerVG(self, nodenames,
11354                                self.disk.ComputeGrowth(self.op.amount))
11355
11356   def Exec(self, feedback_fn):
11357     """Execute disk grow.
11358
11359     """
11360     instance = self.instance
11361     disk = self.disk
11362
11363     assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
11364     assert (self.owned_locks(locking.LEVEL_NODE) ==
11365             self.owned_locks(locking.LEVEL_NODE_RES))
11366
11367     disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
11368     if not disks_ok:
11369       raise errors.OpExecError("Cannot activate block device to grow")
11370
11371     feedback_fn("Growing disk %s of instance '%s' by %s" %
11372                 (self.op.disk, instance.name,
11373                  utils.FormatUnit(self.op.amount, "h")))
11374
11375     # First run all grow ops in dry-run mode
11376     for node in instance.all_nodes:
11377       self.cfg.SetDiskID(disk, node)
11378       result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, True)
11379       result.Raise("Grow request failed to node %s" % node)
11380
11381     # We know that (as far as we can test) operations across different
11382     # nodes will succeed, time to run it for real
11383     for node in instance.all_nodes:
11384       self.cfg.SetDiskID(disk, node)
11385       result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, False)
11386       result.Raise("Grow request failed to node %s" % node)
11387
11388       # TODO: Rewrite code to work properly
11389       # DRBD goes into sync mode for a short amount of time after executing the
11390       # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
11391       # calling "resize" in sync mode fails. Sleeping for a short amount of
11392       # time is a work-around.
11393       time.sleep(5)
11394
11395     disk.RecordGrow(self.op.amount)
11396     self.cfg.Update(instance, feedback_fn)
11397
11398     # Changes have been recorded, release node lock
11399     _ReleaseLocks(self, locking.LEVEL_NODE)
11400
11401     # Downgrade lock while waiting for sync
11402     self.glm.downgrade(locking.LEVEL_INSTANCE)
11403
11404     if self.op.wait_for_sync:
11405       disk_abort = not _WaitForSync(self, instance, disks=[disk])
11406       if disk_abort:
11407         self.proc.LogWarning("Disk sync-ing has not returned a good"
11408                              " status; please check the instance")
11409       if instance.admin_state != constants.ADMINST_UP:
11410         _SafeShutdownInstanceDisks(self, instance, disks=[disk])
11411     elif instance.admin_state != constants.ADMINST_UP:
11412       self.proc.LogWarning("Not shutting down the disk even if the instance is"
11413                            " not supposed to be running because no wait for"
11414                            " sync mode was requested")
11415
11416     assert self.owned_locks(locking.LEVEL_NODE_RES)
11417     assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
11418
11419
11420 class LUInstanceQueryData(NoHooksLU):
11421   """Query runtime instance data.
11422
11423   """
11424   REQ_BGL = False
11425
11426   def ExpandNames(self):
11427     self.needed_locks = {}
11428
11429     # Use locking if requested or when non-static information is wanted
11430     if not (self.op.static or self.op.use_locking):
11431       self.LogWarning("Non-static data requested, locks need to be acquired")
11432       self.op.use_locking = True
11433
11434     if self.op.instances or not self.op.use_locking:
11435       # Expand instance names right here
11436       self.wanted_names = _GetWantedInstances(self, self.op.instances)
11437     else:
11438       # Will use acquired locks
11439       self.wanted_names = None
11440
11441     if self.op.use_locking:
11442       self.share_locks = _ShareAll()
11443
11444       if self.wanted_names is None:
11445         self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
11446       else:
11447         self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
11448
11449       self.needed_locks[locking.LEVEL_NODE] = []
11450       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
11451
11452   def DeclareLocks(self, level):
11453     if self.op.use_locking and level == locking.LEVEL_NODE:
11454       self._LockInstancesNodes()
11455
11456   def CheckPrereq(self):
11457     """Check prerequisites.
11458
11459     This only checks the optional instance list against the existing names.
11460
11461     """
11462     if self.wanted_names is None:
11463       assert self.op.use_locking, "Locking was not used"
11464       self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
11465
11466     self.wanted_instances = \
11467         map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
11468
11469   def _ComputeBlockdevStatus(self, node, instance_name, dev):
11470     """Returns the status of a block device
11471
11472     """
11473     if self.op.static or not node:
11474       return None
11475
11476     self.cfg.SetDiskID(dev, node)
11477
11478     result = self.rpc.call_blockdev_find(node, dev)
11479     if result.offline:
11480       return None
11481
11482     result.Raise("Can't compute disk status for %s" % instance_name)
11483
11484     status = result.payload
11485     if status is None:
11486       return None
11487
11488     return (status.dev_path, status.major, status.minor,
11489             status.sync_percent, status.estimated_time,
11490             status.is_degraded, status.ldisk_status)
11491
11492   def _ComputeDiskStatus(self, instance, snode, dev):
11493     """Compute block device status.
11494
11495     """
11496     if dev.dev_type in constants.LDS_DRBD:
11497       # we change the snode then (otherwise we use the one passed in)
11498       if dev.logical_id[0] == instance.primary_node:
11499         snode = dev.logical_id[1]
11500       else:
11501         snode = dev.logical_id[0]
11502
11503     dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
11504                                               instance.name, dev)
11505     dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
11506
11507     if dev.children:
11508       dev_children = map(compat.partial(self._ComputeDiskStatus,
11509                                         instance, snode),
11510                          dev.children)
11511     else:
11512       dev_children = []
11513
11514     return {
11515       "iv_name": dev.iv_name,
11516       "dev_type": dev.dev_type,
11517       "logical_id": dev.logical_id,
11518       "physical_id": dev.physical_id,
11519       "pstatus": dev_pstatus,
11520       "sstatus": dev_sstatus,
11521       "children": dev_children,
11522       "mode": dev.mode,
11523       "size": dev.size,
11524       }
11525
11526   def Exec(self, feedback_fn):
11527     """Gather and return data"""
11528     result = {}
11529
11530     cluster = self.cfg.GetClusterInfo()
11531
11532     pri_nodes = self.cfg.GetMultiNodeInfo(i.primary_node
11533                                           for i in self.wanted_instances)
11534     for instance, (_, pnode) in zip(self.wanted_instances, pri_nodes):
11535       if self.op.static or pnode.offline:
11536         remote_state = None
11537         if pnode.offline:
11538           self.LogWarning("Primary node %s is marked offline, returning static"
11539                           " information only for instance %s" %
11540                           (pnode.name, instance.name))
11541       else:
11542         remote_info = self.rpc.call_instance_info(instance.primary_node,
11543                                                   instance.name,
11544                                                   instance.hypervisor)
11545         remote_info.Raise("Error checking node %s" % instance.primary_node)
11546         remote_info = remote_info.payload
11547         if remote_info and "state" in remote_info:
11548           remote_state = "up"
11549         else:
11550           if instance.admin_state == constants.ADMINST_UP:
11551             remote_state = "down"
11552           else:
11553             remote_state = instance.admin_state
11554
11555       disks = map(compat.partial(self._ComputeDiskStatus, instance, None),
11556                   instance.disks)
11557
11558       result[instance.name] = {
11559         "name": instance.name,
11560         "config_state": instance.admin_state,
11561         "run_state": remote_state,
11562         "pnode": instance.primary_node,
11563         "snodes": instance.secondary_nodes,
11564         "os": instance.os,
11565         # this happens to be the same format used for hooks
11566         "nics": _NICListToTuple(self, instance.nics),
11567         "disk_template": instance.disk_template,
11568         "disks": disks,
11569         "hypervisor": instance.hypervisor,
11570         "network_port": instance.network_port,
11571         "hv_instance": instance.hvparams,
11572         "hv_actual": cluster.FillHV(instance, skip_globals=True),
11573         "be_instance": instance.beparams,
11574         "be_actual": cluster.FillBE(instance),
11575         "os_instance": instance.osparams,
11576         "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
11577         "serial_no": instance.serial_no,
11578         "mtime": instance.mtime,
11579         "ctime": instance.ctime,
11580         "uuid": instance.uuid,
11581         }
11582
11583     return result
11584
11585
11586 class LUInstanceSetParams(LogicalUnit):
11587   """Modifies an instances's parameters.
11588
11589   """
11590   HPATH = "instance-modify"
11591   HTYPE = constants.HTYPE_INSTANCE
11592   REQ_BGL = False
11593
11594   def CheckArguments(self):
11595     if not (self.op.nics or self.op.disks or self.op.disk_template or
11596             self.op.hvparams or self.op.beparams or self.op.os_name or
11597             self.op.online_inst or self.op.offline_inst or
11598             self.op.runtime_mem):
11599       raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
11600
11601     if self.op.hvparams:
11602       _CheckGlobalHvParams(self.op.hvparams)
11603
11604     # Disk validation
11605     disk_addremove = 0
11606     for disk_op, disk_dict in self.op.disks:
11607       utils.ForceDictType(disk_dict, constants.IDISK_PARAMS_TYPES)
11608       if disk_op == constants.DDM_REMOVE:
11609         disk_addremove += 1
11610         continue
11611       elif disk_op == constants.DDM_ADD:
11612         disk_addremove += 1
11613       else:
11614         if not isinstance(disk_op, int):
11615           raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
11616         if not isinstance(disk_dict, dict):
11617           msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
11618           raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
11619
11620       if disk_op == constants.DDM_ADD:
11621         mode = disk_dict.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
11622         if mode not in constants.DISK_ACCESS_SET:
11623           raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
11624                                      errors.ECODE_INVAL)
11625         size = disk_dict.get(constants.IDISK_SIZE, None)
11626         if size is None:
11627           raise errors.OpPrereqError("Required disk parameter size missing",
11628                                      errors.ECODE_INVAL)
11629         try:
11630           size = int(size)
11631         except (TypeError, ValueError), err:
11632           raise errors.OpPrereqError("Invalid disk size parameter: %s" %
11633                                      str(err), errors.ECODE_INVAL)
11634         disk_dict[constants.IDISK_SIZE] = size
11635       else:
11636         # modification of disk
11637         if constants.IDISK_SIZE in disk_dict:
11638           raise errors.OpPrereqError("Disk size change not possible, use"
11639                                      " grow-disk", errors.ECODE_INVAL)
11640
11641     if disk_addremove > 1:
11642       raise errors.OpPrereqError("Only one disk add or remove operation"
11643                                  " supported at a time", errors.ECODE_INVAL)
11644
11645     if self.op.disks and self.op.disk_template is not None:
11646       raise errors.OpPrereqError("Disk template conversion and other disk"
11647                                  " changes not supported at the same time",
11648                                  errors.ECODE_INVAL)
11649
11650     if (self.op.disk_template and
11651         self.op.disk_template in constants.DTS_INT_MIRROR and
11652         self.op.remote_node is None):
11653       raise errors.OpPrereqError("Changing the disk template to a mirrored"
11654                                  " one requires specifying a secondary node",
11655                                  errors.ECODE_INVAL)
11656
11657     # NIC validation
11658     nic_addremove = 0
11659     for nic_op, nic_dict in self.op.nics:
11660       utils.ForceDictType(nic_dict, constants.INIC_PARAMS_TYPES)
11661       if nic_op == constants.DDM_REMOVE:
11662         nic_addremove += 1
11663         continue
11664       elif nic_op == constants.DDM_ADD:
11665         nic_addremove += 1
11666       else:
11667         if not isinstance(nic_op, int):
11668           raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
11669         if not isinstance(nic_dict, dict):
11670           msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
11671           raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
11672
11673       # nic_dict should be a dict
11674       nic_ip = nic_dict.get(constants.INIC_IP, None)
11675       if nic_ip is not None:
11676         if nic_ip.lower() == constants.VALUE_NONE:
11677           nic_dict[constants.INIC_IP] = None
11678         else:
11679           if not netutils.IPAddress.IsValid(nic_ip):
11680             raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
11681                                        errors.ECODE_INVAL)
11682
11683       nic_bridge = nic_dict.get("bridge", None)
11684       nic_link = nic_dict.get(constants.INIC_LINK, None)
11685       if nic_bridge and nic_link:
11686         raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
11687                                    " at the same time", errors.ECODE_INVAL)
11688       elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
11689         nic_dict["bridge"] = None
11690       elif nic_link and nic_link.lower() == constants.VALUE_NONE:
11691         nic_dict[constants.INIC_LINK] = None
11692
11693       if nic_op == constants.DDM_ADD:
11694         nic_mac = nic_dict.get(constants.INIC_MAC, None)
11695         if nic_mac is None:
11696           nic_dict[constants.INIC_MAC] = constants.VALUE_AUTO
11697
11698       if constants.INIC_MAC in nic_dict:
11699         nic_mac = nic_dict[constants.INIC_MAC]
11700         if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
11701           nic_mac = utils.NormalizeAndValidateMac(nic_mac)
11702
11703         if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
11704           raise errors.OpPrereqError("'auto' is not a valid MAC address when"
11705                                      " modifying an existing nic",
11706                                      errors.ECODE_INVAL)
11707
11708     if nic_addremove > 1:
11709       raise errors.OpPrereqError("Only one NIC add or remove operation"
11710                                  " supported at a time", errors.ECODE_INVAL)
11711
11712   def ExpandNames(self):
11713     self._ExpandAndLockInstance()
11714     # Can't even acquire node locks in shared mode as upcoming changes in
11715     # Ganeti 2.6 will start to modify the node object on disk conversion
11716     self.needed_locks[locking.LEVEL_NODE] = []
11717     self.needed_locks[locking.LEVEL_NODE_RES] = []
11718     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
11719
11720   def DeclareLocks(self, level):
11721     if level == locking.LEVEL_NODE:
11722       self._LockInstancesNodes()
11723       if self.op.disk_template and self.op.remote_node:
11724         self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
11725         self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
11726     elif level == locking.LEVEL_NODE_RES and self.op.disk_template:
11727       # Copy node locks
11728       self.needed_locks[locking.LEVEL_NODE_RES] = \
11729         self.needed_locks[locking.LEVEL_NODE][:]
11730
11731   def BuildHooksEnv(self):
11732     """Build hooks env.
11733
11734     This runs on the master, primary and secondaries.
11735
11736     """
11737     args = dict()
11738     if constants.BE_MINMEM in self.be_new:
11739       args["minmem"] = self.be_new[constants.BE_MINMEM]
11740     if constants.BE_MAXMEM in self.be_new:
11741       args["maxmem"] = self.be_new[constants.BE_MAXMEM]
11742     if constants.BE_VCPUS in self.be_new:
11743       args["vcpus"] = self.be_new[constants.BE_VCPUS]
11744     # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
11745     # information at all.
11746     if self.op.nics:
11747       args["nics"] = []
11748       nic_override = dict(self.op.nics)
11749       for idx, nic in enumerate(self.instance.nics):
11750         if idx in nic_override:
11751           this_nic_override = nic_override[idx]
11752         else:
11753           this_nic_override = {}
11754         if constants.INIC_IP in this_nic_override:
11755           ip = this_nic_override[constants.INIC_IP]
11756         else:
11757           ip = nic.ip
11758         if constants.INIC_MAC in this_nic_override:
11759           mac = this_nic_override[constants.INIC_MAC]
11760         else:
11761           mac = nic.mac
11762         if idx in self.nic_pnew:
11763           nicparams = self.nic_pnew[idx]
11764         else:
11765           nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
11766         mode = nicparams[constants.NIC_MODE]
11767         link = nicparams[constants.NIC_LINK]
11768         args["nics"].append((ip, mac, mode, link))
11769       if constants.DDM_ADD in nic_override:
11770         ip = nic_override[constants.DDM_ADD].get(constants.INIC_IP, None)
11771         mac = nic_override[constants.DDM_ADD][constants.INIC_MAC]
11772         nicparams = self.nic_pnew[constants.DDM_ADD]
11773         mode = nicparams[constants.NIC_MODE]
11774         link = nicparams[constants.NIC_LINK]
11775         args["nics"].append((ip, mac, mode, link))
11776       elif constants.DDM_REMOVE in nic_override:
11777         del args["nics"][-1]
11778
11779     env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
11780     if self.op.disk_template:
11781       env["NEW_DISK_TEMPLATE"] = self.op.disk_template
11782     if self.op.runtime_mem:
11783       env["RUNTIME_MEMORY"] = self.op.runtime_mem
11784
11785     return env
11786
11787   def BuildHooksNodes(self):
11788     """Build hooks nodes.
11789
11790     """
11791     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
11792     return (nl, nl)
11793
11794   def CheckPrereq(self):
11795     """Check prerequisites.
11796
11797     This only checks the instance list against the existing names.
11798
11799     """
11800     # checking the new params on the primary/secondary nodes
11801
11802     instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
11803     cluster = self.cluster = self.cfg.GetClusterInfo()
11804     assert self.instance is not None, \
11805       "Cannot retrieve locked instance %s" % self.op.instance_name
11806     pnode = instance.primary_node
11807     nodelist = list(instance.all_nodes)
11808     pnode_info = self.cfg.GetNodeInfo(pnode)
11809     self.diskparams = self.cfg.GetNodeGroup(pnode_info.group).diskparams
11810
11811     # OS change
11812     if self.op.os_name and not self.op.force:
11813       _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
11814                       self.op.force_variant)
11815       instance_os = self.op.os_name
11816     else:
11817       instance_os = instance.os
11818
11819     if self.op.disk_template:
11820       if instance.disk_template == self.op.disk_template:
11821         raise errors.OpPrereqError("Instance already has disk template %s" %
11822                                    instance.disk_template, errors.ECODE_INVAL)
11823
11824       if (instance.disk_template,
11825           self.op.disk_template) not in self._DISK_CONVERSIONS:
11826         raise errors.OpPrereqError("Unsupported disk template conversion from"
11827                                    " %s to %s" % (instance.disk_template,
11828                                                   self.op.disk_template),
11829                                    errors.ECODE_INVAL)
11830       _CheckInstanceState(self, instance, INSTANCE_DOWN,
11831                           msg="cannot change disk template")
11832       if self.op.disk_template in constants.DTS_INT_MIRROR:
11833         if self.op.remote_node == pnode:
11834           raise errors.OpPrereqError("Given new secondary node %s is the same"
11835                                      " as the primary node of the instance" %
11836                                      self.op.remote_node, errors.ECODE_STATE)
11837         _CheckNodeOnline(self, self.op.remote_node)
11838         _CheckNodeNotDrained(self, self.op.remote_node)
11839         # FIXME: here we assume that the old instance type is DT_PLAIN
11840         assert instance.disk_template == constants.DT_PLAIN
11841         disks = [{constants.IDISK_SIZE: d.size,
11842                   constants.IDISK_VG: d.logical_id[0]}
11843                  for d in instance.disks]
11844         required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
11845         _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
11846
11847         snode_info = self.cfg.GetNodeInfo(self.op.remote_node)
11848         snode_group = self.cfg.GetNodeGroup(snode_info.group)
11849         ipolicy = _CalculateGroupIPolicy(cluster, snode_group)
11850         _CheckTargetNodeIPolicy(self, ipolicy, instance, snode_info,
11851                                 ignore=self.op.ignore_ipolicy)
11852         if pnode_info.group != snode_info.group:
11853           self.LogWarning("The primary and secondary nodes are in two"
11854                           " different node groups; the disk parameters"
11855                           " from the first disk's node group will be"
11856                           " used")
11857
11858     # hvparams processing
11859     if self.op.hvparams:
11860       hv_type = instance.hypervisor
11861       i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
11862       utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
11863       hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
11864
11865       # local check
11866       hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
11867       _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
11868       self.hv_proposed = self.hv_new = hv_new # the new actual values
11869       self.hv_inst = i_hvdict # the new dict (without defaults)
11870     else:
11871       self.hv_proposed = cluster.SimpleFillHV(instance.hypervisor, instance.os,
11872                                               instance.hvparams)
11873       self.hv_new = self.hv_inst = {}
11874
11875     # beparams processing
11876     if self.op.beparams:
11877       i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
11878                                    use_none=True)
11879       objects.UpgradeBeParams(i_bedict)
11880       utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
11881       be_new = cluster.SimpleFillBE(i_bedict)
11882       self.be_proposed = self.be_new = be_new # the new actual values
11883       self.be_inst = i_bedict # the new dict (without defaults)
11884     else:
11885       self.be_new = self.be_inst = {}
11886       self.be_proposed = cluster.SimpleFillBE(instance.beparams)
11887     be_old = cluster.FillBE(instance)
11888
11889     # CPU param validation -- checking every time a paramtere is
11890     # changed to cover all cases where either CPU mask or vcpus have
11891     # changed
11892     if (constants.BE_VCPUS in self.be_proposed and
11893         constants.HV_CPU_MASK in self.hv_proposed):
11894       cpu_list = \
11895         utils.ParseMultiCpuMask(self.hv_proposed[constants.HV_CPU_MASK])
11896       # Verify mask is consistent with number of vCPUs. Can skip this
11897       # test if only 1 entry in the CPU mask, which means same mask
11898       # is applied to all vCPUs.
11899       if (len(cpu_list) > 1 and
11900           len(cpu_list) != self.be_proposed[constants.BE_VCPUS]):
11901         raise errors.OpPrereqError("Number of vCPUs [%d] does not match the"
11902                                    " CPU mask [%s]" %
11903                                    (self.be_proposed[constants.BE_VCPUS],
11904                                     self.hv_proposed[constants.HV_CPU_MASK]),
11905                                    errors.ECODE_INVAL)
11906
11907       # Only perform this test if a new CPU mask is given
11908       if constants.HV_CPU_MASK in self.hv_new:
11909         # Calculate the largest CPU number requested
11910         max_requested_cpu = max(map(max, cpu_list))
11911         # Check that all of the instance's nodes have enough physical CPUs to
11912         # satisfy the requested CPU mask
11913         _CheckNodesPhysicalCPUs(self, instance.all_nodes,
11914                                 max_requested_cpu + 1, instance.hypervisor)
11915
11916     # osparams processing
11917     if self.op.osparams:
11918       i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
11919       _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
11920       self.os_inst = i_osdict # the new dict (without defaults)
11921     else:
11922       self.os_inst = {}
11923
11924     self.warn = []
11925
11926     #TODO(dynmem): do the appropriate check involving MINMEM
11927     if (constants.BE_MAXMEM in self.op.beparams and not self.op.force and
11928         be_new[constants.BE_MAXMEM] > be_old[constants.BE_MAXMEM]):
11929       mem_check_list = [pnode]
11930       if be_new[constants.BE_AUTO_BALANCE]:
11931         # either we changed auto_balance to yes or it was from before
11932         mem_check_list.extend(instance.secondary_nodes)
11933       instance_info = self.rpc.call_instance_info(pnode, instance.name,
11934                                                   instance.hypervisor)
11935       nodeinfo = self.rpc.call_node_info(mem_check_list, None,
11936                                          [instance.hypervisor])
11937       pninfo = nodeinfo[pnode]
11938       msg = pninfo.fail_msg
11939       if msg:
11940         # Assume the primary node is unreachable and go ahead
11941         self.warn.append("Can't get info from primary node %s: %s" %
11942                          (pnode, msg))
11943       else:
11944         (_, _, (pnhvinfo, )) = pninfo.payload
11945         if not isinstance(pnhvinfo.get("memory_free", None), int):
11946           self.warn.append("Node data from primary node %s doesn't contain"
11947                            " free memory information" % pnode)
11948         elif instance_info.fail_msg:
11949           self.warn.append("Can't get instance runtime information: %s" %
11950                           instance_info.fail_msg)
11951         else:
11952           if instance_info.payload:
11953             current_mem = int(instance_info.payload["memory"])
11954           else:
11955             # Assume instance not running
11956             # (there is a slight race condition here, but it's not very
11957             # probable, and we have no other way to check)
11958             # TODO: Describe race condition
11959             current_mem = 0
11960           #TODO(dynmem): do the appropriate check involving MINMEM
11961           miss_mem = (be_new[constants.BE_MAXMEM] - current_mem -
11962                       pnhvinfo["memory_free"])
11963           if miss_mem > 0:
11964             raise errors.OpPrereqError("This change will prevent the instance"
11965                                        " from starting, due to %d MB of memory"
11966                                        " missing on its primary node" %
11967                                        miss_mem,
11968                                        errors.ECODE_NORES)
11969
11970       if be_new[constants.BE_AUTO_BALANCE]:
11971         for node, nres in nodeinfo.items():
11972           if node not in instance.secondary_nodes:
11973             continue
11974           nres.Raise("Can't get info from secondary node %s" % node,
11975                      prereq=True, ecode=errors.ECODE_STATE)
11976           (_, _, (nhvinfo, )) = nres.payload
11977           if not isinstance(nhvinfo.get("memory_free", None), int):
11978             raise errors.OpPrereqError("Secondary node %s didn't return free"
11979                                        " memory information" % node,
11980                                        errors.ECODE_STATE)
11981           #TODO(dynmem): do the appropriate check involving MINMEM
11982           elif be_new[constants.BE_MAXMEM] > nhvinfo["memory_free"]:
11983             raise errors.OpPrereqError("This change will prevent the instance"
11984                                        " from failover to its secondary node"
11985                                        " %s, due to not enough memory" % node,
11986                                        errors.ECODE_STATE)
11987
11988     if self.op.runtime_mem:
11989       remote_info = self.rpc.call_instance_info(instance.primary_node,
11990                                                 instance.name,
11991                                                 instance.hypervisor)
11992       remote_info.Raise("Error checking node %s" % instance.primary_node)
11993       if not remote_info.payload: # not running already
11994         raise errors.OpPrereqError("Instance %s is not running" % instance.name,
11995                                    errors.ECODE_STATE)
11996
11997       current_memory = remote_info.payload["memory"]
11998       if (not self.op.force and
11999            (self.op.runtime_mem > self.be_proposed[constants.BE_MAXMEM] or
12000             self.op.runtime_mem < self.be_proposed[constants.BE_MINMEM])):
12001         raise errors.OpPrereqError("Instance %s must have memory between %d"
12002                                    " and %d MB of memory unless --force is"
12003                                    " given" % (instance.name,
12004                                     self.be_proposed[constants.BE_MINMEM],
12005                                     self.be_proposed[constants.BE_MAXMEM]),
12006                                    errors.ECODE_INVAL)
12007
12008       if self.op.runtime_mem > current_memory:
12009         _CheckNodeFreeMemory(self, instance.primary_node,
12010                              "ballooning memory for instance %s" %
12011                              instance.name,
12012                              self.op.memory - current_memory,
12013                              instance.hypervisor)
12014
12015     # NIC processing
12016     self.nic_pnew = {}
12017     self.nic_pinst = {}
12018     for nic_op, nic_dict in self.op.nics:
12019       if nic_op == constants.DDM_REMOVE:
12020         if not instance.nics:
12021           raise errors.OpPrereqError("Instance has no NICs, cannot remove",
12022                                      errors.ECODE_INVAL)
12023         continue
12024       if nic_op != constants.DDM_ADD:
12025         # an existing nic
12026         if not instance.nics:
12027           raise errors.OpPrereqError("Invalid NIC index %s, instance has"
12028                                      " no NICs" % nic_op,
12029                                      errors.ECODE_INVAL)
12030         if nic_op < 0 or nic_op >= len(instance.nics):
12031           raise errors.OpPrereqError("Invalid NIC index %s, valid values"
12032                                      " are 0 to %d" %
12033                                      (nic_op, len(instance.nics) - 1),
12034                                      errors.ECODE_INVAL)
12035         old_nic_params = instance.nics[nic_op].nicparams
12036         old_nic_ip = instance.nics[nic_op].ip
12037       else:
12038         old_nic_params = {}
12039         old_nic_ip = None
12040
12041       update_params_dict = dict([(key, nic_dict[key])
12042                                  for key in constants.NICS_PARAMETERS
12043                                  if key in nic_dict])
12044
12045       if "bridge" in nic_dict:
12046         update_params_dict[constants.NIC_LINK] = nic_dict["bridge"]
12047
12048       new_nic_params = _GetUpdatedParams(old_nic_params,
12049                                          update_params_dict)
12050       utils.ForceDictType(new_nic_params, constants.NICS_PARAMETER_TYPES)
12051       new_filled_nic_params = cluster.SimpleFillNIC(new_nic_params)
12052       objects.NIC.CheckParameterSyntax(new_filled_nic_params)
12053       self.nic_pinst[nic_op] = new_nic_params
12054       self.nic_pnew[nic_op] = new_filled_nic_params
12055       new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
12056
12057       if new_nic_mode == constants.NIC_MODE_BRIDGED:
12058         nic_bridge = new_filled_nic_params[constants.NIC_LINK]
12059         msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
12060         if msg:
12061           msg = "Error checking bridges on node %s: %s" % (pnode, msg)
12062           if self.op.force:
12063             self.warn.append(msg)
12064           else:
12065             raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
12066       if new_nic_mode == constants.NIC_MODE_ROUTED:
12067         if constants.INIC_IP in nic_dict:
12068           nic_ip = nic_dict[constants.INIC_IP]
12069         else:
12070           nic_ip = old_nic_ip
12071         if nic_ip is None:
12072           raise errors.OpPrereqError("Cannot set the nic ip to None"
12073                                      " on a routed nic", errors.ECODE_INVAL)
12074       if constants.INIC_MAC in nic_dict:
12075         nic_mac = nic_dict[constants.INIC_MAC]
12076         if nic_mac is None:
12077           raise errors.OpPrereqError("Cannot set the nic mac to None",
12078                                      errors.ECODE_INVAL)
12079         elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
12080           # otherwise generate the mac
12081           nic_dict[constants.INIC_MAC] = \
12082             self.cfg.GenerateMAC(self.proc.GetECId())
12083         else:
12084           # or validate/reserve the current one
12085           try:
12086             self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
12087           except errors.ReservationError:
12088             raise errors.OpPrereqError("MAC address %s already in use"
12089                                        " in cluster" % nic_mac,
12090                                        errors.ECODE_NOTUNIQUE)
12091
12092     # DISK processing
12093     if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
12094       raise errors.OpPrereqError("Disk operations not supported for"
12095                                  " diskless instances",
12096                                  errors.ECODE_INVAL)
12097     for disk_op, _ in self.op.disks:
12098       if disk_op == constants.DDM_REMOVE:
12099         if len(instance.disks) == 1:
12100           raise errors.OpPrereqError("Cannot remove the last disk of"
12101                                      " an instance", errors.ECODE_INVAL)
12102         _CheckInstanceState(self, instance, INSTANCE_DOWN,
12103                             msg="cannot remove disks")
12104
12105       if (disk_op == constants.DDM_ADD and
12106           len(instance.disks) >= constants.MAX_DISKS):
12107         raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
12108                                    " add more" % constants.MAX_DISKS,
12109                                    errors.ECODE_STATE)
12110       if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
12111         # an existing disk
12112         if disk_op < 0 or disk_op >= len(instance.disks):
12113           raise errors.OpPrereqError("Invalid disk index %s, valid values"
12114                                      " are 0 to %d" %
12115                                      (disk_op, len(instance.disks)),
12116                                      errors.ECODE_INVAL)
12117
12118     # disabling the instance
12119     if self.op.offline_inst:
12120       _CheckInstanceState(self, instance, INSTANCE_DOWN,
12121                           msg="cannot change instance state to offline")
12122
12123     # enabling the instance
12124     if self.op.online_inst:
12125       _CheckInstanceState(self, instance, INSTANCE_OFFLINE,
12126                           msg="cannot make instance go online")
12127
12128   def _ConvertPlainToDrbd(self, feedback_fn):
12129     """Converts an instance from plain to drbd.
12130
12131     """
12132     feedback_fn("Converting template to drbd")
12133     instance = self.instance
12134     pnode = instance.primary_node
12135     snode = self.op.remote_node
12136
12137     assert instance.disk_template == constants.DT_PLAIN
12138
12139     # create a fake disk info for _GenerateDiskTemplate
12140     disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
12141                   constants.IDISK_VG: d.logical_id[0]}
12142                  for d in instance.disks]
12143     new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
12144                                       instance.name, pnode, [snode],
12145                                       disk_info, None, None, 0, feedback_fn,
12146                                       self.diskparams)
12147     info = _GetInstanceInfoText(instance)
12148     feedback_fn("Creating aditional volumes...")
12149     # first, create the missing data and meta devices
12150     for disk in new_disks:
12151       # unfortunately this is... not too nice
12152       _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
12153                             info, True)
12154       for child in disk.children:
12155         _CreateSingleBlockDev(self, snode, instance, child, info, True)
12156     # at this stage, all new LVs have been created, we can rename the
12157     # old ones
12158     feedback_fn("Renaming original volumes...")
12159     rename_list = [(o, n.children[0].logical_id)
12160                    for (o, n) in zip(instance.disks, new_disks)]
12161     result = self.rpc.call_blockdev_rename(pnode, rename_list)
12162     result.Raise("Failed to rename original LVs")
12163
12164     feedback_fn("Initializing DRBD devices...")
12165     # all child devices are in place, we can now create the DRBD devices
12166     for disk in new_disks:
12167       for node in [pnode, snode]:
12168         f_create = node == pnode
12169         _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
12170
12171     # at this point, the instance has been modified
12172     instance.disk_template = constants.DT_DRBD8
12173     instance.disks = new_disks
12174     self.cfg.Update(instance, feedback_fn)
12175
12176     # Release node locks while waiting for sync
12177     _ReleaseLocks(self, locking.LEVEL_NODE)
12178
12179     # disks are created, waiting for sync
12180     disk_abort = not _WaitForSync(self, instance,
12181                                   oneshot=not self.op.wait_for_sync)
12182     if disk_abort:
12183       raise errors.OpExecError("There are some degraded disks for"
12184                                " this instance, please cleanup manually")
12185
12186     # Node resource locks will be released by caller
12187
12188   def _ConvertDrbdToPlain(self, feedback_fn):
12189     """Converts an instance from drbd to plain.
12190
12191     """
12192     instance = self.instance
12193
12194     assert len(instance.secondary_nodes) == 1
12195     assert instance.disk_template == constants.DT_DRBD8
12196
12197     pnode = instance.primary_node
12198     snode = instance.secondary_nodes[0]
12199     feedback_fn("Converting template to plain")
12200
12201     old_disks = instance.disks
12202     new_disks = [d.children[0] for d in old_disks]
12203
12204     # copy over size and mode
12205     for parent, child in zip(old_disks, new_disks):
12206       child.size = parent.size
12207       child.mode = parent.mode
12208
12209     # update instance structure
12210     instance.disks = new_disks
12211     instance.disk_template = constants.DT_PLAIN
12212     self.cfg.Update(instance, feedback_fn)
12213
12214     # Release locks in case removing disks takes a while
12215     _ReleaseLocks(self, locking.LEVEL_NODE)
12216
12217     feedback_fn("Removing volumes on the secondary node...")
12218     for disk in old_disks:
12219       self.cfg.SetDiskID(disk, snode)
12220       msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
12221       if msg:
12222         self.LogWarning("Could not remove block device %s on node %s,"
12223                         " continuing anyway: %s", disk.iv_name, snode, msg)
12224
12225     feedback_fn("Removing unneeded volumes on the primary node...")
12226     for idx, disk in enumerate(old_disks):
12227       meta = disk.children[1]
12228       self.cfg.SetDiskID(meta, pnode)
12229       msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
12230       if msg:
12231         self.LogWarning("Could not remove metadata for disk %d on node %s,"
12232                         " continuing anyway: %s", idx, pnode, msg)
12233
12234     # this is a DRBD disk, return its port to the pool
12235     for disk in old_disks:
12236       tcp_port = disk.logical_id[2]
12237       self.cfg.AddTcpUdpPort(tcp_port)
12238
12239     # Node resource locks will be released by caller
12240
12241   def Exec(self, feedback_fn):
12242     """Modifies an instance.
12243
12244     All parameters take effect only at the next restart of the instance.
12245
12246     """
12247     # Process here the warnings from CheckPrereq, as we don't have a
12248     # feedback_fn there.
12249     for warn in self.warn:
12250       feedback_fn("WARNING: %s" % warn)
12251
12252     assert ((self.op.disk_template is None) ^
12253             bool(self.owned_locks(locking.LEVEL_NODE_RES))), \
12254       "Not owning any node resource locks"
12255
12256     result = []
12257     instance = self.instance
12258
12259     # runtime memory
12260     if self.op.runtime_mem:
12261       rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node,
12262                                                      instance,
12263                                                      self.op.runtime_mem)
12264       rpcres.Raise("Cannot modify instance runtime memory")
12265       result.append(("runtime_memory", self.op.runtime_mem))
12266
12267     # disk changes
12268     for disk_op, disk_dict in self.op.disks:
12269       if disk_op == constants.DDM_REMOVE:
12270         # remove the last disk
12271         device = instance.disks.pop()
12272         device_idx = len(instance.disks)
12273         for node, disk in device.ComputeNodeTree(instance.primary_node):
12274           self.cfg.SetDiskID(disk, node)
12275           msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
12276           if msg:
12277             self.LogWarning("Could not remove disk/%d on node %s: %s,"
12278                             " continuing anyway", device_idx, node, msg)
12279         result.append(("disk/%d" % device_idx, "remove"))
12280
12281         # if this is a DRBD disk, return its port to the pool
12282         if device.dev_type in constants.LDS_DRBD:
12283           tcp_port = device.logical_id[2]
12284           self.cfg.AddTcpUdpPort(tcp_port)
12285       elif disk_op == constants.DDM_ADD:
12286         # add a new disk
12287         if instance.disk_template in (constants.DT_FILE,
12288                                         constants.DT_SHARED_FILE):
12289           file_driver, file_path = instance.disks[0].logical_id
12290           file_path = os.path.dirname(file_path)
12291         else:
12292           file_driver = file_path = None
12293         disk_idx_base = len(instance.disks)
12294         new_disk = _GenerateDiskTemplate(self,
12295                                          instance.disk_template,
12296                                          instance.name, instance.primary_node,
12297                                          instance.secondary_nodes,
12298                                          [disk_dict],
12299                                          file_path,
12300                                          file_driver,
12301                                          disk_idx_base,
12302                                          feedback_fn,
12303                                          self.diskparams)[0]
12304         instance.disks.append(new_disk)
12305         info = _GetInstanceInfoText(instance)
12306
12307         logging.info("Creating volume %s for instance %s",
12308                      new_disk.iv_name, instance.name)
12309         # Note: this needs to be kept in sync with _CreateDisks
12310         #HARDCODE
12311         for node in instance.all_nodes:
12312           f_create = node == instance.primary_node
12313           try:
12314             _CreateBlockDev(self, node, instance, new_disk,
12315                             f_create, info, f_create)
12316           except errors.OpExecError, err:
12317             self.LogWarning("Failed to create volume %s (%s) on"
12318                             " node %s: %s",
12319                             new_disk.iv_name, new_disk, node, err)
12320         result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
12321                        (new_disk.size, new_disk.mode)))
12322       else:
12323         # change a given disk
12324         instance.disks[disk_op].mode = disk_dict[constants.IDISK_MODE]
12325         result.append(("disk.mode/%d" % disk_op,
12326                        disk_dict[constants.IDISK_MODE]))
12327
12328     if self.op.disk_template:
12329       if __debug__:
12330         check_nodes = set(instance.all_nodes)
12331         if self.op.remote_node:
12332           check_nodes.add(self.op.remote_node)
12333         for level in [locking.LEVEL_NODE, locking.LEVEL_NODE_RES]:
12334           owned = self.owned_locks(level)
12335           assert not (check_nodes - owned), \
12336             ("Not owning the correct locks, owning %r, expected at least %r" %
12337              (owned, check_nodes))
12338
12339       r_shut = _ShutdownInstanceDisks(self, instance)
12340       if not r_shut:
12341         raise errors.OpExecError("Cannot shutdown instance disks, unable to"
12342                                  " proceed with disk template conversion")
12343       mode = (instance.disk_template, self.op.disk_template)
12344       try:
12345         self._DISK_CONVERSIONS[mode](self, feedback_fn)
12346       except:
12347         self.cfg.ReleaseDRBDMinors(instance.name)
12348         raise
12349       result.append(("disk_template", self.op.disk_template))
12350
12351       assert instance.disk_template == self.op.disk_template, \
12352         ("Expected disk template '%s', found '%s'" %
12353          (self.op.disk_template, instance.disk_template))
12354
12355     # Release node and resource locks if there are any (they might already have
12356     # been released during disk conversion)
12357     _ReleaseLocks(self, locking.LEVEL_NODE)
12358     _ReleaseLocks(self, locking.LEVEL_NODE_RES)
12359
12360     # NIC changes
12361     for nic_op, nic_dict in self.op.nics:
12362       if nic_op == constants.DDM_REMOVE:
12363         # remove the last nic
12364         del instance.nics[-1]
12365         result.append(("nic.%d" % len(instance.nics), "remove"))
12366       elif nic_op == constants.DDM_ADD:
12367         # mac and bridge should be set, by now
12368         mac = nic_dict[constants.INIC_MAC]
12369         ip = nic_dict.get(constants.INIC_IP, None)
12370         nicparams = self.nic_pinst[constants.DDM_ADD]
12371         new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
12372         instance.nics.append(new_nic)
12373         result.append(("nic.%d" % (len(instance.nics) - 1),
12374                        "add:mac=%s,ip=%s,mode=%s,link=%s" %
12375                        (new_nic.mac, new_nic.ip,
12376                         self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
12377                         self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
12378                        )))
12379       else:
12380         for key in (constants.INIC_MAC, constants.INIC_IP):
12381           if key in nic_dict:
12382             setattr(instance.nics[nic_op], key, nic_dict[key])
12383         if nic_op in self.nic_pinst:
12384           instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
12385         for key, val in nic_dict.iteritems():
12386           result.append(("nic.%s/%d" % (key, nic_op), val))
12387
12388     # hvparams changes
12389     if self.op.hvparams:
12390       instance.hvparams = self.hv_inst
12391       for key, val in self.op.hvparams.iteritems():
12392         result.append(("hv/%s" % key, val))
12393
12394     # beparams changes
12395     if self.op.beparams:
12396       instance.beparams = self.be_inst
12397       for key, val in self.op.beparams.iteritems():
12398         result.append(("be/%s" % key, val))
12399
12400     # OS change
12401     if self.op.os_name:
12402       instance.os = self.op.os_name
12403
12404     # osparams changes
12405     if self.op.osparams:
12406       instance.osparams = self.os_inst
12407       for key, val in self.op.osparams.iteritems():
12408         result.append(("os/%s" % key, val))
12409
12410     # online/offline instance
12411     if self.op.online_inst:
12412       self.cfg.MarkInstanceDown(instance.name)
12413       result.append(("admin_state", constants.ADMINST_DOWN))
12414     if self.op.offline_inst:
12415       self.cfg.MarkInstanceOffline(instance.name)
12416       result.append(("admin_state", constants.ADMINST_OFFLINE))
12417
12418     self.cfg.Update(instance, feedback_fn)
12419
12420     assert not (self.owned_locks(locking.LEVEL_NODE_RES) or
12421                 self.owned_locks(locking.LEVEL_NODE)), \
12422       "All node locks should have been released by now"
12423
12424     return result
12425
12426   _DISK_CONVERSIONS = {
12427     (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
12428     (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
12429     }
12430
12431
12432 class LUInstanceChangeGroup(LogicalUnit):
12433   HPATH = "instance-change-group"
12434   HTYPE = constants.HTYPE_INSTANCE
12435   REQ_BGL = False
12436
12437   def ExpandNames(self):
12438     self.share_locks = _ShareAll()
12439     self.needed_locks = {
12440       locking.LEVEL_NODEGROUP: [],
12441       locking.LEVEL_NODE: [],
12442       }
12443
12444     self._ExpandAndLockInstance()
12445
12446     if self.op.target_groups:
12447       self.req_target_uuids = map(self.cfg.LookupNodeGroup,
12448                                   self.op.target_groups)
12449     else:
12450       self.req_target_uuids = None
12451
12452     self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
12453
12454   def DeclareLocks(self, level):
12455     if level == locking.LEVEL_NODEGROUP:
12456       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
12457
12458       if self.req_target_uuids:
12459         lock_groups = set(self.req_target_uuids)
12460
12461         # Lock all groups used by instance optimistically; this requires going
12462         # via the node before it's locked, requiring verification later on
12463         instance_groups = self.cfg.GetInstanceNodeGroups(self.op.instance_name)
12464         lock_groups.update(instance_groups)
12465       else:
12466         # No target groups, need to lock all of them
12467         lock_groups = locking.ALL_SET
12468
12469       self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
12470
12471     elif level == locking.LEVEL_NODE:
12472       if self.req_target_uuids:
12473         # Lock all nodes used by instances
12474         self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
12475         self._LockInstancesNodes()
12476
12477         # Lock all nodes in all potential target groups
12478         lock_groups = (frozenset(self.owned_locks(locking.LEVEL_NODEGROUP)) -
12479                        self.cfg.GetInstanceNodeGroups(self.op.instance_name))
12480         member_nodes = [node_name
12481                         for group in lock_groups
12482                         for node_name in self.cfg.GetNodeGroup(group).members]
12483         self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
12484       else:
12485         # Lock all nodes as all groups are potential targets
12486         self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
12487
12488   def CheckPrereq(self):
12489     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
12490     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
12491     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
12492
12493     assert (self.req_target_uuids is None or
12494             owned_groups.issuperset(self.req_target_uuids))
12495     assert owned_instances == set([self.op.instance_name])
12496
12497     # Get instance information
12498     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
12499
12500     # Check if node groups for locked instance are still correct
12501     assert owned_nodes.issuperset(self.instance.all_nodes), \
12502       ("Instance %s's nodes changed while we kept the lock" %
12503        self.op.instance_name)
12504
12505     inst_groups = _CheckInstanceNodeGroups(self.cfg, self.op.instance_name,
12506                                            owned_groups)
12507
12508     if self.req_target_uuids:
12509       # User requested specific target groups
12510       self.target_uuids = self.req_target_uuids
12511     else:
12512       # All groups except those used by the instance are potential targets
12513       self.target_uuids = owned_groups - inst_groups
12514
12515     conflicting_groups = self.target_uuids & inst_groups
12516     if conflicting_groups:
12517       raise errors.OpPrereqError("Can't use group(s) '%s' as targets, they are"
12518                                  " used by the instance '%s'" %
12519                                  (utils.CommaJoin(conflicting_groups),
12520                                   self.op.instance_name),
12521                                  errors.ECODE_INVAL)
12522
12523     if not self.target_uuids:
12524       raise errors.OpPrereqError("There are no possible target groups",
12525                                  errors.ECODE_INVAL)
12526
12527   def BuildHooksEnv(self):
12528     """Build hooks env.
12529
12530     """
12531     assert self.target_uuids
12532
12533     env = {
12534       "TARGET_GROUPS": " ".join(self.target_uuids),
12535       }
12536
12537     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
12538
12539     return env
12540
12541   def BuildHooksNodes(self):
12542     """Build hooks nodes.
12543
12544     """
12545     mn = self.cfg.GetMasterNode()
12546     return ([mn], [mn])
12547
12548   def Exec(self, feedback_fn):
12549     instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
12550
12551     assert instances == [self.op.instance_name], "Instance not locked"
12552
12553     ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
12554                      instances=instances, target_groups=list(self.target_uuids))
12555
12556     ial.Run(self.op.iallocator)
12557
12558     if not ial.success:
12559       raise errors.OpPrereqError("Can't compute solution for changing group of"
12560                                  " instance '%s' using iallocator '%s': %s" %
12561                                  (self.op.instance_name, self.op.iallocator,
12562                                   ial.info),
12563                                  errors.ECODE_NORES)
12564
12565     jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
12566
12567     self.LogInfo("Iallocator returned %s job(s) for changing group of"
12568                  " instance '%s'", len(jobs), self.op.instance_name)
12569
12570     return ResultWithJobs(jobs)
12571
12572
12573 class LUBackupQuery(NoHooksLU):
12574   """Query the exports list
12575
12576   """
12577   REQ_BGL = False
12578
12579   def ExpandNames(self):
12580     self.needed_locks = {}
12581     self.share_locks[locking.LEVEL_NODE] = 1
12582     if not self.op.nodes:
12583       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
12584     else:
12585       self.needed_locks[locking.LEVEL_NODE] = \
12586         _GetWantedNodes(self, self.op.nodes)
12587
12588   def Exec(self, feedback_fn):
12589     """Compute the list of all the exported system images.
12590
12591     @rtype: dict
12592     @return: a dictionary with the structure node->(export-list)
12593         where export-list is a list of the instances exported on
12594         that node.
12595
12596     """
12597     self.nodes = self.owned_locks(locking.LEVEL_NODE)
12598     rpcresult = self.rpc.call_export_list(self.nodes)
12599     result = {}
12600     for node in rpcresult:
12601       if rpcresult[node].fail_msg:
12602         result[node] = False
12603       else:
12604         result[node] = rpcresult[node].payload
12605
12606     return result
12607
12608
12609 class LUBackupPrepare(NoHooksLU):
12610   """Prepares an instance for an export and returns useful information.
12611
12612   """
12613   REQ_BGL = False
12614
12615   def ExpandNames(self):
12616     self._ExpandAndLockInstance()
12617
12618   def CheckPrereq(self):
12619     """Check prerequisites.
12620
12621     """
12622     instance_name = self.op.instance_name
12623
12624     self.instance = self.cfg.GetInstanceInfo(instance_name)
12625     assert self.instance is not None, \
12626           "Cannot retrieve locked instance %s" % self.op.instance_name
12627     _CheckNodeOnline(self, self.instance.primary_node)
12628
12629     self._cds = _GetClusterDomainSecret()
12630
12631   def Exec(self, feedback_fn):
12632     """Prepares an instance for an export.
12633
12634     """
12635     instance = self.instance
12636
12637     if self.op.mode == constants.EXPORT_MODE_REMOTE:
12638       salt = utils.GenerateSecret(8)
12639
12640       feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
12641       result = self.rpc.call_x509_cert_create(instance.primary_node,
12642                                               constants.RIE_CERT_VALIDITY)
12643       result.Raise("Can't create X509 key and certificate on %s" % result.node)
12644
12645       (name, cert_pem) = result.payload
12646
12647       cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
12648                                              cert_pem)
12649
12650       return {
12651         "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
12652         "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
12653                           salt),
12654         "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
12655         }
12656
12657     return None
12658
12659
12660 class LUBackupExport(LogicalUnit):
12661   """Export an instance to an image in the cluster.
12662
12663   """
12664   HPATH = "instance-export"
12665   HTYPE = constants.HTYPE_INSTANCE
12666   REQ_BGL = False
12667
12668   def CheckArguments(self):
12669     """Check the arguments.
12670
12671     """
12672     self.x509_key_name = self.op.x509_key_name
12673     self.dest_x509_ca_pem = self.op.destination_x509_ca
12674
12675     if self.op.mode == constants.EXPORT_MODE_REMOTE:
12676       if not self.x509_key_name:
12677         raise errors.OpPrereqError("Missing X509 key name for encryption",
12678                                    errors.ECODE_INVAL)
12679
12680       if not self.dest_x509_ca_pem:
12681         raise errors.OpPrereqError("Missing destination X509 CA",
12682                                    errors.ECODE_INVAL)
12683
12684   def ExpandNames(self):
12685     self._ExpandAndLockInstance()
12686
12687     # Lock all nodes for local exports
12688     if self.op.mode == constants.EXPORT_MODE_LOCAL:
12689       # FIXME: lock only instance primary and destination node
12690       #
12691       # Sad but true, for now we have do lock all nodes, as we don't know where
12692       # the previous export might be, and in this LU we search for it and
12693       # remove it from its current node. In the future we could fix this by:
12694       #  - making a tasklet to search (share-lock all), then create the
12695       #    new one, then one to remove, after
12696       #  - removing the removal operation altogether
12697       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
12698
12699   def DeclareLocks(self, level):
12700     """Last minute lock declaration."""
12701     # All nodes are locked anyway, so nothing to do here.
12702
12703   def BuildHooksEnv(self):
12704     """Build hooks env.
12705
12706     This will run on the master, primary node and target node.
12707
12708     """
12709     env = {
12710       "EXPORT_MODE": self.op.mode,
12711       "EXPORT_NODE": self.op.target_node,
12712       "EXPORT_DO_SHUTDOWN": self.op.shutdown,
12713       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
12714       # TODO: Generic function for boolean env variables
12715       "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
12716       }
12717
12718     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
12719
12720     return env
12721
12722   def BuildHooksNodes(self):
12723     """Build hooks nodes.
12724
12725     """
12726     nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
12727
12728     if self.op.mode == constants.EXPORT_MODE_LOCAL:
12729       nl.append(self.op.target_node)
12730
12731     return (nl, nl)
12732
12733   def CheckPrereq(self):
12734     """Check prerequisites.
12735
12736     This checks that the instance and node names are valid.
12737
12738     """
12739     instance_name = self.op.instance_name
12740
12741     self.instance = self.cfg.GetInstanceInfo(instance_name)
12742     assert self.instance is not None, \
12743           "Cannot retrieve locked instance %s" % self.op.instance_name
12744     _CheckNodeOnline(self, self.instance.primary_node)
12745
12746     if (self.op.remove_instance and
12747         self.instance.admin_state == constants.ADMINST_UP and
12748         not self.op.shutdown):
12749       raise errors.OpPrereqError("Can not remove instance without shutting it"
12750                                  " down before")
12751
12752     if self.op.mode == constants.EXPORT_MODE_LOCAL:
12753       self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
12754       self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
12755       assert self.dst_node is not None
12756
12757       _CheckNodeOnline(self, self.dst_node.name)
12758       _CheckNodeNotDrained(self, self.dst_node.name)
12759
12760       self._cds = None
12761       self.dest_disk_info = None
12762       self.dest_x509_ca = None
12763
12764     elif self.op.mode == constants.EXPORT_MODE_REMOTE:
12765       self.dst_node = None
12766
12767       if len(self.op.target_node) != len(self.instance.disks):
12768         raise errors.OpPrereqError(("Received destination information for %s"
12769                                     " disks, but instance %s has %s disks") %
12770                                    (len(self.op.target_node), instance_name,
12771                                     len(self.instance.disks)),
12772                                    errors.ECODE_INVAL)
12773
12774       cds = _GetClusterDomainSecret()
12775
12776       # Check X509 key name
12777       try:
12778         (key_name, hmac_digest, hmac_salt) = self.x509_key_name
12779       except (TypeError, ValueError), err:
12780         raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
12781
12782       if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
12783         raise errors.OpPrereqError("HMAC for X509 key name is wrong",
12784                                    errors.ECODE_INVAL)
12785
12786       # Load and verify CA
12787       try:
12788         (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
12789       except OpenSSL.crypto.Error, err:
12790         raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
12791                                    (err, ), errors.ECODE_INVAL)
12792
12793       (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
12794       if errcode is not None:
12795         raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
12796                                    (msg, ), errors.ECODE_INVAL)
12797
12798       self.dest_x509_ca = cert
12799
12800       # Verify target information
12801       disk_info = []
12802       for idx, disk_data in enumerate(self.op.target_node):
12803         try:
12804           (host, port, magic) = \
12805             masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
12806         except errors.GenericError, err:
12807           raise errors.OpPrereqError("Target info for disk %s: %s" %
12808                                      (idx, err), errors.ECODE_INVAL)
12809
12810         disk_info.append((host, port, magic))
12811
12812       assert len(disk_info) == len(self.op.target_node)
12813       self.dest_disk_info = disk_info
12814
12815     else:
12816       raise errors.ProgrammerError("Unhandled export mode %r" %
12817                                    self.op.mode)
12818
12819     # instance disk type verification
12820     # TODO: Implement export support for file-based disks
12821     for disk in self.instance.disks:
12822       if disk.dev_type == constants.LD_FILE:
12823         raise errors.OpPrereqError("Export not supported for instances with"
12824                                    " file-based disks", errors.ECODE_INVAL)
12825
12826   def _CleanupExports(self, feedback_fn):
12827     """Removes exports of current instance from all other nodes.
12828
12829     If an instance in a cluster with nodes A..D was exported to node C, its
12830     exports will be removed from the nodes A, B and D.
12831
12832     """
12833     assert self.op.mode != constants.EXPORT_MODE_REMOTE
12834
12835     nodelist = self.cfg.GetNodeList()
12836     nodelist.remove(self.dst_node.name)
12837
12838     # on one-node clusters nodelist will be empty after the removal
12839     # if we proceed the backup would be removed because OpBackupQuery
12840     # substitutes an empty list with the full cluster node list.
12841     iname = self.instance.name
12842     if nodelist:
12843       feedback_fn("Removing old exports for instance %s" % iname)
12844       exportlist = self.rpc.call_export_list(nodelist)
12845       for node in exportlist:
12846         if exportlist[node].fail_msg:
12847           continue
12848         if iname in exportlist[node].payload:
12849           msg = self.rpc.call_export_remove(node, iname).fail_msg
12850           if msg:
12851             self.LogWarning("Could not remove older export for instance %s"
12852                             " on node %s: %s", iname, node, msg)
12853
12854   def Exec(self, feedback_fn):
12855     """Export an instance to an image in the cluster.
12856
12857     """
12858     assert self.op.mode in constants.EXPORT_MODES
12859
12860     instance = self.instance
12861     src_node = instance.primary_node
12862
12863     if self.op.shutdown:
12864       # shutdown the instance, but not the disks
12865       feedback_fn("Shutting down instance %s" % instance.name)
12866       result = self.rpc.call_instance_shutdown(src_node, instance,
12867                                                self.op.shutdown_timeout)
12868       # TODO: Maybe ignore failures if ignore_remove_failures is set
12869       result.Raise("Could not shutdown instance %s on"
12870                    " node %s" % (instance.name, src_node))
12871
12872     # set the disks ID correctly since call_instance_start needs the
12873     # correct drbd minor to create the symlinks
12874     for disk in instance.disks:
12875       self.cfg.SetDiskID(disk, src_node)
12876
12877     activate_disks = (instance.admin_state != constants.ADMINST_UP)
12878
12879     if activate_disks:
12880       # Activate the instance disks if we'exporting a stopped instance
12881       feedback_fn("Activating disks for %s" % instance.name)
12882       _StartInstanceDisks(self, instance, None)
12883
12884     try:
12885       helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
12886                                                      instance)
12887
12888       helper.CreateSnapshots()
12889       try:
12890         if (self.op.shutdown and
12891             instance.admin_state == constants.ADMINST_UP and
12892             not self.op.remove_instance):
12893           assert not activate_disks
12894           feedback_fn("Starting instance %s" % instance.name)
12895           result = self.rpc.call_instance_start(src_node,
12896                                                 (instance, None, None), False)
12897           msg = result.fail_msg
12898           if msg:
12899             feedback_fn("Failed to start instance: %s" % msg)
12900             _ShutdownInstanceDisks(self, instance)
12901             raise errors.OpExecError("Could not start instance: %s" % msg)
12902
12903         if self.op.mode == constants.EXPORT_MODE_LOCAL:
12904           (fin_resu, dresults) = helper.LocalExport(self.dst_node)
12905         elif self.op.mode == constants.EXPORT_MODE_REMOTE:
12906           connect_timeout = constants.RIE_CONNECT_TIMEOUT
12907           timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
12908
12909           (key_name, _, _) = self.x509_key_name
12910
12911           dest_ca_pem = \
12912             OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
12913                                             self.dest_x509_ca)
12914
12915           (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
12916                                                      key_name, dest_ca_pem,
12917                                                      timeouts)
12918       finally:
12919         helper.Cleanup()
12920
12921       # Check for backwards compatibility
12922       assert len(dresults) == len(instance.disks)
12923       assert compat.all(isinstance(i, bool) for i in dresults), \
12924              "Not all results are boolean: %r" % dresults
12925
12926     finally:
12927       if activate_disks:
12928         feedback_fn("Deactivating disks for %s" % instance.name)
12929         _ShutdownInstanceDisks(self, instance)
12930
12931     if not (compat.all(dresults) and fin_resu):
12932       failures = []
12933       if not fin_resu:
12934         failures.append("export finalization")
12935       if not compat.all(dresults):
12936         fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
12937                                if not dsk)
12938         failures.append("disk export: disk(s) %s" % fdsk)
12939
12940       raise errors.OpExecError("Export failed, errors in %s" %
12941                                utils.CommaJoin(failures))
12942
12943     # At this point, the export was successful, we can cleanup/finish
12944
12945     # Remove instance if requested
12946     if self.op.remove_instance:
12947       feedback_fn("Removing instance %s" % instance.name)
12948       _RemoveInstance(self, feedback_fn, instance,
12949                       self.op.ignore_remove_failures)
12950
12951     if self.op.mode == constants.EXPORT_MODE_LOCAL:
12952       self._CleanupExports(feedback_fn)
12953
12954     return fin_resu, dresults
12955
12956
12957 class LUBackupRemove(NoHooksLU):
12958   """Remove exports related to the named instance.
12959
12960   """
12961   REQ_BGL = False
12962
12963   def ExpandNames(self):
12964     self.needed_locks = {}
12965     # We need all nodes to be locked in order for RemoveExport to work, but we
12966     # don't need to lock the instance itself, as nothing will happen to it (and
12967     # we can remove exports also for a removed instance)
12968     self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
12969
12970   def Exec(self, feedback_fn):
12971     """Remove any export.
12972
12973     """
12974     instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
12975     # If the instance was not found we'll try with the name that was passed in.
12976     # This will only work if it was an FQDN, though.
12977     fqdn_warn = False
12978     if not instance_name:
12979       fqdn_warn = True
12980       instance_name = self.op.instance_name
12981
12982     locked_nodes = self.owned_locks(locking.LEVEL_NODE)
12983     exportlist = self.rpc.call_export_list(locked_nodes)
12984     found = False
12985     for node in exportlist:
12986       msg = exportlist[node].fail_msg
12987       if msg:
12988         self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
12989         continue
12990       if instance_name in exportlist[node].payload:
12991         found = True
12992         result = self.rpc.call_export_remove(node, instance_name)
12993         msg = result.fail_msg
12994         if msg:
12995           logging.error("Could not remove export for instance %s"
12996                         " on node %s: %s", instance_name, node, msg)
12997
12998     if fqdn_warn and not found:
12999       feedback_fn("Export not found. If trying to remove an export belonging"
13000                   " to a deleted instance please use its Fully Qualified"
13001                   " Domain Name.")
13002
13003
13004 class LUGroupAdd(LogicalUnit):
13005   """Logical unit for creating node groups.
13006
13007   """
13008   HPATH = "group-add"
13009   HTYPE = constants.HTYPE_GROUP
13010   REQ_BGL = False
13011
13012   def ExpandNames(self):
13013     # We need the new group's UUID here so that we can create and acquire the
13014     # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
13015     # that it should not check whether the UUID exists in the configuration.
13016     self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
13017     self.needed_locks = {}
13018     self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
13019
13020   def CheckPrereq(self):
13021     """Check prerequisites.
13022
13023     This checks that the given group name is not an existing node group
13024     already.
13025
13026     """
13027     try:
13028       existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13029     except errors.OpPrereqError:
13030       pass
13031     else:
13032       raise errors.OpPrereqError("Desired group name '%s' already exists as a"
13033                                  " node group (UUID: %s)" %
13034                                  (self.op.group_name, existing_uuid),
13035                                  errors.ECODE_EXISTS)
13036
13037     if self.op.ndparams:
13038       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
13039
13040     if self.op.hv_state:
13041       self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
13042     else:
13043       self.new_hv_state = None
13044
13045     if self.op.disk_state:
13046       self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
13047     else:
13048       self.new_disk_state = None
13049
13050     if self.op.diskparams:
13051       for templ in constants.DISK_TEMPLATES:
13052         if templ not in self.op.diskparams:
13053           self.op.diskparams[templ] = {}
13054         utils.ForceDictType(self.op.diskparams[templ], constants.DISK_DT_TYPES)
13055     else:
13056       self.op.diskparams = self.cfg.GetClusterInfo().diskparams
13057
13058     if self.op.ipolicy:
13059       cluster = self.cfg.GetClusterInfo()
13060       full_ipolicy = cluster.SimpleFillIPolicy(self.op.ipolicy)
13061       try:
13062         objects.InstancePolicy.CheckParameterSyntax(full_ipolicy)
13063       except errors.ConfigurationError, err:
13064         raise errors.OpPrereqError("Invalid instance policy: %s" % err,
13065                                    errors.ECODE_INVAL)
13066
13067   def BuildHooksEnv(self):
13068     """Build hooks env.
13069
13070     """
13071     return {
13072       "GROUP_NAME": self.op.group_name,
13073       }
13074
13075   def BuildHooksNodes(self):
13076     """Build hooks nodes.
13077
13078     """
13079     mn = self.cfg.GetMasterNode()
13080     return ([mn], [mn])
13081
13082   def Exec(self, feedback_fn):
13083     """Add the node group to the cluster.
13084
13085     """
13086     group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
13087                                   uuid=self.group_uuid,
13088                                   alloc_policy=self.op.alloc_policy,
13089                                   ndparams=self.op.ndparams,
13090                                   diskparams=self.op.diskparams,
13091                                   ipolicy=self.op.ipolicy,
13092                                   hv_state_static=self.new_hv_state,
13093                                   disk_state_static=self.new_disk_state)
13094
13095     self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
13096     del self.remove_locks[locking.LEVEL_NODEGROUP]
13097
13098
13099 class LUGroupAssignNodes(NoHooksLU):
13100   """Logical unit for assigning nodes to groups.
13101
13102   """
13103   REQ_BGL = False
13104
13105   def ExpandNames(self):
13106     # These raise errors.OpPrereqError on their own:
13107     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13108     self.op.nodes = _GetWantedNodes(self, self.op.nodes)
13109
13110     # We want to lock all the affected nodes and groups. We have readily
13111     # available the list of nodes, and the *destination* group. To gather the
13112     # list of "source" groups, we need to fetch node information later on.
13113     self.needed_locks = {
13114       locking.LEVEL_NODEGROUP: set([self.group_uuid]),
13115       locking.LEVEL_NODE: self.op.nodes,
13116       }
13117
13118   def DeclareLocks(self, level):
13119     if level == locking.LEVEL_NODEGROUP:
13120       assert len(self.needed_locks[locking.LEVEL_NODEGROUP]) == 1
13121
13122       # Try to get all affected nodes' groups without having the group or node
13123       # lock yet. Needs verification later in the code flow.
13124       groups = self.cfg.GetNodeGroupsFromNodes(self.op.nodes)
13125
13126       self.needed_locks[locking.LEVEL_NODEGROUP].update(groups)
13127
13128   def CheckPrereq(self):
13129     """Check prerequisites.
13130
13131     """
13132     assert self.needed_locks[locking.LEVEL_NODEGROUP]
13133     assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
13134             frozenset(self.op.nodes))
13135
13136     expected_locks = (set([self.group_uuid]) |
13137                       self.cfg.GetNodeGroupsFromNodes(self.op.nodes))
13138     actual_locks = self.owned_locks(locking.LEVEL_NODEGROUP)
13139     if actual_locks != expected_locks:
13140       raise errors.OpExecError("Nodes changed groups since locks were acquired,"
13141                                " current groups are '%s', used to be '%s'" %
13142                                (utils.CommaJoin(expected_locks),
13143                                 utils.CommaJoin(actual_locks)))
13144
13145     self.node_data = self.cfg.GetAllNodesInfo()
13146     self.group = self.cfg.GetNodeGroup(self.group_uuid)
13147     instance_data = self.cfg.GetAllInstancesInfo()
13148
13149     if self.group is None:
13150       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
13151                                (self.op.group_name, self.group_uuid))
13152
13153     (new_splits, previous_splits) = \
13154       self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
13155                                              for node in self.op.nodes],
13156                                             self.node_data, instance_data)
13157
13158     if new_splits:
13159       fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
13160
13161       if not self.op.force:
13162         raise errors.OpExecError("The following instances get split by this"
13163                                  " change and --force was not given: %s" %
13164                                  fmt_new_splits)
13165       else:
13166         self.LogWarning("This operation will split the following instances: %s",
13167                         fmt_new_splits)
13168
13169         if previous_splits:
13170           self.LogWarning("In addition, these already-split instances continue"
13171                           " to be split across groups: %s",
13172                           utils.CommaJoin(utils.NiceSort(previous_splits)))
13173
13174   def Exec(self, feedback_fn):
13175     """Assign nodes to a new group.
13176
13177     """
13178     mods = [(node_name, self.group_uuid) for node_name in self.op.nodes]
13179
13180     self.cfg.AssignGroupNodes(mods)
13181
13182   @staticmethod
13183   def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
13184     """Check for split instances after a node assignment.
13185
13186     This method considers a series of node assignments as an atomic operation,
13187     and returns information about split instances after applying the set of
13188     changes.
13189
13190     In particular, it returns information about newly split instances, and
13191     instances that were already split, and remain so after the change.
13192
13193     Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
13194     considered.
13195
13196     @type changes: list of (node_name, new_group_uuid) pairs.
13197     @param changes: list of node assignments to consider.
13198     @param node_data: a dict with data for all nodes
13199     @param instance_data: a dict with all instances to consider
13200     @rtype: a two-tuple
13201     @return: a list of instances that were previously okay and result split as a
13202       consequence of this change, and a list of instances that were previously
13203       split and this change does not fix.
13204
13205     """
13206     changed_nodes = dict((node, group) for node, group in changes
13207                          if node_data[node].group != group)
13208
13209     all_split_instances = set()
13210     previously_split_instances = set()
13211
13212     def InstanceNodes(instance):
13213       return [instance.primary_node] + list(instance.secondary_nodes)
13214
13215     for inst in instance_data.values():
13216       if inst.disk_template not in constants.DTS_INT_MIRROR:
13217         continue
13218
13219       instance_nodes = InstanceNodes(inst)
13220
13221       if len(set(node_data[node].group for node in instance_nodes)) > 1:
13222         previously_split_instances.add(inst.name)
13223
13224       if len(set(changed_nodes.get(node, node_data[node].group)
13225                  for node in instance_nodes)) > 1:
13226         all_split_instances.add(inst.name)
13227
13228     return (list(all_split_instances - previously_split_instances),
13229             list(previously_split_instances & all_split_instances))
13230
13231
13232 class _GroupQuery(_QueryBase):
13233   FIELDS = query.GROUP_FIELDS
13234
13235   def ExpandNames(self, lu):
13236     lu.needed_locks = {}
13237
13238     self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
13239     self._cluster = lu.cfg.GetClusterInfo()
13240     name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
13241
13242     if not self.names:
13243       self.wanted = [name_to_uuid[name]
13244                      for name in utils.NiceSort(name_to_uuid.keys())]
13245     else:
13246       # Accept names to be either names or UUIDs.
13247       missing = []
13248       self.wanted = []
13249       all_uuid = frozenset(self._all_groups.keys())
13250
13251       for name in self.names:
13252         if name in all_uuid:
13253           self.wanted.append(name)
13254         elif name in name_to_uuid:
13255           self.wanted.append(name_to_uuid[name])
13256         else:
13257           missing.append(name)
13258
13259       if missing:
13260         raise errors.OpPrereqError("Some groups do not exist: %s" %
13261                                    utils.CommaJoin(missing),
13262                                    errors.ECODE_NOENT)
13263
13264   def DeclareLocks(self, lu, level):
13265     pass
13266
13267   def _GetQueryData(self, lu):
13268     """Computes the list of node groups and their attributes.
13269
13270     """
13271     do_nodes = query.GQ_NODE in self.requested_data
13272     do_instances = query.GQ_INST in self.requested_data
13273
13274     group_to_nodes = None
13275     group_to_instances = None
13276
13277     # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
13278     # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
13279     # latter GetAllInstancesInfo() is not enough, for we have to go through
13280     # instance->node. Hence, we will need to process nodes even if we only need
13281     # instance information.
13282     if do_nodes or do_instances:
13283       all_nodes = lu.cfg.GetAllNodesInfo()
13284       group_to_nodes = dict((uuid, []) for uuid in self.wanted)
13285       node_to_group = {}
13286
13287       for node in all_nodes.values():
13288         if node.group in group_to_nodes:
13289           group_to_nodes[node.group].append(node.name)
13290           node_to_group[node.name] = node.group
13291
13292       if do_instances:
13293         all_instances = lu.cfg.GetAllInstancesInfo()
13294         group_to_instances = dict((uuid, []) for uuid in self.wanted)
13295
13296         for instance in all_instances.values():
13297           node = instance.primary_node
13298           if node in node_to_group:
13299             group_to_instances[node_to_group[node]].append(instance.name)
13300
13301         if not do_nodes:
13302           # Do not pass on node information if it was not requested.
13303           group_to_nodes = None
13304
13305     return query.GroupQueryData(self._cluster,
13306                                 [self._all_groups[uuid]
13307                                  for uuid in self.wanted],
13308                                 group_to_nodes, group_to_instances)
13309
13310
13311 class LUGroupQuery(NoHooksLU):
13312   """Logical unit for querying node groups.
13313
13314   """
13315   REQ_BGL = False
13316
13317   def CheckArguments(self):
13318     self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
13319                           self.op.output_fields, False)
13320
13321   def ExpandNames(self):
13322     self.gq.ExpandNames(self)
13323
13324   def DeclareLocks(self, level):
13325     self.gq.DeclareLocks(self, level)
13326
13327   def Exec(self, feedback_fn):
13328     return self.gq.OldStyleQuery(self)
13329
13330
13331 class LUGroupSetParams(LogicalUnit):
13332   """Modifies the parameters of a node group.
13333
13334   """
13335   HPATH = "group-modify"
13336   HTYPE = constants.HTYPE_GROUP
13337   REQ_BGL = False
13338
13339   def CheckArguments(self):
13340     all_changes = [
13341       self.op.ndparams,
13342       self.op.diskparams,
13343       self.op.alloc_policy,
13344       self.op.hv_state,
13345       self.op.disk_state,
13346       self.op.ipolicy,
13347       ]
13348
13349     if all_changes.count(None) == len(all_changes):
13350       raise errors.OpPrereqError("Please pass at least one modification",
13351                                  errors.ECODE_INVAL)
13352
13353   def ExpandNames(self):
13354     # This raises errors.OpPrereqError on its own:
13355     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13356
13357     self.needed_locks = {
13358       locking.LEVEL_NODEGROUP: [self.group_uuid],
13359       }
13360
13361   def CheckPrereq(self):
13362     """Check prerequisites.
13363
13364     """
13365     self.group = self.cfg.GetNodeGroup(self.group_uuid)
13366
13367     if self.group is None:
13368       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
13369                                (self.op.group_name, self.group_uuid))
13370
13371     if self.op.ndparams:
13372       new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
13373       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
13374       self.new_ndparams = new_ndparams
13375
13376     if self.op.diskparams:
13377       self.new_diskparams = dict()
13378       for templ in constants.DISK_TEMPLATES:
13379         if templ not in self.op.diskparams:
13380           self.op.diskparams[templ] = {}
13381         new_templ_params = _GetUpdatedParams(self.group.diskparams[templ],
13382                                              self.op.diskparams[templ])
13383         utils.ForceDictType(new_templ_params, constants.DISK_DT_TYPES)
13384         self.new_diskparams[templ] = new_templ_params
13385
13386     if self.op.hv_state:
13387       self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
13388                                                  self.group.hv_state_static)
13389
13390     if self.op.disk_state:
13391       self.new_disk_state = \
13392         _MergeAndVerifyDiskState(self.op.disk_state,
13393                                  self.group.disk_state_static)
13394
13395     if self.op.ipolicy:
13396       self.new_ipolicy = _GetUpdatedIPolicy(self.group.ipolicy,
13397                                             self.op.ipolicy,
13398                                             group_policy=True)
13399
13400   def BuildHooksEnv(self):
13401     """Build hooks env.
13402
13403     """
13404     return {
13405       "GROUP_NAME": self.op.group_name,
13406       "NEW_ALLOC_POLICY": self.op.alloc_policy,
13407       }
13408
13409   def BuildHooksNodes(self):
13410     """Build hooks nodes.
13411
13412     """
13413     mn = self.cfg.GetMasterNode()
13414     return ([mn], [mn])
13415
13416   def Exec(self, feedback_fn):
13417     """Modifies the node group.
13418
13419     """
13420     result = []
13421
13422     if self.op.ndparams:
13423       self.group.ndparams = self.new_ndparams
13424       result.append(("ndparams", str(self.group.ndparams)))
13425
13426     if self.op.diskparams:
13427       self.group.diskparams = self.new_diskparams
13428       result.append(("diskparams", str(self.group.diskparams)))
13429
13430     if self.op.alloc_policy:
13431       self.group.alloc_policy = self.op.alloc_policy
13432
13433     if self.op.hv_state:
13434       self.group.hv_state_static = self.new_hv_state
13435
13436     if self.op.disk_state:
13437       self.group.disk_state_static = self.new_disk_state
13438
13439     if self.op.ipolicy:
13440       self.group.ipolicy = self.new_ipolicy
13441
13442     self.cfg.Update(self.group, feedback_fn)
13443     return result
13444
13445
13446 class LUGroupRemove(LogicalUnit):
13447   HPATH = "group-remove"
13448   HTYPE = constants.HTYPE_GROUP
13449   REQ_BGL = False
13450
13451   def ExpandNames(self):
13452     # This will raises errors.OpPrereqError on its own:
13453     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13454     self.needed_locks = {
13455       locking.LEVEL_NODEGROUP: [self.group_uuid],
13456       }
13457
13458   def CheckPrereq(self):
13459     """Check prerequisites.
13460
13461     This checks that the given group name exists as a node group, that is
13462     empty (i.e., contains no nodes), and that is not the last group of the
13463     cluster.
13464
13465     """
13466     # Verify that the group is empty.
13467     group_nodes = [node.name
13468                    for node in self.cfg.GetAllNodesInfo().values()
13469                    if node.group == self.group_uuid]
13470
13471     if group_nodes:
13472       raise errors.OpPrereqError("Group '%s' not empty, has the following"
13473                                  " nodes: %s" %
13474                                  (self.op.group_name,
13475                                   utils.CommaJoin(utils.NiceSort(group_nodes))),
13476                                  errors.ECODE_STATE)
13477
13478     # Verify the cluster would not be left group-less.
13479     if len(self.cfg.GetNodeGroupList()) == 1:
13480       raise errors.OpPrereqError("Group '%s' is the only group,"
13481                                  " cannot be removed" %
13482                                  self.op.group_name,
13483                                  errors.ECODE_STATE)
13484
13485   def BuildHooksEnv(self):
13486     """Build hooks env.
13487
13488     """
13489     return {
13490       "GROUP_NAME": self.op.group_name,
13491       }
13492
13493   def BuildHooksNodes(self):
13494     """Build hooks nodes.
13495
13496     """
13497     mn = self.cfg.GetMasterNode()
13498     return ([mn], [mn])
13499
13500   def Exec(self, feedback_fn):
13501     """Remove the node group.
13502
13503     """
13504     try:
13505       self.cfg.RemoveNodeGroup(self.group_uuid)
13506     except errors.ConfigurationError:
13507       raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
13508                                (self.op.group_name, self.group_uuid))
13509
13510     self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
13511
13512
13513 class LUGroupRename(LogicalUnit):
13514   HPATH = "group-rename"
13515   HTYPE = constants.HTYPE_GROUP
13516   REQ_BGL = False
13517
13518   def ExpandNames(self):
13519     # This raises errors.OpPrereqError on its own:
13520     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13521
13522     self.needed_locks = {
13523       locking.LEVEL_NODEGROUP: [self.group_uuid],
13524       }
13525
13526   def CheckPrereq(self):
13527     """Check prerequisites.
13528
13529     Ensures requested new name is not yet used.
13530
13531     """
13532     try:
13533       new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
13534     except errors.OpPrereqError:
13535       pass
13536     else:
13537       raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
13538                                  " node group (UUID: %s)" %
13539                                  (self.op.new_name, new_name_uuid),
13540                                  errors.ECODE_EXISTS)
13541
13542   def BuildHooksEnv(self):
13543     """Build hooks env.
13544
13545     """
13546     return {
13547       "OLD_NAME": self.op.group_name,
13548       "NEW_NAME": self.op.new_name,
13549       }
13550
13551   def BuildHooksNodes(self):
13552     """Build hooks nodes.
13553
13554     """
13555     mn = self.cfg.GetMasterNode()
13556
13557     all_nodes = self.cfg.GetAllNodesInfo()
13558     all_nodes.pop(mn, None)
13559
13560     run_nodes = [mn]
13561     run_nodes.extend(node.name for node in all_nodes.values()
13562                      if node.group == self.group_uuid)
13563
13564     return (run_nodes, run_nodes)
13565
13566   def Exec(self, feedback_fn):
13567     """Rename the node group.
13568
13569     """
13570     group = self.cfg.GetNodeGroup(self.group_uuid)
13571
13572     if group is None:
13573       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
13574                                (self.op.group_name, self.group_uuid))
13575
13576     group.name = self.op.new_name
13577     self.cfg.Update(group, feedback_fn)
13578
13579     return self.op.new_name
13580
13581
13582 class LUGroupEvacuate(LogicalUnit):
13583   HPATH = "group-evacuate"
13584   HTYPE = constants.HTYPE_GROUP
13585   REQ_BGL = False
13586
13587   def ExpandNames(self):
13588     # This raises errors.OpPrereqError on its own:
13589     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13590
13591     if self.op.target_groups:
13592       self.req_target_uuids = map(self.cfg.LookupNodeGroup,
13593                                   self.op.target_groups)
13594     else:
13595       self.req_target_uuids = []
13596
13597     if self.group_uuid in self.req_target_uuids:
13598       raise errors.OpPrereqError("Group to be evacuated (%s) can not be used"
13599                                  " as a target group (targets are %s)" %
13600                                  (self.group_uuid,
13601                                   utils.CommaJoin(self.req_target_uuids)),
13602                                  errors.ECODE_INVAL)
13603
13604     self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
13605
13606     self.share_locks = _ShareAll()
13607     self.needed_locks = {
13608       locking.LEVEL_INSTANCE: [],
13609       locking.LEVEL_NODEGROUP: [],
13610       locking.LEVEL_NODE: [],
13611       }
13612
13613   def DeclareLocks(self, level):
13614     if level == locking.LEVEL_INSTANCE:
13615       assert not self.needed_locks[locking.LEVEL_INSTANCE]
13616
13617       # Lock instances optimistically, needs verification once node and group
13618       # locks have been acquired
13619       self.needed_locks[locking.LEVEL_INSTANCE] = \
13620         self.cfg.GetNodeGroupInstances(self.group_uuid)
13621
13622     elif level == locking.LEVEL_NODEGROUP:
13623       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
13624
13625       if self.req_target_uuids:
13626         lock_groups = set([self.group_uuid] + self.req_target_uuids)
13627
13628         # Lock all groups used by instances optimistically; this requires going
13629         # via the node before it's locked, requiring verification later on
13630         lock_groups.update(group_uuid
13631                            for instance_name in
13632                              self.owned_locks(locking.LEVEL_INSTANCE)
13633                            for group_uuid in
13634                              self.cfg.GetInstanceNodeGroups(instance_name))
13635       else:
13636         # No target groups, need to lock all of them
13637         lock_groups = locking.ALL_SET
13638
13639       self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
13640
13641     elif level == locking.LEVEL_NODE:
13642       # This will only lock the nodes in the group to be evacuated which
13643       # contain actual instances
13644       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
13645       self._LockInstancesNodes()
13646
13647       # Lock all nodes in group to be evacuated and target groups
13648       owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
13649       assert self.group_uuid in owned_groups
13650       member_nodes = [node_name
13651                       for group in owned_groups
13652                       for node_name in self.cfg.GetNodeGroup(group).members]
13653       self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
13654
13655   def CheckPrereq(self):
13656     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
13657     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
13658     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
13659
13660     assert owned_groups.issuperset(self.req_target_uuids)
13661     assert self.group_uuid in owned_groups
13662
13663     # Check if locked instances are still correct
13664     _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
13665
13666     # Get instance information
13667     self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
13668
13669     # Check if node groups for locked instances are still correct
13670     for instance_name in owned_instances:
13671       inst = self.instances[instance_name]
13672       assert owned_nodes.issuperset(inst.all_nodes), \
13673         "Instance %s's nodes changed while we kept the lock" % instance_name
13674
13675       inst_groups = _CheckInstanceNodeGroups(self.cfg, instance_name,
13676                                              owned_groups)
13677
13678       assert self.group_uuid in inst_groups, \
13679         "Instance %s has no node in group %s" % (instance_name, self.group_uuid)
13680
13681     if self.req_target_uuids:
13682       # User requested specific target groups
13683       self.target_uuids = self.req_target_uuids
13684     else:
13685       # All groups except the one to be evacuated are potential targets
13686       self.target_uuids = [group_uuid for group_uuid in owned_groups
13687                            if group_uuid != self.group_uuid]
13688
13689       if not self.target_uuids:
13690         raise errors.OpPrereqError("There are no possible target groups",
13691                                    errors.ECODE_INVAL)
13692
13693   def BuildHooksEnv(self):
13694     """Build hooks env.
13695
13696     """
13697     return {
13698       "GROUP_NAME": self.op.group_name,
13699       "TARGET_GROUPS": " ".join(self.target_uuids),
13700       }
13701
13702   def BuildHooksNodes(self):
13703     """Build hooks nodes.
13704
13705     """
13706     mn = self.cfg.GetMasterNode()
13707
13708     assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
13709
13710     run_nodes = [mn] + self.cfg.GetNodeGroup(self.group_uuid).members
13711
13712     return (run_nodes, run_nodes)
13713
13714   def Exec(self, feedback_fn):
13715     instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
13716
13717     assert self.group_uuid not in self.target_uuids
13718
13719     ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
13720                      instances=instances, target_groups=self.target_uuids)
13721
13722     ial.Run(self.op.iallocator)
13723
13724     if not ial.success:
13725       raise errors.OpPrereqError("Can't compute group evacuation using"
13726                                  " iallocator '%s': %s" %
13727                                  (self.op.iallocator, ial.info),
13728                                  errors.ECODE_NORES)
13729
13730     jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
13731
13732     self.LogInfo("Iallocator returned %s job(s) for evacuating node group %s",
13733                  len(jobs), self.op.group_name)
13734
13735     return ResultWithJobs(jobs)
13736
13737
13738 class TagsLU(NoHooksLU): # pylint: disable=W0223
13739   """Generic tags LU.
13740
13741   This is an abstract class which is the parent of all the other tags LUs.
13742
13743   """
13744   def ExpandNames(self):
13745     self.group_uuid = None
13746     self.needed_locks = {}
13747     if self.op.kind == constants.TAG_NODE:
13748       self.op.name = _ExpandNodeName(self.cfg, self.op.name)
13749       self.needed_locks[locking.LEVEL_NODE] = self.op.name
13750     elif self.op.kind == constants.TAG_INSTANCE:
13751       self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
13752       self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
13753     elif self.op.kind == constants.TAG_NODEGROUP:
13754       self.group_uuid = self.cfg.LookupNodeGroup(self.op.name)
13755
13756     # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
13757     # not possible to acquire the BGL based on opcode parameters)
13758
13759   def CheckPrereq(self):
13760     """Check prerequisites.
13761
13762     """
13763     if self.op.kind == constants.TAG_CLUSTER:
13764       self.target = self.cfg.GetClusterInfo()
13765     elif self.op.kind == constants.TAG_NODE:
13766       self.target = self.cfg.GetNodeInfo(self.op.name)
13767     elif self.op.kind == constants.TAG_INSTANCE:
13768       self.target = self.cfg.GetInstanceInfo(self.op.name)
13769     elif self.op.kind == constants.TAG_NODEGROUP:
13770       self.target = self.cfg.GetNodeGroup(self.group_uuid)
13771     else:
13772       raise errors.OpPrereqError("Wrong tag type requested (%s)" %
13773                                  str(self.op.kind), errors.ECODE_INVAL)
13774
13775
13776 class LUTagsGet(TagsLU):
13777   """Returns the tags of a given object.
13778
13779   """
13780   REQ_BGL = False
13781
13782   def ExpandNames(self):
13783     TagsLU.ExpandNames(self)
13784
13785     # Share locks as this is only a read operation
13786     self.share_locks = _ShareAll()
13787
13788   def Exec(self, feedback_fn):
13789     """Returns the tag list.
13790
13791     """
13792     return list(self.target.GetTags())
13793
13794
13795 class LUTagsSearch(NoHooksLU):
13796   """Searches the tags for a given pattern.
13797
13798   """
13799   REQ_BGL = False
13800
13801   def ExpandNames(self):
13802     self.needed_locks = {}
13803
13804   def CheckPrereq(self):
13805     """Check prerequisites.
13806
13807     This checks the pattern passed for validity by compiling it.
13808
13809     """
13810     try:
13811       self.re = re.compile(self.op.pattern)
13812     except re.error, err:
13813       raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
13814                                  (self.op.pattern, err), errors.ECODE_INVAL)
13815
13816   def Exec(self, feedback_fn):
13817     """Returns the tag list.
13818
13819     """
13820     cfg = self.cfg
13821     tgts = [("/cluster", cfg.GetClusterInfo())]
13822     ilist = cfg.GetAllInstancesInfo().values()
13823     tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
13824     nlist = cfg.GetAllNodesInfo().values()
13825     tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
13826     tgts.extend(("/nodegroup/%s" % n.name, n)
13827                 for n in cfg.GetAllNodeGroupsInfo().values())
13828     results = []
13829     for path, target in tgts:
13830       for tag in target.GetTags():
13831         if self.re.search(tag):
13832           results.append((path, tag))
13833     return results
13834
13835
13836 class LUTagsSet(TagsLU):
13837   """Sets a tag on a given object.
13838
13839   """
13840   REQ_BGL = False
13841
13842   def CheckPrereq(self):
13843     """Check prerequisites.
13844
13845     This checks the type and length of the tag name and value.
13846
13847     """
13848     TagsLU.CheckPrereq(self)
13849     for tag in self.op.tags:
13850       objects.TaggableObject.ValidateTag(tag)
13851
13852   def Exec(self, feedback_fn):
13853     """Sets the tag.
13854
13855     """
13856     try:
13857       for tag in self.op.tags:
13858         self.target.AddTag(tag)
13859     except errors.TagError, err:
13860       raise errors.OpExecError("Error while setting tag: %s" % str(err))
13861     self.cfg.Update(self.target, feedback_fn)
13862
13863
13864 class LUTagsDel(TagsLU):
13865   """Delete a list of tags from a given object.
13866
13867   """
13868   REQ_BGL = False
13869
13870   def CheckPrereq(self):
13871     """Check prerequisites.
13872
13873     This checks that we have the given tag.
13874
13875     """
13876     TagsLU.CheckPrereq(self)
13877     for tag in self.op.tags:
13878       objects.TaggableObject.ValidateTag(tag)
13879     del_tags = frozenset(self.op.tags)
13880     cur_tags = self.target.GetTags()
13881
13882     diff_tags = del_tags - cur_tags
13883     if diff_tags:
13884       diff_names = ("'%s'" % i for i in sorted(diff_tags))
13885       raise errors.OpPrereqError("Tag(s) %s not found" %
13886                                  (utils.CommaJoin(diff_names), ),
13887                                  errors.ECODE_NOENT)
13888
13889   def Exec(self, feedback_fn):
13890     """Remove the tag from the object.
13891
13892     """
13893     for tag in self.op.tags:
13894       self.target.RemoveTag(tag)
13895     self.cfg.Update(self.target, feedback_fn)
13896
13897
13898 class LUTestDelay(NoHooksLU):
13899   """Sleep for a specified amount of time.
13900
13901   This LU sleeps on the master and/or nodes for a specified amount of
13902   time.
13903
13904   """
13905   REQ_BGL = False
13906
13907   def ExpandNames(self):
13908     """Expand names and set required locks.
13909
13910     This expands the node list, if any.
13911
13912     """
13913     self.needed_locks = {}
13914     if self.op.on_nodes:
13915       # _GetWantedNodes can be used here, but is not always appropriate to use
13916       # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
13917       # more information.
13918       self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
13919       self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
13920
13921   def _TestDelay(self):
13922     """Do the actual sleep.
13923
13924     """
13925     if self.op.on_master:
13926       if not utils.TestDelay(self.op.duration):
13927         raise errors.OpExecError("Error during master delay test")
13928     if self.op.on_nodes:
13929       result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
13930       for node, node_result in result.items():
13931         node_result.Raise("Failure during rpc call to node %s" % node)
13932
13933   def Exec(self, feedback_fn):
13934     """Execute the test delay opcode, with the wanted repetitions.
13935
13936     """
13937     if self.op.repeat == 0:
13938       self._TestDelay()
13939     else:
13940       top_value = self.op.repeat - 1
13941       for i in range(self.op.repeat):
13942         self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
13943         self._TestDelay()
13944
13945
13946 class LUTestJqueue(NoHooksLU):
13947   """Utility LU to test some aspects of the job queue.
13948
13949   """
13950   REQ_BGL = False
13951
13952   # Must be lower than default timeout for WaitForJobChange to see whether it
13953   # notices changed jobs
13954   _CLIENT_CONNECT_TIMEOUT = 20.0
13955   _CLIENT_CONFIRM_TIMEOUT = 60.0
13956
13957   @classmethod
13958   def _NotifyUsingSocket(cls, cb, errcls):
13959     """Opens a Unix socket and waits for another program to connect.
13960
13961     @type cb: callable
13962     @param cb: Callback to send socket name to client
13963     @type errcls: class
13964     @param errcls: Exception class to use for errors
13965
13966     """
13967     # Using a temporary directory as there's no easy way to create temporary
13968     # sockets without writing a custom loop around tempfile.mktemp and
13969     # socket.bind
13970     tmpdir = tempfile.mkdtemp()
13971     try:
13972       tmpsock = utils.PathJoin(tmpdir, "sock")
13973
13974       logging.debug("Creating temporary socket at %s", tmpsock)
13975       sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
13976       try:
13977         sock.bind(tmpsock)
13978         sock.listen(1)
13979
13980         # Send details to client
13981         cb(tmpsock)
13982
13983         # Wait for client to connect before continuing
13984         sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
13985         try:
13986           (conn, _) = sock.accept()
13987         except socket.error, err:
13988           raise errcls("Client didn't connect in time (%s)" % err)
13989       finally:
13990         sock.close()
13991     finally:
13992       # Remove as soon as client is connected
13993       shutil.rmtree(tmpdir)
13994
13995     # Wait for client to close
13996     try:
13997       try:
13998         # pylint: disable=E1101
13999         # Instance of '_socketobject' has no ... member
14000         conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
14001         conn.recv(1)
14002       except socket.error, err:
14003         raise errcls("Client failed to confirm notification (%s)" % err)
14004     finally:
14005       conn.close()
14006
14007   def _SendNotification(self, test, arg, sockname):
14008     """Sends a notification to the client.
14009
14010     @type test: string
14011     @param test: Test name
14012     @param arg: Test argument (depends on test)
14013     @type sockname: string
14014     @param sockname: Socket path
14015
14016     """
14017     self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
14018
14019   def _Notify(self, prereq, test, arg):
14020     """Notifies the client of a test.
14021
14022     @type prereq: bool
14023     @param prereq: Whether this is a prereq-phase test
14024     @type test: string
14025     @param test: Test name
14026     @param arg: Test argument (depends on test)
14027
14028     """
14029     if prereq:
14030       errcls = errors.OpPrereqError
14031     else:
14032       errcls = errors.OpExecError
14033
14034     return self._NotifyUsingSocket(compat.partial(self._SendNotification,
14035                                                   test, arg),
14036                                    errcls)
14037
14038   def CheckArguments(self):
14039     self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
14040     self.expandnames_calls = 0
14041
14042   def ExpandNames(self):
14043     checkargs_calls = getattr(self, "checkargs_calls", 0)
14044     if checkargs_calls < 1:
14045       raise errors.ProgrammerError("CheckArguments was not called")
14046
14047     self.expandnames_calls += 1
14048
14049     if self.op.notify_waitlock:
14050       self._Notify(True, constants.JQT_EXPANDNAMES, None)
14051
14052     self.LogInfo("Expanding names")
14053
14054     # Get lock on master node (just to get a lock, not for a particular reason)
14055     self.needed_locks = {
14056       locking.LEVEL_NODE: self.cfg.GetMasterNode(),
14057       }
14058
14059   def Exec(self, feedback_fn):
14060     if self.expandnames_calls < 1:
14061       raise errors.ProgrammerError("ExpandNames was not called")
14062
14063     if self.op.notify_exec:
14064       self._Notify(False, constants.JQT_EXEC, None)
14065
14066     self.LogInfo("Executing")
14067
14068     if self.op.log_messages:
14069       self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
14070       for idx, msg in enumerate(self.op.log_messages):
14071         self.LogInfo("Sending log message %s", idx + 1)
14072         feedback_fn(constants.JQT_MSGPREFIX + msg)
14073         # Report how many test messages have been sent
14074         self._Notify(False, constants.JQT_LOGMSG, idx + 1)
14075
14076     if self.op.fail:
14077       raise errors.OpExecError("Opcode failure was requested")
14078
14079     return True
14080
14081
14082 class IAllocator(object):
14083   """IAllocator framework.
14084
14085   An IAllocator instance has three sets of attributes:
14086     - cfg that is needed to query the cluster
14087     - input data (all members of the _KEYS class attribute are required)
14088     - four buffer attributes (in|out_data|text), that represent the
14089       input (to the external script) in text and data structure format,
14090       and the output from it, again in two formats
14091     - the result variables from the script (success, info, nodes) for
14092       easy usage
14093
14094   """
14095   # pylint: disable=R0902
14096   # lots of instance attributes
14097
14098   def __init__(self, cfg, rpc_runner, mode, **kwargs):
14099     self.cfg = cfg
14100     self.rpc = rpc_runner
14101     # init buffer variables
14102     self.in_text = self.out_text = self.in_data = self.out_data = None
14103     # init all input fields so that pylint is happy
14104     self.mode = mode
14105     self.memory = self.disks = self.disk_template = None
14106     self.os = self.tags = self.nics = self.vcpus = None
14107     self.hypervisor = None
14108     self.relocate_from = None
14109     self.name = None
14110     self.instances = None
14111     self.evac_mode = None
14112     self.target_groups = []
14113     # computed fields
14114     self.required_nodes = None
14115     # init result fields
14116     self.success = self.info = self.result = None
14117
14118     try:
14119       (fn, keydata, self._result_check) = self._MODE_DATA[self.mode]
14120     except KeyError:
14121       raise errors.ProgrammerError("Unknown mode '%s' passed to the"
14122                                    " IAllocator" % self.mode)
14123
14124     keyset = [n for (n, _) in keydata]
14125
14126     for key in kwargs:
14127       if key not in keyset:
14128         raise errors.ProgrammerError("Invalid input parameter '%s' to"
14129                                      " IAllocator" % key)
14130       setattr(self, key, kwargs[key])
14131
14132     for key in keyset:
14133       if key not in kwargs:
14134         raise errors.ProgrammerError("Missing input parameter '%s' to"
14135                                      " IAllocator" % key)
14136     self._BuildInputData(compat.partial(fn, self), keydata)
14137
14138   def _ComputeClusterData(self):
14139     """Compute the generic allocator input data.
14140
14141     This is the data that is independent of the actual operation.
14142
14143     """
14144     cfg = self.cfg
14145     cluster_info = cfg.GetClusterInfo()
14146     # cluster data
14147     data = {
14148       "version": constants.IALLOCATOR_VERSION,
14149       "cluster_name": cfg.GetClusterName(),
14150       "cluster_tags": list(cluster_info.GetTags()),
14151       "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
14152       # we don't have job IDs
14153       }
14154     ninfo = cfg.GetAllNodesInfo()
14155     iinfo = cfg.GetAllInstancesInfo().values()
14156     i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
14157
14158     # node data
14159     node_list = [n.name for n in ninfo.values() if n.vm_capable]
14160
14161     if self.mode == constants.IALLOCATOR_MODE_ALLOC:
14162       hypervisor_name = self.hypervisor
14163     elif self.mode == constants.IALLOCATOR_MODE_RELOC:
14164       hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
14165     else:
14166       hypervisor_name = cluster_info.primary_hypervisor
14167
14168     node_data = self.rpc.call_node_info(node_list, [cfg.GetVGName()],
14169                                         [hypervisor_name])
14170     node_iinfo = \
14171       self.rpc.call_all_instances_info(node_list,
14172                                        cluster_info.enabled_hypervisors)
14173
14174     data["nodegroups"] = self._ComputeNodeGroupData(cfg)
14175
14176     config_ndata = self._ComputeBasicNodeData(ninfo)
14177     data["nodes"] = self._ComputeDynamicNodeData(ninfo, node_data, node_iinfo,
14178                                                  i_list, config_ndata)
14179     assert len(data["nodes"]) == len(ninfo), \
14180         "Incomplete node data computed"
14181
14182     data["instances"] = self._ComputeInstanceData(cluster_info, i_list)
14183
14184     self.in_data = data
14185
14186   @staticmethod
14187   def _ComputeNodeGroupData(cfg):
14188     """Compute node groups data.
14189
14190     """
14191     cluster = cfg.GetClusterInfo()
14192     ng = dict((guuid, {
14193       "name": gdata.name,
14194       "alloc_policy": gdata.alloc_policy,
14195       "ipolicy": _CalculateGroupIPolicy(cluster, gdata),
14196       })
14197       for guuid, gdata in cfg.GetAllNodeGroupsInfo().items())
14198
14199     return ng
14200
14201   @staticmethod
14202   def _ComputeBasicNodeData(node_cfg):
14203     """Compute global node data.
14204
14205     @rtype: dict
14206     @returns: a dict of name: (node dict, node config)
14207
14208     """
14209     # fill in static (config-based) values
14210     node_results = dict((ninfo.name, {
14211       "tags": list(ninfo.GetTags()),
14212       "primary_ip": ninfo.primary_ip,
14213       "secondary_ip": ninfo.secondary_ip,
14214       "offline": ninfo.offline,
14215       "drained": ninfo.drained,
14216       "master_candidate": ninfo.master_candidate,
14217       "group": ninfo.group,
14218       "master_capable": ninfo.master_capable,
14219       "vm_capable": ninfo.vm_capable,
14220       })
14221       for ninfo in node_cfg.values())
14222
14223     return node_results
14224
14225   @staticmethod
14226   def _ComputeDynamicNodeData(node_cfg, node_data, node_iinfo, i_list,
14227                               node_results):
14228     """Compute global node data.
14229
14230     @param node_results: the basic node structures as filled from the config
14231
14232     """
14233     #TODO(dynmem): compute the right data on MAX and MIN memory
14234     # make a copy of the current dict
14235     node_results = dict(node_results)
14236     for nname, nresult in node_data.items():
14237       assert nname in node_results, "Missing basic data for node %s" % nname
14238       ninfo = node_cfg[nname]
14239
14240       if not (ninfo.offline or ninfo.drained):
14241         nresult.Raise("Can't get data for node %s" % nname)
14242         node_iinfo[nname].Raise("Can't get node instance info from node %s" %
14243                                 nname)
14244         remote_info = _MakeLegacyNodeInfo(nresult.payload)
14245
14246         for attr in ["memory_total", "memory_free", "memory_dom0",
14247                      "vg_size", "vg_free", "cpu_total"]:
14248           if attr not in remote_info:
14249             raise errors.OpExecError("Node '%s' didn't return attribute"
14250                                      " '%s'" % (nname, attr))
14251           if not isinstance(remote_info[attr], int):
14252             raise errors.OpExecError("Node '%s' returned invalid value"
14253                                      " for '%s': %s" %
14254                                      (nname, attr, remote_info[attr]))
14255         # compute memory used by primary instances
14256         i_p_mem = i_p_up_mem = 0
14257         for iinfo, beinfo in i_list:
14258           if iinfo.primary_node == nname:
14259             i_p_mem += beinfo[constants.BE_MAXMEM]
14260             if iinfo.name not in node_iinfo[nname].payload:
14261               i_used_mem = 0
14262             else:
14263               i_used_mem = int(node_iinfo[nname].payload[iinfo.name]["memory"])
14264             i_mem_diff = beinfo[constants.BE_MAXMEM] - i_used_mem
14265             remote_info["memory_free"] -= max(0, i_mem_diff)
14266
14267             if iinfo.admin_state == constants.ADMINST_UP:
14268               i_p_up_mem += beinfo[constants.BE_MAXMEM]
14269
14270         # compute memory used by instances
14271         pnr_dyn = {
14272           "total_memory": remote_info["memory_total"],
14273           "reserved_memory": remote_info["memory_dom0"],
14274           "free_memory": remote_info["memory_free"],
14275           "total_disk": remote_info["vg_size"],
14276           "free_disk": remote_info["vg_free"],
14277           "total_cpus": remote_info["cpu_total"],
14278           "i_pri_memory": i_p_mem,
14279           "i_pri_up_memory": i_p_up_mem,
14280           }
14281         pnr_dyn.update(node_results[nname])
14282         node_results[nname] = pnr_dyn
14283
14284     return node_results
14285
14286   @staticmethod
14287   def _ComputeInstanceData(cluster_info, i_list):
14288     """Compute global instance data.
14289
14290     """
14291     instance_data = {}
14292     for iinfo, beinfo in i_list:
14293       nic_data = []
14294       for nic in iinfo.nics:
14295         filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
14296         nic_dict = {
14297           "mac": nic.mac,
14298           "ip": nic.ip,
14299           "mode": filled_params[constants.NIC_MODE],
14300           "link": filled_params[constants.NIC_LINK],
14301           }
14302         if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
14303           nic_dict["bridge"] = filled_params[constants.NIC_LINK]
14304         nic_data.append(nic_dict)
14305       pir = {
14306         "tags": list(iinfo.GetTags()),
14307         "admin_state": iinfo.admin_state,
14308         "vcpus": beinfo[constants.BE_VCPUS],
14309         "memory": beinfo[constants.BE_MAXMEM],
14310         "os": iinfo.os,
14311         "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
14312         "nics": nic_data,
14313         "disks": [{constants.IDISK_SIZE: dsk.size,
14314                    constants.IDISK_MODE: dsk.mode}
14315                   for dsk in iinfo.disks],
14316         "disk_template": iinfo.disk_template,
14317         "hypervisor": iinfo.hypervisor,
14318         }
14319       pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
14320                                                  pir["disks"])
14321       instance_data[iinfo.name] = pir
14322
14323     return instance_data
14324
14325   def _AddNewInstance(self):
14326     """Add new instance data to allocator structure.
14327
14328     This in combination with _AllocatorGetClusterData will create the
14329     correct structure needed as input for the allocator.
14330
14331     The checks for the completeness of the opcode must have already been
14332     done.
14333
14334     """
14335     disk_space = _ComputeDiskSize(self.disk_template, self.disks)
14336
14337     if self.disk_template in constants.DTS_INT_MIRROR:
14338       self.required_nodes = 2
14339     else:
14340       self.required_nodes = 1
14341
14342     request = {
14343       "name": self.name,
14344       "disk_template": self.disk_template,
14345       "tags": self.tags,
14346       "os": self.os,
14347       "vcpus": self.vcpus,
14348       "memory": self.memory,
14349       "disks": self.disks,
14350       "disk_space_total": disk_space,
14351       "nics": self.nics,
14352       "required_nodes": self.required_nodes,
14353       "hypervisor": self.hypervisor,
14354       }
14355
14356     return request
14357
14358   def _AddRelocateInstance(self):
14359     """Add relocate instance data to allocator structure.
14360
14361     This in combination with _IAllocatorGetClusterData will create the
14362     correct structure needed as input for the allocator.
14363
14364     The checks for the completeness of the opcode must have already been
14365     done.
14366
14367     """
14368     instance = self.cfg.GetInstanceInfo(self.name)
14369     if instance is None:
14370       raise errors.ProgrammerError("Unknown instance '%s' passed to"
14371                                    " IAllocator" % self.name)
14372
14373     if instance.disk_template not in constants.DTS_MIRRORED:
14374       raise errors.OpPrereqError("Can't relocate non-mirrored instances",
14375                                  errors.ECODE_INVAL)
14376
14377     if instance.disk_template in constants.DTS_INT_MIRROR and \
14378         len(instance.secondary_nodes) != 1:
14379       raise errors.OpPrereqError("Instance has not exactly one secondary node",
14380                                  errors.ECODE_STATE)
14381
14382     self.required_nodes = 1
14383     disk_sizes = [{constants.IDISK_SIZE: disk.size} for disk in instance.disks]
14384     disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
14385
14386     request = {
14387       "name": self.name,
14388       "disk_space_total": disk_space,
14389       "required_nodes": self.required_nodes,
14390       "relocate_from": self.relocate_from,
14391       }
14392     return request
14393
14394   def _AddNodeEvacuate(self):
14395     """Get data for node-evacuate requests.
14396
14397     """
14398     return {
14399       "instances": self.instances,
14400       "evac_mode": self.evac_mode,
14401       }
14402
14403   def _AddChangeGroup(self):
14404     """Get data for node-evacuate requests.
14405
14406     """
14407     return {
14408       "instances": self.instances,
14409       "target_groups": self.target_groups,
14410       }
14411
14412   def _BuildInputData(self, fn, keydata):
14413     """Build input data structures.
14414
14415     """
14416     self._ComputeClusterData()
14417
14418     request = fn()
14419     request["type"] = self.mode
14420     for keyname, keytype in keydata:
14421       if keyname not in request:
14422         raise errors.ProgrammerError("Request parameter %s is missing" %
14423                                      keyname)
14424       val = request[keyname]
14425       if not keytype(val):
14426         raise errors.ProgrammerError("Request parameter %s doesn't pass"
14427                                      " validation, value %s, expected"
14428                                      " type %s" % (keyname, val, keytype))
14429     self.in_data["request"] = request
14430
14431     self.in_text = serializer.Dump(self.in_data)
14432
14433   _STRING_LIST = ht.TListOf(ht.TString)
14434   _JOB_LIST = ht.TListOf(ht.TListOf(ht.TStrictDict(True, False, {
14435      # pylint: disable=E1101
14436      # Class '...' has no 'OP_ID' member
14437      "OP_ID": ht.TElemOf([opcodes.OpInstanceFailover.OP_ID,
14438                           opcodes.OpInstanceMigrate.OP_ID,
14439                           opcodes.OpInstanceReplaceDisks.OP_ID])
14440      })))
14441
14442   _NEVAC_MOVED = \
14443     ht.TListOf(ht.TAnd(ht.TIsLength(3),
14444                        ht.TItems([ht.TNonEmptyString,
14445                                   ht.TNonEmptyString,
14446                                   ht.TListOf(ht.TNonEmptyString),
14447                                  ])))
14448   _NEVAC_FAILED = \
14449     ht.TListOf(ht.TAnd(ht.TIsLength(2),
14450                        ht.TItems([ht.TNonEmptyString,
14451                                   ht.TMaybeString,
14452                                  ])))
14453   _NEVAC_RESULT = ht.TAnd(ht.TIsLength(3),
14454                           ht.TItems([_NEVAC_MOVED, _NEVAC_FAILED, _JOB_LIST]))
14455
14456   _MODE_DATA = {
14457     constants.IALLOCATOR_MODE_ALLOC:
14458       (_AddNewInstance,
14459        [
14460         ("name", ht.TString),
14461         ("memory", ht.TInt),
14462         ("disks", ht.TListOf(ht.TDict)),
14463         ("disk_template", ht.TString),
14464         ("os", ht.TString),
14465         ("tags", _STRING_LIST),
14466         ("nics", ht.TListOf(ht.TDict)),
14467         ("vcpus", ht.TInt),
14468         ("hypervisor", ht.TString),
14469         ], ht.TList),
14470     constants.IALLOCATOR_MODE_RELOC:
14471       (_AddRelocateInstance,
14472        [("name", ht.TString), ("relocate_from", _STRING_LIST)],
14473        ht.TList),
14474      constants.IALLOCATOR_MODE_NODE_EVAC:
14475       (_AddNodeEvacuate, [
14476         ("instances", _STRING_LIST),
14477         ("evac_mode", ht.TElemOf(constants.IALLOCATOR_NEVAC_MODES)),
14478         ], _NEVAC_RESULT),
14479      constants.IALLOCATOR_MODE_CHG_GROUP:
14480       (_AddChangeGroup, [
14481         ("instances", _STRING_LIST),
14482         ("target_groups", _STRING_LIST),
14483         ], _NEVAC_RESULT),
14484     }
14485
14486   def Run(self, name, validate=True, call_fn=None):
14487     """Run an instance allocator and return the results.
14488
14489     """
14490     if call_fn is None:
14491       call_fn = self.rpc.call_iallocator_runner
14492
14493     result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
14494     result.Raise("Failure while running the iallocator script")
14495
14496     self.out_text = result.payload
14497     if validate:
14498       self._ValidateResult()
14499
14500   def _ValidateResult(self):
14501     """Process the allocator results.
14502
14503     This will process and if successful save the result in
14504     self.out_data and the other parameters.
14505
14506     """
14507     try:
14508       rdict = serializer.Load(self.out_text)
14509     except Exception, err:
14510       raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
14511
14512     if not isinstance(rdict, dict):
14513       raise errors.OpExecError("Can't parse iallocator results: not a dict")
14514
14515     # TODO: remove backwards compatiblity in later versions
14516     if "nodes" in rdict and "result" not in rdict:
14517       rdict["result"] = rdict["nodes"]
14518       del rdict["nodes"]
14519
14520     for key in "success", "info", "result":
14521       if key not in rdict:
14522         raise errors.OpExecError("Can't parse iallocator results:"
14523                                  " missing key '%s'" % key)
14524       setattr(self, key, rdict[key])
14525
14526     if not self._result_check(self.result):
14527       raise errors.OpExecError("Iallocator returned invalid result,"
14528                                " expected %s, got %s" %
14529                                (self._result_check, self.result),
14530                                errors.ECODE_INVAL)
14531
14532     if self.mode == constants.IALLOCATOR_MODE_RELOC:
14533       assert self.relocate_from is not None
14534       assert self.required_nodes == 1
14535
14536       node2group = dict((name, ndata["group"])
14537                         for (name, ndata) in self.in_data["nodes"].items())
14538
14539       fn = compat.partial(self._NodesToGroups, node2group,
14540                           self.in_data["nodegroups"])
14541
14542       instance = self.cfg.GetInstanceInfo(self.name)
14543       request_groups = fn(self.relocate_from + [instance.primary_node])
14544       result_groups = fn(rdict["result"] + [instance.primary_node])
14545
14546       if self.success and not set(result_groups).issubset(request_groups):
14547         raise errors.OpExecError("Groups of nodes returned by iallocator (%s)"
14548                                  " differ from original groups (%s)" %
14549                                  (utils.CommaJoin(result_groups),
14550                                   utils.CommaJoin(request_groups)))
14551
14552     elif self.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
14553       assert self.evac_mode in constants.IALLOCATOR_NEVAC_MODES
14554
14555     self.out_data = rdict
14556
14557   @staticmethod
14558   def _NodesToGroups(node2group, groups, nodes):
14559     """Returns a list of unique group names for a list of nodes.
14560
14561     @type node2group: dict
14562     @param node2group: Map from node name to group UUID
14563     @type groups: dict
14564     @param groups: Group information
14565     @type nodes: list
14566     @param nodes: Node names
14567
14568     """
14569     result = set()
14570
14571     for node in nodes:
14572       try:
14573         group_uuid = node2group[node]
14574       except KeyError:
14575         # Ignore unknown node
14576         pass
14577       else:
14578         try:
14579           group = groups[group_uuid]
14580         except KeyError:
14581           # Can't find group, let's use UUID
14582           group_name = group_uuid
14583         else:
14584           group_name = group["name"]
14585
14586         result.add(group_name)
14587
14588     return sorted(result)
14589
14590
14591 class LUTestAllocator(NoHooksLU):
14592   """Run allocator tests.
14593
14594   This LU runs the allocator tests
14595
14596   """
14597   def CheckPrereq(self):
14598     """Check prerequisites.
14599
14600     This checks the opcode parameters depending on the director and mode test.
14601
14602     """
14603     if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
14604       for attr in ["memory", "disks", "disk_template",
14605                    "os", "tags", "nics", "vcpus"]:
14606         if not hasattr(self.op, attr):
14607           raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
14608                                      attr, errors.ECODE_INVAL)
14609       iname = self.cfg.ExpandInstanceName(self.op.name)
14610       if iname is not None:
14611         raise errors.OpPrereqError("Instance '%s' already in the cluster" %
14612                                    iname, errors.ECODE_EXISTS)
14613       if not isinstance(self.op.nics, list):
14614         raise errors.OpPrereqError("Invalid parameter 'nics'",
14615                                    errors.ECODE_INVAL)
14616       if not isinstance(self.op.disks, list):
14617         raise errors.OpPrereqError("Invalid parameter 'disks'",
14618                                    errors.ECODE_INVAL)
14619       for row in self.op.disks:
14620         if (not isinstance(row, dict) or
14621             constants.IDISK_SIZE not in row or
14622             not isinstance(row[constants.IDISK_SIZE], int) or
14623             constants.IDISK_MODE not in row or
14624             row[constants.IDISK_MODE] not in constants.DISK_ACCESS_SET):
14625           raise errors.OpPrereqError("Invalid contents of the 'disks'"
14626                                      " parameter", errors.ECODE_INVAL)
14627       if self.op.hypervisor is None:
14628         self.op.hypervisor = self.cfg.GetHypervisorType()
14629     elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
14630       fname = _ExpandInstanceName(self.cfg, self.op.name)
14631       self.op.name = fname
14632       self.relocate_from = \
14633           list(self.cfg.GetInstanceInfo(fname).secondary_nodes)
14634     elif self.op.mode in (constants.IALLOCATOR_MODE_CHG_GROUP,
14635                           constants.IALLOCATOR_MODE_NODE_EVAC):
14636       if not self.op.instances:
14637         raise errors.OpPrereqError("Missing instances", errors.ECODE_INVAL)
14638       self.op.instances = _GetWantedInstances(self, self.op.instances)
14639     else:
14640       raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
14641                                  self.op.mode, errors.ECODE_INVAL)
14642
14643     if self.op.direction == constants.IALLOCATOR_DIR_OUT:
14644       if self.op.allocator is None:
14645         raise errors.OpPrereqError("Missing allocator name",
14646                                    errors.ECODE_INVAL)
14647     elif self.op.direction != constants.IALLOCATOR_DIR_IN:
14648       raise errors.OpPrereqError("Wrong allocator test '%s'" %
14649                                  self.op.direction, errors.ECODE_INVAL)
14650
14651   def Exec(self, feedback_fn):
14652     """Run the allocator test.
14653
14654     """
14655     if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
14656       ial = IAllocator(self.cfg, self.rpc,
14657                        mode=self.op.mode,
14658                        name=self.op.name,
14659                        memory=self.op.memory,
14660                        disks=self.op.disks,
14661                        disk_template=self.op.disk_template,
14662                        os=self.op.os,
14663                        tags=self.op.tags,
14664                        nics=self.op.nics,
14665                        vcpus=self.op.vcpus,
14666                        hypervisor=self.op.hypervisor,
14667                        )
14668     elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
14669       ial = IAllocator(self.cfg, self.rpc,
14670                        mode=self.op.mode,
14671                        name=self.op.name,
14672                        relocate_from=list(self.relocate_from),
14673                        )
14674     elif self.op.mode == constants.IALLOCATOR_MODE_CHG_GROUP:
14675       ial = IAllocator(self.cfg, self.rpc,
14676                        mode=self.op.mode,
14677                        instances=self.op.instances,
14678                        target_groups=self.op.target_groups)
14679     elif self.op.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
14680       ial = IAllocator(self.cfg, self.rpc,
14681                        mode=self.op.mode,
14682                        instances=self.op.instances,
14683                        evac_mode=self.op.evac_mode)
14684     else:
14685       raise errors.ProgrammerError("Uncatched mode %s in"
14686                                    " LUTestAllocator.Exec", self.op.mode)
14687
14688     if self.op.direction == constants.IALLOCATOR_DIR_IN:
14689       result = ial.in_text
14690     else:
14691       ial.Run(self.op.allocator, validate=False)
14692       result = ial.out_text
14693     return result
14694
14695
14696 #: Query type implementations
14697 _QUERY_IMPL = {
14698   constants.QR_INSTANCE: _InstanceQuery,
14699   constants.QR_NODE: _NodeQuery,
14700   constants.QR_GROUP: _GroupQuery,
14701   constants.QR_OS: _OsQuery,
14702   }
14703
14704 assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
14705
14706
14707 def _GetQueryImplementation(name):
14708   """Returns the implemtnation for a query type.
14709
14710   @param name: Query type, must be one of L{constants.QR_VIA_OP}
14711
14712   """
14713   try:
14714     return _QUERY_IMPL[name]
14715   except KeyError:
14716     raise errors.OpPrereqError("Unknown query resource '%s'" % name,
14717                                errors.ECODE_INVAL)