code.grnet.gr Git - ganeti-local/blob - lib/cmdlib.py

   1 #
   2 #
   3
   4 # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011 Google Inc.
   5 #
   6 # This program is free software; you can redistribute it and/or modify
   7 # it under the terms of the GNU General Public License as published by
   8 # the Free Software Foundation; either version 2 of the License, or
   9 # (at your option) any later version.
  10 #
  11 # This program is distributed in the hope that it will be useful, but
  12 # WITHOUT ANY WARRANTY; without even the implied warranty of
  13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 # General Public License for more details.
  15 #
  16 # You should have received a copy of the GNU General Public License
  17 # along with this program; if not, write to the Free Software
  18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  19 # 02110-1301, USA.
  20
  21
  22 """Module implementing the master-side code."""
  23
  24 # pylint: disable=W0201,C0302
  25
  26 # W0201 since most LU attributes are defined in CheckPrereq or similar
  27 # functions
  28
  29 # C0302: since we have waaaay too many lines in this module
  30
  31 import os
  32 import os.path
  33 import time
  34 import re
  35 import platform
  36 import logging
  37 import copy
  38 import OpenSSL
  39 import socket
  40 import tempfile
  41 import shutil
  42 import itertools
  43 import operator
  44
  45 from ganeti import ssh
  46 from ganeti import utils
  47 from ganeti import errors
  48 from ganeti import hypervisor
  49 from ganeti import locking
  50 from ganeti import constants
  51 from ganeti import objects
  52 from ganeti import serializer
  53 from ganeti import ssconf
  54 from ganeti import uidpool
  55 from ganeti import compat
  56 from ganeti import masterd
  57 from ganeti import netutils
  58 from ganeti import query
  59 from ganeti import qlang
  60 from ganeti import opcodes
  61 from ganeti import ht
  62 from ganeti import rpc
  63
  64 import ganeti.masterd.instance # pylint: disable=W0611
  65
  66
  67 #: Size of DRBD meta block device
  68 DRBD_META_SIZE = 128
  69
  70 # States of instance
  71 INSTANCE_UP = [constants.ADMINST_UP]
  72 INSTANCE_DOWN = [constants.ADMINST_DOWN]
  73 INSTANCE_OFFLINE = [constants.ADMINST_OFFLINE]
  74 INSTANCE_ONLINE = [constants.ADMINST_DOWN, constants.ADMINST_UP]
  75 INSTANCE_NOT_RUNNING = [constants.ADMINST_DOWN, constants.ADMINST_OFFLINE]
  76
  77
  78 class ResultWithJobs:
  79   """Data container for LU results with jobs.
  80
  81   Instances of this class returned from L{LogicalUnit.Exec} will be recognized
  82   by L{mcpu.Processor._ProcessResult}. The latter will then submit the jobs
  83   contained in the C{jobs} attribute and include the job IDs in the opcode
  84   result.
  85
  86   """
  87   def __init__(self, jobs, **kwargs):
  88     """Initializes this class.
  89
  90     Additional return values can be specified as keyword arguments.
  91
  92     @type jobs: list of lists of L{opcode.OpCode}
  93     @param jobs: A list of lists of opcode objects
  94
  95     """
  96     self.jobs = jobs
  97     self.other = kwargs
  98
  99
 100 class LogicalUnit(object):
 101   """Logical Unit base class.
 102
 103   Subclasses must follow these rules:
 104     - implement ExpandNames
 105     - implement CheckPrereq (except when tasklets are used)
 106     - implement Exec (except when tasklets are used)
 107     - implement BuildHooksEnv
 108     - implement BuildHooksNodes
 109     - redefine HPATH and HTYPE
 110     - optionally redefine their run requirements:
 111         REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
 112
 113   Note that all commands require root permissions.
 114
 115   @ivar dry_run_result: the value (if any) that will be returned to the caller
 116       in dry-run mode (signalled by opcode dry_run parameter)
 117
 118   """
 119   HPATH = None
 120   HTYPE = None
 121   REQ_BGL = True
 122
 123   def __init__(self, processor, op, context, rpc_runner):
 124     """Constructor for LogicalUnit.
 125
 126     This needs to be overridden in derived classes in order to check op
 127     validity.
 128
 129     """
 130     self.proc = processor
 131     self.op = op
 132     self.cfg = context.cfg
 133     self.glm = context.glm
 134     # readability alias
 135     self.owned_locks = context.glm.list_owned
 136     self.context = context
 137     self.rpc = rpc_runner
 138     # Dicts used to declare locking needs to mcpu
 139     self.needed_locks = None
 140     self.share_locks = dict.fromkeys(locking.LEVELS, 0)
 141     self.add_locks = {}
 142     self.remove_locks = {}
 143     # Used to force good behavior when calling helper functions
 144     self.recalculate_locks = {}
 145     # logging
 146     self.Log = processor.Log # pylint: disable=C0103
 147     self.LogWarning = processor.LogWarning # pylint: disable=C0103
 148     self.LogInfo = processor.LogInfo # pylint: disable=C0103
 149     self.LogStep = processor.LogStep # pylint: disable=C0103
 150     # support for dry-run
 151     self.dry_run_result = None
 152     # support for generic debug attribute
 153     if (not hasattr(self.op, "debug_level") or
 154         not isinstance(self.op.debug_level, int)):
 155       self.op.debug_level = 0
 156
 157     # Tasklets
 158     self.tasklets = None
 159
 160     # Validate opcode parameters and set defaults
 161     self.op.Validate(True)
 162
 163     self.CheckArguments()
 164
 165   def CheckArguments(self):
 166     """Check syntactic validity for the opcode arguments.
 167
 168     This method is for doing a simple syntactic check and ensure
 169     validity of opcode parameters, without any cluster-related
 170     checks. While the same can be accomplished in ExpandNames and/or
 171     CheckPrereq, doing these separate is better because:
 172
 173       - ExpandNames is left as as purely a lock-related function
 174       - CheckPrereq is run after we have acquired locks (and possible
 175         waited for them)
 176
 177     The function is allowed to change the self.op attribute so that
 178     later methods can no longer worry about missing parameters.
 179
 180     """
 181     pass
 182
 183   def ExpandNames(self):
 184     """Expand names for this LU.
 185
 186     This method is called before starting to execute the opcode, and it should
 187     update all the parameters of the opcode to their canonical form (e.g. a
 188     short node name must be fully expanded after this method has successfully
 189     completed). This way locking, hooks, logging, etc. can work correctly.
 190
 191     LUs which implement this method must also populate the self.needed_locks
 192     member, as a dict with lock levels as keys, and a list of needed lock names
 193     as values. Rules:
 194
 195       - use an empty dict if you don't need any lock
 196       - if you don't need any lock at a particular level omit that level
 197       - don't put anything for the BGL level
 198       - if you want all locks at a level use locking.ALL_SET as a value
 199
 200     If you need to share locks (rather than acquire them exclusively) at one
 201     level you can modify self.share_locks, setting a true value (usually 1) for
 202     that level. By default locks are not shared.
 203
 204     This function can also define a list of tasklets, which then will be
 205     executed in order instead of the usual LU-level CheckPrereq and Exec
 206     functions, if those are not defined by the LU.
 207
 208     Examples::
 209
 210       # Acquire all nodes and one instance
 211       self.needed_locks = {
 212         locking.LEVEL_NODE: locking.ALL_SET,
 213         locking.LEVEL_INSTANCE: ['instance1.example.com'],
 214       }
 215       # Acquire just two nodes
 216       self.needed_locks = {
 217         locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
 218       }
 219       # Acquire no locks
 220       self.needed_locks = {} # No, you can't leave it to the default value None
 221
 222     """
 223     # The implementation of this method is mandatory only if the new LU is
 224     # concurrent, so that old LUs don't need to be changed all at the same
 225     # time.
 226     if self.REQ_BGL:
 227       self.needed_locks = {} # Exclusive LUs don't need locks.
 228     else:
 229       raise NotImplementedError
 230
 231   def DeclareLocks(self, level):
 232     """Declare LU locking needs for a level
 233
 234     While most LUs can just declare their locking needs at ExpandNames time,
 235     sometimes there's the need to calculate some locks after having acquired
 236     the ones before. This function is called just before acquiring locks at a
 237     particular level, but after acquiring the ones at lower levels, and permits
 238     such calculations. It can be used to modify self.needed_locks, and by
 239     default it does nothing.
 240
 241     This function is only called if you have something already set in
 242     self.needed_locks for the level.
 243
 244     @param level: Locking level which is going to be locked
 245     @type level: member of ganeti.locking.LEVELS
 246
 247     """
 248
 249   def CheckPrereq(self):
 250     """Check prerequisites for this LU.
 251
 252     This method should check that the prerequisites for the execution
 253     of this LU are fulfilled. It can do internode communication, but
 254     it should be idempotent - no cluster or system changes are
 255     allowed.
 256
 257     The method should raise errors.OpPrereqError in case something is
 258     not fulfilled. Its return value is ignored.
 259
 260     This method should also update all the parameters of the opcode to
 261     their canonical form if it hasn't been done by ExpandNames before.
 262
 263     """
 264     if self.tasklets is not None:
 265       for (idx, tl) in enumerate(self.tasklets):
 266         logging.debug("Checking prerequisites for tasklet %s/%s",
 267                       idx + 1, len(self.tasklets))
 268         tl.CheckPrereq()
 269     else:
 270       pass
 271
 272   def Exec(self, feedback_fn):
 273     """Execute the LU.
 274
 275     This method should implement the actual work. It should raise
 276     errors.OpExecError for failures that are somewhat dealt with in
 277     code, or expected.
 278
 279     """
 280     if self.tasklets is not None:
 281       for (idx, tl) in enumerate(self.tasklets):
 282         logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
 283         tl.Exec(feedback_fn)
 284     else:
 285       raise NotImplementedError
 286
 287   def BuildHooksEnv(self):
 288     """Build hooks environment for this LU.
 289
 290     @rtype: dict
 291     @return: Dictionary containing the environment that will be used for
 292       running the hooks for this LU. The keys of the dict must not be prefixed
 293       with "GANETI_"--that'll be added by the hooks runner. The hooks runner
 294       will extend the environment with additional variables. If no environment
 295       should be defined, an empty dictionary should be returned (not C{None}).
 296     @note: If the C{HPATH} attribute of the LU class is C{None}, this function
 297       will not be called.
 298
 299     """
 300     raise NotImplementedError
 301
 302   def BuildHooksNodes(self):
 303     """Build list of nodes to run LU's hooks.
 304
 305     @rtype: tuple; (list, list)
 306     @return: Tuple containing a list of node names on which the hook
 307       should run before the execution and a list of node names on which the
 308       hook should run after the execution. No nodes should be returned as an
 309       empty list (and not None).
 310     @note: If the C{HPATH} attribute of the LU class is C{None}, this function
 311       will not be called.
 312
 313     """
 314     raise NotImplementedError
 315
 316   def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
 317     """Notify the LU about the results of its hooks.
 318
 319     This method is called every time a hooks phase is executed, and notifies
 320     the Logical Unit about the hooks' result. The LU can then use it to alter
 321     its result based on the hooks.  By default the method does nothing and the
 322     previous result is passed back unchanged but any LU can define it if it
 323     wants to use the local cluster hook-scripts somehow.
 324
 325     @param phase: one of L{constants.HOOKS_PHASE_POST} or
 326         L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
 327     @param hook_results: the results of the multi-node hooks rpc call
 328     @param feedback_fn: function used send feedback back to the caller
 329     @param lu_result: the previous Exec result this LU had, or None
 330         in the PRE phase
 331     @return: the new Exec result, based on the previous result
 332         and hook results
 333
 334     """
 335     # API must be kept, thus we ignore the unused argument and could
 336     # be a function warnings
 337     # pylint: disable=W0613,R0201
 338     return lu_result
 339
 340   def _ExpandAndLockInstance(self):
 341     """Helper function to expand and lock an instance.
 342
 343     Many LUs that work on an instance take its name in self.op.instance_name
 344     and need to expand it and then declare the expanded name for locking. This
 345     function does it, and then updates self.op.instance_name to the expanded
 346     name. It also initializes needed_locks as a dict, if this hasn't been done
 347     before.
 348
 349     """
 350     if self.needed_locks is None:
 351       self.needed_locks = {}
 352     else:
 353       assert locking.LEVEL_INSTANCE not in self.needed_locks, \
 354         "_ExpandAndLockInstance called with instance-level locks set"
 355     self.op.instance_name = _ExpandInstanceName(self.cfg,
 356                                                 self.op.instance_name)
 357     self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
 358
 359   def _LockInstancesNodes(self, primary_only=False,
 360                           level=locking.LEVEL_NODE):
 361     """Helper function to declare instances' nodes for locking.
 362
 363     This function should be called after locking one or more instances to lock
 364     their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
 365     with all primary or secondary nodes for instances already locked and
 366     present in self.needed_locks[locking.LEVEL_INSTANCE].
 367
 368     It should be called from DeclareLocks, and for safety only works if
 369     self.recalculate_locks[locking.LEVEL_NODE] is set.
 370
 371     In the future it may grow parameters to just lock some instance's nodes, or
 372     to just lock primaries or secondary nodes, if needed.
 373
 374     If should be called in DeclareLocks in a way similar to::
 375
 376       if level == locking.LEVEL_NODE:
 377         self._LockInstancesNodes()
 378
 379     @type primary_only: boolean
 380     @param primary_only: only lock primary nodes of locked instances
 381     @param level: Which lock level to use for locking nodes
 382
 383     """
 384     assert level in self.recalculate_locks, \
 385       "_LockInstancesNodes helper function called with no nodes to recalculate"
 386
 387     # TODO: check if we're really been called with the instance locks held
 388
 389     # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
 390     # future we might want to have different behaviors depending on the value
 391     # of self.recalculate_locks[locking.LEVEL_NODE]
 392     wanted_nodes = []
 393     locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
 394     for _, instance in self.cfg.GetMultiInstanceInfo(locked_i):
 395       wanted_nodes.append(instance.primary_node)
 396       if not primary_only:
 397         wanted_nodes.extend(instance.secondary_nodes)
 398
 399     if self.recalculate_locks[level] == constants.LOCKS_REPLACE:
 400       self.needed_locks[level] = wanted_nodes
 401     elif self.recalculate_locks[level] == constants.LOCKS_APPEND:
 402       self.needed_locks[level].extend(wanted_nodes)
 403     else:
 404       raise errors.ProgrammerError("Unknown recalculation mode")
 405
 406     del self.recalculate_locks[level]
 407
 408
 409 class NoHooksLU(LogicalUnit): # pylint: disable=W0223
 410   """Simple LU which runs no hooks.
 411
 412   This LU is intended as a parent for other LogicalUnits which will
 413   run no hooks, in order to reduce duplicate code.
 414
 415   """
 416   HPATH = None
 417   HTYPE = None
 418
 419   def BuildHooksEnv(self):
 420     """Empty BuildHooksEnv for NoHooksLu.
 421
 422     This just raises an error.
 423
 424     """
 425     raise AssertionError("BuildHooksEnv called for NoHooksLUs")
 426
 427   def BuildHooksNodes(self):
 428     """Empty BuildHooksNodes for NoHooksLU.
 429
 430     """
 431     raise AssertionError("BuildHooksNodes called for NoHooksLU")
 432
 433
 434 class Tasklet:
 435   """Tasklet base class.
 436
 437   Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
 438   they can mix legacy code with tasklets. Locking needs to be done in the LU,
 439   tasklets know nothing about locks.
 440
 441   Subclasses must follow these rules:
 442     - Implement CheckPrereq
 443     - Implement Exec
 444
 445   """
 446   def __init__(self, lu):
 447     self.lu = lu
 448
 449     # Shortcuts
 450     self.cfg = lu.cfg
 451     self.rpc = lu.rpc
 452
 453   def CheckPrereq(self):
 454     """Check prerequisites for this tasklets.
 455
 456     This method should check whether the prerequisites for the execution of
 457     this tasklet are fulfilled. It can do internode communication, but it
 458     should be idempotent - no cluster or system changes are allowed.
 459
 460     The method should raise errors.OpPrereqError in case something is not
 461     fulfilled. Its return value is ignored.
 462
 463     This method should also update all parameters to their canonical form if it
 464     hasn't been done before.
 465
 466     """
 467     pass
 468
 469   def Exec(self, feedback_fn):
 470     """Execute the tasklet.
 471
 472     This method should implement the actual work. It should raise
 473     errors.OpExecError for failures that are somewhat dealt with in code, or
 474     expected.
 475
 476     """
 477     raise NotImplementedError
 478
 479
 480 class _QueryBase:
 481   """Base for query utility classes.
 482
 483   """
 484   #: Attribute holding field definitions
 485   FIELDS = None
 486
 487   def __init__(self, qfilter, fields, use_locking):
 488     """Initializes this class.
 489
 490     """
 491     self.use_locking = use_locking
 492
 493     self.query = query.Query(self.FIELDS, fields, qfilter=qfilter,
 494                              namefield="name")
 495     self.requested_data = self.query.RequestedData()
 496     self.names = self.query.RequestedNames()
 497
 498     # Sort only if no names were requested
 499     self.sort_by_name = not self.names
 500
 501     self.do_locking = None
 502     self.wanted = None
 503
 504   def _GetNames(self, lu, all_names, lock_level):
 505     """Helper function to determine names asked for in the query.
 506
 507     """
 508     if self.do_locking:
 509       names = lu.owned_locks(lock_level)
 510     else:
 511       names = all_names
 512
 513     if self.wanted == locking.ALL_SET:
 514       assert not self.names
 515       # caller didn't specify names, so ordering is not important
 516       return utils.NiceSort(names)
 517
 518     # caller specified names and we must keep the same order
 519     assert self.names
 520     assert not self.do_locking or lu.glm.is_owned(lock_level)
 521
 522     missing = set(self.wanted).difference(names)
 523     if missing:
 524       raise errors.OpExecError("Some items were removed before retrieving"
 525                                " their data: %s" % missing)
 526
 527     # Return expanded names
 528     return self.wanted
 529
 530   def ExpandNames(self, lu):
 531     """Expand names for this query.
 532
 533     See L{LogicalUnit.ExpandNames}.
 534
 535     """
 536     raise NotImplementedError()
 537
 538   def DeclareLocks(self, lu, level):
 539     """Declare locks for this query.
 540
 541     See L{LogicalUnit.DeclareLocks}.
 542
 543     """
 544     raise NotImplementedError()
 545
 546   def _GetQueryData(self, lu):
 547     """Collects all data for this query.
 548
 549     @return: Query data object
 550
 551     """
 552     raise NotImplementedError()
 553
 554   def NewStyleQuery(self, lu):
 555     """Collect data and execute query.
 556
 557     """
 558     return query.GetQueryResponse(self.query, self._GetQueryData(lu),
 559                                   sort_by_name=self.sort_by_name)
 560
 561   def OldStyleQuery(self, lu):
 562     """Collect data and execute query.
 563
 564     """
 565     return self.query.OldStyleQuery(self._GetQueryData(lu),
 566                                     sort_by_name=self.sort_by_name)
 567
 568
 569 def _ShareAll():
 570   """Returns a dict declaring all lock levels shared.
 571
 572   """
 573   return dict.fromkeys(locking.LEVELS, 1)
 574
 575
 576 def _MakeLegacyNodeInfo(data):
 577   """Formats the data returned by L{rpc.RpcRunner.call_node_info}.
 578
 579   Converts the data into a single dictionary. This is fine for most use cases,
 580   but some require information from more than one volume group or hypervisor.
 581
 582   """
 583   (bootid, (vg_info, ), (hv_info, )) = data
 584
 585   return utils.JoinDisjointDicts(utils.JoinDisjointDicts(vg_info, hv_info), {
 586     "bootid": bootid,
 587     })
 588
 589
 590 def _CheckInstanceNodeGroups(cfg, instance_name, owned_groups):
 591   """Checks if the owned node groups are still correct for an instance.
 592
 593   @type cfg: L{config.ConfigWriter}
 594   @param cfg: The cluster configuration
 595   @type instance_name: string
 596   @param instance_name: Instance name
 597   @type owned_groups: set or frozenset
 598   @param owned_groups: List of currently owned node groups
 599
 600   """
 601   inst_groups = cfg.GetInstanceNodeGroups(instance_name)
 602
 603   if not owned_groups.issuperset(inst_groups):
 604     raise errors.OpPrereqError("Instance %s's node groups changed since"
 605                                " locks were acquired, current groups are"
 606                                " are '%s', owning groups '%s'; retry the"
 607                                " operation" %
 608                                (instance_name,
 609                                 utils.CommaJoin(inst_groups),
 610                                 utils.CommaJoin(owned_groups)),
 611                                errors.ECODE_STATE)
 612
 613   return inst_groups
 614
 615
 616 def _CheckNodeGroupInstances(cfg, group_uuid, owned_instances):
 617   """Checks if the instances in a node group are still correct.
 618
 619   @type cfg: L{config.ConfigWriter}
 620   @param cfg: The cluster configuration
 621   @type group_uuid: string
 622   @param group_uuid: Node group UUID
 623   @type owned_instances: set or frozenset
 624   @param owned_instances: List of currently owned instances
 625
 626   """
 627   wanted_instances = cfg.GetNodeGroupInstances(group_uuid)
 628   if owned_instances != wanted_instances:
 629     raise errors.OpPrereqError("Instances in node group '%s' changed since"
 630                                " locks were acquired, wanted '%s', have '%s';"
 631                                " retry the operation" %
 632                                (group_uuid,
 633                                 utils.CommaJoin(wanted_instances),
 634                                 utils.CommaJoin(owned_instances)),
 635                                errors.ECODE_STATE)
 636
 637   return wanted_instances
 638
 639
 640 def _SupportsOob(cfg, node):
 641   """Tells if node supports OOB.
 642
 643   @type cfg: L{config.ConfigWriter}
 644   @param cfg: The cluster configuration
 645   @type node: L{objects.Node}
 646   @param node: The node
 647   @return: The OOB script if supported or an empty string otherwise
 648
 649   """
 650   return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
 651
 652
 653 def _GetWantedNodes(lu, nodes):
 654   """Returns list of checked and expanded node names.
 655
 656   @type lu: L{LogicalUnit}
 657   @param lu: the logical unit on whose behalf we execute
 658   @type nodes: list
 659   @param nodes: list of node names or None for all nodes
 660   @rtype: list
 661   @return: the list of nodes, sorted
 662   @raise errors.ProgrammerError: if the nodes parameter is wrong type
 663
 664   """
 665   if nodes:
 666     return [_ExpandNodeName(lu.cfg, name) for name in nodes]
 667
 668   return utils.NiceSort(lu.cfg.GetNodeList())
 669
 670
 671 def _GetWantedInstances(lu, instances):
 672   """Returns list of checked and expanded instance names.
 673
 674   @type lu: L{LogicalUnit}
 675   @param lu: the logical unit on whose behalf we execute
 676   @type instances: list
 677   @param instances: list of instance names or None for all instances
 678   @rtype: list
 679   @return: the list of instances, sorted
 680   @raise errors.OpPrereqError: if the instances parameter is wrong type
 681   @raise errors.OpPrereqError: if any of the passed instances is not found
 682
 683   """
 684   if instances:
 685     wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
 686   else:
 687     wanted = utils.NiceSort(lu.cfg.GetInstanceList())
 688   return wanted
 689
 690
 691 def _GetUpdatedParams(old_params, update_dict,
 692                       use_default=True, use_none=False):
 693   """Return the new version of a parameter dictionary.
 694
 695   @type old_params: dict
 696   @param old_params: old parameters
 697   @type update_dict: dict
 698   @param update_dict: dict containing new parameter values, or
 699       constants.VALUE_DEFAULT to reset the parameter to its default
 700       value
 701   @param use_default: boolean
 702   @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
 703       values as 'to be deleted' values
 704   @param use_none: boolean
 705   @type use_none: whether to recognise C{None} values as 'to be
 706       deleted' values
 707   @rtype: dict
 708   @return: the new parameter dictionary
 709
 710   """
 711   params_copy = copy.deepcopy(old_params)
 712   for key, val in update_dict.iteritems():
 713     if ((use_default and val == constants.VALUE_DEFAULT) or
 714         (use_none and val is None)):
 715       try:
 716         del params_copy[key]
 717       except KeyError:
 718         pass
 719     else:
 720       params_copy[key] = val
 721   return params_copy
 722
 723
 724 def _UpdateAndVerifySubDict(base, updates, type_check):
 725   """Updates and verifies a dict with sub dicts of the same type.
 726
 727   @param base: The dict with the old data
 728   @param updates: The dict with the new data
 729   @param type_check: Dict suitable to ForceDictType to verify correct types
 730   @returns: A new dict with updated and verified values
 731
 732   """
 733   def fn(old, value):
 734     new = _GetUpdatedParams(old, value)
 735     utils.ForceDictType(new, type_check)
 736     return new
 737
 738   ret = copy.deepcopy(base)
 739   ret.update(dict((key, fn(base.get(key, {}), value))
 740                   for key, value in updates.items()))
 741   return ret
 742
 743
 744 def _MergeAndVerifyHvState(op_input, obj_input):
 745   """Combines the hv state from an opcode with the one of the object
 746
 747   @param op_input: The input dict from the opcode
 748   @param obj_input: The input dict from the objects
 749   @return: The verified and updated dict
 750
 751   """
 752   if op_input:
 753     invalid_hvs = set(op_input) - constants.HYPER_TYPES
 754     if invalid_hvs:
 755       raise errors.OpPrereqError("Invalid hypervisor(s) in hypervisor state:"
 756                                  " %s" % utils.CommaJoin(invalid_hvs),
 757                                  errors.ECODE_INVAL)
 758     if obj_input is None:
 759       obj_input = {}
 760     type_check = constants.HVSTS_PARAMETER_TYPES
 761     return _UpdateAndVerifySubDict(obj_input, op_input, type_check)
 762
 763   return None
 764
 765
 766 def _MergeAndVerifyDiskState(op_input, obj_input):
 767   """Combines the disk state from an opcode with the one of the object
 768
 769   @param op_input: The input dict from the opcode
 770   @param obj_input: The input dict from the objects
 771   @return: The verified and updated dict
 772   """
 773   if op_input:
 774     invalid_dst = set(op_input) - constants.DS_VALID_TYPES
 775     if invalid_dst:
 776       raise errors.OpPrereqError("Invalid storage type(s) in disk state: %s" %
 777                                  utils.CommaJoin(invalid_dst),
 778                                  errors.ECODE_INVAL)
 779     type_check = constants.DSS_PARAMETER_TYPES
 780     if obj_input is None:
 781       obj_input = {}
 782     return dict((key, _UpdateAndVerifySubDict(obj_input.get(key, {}), value,
 783                                               type_check))
 784                 for key, value in op_input.items())
 785
 786   return None
 787
 788
 789 def _ReleaseLocks(lu, level, names=None, keep=None):
 790   """Releases locks owned by an LU.
 791
 792   @type lu: L{LogicalUnit}
 793   @param level: Lock level
 794   @type names: list or None
 795   @param names: Names of locks to release
 796   @type keep: list or None
 797   @param keep: Names of locks to retain
 798
 799   """
 800   assert not (keep is not None and names is not None), \
 801          "Only one of the 'names' and the 'keep' parameters can be given"
 802
 803   if names is not None:
 804     should_release = names.__contains__
 805   elif keep:
 806     should_release = lambda name: name not in keep
 807   else:
 808     should_release = None
 809
 810   owned = lu.owned_locks(level)
 811   if not owned:
 812     # Not owning any lock at this level, do nothing
 813     pass
 814
 815   elif should_release:
 816     retain = []
 817     release = []
 818
 819     # Determine which locks to release
 820     for name in owned:
 821       if should_release(name):
 822         release.append(name)
 823       else:
 824         retain.append(name)
 825
 826     assert len(lu.owned_locks(level)) == (len(retain) + len(release))
 827
 828     # Release just some locks
 829     lu.glm.release(level, names=release)
 830
 831     assert frozenset(lu.owned_locks(level)) == frozenset(retain)
 832   else:
 833     # Release everything
 834     lu.glm.release(level)
 835
 836     assert not lu.glm.is_owned(level), "No locks should be owned"
 837
 838
 839 def _MapInstanceDisksToNodes(instances):
 840   """Creates a map from (node, volume) to instance name.
 841
 842   @type instances: list of L{objects.Instance}
 843   @rtype: dict; tuple of (node name, volume name) as key, instance name as value
 844
 845   """
 846   return dict(((node, vol), inst.name)
 847               for inst in instances
 848               for (node, vols) in inst.MapLVsByNode().items()
 849               for vol in vols)
 850
 851
 852 def _RunPostHook(lu, node_name):
 853   """Runs the post-hook for an opcode on a single node.
 854
 855   """
 856   hm = lu.proc.BuildHooksManager(lu)
 857   try:
 858     hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
 859   except:
 860     # pylint: disable=W0702
 861     lu.LogWarning("Errors occurred running hooks on %s" % node_name)
 862
 863
 864 def _CheckOutputFields(static, dynamic, selected):
 865   """Checks whether all selected fields are valid.
 866
 867   @type static: L{utils.FieldSet}
 868   @param static: static fields set
 869   @type dynamic: L{utils.FieldSet}
 870   @param dynamic: dynamic fields set
 871
 872   """
 873   f = utils.FieldSet()
 874   f.Extend(static)
 875   f.Extend(dynamic)
 876
 877   delta = f.NonMatching(selected)
 878   if delta:
 879     raise errors.OpPrereqError("Unknown output fields selected: %s"
 880                                % ",".join(delta), errors.ECODE_INVAL)
 881
 882
 883 def _CheckGlobalHvParams(params):
 884   """Validates that given hypervisor params are not global ones.
 885
 886   This will ensure that instances don't get customised versions of
 887   global params.
 888
 889   """
 890   used_globals = constants.HVC_GLOBALS.intersection(params)
 891   if used_globals:
 892     msg = ("The following hypervisor parameters are global and cannot"
 893            " be customized at instance level, please modify them at"
 894            " cluster level: %s" % utils.CommaJoin(used_globals))
 895     raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
 896
 897
 898 def _CheckNodeOnline(lu, node, msg=None):
 899   """Ensure that a given node is online.
 900
 901   @param lu: the LU on behalf of which we make the check
 902   @param node: the node to check
 903   @param msg: if passed, should be a message to replace the default one
 904   @raise errors.OpPrereqError: if the node is offline
 905
 906   """
 907   if msg is None:
 908     msg = "Can't use offline node"
 909   if lu.cfg.GetNodeInfo(node).offline:
 910     raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
 911
 912
 913 def _CheckNodeNotDrained(lu, node):
 914   """Ensure that a given node is not drained.
 915
 916   @param lu: the LU on behalf of which we make the check
 917   @param node: the node to check
 918   @raise errors.OpPrereqError: if the node is drained
 919
 920   """
 921   if lu.cfg.GetNodeInfo(node).drained:
 922     raise errors.OpPrereqError("Can't use drained node %s" % node,
 923                                errors.ECODE_STATE)
 924
 925
 926 def _CheckNodeVmCapable(lu, node):
 927   """Ensure that a given node is vm capable.
 928
 929   @param lu: the LU on behalf of which we make the check
 930   @param node: the node to check
 931   @raise errors.OpPrereqError: if the node is not vm capable
 932
 933   """
 934   if not lu.cfg.GetNodeInfo(node).vm_capable:
 935     raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
 936                                errors.ECODE_STATE)
 937
 938
 939 def _CheckNodeHasOS(lu, node, os_name, force_variant):
 940   """Ensure that a node supports a given OS.
 941
 942   @param lu: the LU on behalf of which we make the check
 943   @param node: the node to check
 944   @param os_name: the OS to query about
 945   @param force_variant: whether to ignore variant errors
 946   @raise errors.OpPrereqError: if the node is not supporting the OS
 947
 948   """
 949   result = lu.rpc.call_os_get(node, os_name)
 950   result.Raise("OS '%s' not in supported OS list for node %s" %
 951                (os_name, node),
 952                prereq=True, ecode=errors.ECODE_INVAL)
 953   if not force_variant:
 954     _CheckOSVariant(result.payload, os_name)
 955
 956
 957 def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
 958   """Ensure that a node has the given secondary ip.
 959
 960   @type lu: L{LogicalUnit}
 961   @param lu: the LU on behalf of which we make the check
 962   @type node: string
 963   @param node: the node to check
 964   @type secondary_ip: string
 965   @param secondary_ip: the ip to check
 966   @type prereq: boolean
 967   @param prereq: whether to throw a prerequisite or an execute error
 968   @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
 969   @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
 970
 971   """
 972   result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
 973   result.Raise("Failure checking secondary ip on node %s" % node,
 974                prereq=prereq, ecode=errors.ECODE_ENVIRON)
 975   if not result.payload:
 976     msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
 977            " please fix and re-run this command" % secondary_ip)
 978     if prereq:
 979       raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
 980     else:
 981       raise errors.OpExecError(msg)
 982
 983
 984 def _GetClusterDomainSecret():
 985   """Reads the cluster domain secret.
 986
 987   """
 988   return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
 989                                strict=True)
 990
 991
 992 def _CheckInstanceState(lu, instance, req_states, msg=None):
 993   """Ensure that an instance is in one of the required states.
 994
 995   @param lu: the LU on behalf of which we make the check
 996   @param instance: the instance to check
 997   @param msg: if passed, should be a message to replace the default one
 998   @raise errors.OpPrereqError: if the instance is not in the required state
 999
1000   """
1001   if msg is None:
1002     msg = "can't use instance from outside %s states" % ", ".join(req_states)
1003   if instance.admin_state not in req_states:
1004     raise errors.OpPrereqError("Instance %s is marked to be %s, %s" %
1005                                (instance, instance.admin_state, msg),
1006                                errors.ECODE_STATE)
1007
1008   if constants.ADMINST_UP not in req_states:
1009     pnode = instance.primary_node
1010     ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
1011     ins_l.Raise("Can't contact node %s for instance information" % pnode,
1012                 prereq=True, ecode=errors.ECODE_ENVIRON)
1013
1014     if instance.name in ins_l.payload:
1015       raise errors.OpPrereqError("Instance %s is running, %s" %
1016                                  (instance.name, msg), errors.ECODE_STATE)
1017
1018
1019 def _CheckMinMaxSpecs(name, ipolicy, value):
1020   """Checks if value is in the desired range.
1021
1022   @param name: name of the parameter for which we perform the check
1023   @param ipolicy: dictionary containing min, max and std values
1024   @param value: actual value that we want to use
1025   @return: None or element not meeting the criteria
1026
1027
1028   """
1029   if value in [None, constants.VALUE_AUTO]:
1030     return None
1031   max_v = ipolicy[constants.ISPECS_MAX].get(name, value)
1032   min_v = ipolicy[constants.ISPECS_MIN].get(name, value)
1033   if value > max_v or min_v > value:
1034     return ("%s value %s is not in range [%s, %s]" %
1035             (name, value, min_v, max_v))
1036   return None
1037
1038
1039 def _ComputeIPolicySpecViolation(ipolicy, mem_size, cpu_count, disk_count,
1040                                  nic_count, disk_sizes,
1041                                  _check_spec_fn=_CheckMinMaxSpecs):
1042   """Verifies ipolicy against provided specs.
1043
1044   @type ipolicy: dict
1045   @param ipolicy: The ipolicy
1046   @type mem_size: int
1047   @param mem_size: The memory size
1048   @type cpu_count: int
1049   @param cpu_count: Used cpu cores
1050   @type disk_count: int
1051   @param disk_count: Number of disks used
1052   @type nic_count: int
1053   @param nic_count: Number of nics used
1054   @type disk_sizes: list of ints
1055   @param disk_sizes: Disk sizes of used disk (len must match C{disk_count})
1056   @param _check_spec_fn: The checking function (unittest only)
1057   @return: A list of violations, or an empty list of no violations are found
1058
1059   """
1060   assert disk_count == len(disk_sizes)
1061
1062   test_settings = [
1063     (constants.ISPEC_MEM_SIZE, mem_size),
1064     (constants.ISPEC_CPU_COUNT, cpu_count),
1065     (constants.ISPEC_DISK_COUNT, disk_count),
1066     (constants.ISPEC_NIC_COUNT, nic_count),
1067     ] + map((lambda d: (constants.ISPEC_DISK_SIZE, d)), disk_sizes)
1068
1069   return filter(None,
1070                 (_check_spec_fn(name, ipolicy, value)
1071                  for (name, value) in test_settings))
1072
1073
1074 def _ComputeIPolicyInstanceViolation(ipolicy, instance,
1075                                      _compute_fn=_ComputeIPolicySpecViolation):
1076   """Compute if instance meets the specs of ipolicy.
1077
1078   @type ipolicy: dict
1079   @param ipolicy: The ipolicy to verify against
1080   @type instance: L{objects.Instance}
1081   @param instance: The instance to verify
1082   @param _compute_fn: The function to verify ipolicy (unittest only)
1083   @see: L{_ComputeIPolicySpecViolation}
1084
1085   """
1086   mem_size = instance.beparams.get(constants.BE_MAXMEM, None)
1087   cpu_count = instance.beparams.get(constants.BE_VCPUS, None)
1088   disk_count = len(instance.disks)
1089   disk_sizes = [disk.size for disk in instance.disks]
1090   nic_count = len(instance.nics)
1091
1092   return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1093                      disk_sizes)
1094
1095
1096 def _ComputeIPolicyInstanceSpecViolation(ipolicy, instance_spec,
1097     _compute_fn=_ComputeIPolicySpecViolation):
1098   """Compute if instance specs meets the specs of ipolicy.
1099
1100   @type ipolicy: dict
1101   @param ipolicy: The ipolicy to verify against
1102   @param instance_spec: dict
1103   @param instance_spec: The instance spec to verify
1104   @param _compute_fn: The function to verify ipolicy (unittest only)
1105   @see: L{_ComputeIPolicySpecViolation}
1106
1107   """
1108   mem_size = instance_spec.get(constants.ISPEC_MEM_SIZE, None)
1109   cpu_count = instance_spec.get(constants.ISPEC_CPU_COUNT, None)
1110   disk_count = instance_spec.get(constants.ISPEC_DISK_COUNT, 0)
1111   disk_sizes = instance_spec.get(constants.ISPEC_DISK_SIZE, [])
1112   nic_count = instance_spec.get(constants.ISPEC_NIC_COUNT, 0)
1113
1114   return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1115                      disk_sizes)
1116
1117
1118 def _ComputeIPolicyNodeViolation(ipolicy, instance, current_group,
1119                                  target_group,
1120                                  _compute_fn=_ComputeIPolicyInstanceViolation):
1121   """Compute if instance meets the specs of the new target group.
1122
1123   @param ipolicy: The ipolicy to verify
1124   @param instance: The instance object to verify
1125   @param current_group: The current group of the instance
1126   @param target_group: The new group of the instance
1127   @param _compute_fn: The function to verify ipolicy (unittest only)
1128   @see: L{_ComputeIPolicySpecViolation}
1129
1130   """
1131   if current_group == target_group:
1132     return []
1133   else:
1134     return _compute_fn(ipolicy, instance)
1135
1136
1137 def _CheckTargetNodeIPolicy(lu, ipolicy, instance, node, ignore=False,
1138                             _compute_fn=_ComputeIPolicyNodeViolation):
1139   """Checks that the target node is correct in terms of instance policy.
1140
1141   @param ipolicy: The ipolicy to verify
1142   @param instance: The instance object to verify
1143   @param node: The new node to relocate
1144   @param ignore: Ignore violations of the ipolicy
1145   @param _compute_fn: The function to verify ipolicy (unittest only)
1146   @see: L{_ComputeIPolicySpecViolation}
1147
1148   """
1149   res = _compute_fn(ipolicy, instance, instance.primary_node.group, node.group)
1150
1151   if res:
1152     msg = ("Instance does not meet target node group's (%s) instance"
1153            " policy: %s") % (node.group, utils.CommaJoin(res))
1154     if ignore:
1155       lu.LogWarning(msg)
1156     else:
1157       raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
1158
1159
1160 def _ExpandItemName(fn, name, kind):
1161   """Expand an item name.
1162
1163   @param fn: the function to use for expansion
1164   @param name: requested item name
1165   @param kind: text description ('Node' or 'Instance')
1166   @return: the resolved (full) name
1167   @raise errors.OpPrereqError: if the item is not found
1168
1169   """
1170   full_name = fn(name)
1171   if full_name is None:
1172     raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
1173                                errors.ECODE_NOENT)
1174   return full_name
1175
1176
1177 def _ExpandNodeName(cfg, name):
1178   """Wrapper over L{_ExpandItemName} for nodes."""
1179   return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
1180
1181
1182 def _ExpandInstanceName(cfg, name):
1183   """Wrapper over L{_ExpandItemName} for instance."""
1184   return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
1185
1186
1187 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
1188                           minmem, maxmem, vcpus, nics, disk_template, disks,
1189                           bep, hvp, hypervisor_name, tags):
1190   """Builds instance related env variables for hooks
1191
1192   This builds the hook environment from individual variables.
1193
1194   @type name: string
1195   @param name: the name of the instance
1196   @type primary_node: string
1197   @param primary_node: the name of the instance's primary node
1198   @type secondary_nodes: list
1199   @param secondary_nodes: list of secondary nodes as strings
1200   @type os_type: string
1201   @param os_type: the name of the instance's OS
1202   @type status: string
1203   @param status: the desired status of the instance
1204   @type minmem: string
1205   @param minmem: the minimum memory size of the instance
1206   @type maxmem: string
1207   @param maxmem: the maximum memory size of the instance
1208   @type vcpus: string
1209   @param vcpus: the count of VCPUs the instance has
1210   @type nics: list
1211   @param nics: list of tuples (ip, mac, mode, link) representing
1212       the NICs the instance has
1213   @type disk_template: string
1214   @param disk_template: the disk template of the instance
1215   @type disks: list
1216   @param disks: the list of (size, mode) pairs
1217   @type bep: dict
1218   @param bep: the backend parameters for the instance
1219   @type hvp: dict
1220   @param hvp: the hypervisor parameters for the instance
1221   @type hypervisor_name: string
1222   @param hypervisor_name: the hypervisor for the instance
1223   @type tags: list
1224   @param tags: list of instance tags as strings
1225   @rtype: dict
1226   @return: the hook environment for this instance
1227
1228   """
1229   env = {
1230     "OP_TARGET": name,
1231     "INSTANCE_NAME": name,
1232     "INSTANCE_PRIMARY": primary_node,
1233     "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
1234     "INSTANCE_OS_TYPE": os_type,
1235     "INSTANCE_STATUS": status,
1236     "INSTANCE_MINMEM": minmem,
1237     "INSTANCE_MAXMEM": maxmem,
1238     # TODO(2.7) remove deprecated "memory" value
1239     "INSTANCE_MEMORY": maxmem,
1240     "INSTANCE_VCPUS": vcpus,
1241     "INSTANCE_DISK_TEMPLATE": disk_template,
1242     "INSTANCE_HYPERVISOR": hypervisor_name,
1243   }
1244   if nics:
1245     nic_count = len(nics)
1246     for idx, (ip, mac, mode, link) in enumerate(nics):
1247       if ip is None:
1248         ip = ""
1249       env["INSTANCE_NIC%d_IP" % idx] = ip
1250       env["INSTANCE_NIC%d_MAC" % idx] = mac
1251       env["INSTANCE_NIC%d_MODE" % idx] = mode
1252       env["INSTANCE_NIC%d_LINK" % idx] = link
1253       if mode == constants.NIC_MODE_BRIDGED:
1254         env["INSTANCE_NIC%d_BRIDGE" % idx] = link
1255   else:
1256     nic_count = 0
1257
1258   env["INSTANCE_NIC_COUNT"] = nic_count
1259
1260   if disks:
1261     disk_count = len(disks)
1262     for idx, (size, mode) in enumerate(disks):
1263       env["INSTANCE_DISK%d_SIZE" % idx] = size
1264       env["INSTANCE_DISK%d_MODE" % idx] = mode
1265   else:
1266     disk_count = 0
1267
1268   env["INSTANCE_DISK_COUNT"] = disk_count
1269
1270   if not tags:
1271     tags = []
1272
1273   env["INSTANCE_TAGS"] = " ".join(tags)
1274
1275   for source, kind in [(bep, "BE"), (hvp, "HV")]:
1276     for key, value in source.items():
1277       env["INSTANCE_%s_%s" % (kind, key)] = value
1278
1279   return env
1280
1281
1282 def _NICListToTuple(lu, nics):
1283   """Build a list of nic information tuples.
1284
1285   This list is suitable to be passed to _BuildInstanceHookEnv or as a return
1286   value in LUInstanceQueryData.
1287
1288   @type lu:  L{LogicalUnit}
1289   @param lu: the logical unit on whose behalf we execute
1290   @type nics: list of L{objects.NIC}
1291   @param nics: list of nics to convert to hooks tuples
1292
1293   """
1294   hooks_nics = []
1295   cluster = lu.cfg.GetClusterInfo()
1296   for nic in nics:
1297     ip = nic.ip
1298     mac = nic.mac
1299     filled_params = cluster.SimpleFillNIC(nic.nicparams)
1300     mode = filled_params[constants.NIC_MODE]
1301     link = filled_params[constants.NIC_LINK]
1302     hooks_nics.append((ip, mac, mode, link))
1303   return hooks_nics
1304
1305
1306 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
1307   """Builds instance related env variables for hooks from an object.
1308
1309   @type lu: L{LogicalUnit}
1310   @param lu: the logical unit on whose behalf we execute
1311   @type instance: L{objects.Instance}
1312   @param instance: the instance for which we should build the
1313       environment
1314   @type override: dict
1315   @param override: dictionary with key/values that will override
1316       our values
1317   @rtype: dict
1318   @return: the hook environment dictionary
1319
1320   """
1321   cluster = lu.cfg.GetClusterInfo()
1322   bep = cluster.FillBE(instance)
1323   hvp = cluster.FillHV(instance)
1324   args = {
1325     "name": instance.name,
1326     "primary_node": instance.primary_node,
1327     "secondary_nodes": instance.secondary_nodes,
1328     "os_type": instance.os,
1329     "status": instance.admin_state,
1330     "maxmem": bep[constants.BE_MAXMEM],
1331     "minmem": bep[constants.BE_MINMEM],
1332     "vcpus": bep[constants.BE_VCPUS],
1333     "nics": _NICListToTuple(lu, instance.nics),
1334     "disk_template": instance.disk_template,
1335     "disks": [(disk.size, disk.mode) for disk in instance.disks],
1336     "bep": bep,
1337     "hvp": hvp,
1338     "hypervisor_name": instance.hypervisor,
1339     "tags": instance.tags,
1340   }
1341   if override:
1342     args.update(override)
1343   return _BuildInstanceHookEnv(**args) # pylint: disable=W0142
1344
1345
1346 def _AdjustCandidatePool(lu, exceptions):
1347   """Adjust the candidate pool after node operations.
1348
1349   """
1350   mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1351   if mod_list:
1352     lu.LogInfo("Promoted nodes to master candidate role: %s",
1353                utils.CommaJoin(node.name for node in mod_list))
1354     for name in mod_list:
1355       lu.context.ReaddNode(name)
1356   mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1357   if mc_now > mc_max:
1358     lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1359                (mc_now, mc_max))
1360
1361
1362 def _DecideSelfPromotion(lu, exceptions=None):
1363   """Decide whether I should promote myself as a master candidate.
1364
1365   """
1366   cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1367   mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1368   # the new node will increase mc_max with one, so:
1369   mc_should = min(mc_should + 1, cp_size)
1370   return mc_now < mc_should
1371
1372
1373 def _CalculateGroupIPolicy(cluster, group):
1374   """Calculate instance policy for group.
1375
1376   """
1377   return cluster.SimpleFillIPolicy(group.ipolicy)
1378
1379
1380 def _CheckNicsBridgesExist(lu, target_nics, target_node):
1381   """Check that the brigdes needed by a list of nics exist.
1382
1383   """
1384   cluster = lu.cfg.GetClusterInfo()
1385   paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1386   brlist = [params[constants.NIC_LINK] for params in paramslist
1387             if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1388   if brlist:
1389     result = lu.rpc.call_bridges_exist(target_node, brlist)
1390     result.Raise("Error checking bridges on destination node '%s'" %
1391                  target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1392
1393
1394 def _CheckInstanceBridgesExist(lu, instance, node=None):
1395   """Check that the brigdes needed by an instance exist.
1396
1397   """
1398   if node is None:
1399     node = instance.primary_node
1400   _CheckNicsBridgesExist(lu, instance.nics, node)
1401
1402
1403 def _CheckOSVariant(os_obj, name):
1404   """Check whether an OS name conforms to the os variants specification.
1405
1406   @type os_obj: L{objects.OS}
1407   @param os_obj: OS object to check
1408   @type name: string
1409   @param name: OS name passed by the user, to check for validity
1410
1411   """
1412   variant = objects.OS.GetVariant(name)
1413   if not os_obj.supported_variants:
1414     if variant:
1415       raise errors.OpPrereqError("OS '%s' doesn't support variants ('%s'"
1416                                  " passed)" % (os_obj.name, variant),
1417                                  errors.ECODE_INVAL)
1418     return
1419   if not variant:
1420     raise errors.OpPrereqError("OS name must include a variant",
1421                                errors.ECODE_INVAL)
1422
1423   if variant not in os_obj.supported_variants:
1424     raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1425
1426
1427 def _GetNodeInstancesInner(cfg, fn):
1428   return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1429
1430
1431 def _GetNodeInstances(cfg, node_name):
1432   """Returns a list of all primary and secondary instances on a node.
1433
1434   """
1435
1436   return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1437
1438
1439 def _GetNodePrimaryInstances(cfg, node_name):
1440   """Returns primary instances on a node.
1441
1442   """
1443   return _GetNodeInstancesInner(cfg,
1444                                 lambda inst: node_name == inst.primary_node)
1445
1446
1447 def _GetNodeSecondaryInstances(cfg, node_name):
1448   """Returns secondary instances on a node.
1449
1450   """
1451   return _GetNodeInstancesInner(cfg,
1452                                 lambda inst: node_name in inst.secondary_nodes)
1453
1454
1455 def _GetStorageTypeArgs(cfg, storage_type):
1456   """Returns the arguments for a storage type.
1457
1458   """
1459   # Special case for file storage
1460   if storage_type == constants.ST_FILE:
1461     # storage.FileStorage wants a list of storage directories
1462     return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1463
1464   return []
1465
1466
1467 def _FindFaultyInstanceDisks(cfg, rpc_runner, instance, node_name, prereq):
1468   faulty = []
1469
1470   for dev in instance.disks:
1471     cfg.SetDiskID(dev, node_name)
1472
1473   result = rpc_runner.call_blockdev_getmirrorstatus(node_name, instance.disks)
1474   result.Raise("Failed to get disk status from node %s" % node_name,
1475                prereq=prereq, ecode=errors.ECODE_ENVIRON)
1476
1477   for idx, bdev_status in enumerate(result.payload):
1478     if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1479       faulty.append(idx)
1480
1481   return faulty
1482
1483
1484 def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1485   """Check the sanity of iallocator and node arguments and use the
1486   cluster-wide iallocator if appropriate.
1487
1488   Check that at most one of (iallocator, node) is specified. If none is
1489   specified, then the LU's opcode's iallocator slot is filled with the
1490   cluster-wide default iallocator.
1491
1492   @type iallocator_slot: string
1493   @param iallocator_slot: the name of the opcode iallocator slot
1494   @type node_slot: string
1495   @param node_slot: the name of the opcode target node slot
1496
1497   """
1498   node = getattr(lu.op, node_slot, None)
1499   iallocator = getattr(lu.op, iallocator_slot, None)
1500
1501   if node is not None and iallocator is not None:
1502     raise errors.OpPrereqError("Do not specify both, iallocator and node",
1503                                errors.ECODE_INVAL)
1504   elif node is None and iallocator is None:
1505     default_iallocator = lu.cfg.GetDefaultIAllocator()
1506     if default_iallocator:
1507       setattr(lu.op, iallocator_slot, default_iallocator)
1508     else:
1509       raise errors.OpPrereqError("No iallocator or node given and no"
1510                                  " cluster-wide default iallocator found;"
1511                                  " please specify either an iallocator or a"
1512                                  " node, or set a cluster-wide default"
1513                                  " iallocator")
1514
1515
1516 def _GetDefaultIAllocator(cfg, iallocator):
1517   """Decides on which iallocator to use.
1518
1519   @type cfg: L{config.ConfigWriter}
1520   @param cfg: Cluster configuration object
1521   @type iallocator: string or None
1522   @param iallocator: Iallocator specified in opcode
1523   @rtype: string
1524   @return: Iallocator name
1525
1526   """
1527   if not iallocator:
1528     # Use default iallocator
1529     iallocator = cfg.GetDefaultIAllocator()
1530
1531   if not iallocator:
1532     raise errors.OpPrereqError("No iallocator was specified, neither in the"
1533                                " opcode nor as a cluster-wide default",
1534                                errors.ECODE_INVAL)
1535
1536   return iallocator
1537
1538
1539 class LUClusterPostInit(LogicalUnit):
1540   """Logical unit for running hooks after cluster initialization.
1541
1542   """
1543   HPATH = "cluster-init"
1544   HTYPE = constants.HTYPE_CLUSTER
1545
1546   def BuildHooksEnv(self):
1547     """Build hooks env.
1548
1549     """
1550     return {
1551       "OP_TARGET": self.cfg.GetClusterName(),
1552       }
1553
1554   def BuildHooksNodes(self):
1555     """Build hooks nodes.
1556
1557     """
1558     return ([], [self.cfg.GetMasterNode()])
1559
1560   def Exec(self, feedback_fn):
1561     """Nothing to do.
1562
1563     """
1564     return True
1565
1566
1567 class LUClusterDestroy(LogicalUnit):
1568   """Logical unit for destroying the cluster.
1569
1570   """
1571   HPATH = "cluster-destroy"
1572   HTYPE = constants.HTYPE_CLUSTER
1573
1574   def BuildHooksEnv(self):
1575     """Build hooks env.
1576
1577     """
1578     return {
1579       "OP_TARGET": self.cfg.GetClusterName(),
1580       }
1581
1582   def BuildHooksNodes(self):
1583     """Build hooks nodes.
1584
1585     """
1586     return ([], [])
1587
1588   def CheckPrereq(self):
1589     """Check prerequisites.
1590
1591     This checks whether the cluster is empty.
1592
1593     Any errors are signaled by raising errors.OpPrereqError.
1594
1595     """
1596     master = self.cfg.GetMasterNode()
1597
1598     nodelist = self.cfg.GetNodeList()
1599     if len(nodelist) != 1 or nodelist[0] != master:
1600       raise errors.OpPrereqError("There are still %d node(s) in"
1601                                  " this cluster." % (len(nodelist) - 1),
1602                                  errors.ECODE_INVAL)
1603     instancelist = self.cfg.GetInstanceList()
1604     if instancelist:
1605       raise errors.OpPrereqError("There are still %d instance(s) in"
1606                                  " this cluster." % len(instancelist),
1607                                  errors.ECODE_INVAL)
1608
1609   def Exec(self, feedback_fn):
1610     """Destroys the cluster.
1611
1612     """
1613     master_params = self.cfg.GetMasterNetworkParameters()
1614
1615     # Run post hooks on master node before it's removed
1616     _RunPostHook(self, master_params.name)
1617
1618     ems = self.cfg.GetUseExternalMipScript()
1619     result = self.rpc.call_node_deactivate_master_ip(master_params.name,
1620                                                      master_params, ems)
1621     if result.fail_msg:
1622       self.LogWarning("Error disabling the master IP address: %s",
1623                       result.fail_msg)
1624
1625     return master_params.name
1626
1627
1628 def _VerifyCertificate(filename):
1629   """Verifies a certificate for L{LUClusterVerifyConfig}.
1630
1631   @type filename: string
1632   @param filename: Path to PEM file
1633
1634   """
1635   try:
1636     cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1637                                            utils.ReadFile(filename))
1638   except Exception, err: # pylint: disable=W0703
1639     return (LUClusterVerifyConfig.ETYPE_ERROR,
1640             "Failed to load X509 certificate %s: %s" % (filename, err))
1641
1642   (errcode, msg) = \
1643     utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1644                                 constants.SSL_CERT_EXPIRATION_ERROR)
1645
1646   if msg:
1647     fnamemsg = "While verifying %s: %s" % (filename, msg)
1648   else:
1649     fnamemsg = None
1650
1651   if errcode is None:
1652     return (None, fnamemsg)
1653   elif errcode == utils.CERT_WARNING:
1654     return (LUClusterVerifyConfig.ETYPE_WARNING, fnamemsg)
1655   elif errcode == utils.CERT_ERROR:
1656     return (LUClusterVerifyConfig.ETYPE_ERROR, fnamemsg)
1657
1658   raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1659
1660
1661 def _GetAllHypervisorParameters(cluster, instances):
1662   """Compute the set of all hypervisor parameters.
1663
1664   @type cluster: L{objects.Cluster}
1665   @param cluster: the cluster object
1666   @param instances: list of L{objects.Instance}
1667   @param instances: additional instances from which to obtain parameters
1668   @rtype: list of (origin, hypervisor, parameters)
1669   @return: a list with all parameters found, indicating the hypervisor they
1670        apply to, and the origin (can be "cluster", "os X", or "instance Y")
1671
1672   """
1673   hvp_data = []
1674
1675   for hv_name in cluster.enabled_hypervisors:
1676     hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
1677
1678   for os_name, os_hvp in cluster.os_hvp.items():
1679     for hv_name, hv_params in os_hvp.items():
1680       if hv_params:
1681         full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
1682         hvp_data.append(("os %s" % os_name, hv_name, full_params))
1683
1684   # TODO: collapse identical parameter values in a single one
1685   for instance in instances:
1686     if instance.hvparams:
1687       hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
1688                        cluster.FillHV(instance)))
1689
1690   return hvp_data
1691
1692
1693 class _VerifyErrors(object):
1694   """Mix-in for cluster/group verify LUs.
1695
1696   It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects
1697   self.op and self._feedback_fn to be available.)
1698
1699   """
1700
1701   ETYPE_FIELD = "code"
1702   ETYPE_ERROR = "ERROR"
1703   ETYPE_WARNING = "WARNING"
1704
1705   def _Error(self, ecode, item, msg, *args, **kwargs):
1706     """Format an error message.
1707
1708     Based on the opcode's error_codes parameter, either format a
1709     parseable error code, or a simpler error string.
1710
1711     This must be called only from Exec and functions called from Exec.
1712
1713     """
1714     ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1715     itype, etxt, _ = ecode
1716     # first complete the msg
1717     if args:
1718       msg = msg % args
1719     # then format the whole message
1720     if self.op.error_codes: # This is a mix-in. pylint: disable=E1101
1721       msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1722     else:
1723       if item:
1724         item = " " + item
1725       else:
1726         item = ""
1727       msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1728     # and finally report it via the feedback_fn
1729     self._feedback_fn("  - %s" % msg) # Mix-in. pylint: disable=E1101
1730
1731   def _ErrorIf(self, cond, ecode, *args, **kwargs):
1732     """Log an error message if the passed condition is True.
1733
1734     """
1735     cond = (bool(cond)
1736             or self.op.debug_simulate_errors) # pylint: disable=E1101
1737
1738     # If the error code is in the list of ignored errors, demote the error to a
1739     # warning
1740     (_, etxt, _) = ecode
1741     if etxt in self.op.ignore_errors:     # pylint: disable=E1101
1742       kwargs[self.ETYPE_FIELD] = self.ETYPE_WARNING
1743
1744     if cond:
1745       self._Error(ecode, *args, **kwargs)
1746
1747     # do not mark the operation as failed for WARN cases only
1748     if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1749       self.bad = self.bad or cond
1750
1751
1752 class LUClusterVerify(NoHooksLU):
1753   """Submits all jobs necessary to verify the cluster.
1754
1755   """
1756   REQ_BGL = False
1757
1758   def ExpandNames(self):
1759     self.needed_locks = {}
1760
1761   def Exec(self, feedback_fn):
1762     jobs = []
1763
1764     if self.op.group_name:
1765       groups = [self.op.group_name]
1766       depends_fn = lambda: None
1767     else:
1768       groups = self.cfg.GetNodeGroupList()
1769
1770       # Verify global configuration
1771       jobs.append([
1772         opcodes.OpClusterVerifyConfig(ignore_errors=self.op.ignore_errors)
1773         ])
1774
1775       # Always depend on global verification
1776       depends_fn = lambda: [(-len(jobs), [])]
1777
1778     jobs.extend([opcodes.OpClusterVerifyGroup(group_name=group,
1779                                             ignore_errors=self.op.ignore_errors,
1780                                             depends=depends_fn())]
1781                 for group in groups)
1782
1783     # Fix up all parameters
1784     for op in itertools.chain(*jobs): # pylint: disable=W0142
1785       op.debug_simulate_errors = self.op.debug_simulate_errors
1786       op.verbose = self.op.verbose
1787       op.error_codes = self.op.error_codes
1788       try:
1789         op.skip_checks = self.op.skip_checks
1790       except AttributeError:
1791         assert not isinstance(op, opcodes.OpClusterVerifyGroup)
1792
1793     return ResultWithJobs(jobs)
1794
1795
1796 class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
1797   """Verifies the cluster config.
1798
1799   """
1800   REQ_BGL = True
1801
1802   def _VerifyHVP(self, hvp_data):
1803     """Verifies locally the syntax of the hypervisor parameters.
1804
1805     """
1806     for item, hv_name, hv_params in hvp_data:
1807       msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
1808              (item, hv_name))
1809       try:
1810         hv_class = hypervisor.GetHypervisor(hv_name)
1811         utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
1812         hv_class.CheckParameterSyntax(hv_params)
1813       except errors.GenericError, err:
1814         self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg % str(err))
1815
1816   def ExpandNames(self):
1817     # Information can be safely retrieved as the BGL is acquired in exclusive
1818     # mode
1819     assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER)
1820     self.all_group_info = self.cfg.GetAllNodeGroupsInfo()
1821     self.all_node_info = self.cfg.GetAllNodesInfo()
1822     self.all_inst_info = self.cfg.GetAllInstancesInfo()
1823     self.needed_locks = {}
1824
1825   def Exec(self, feedback_fn):
1826     """Verify integrity of cluster, performing various test on nodes.
1827
1828     """
1829     self.bad = False
1830     self._feedback_fn = feedback_fn
1831
1832     feedback_fn("* Verifying cluster config")
1833
1834     for msg in self.cfg.VerifyConfig():
1835       self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg)
1836
1837     feedback_fn("* Verifying cluster certificate files")
1838
1839     for cert_filename in constants.ALL_CERT_FILES:
1840       (errcode, msg) = _VerifyCertificate(cert_filename)
1841       self._ErrorIf(errcode, constants.CV_ECLUSTERCERT, None, msg, code=errcode)
1842
1843     feedback_fn("* Verifying hypervisor parameters")
1844
1845     self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(),
1846                                                 self.all_inst_info.values()))
1847
1848     feedback_fn("* Verifying all nodes belong to an existing group")
1849
1850     # We do this verification here because, should this bogus circumstance
1851     # occur, it would never be caught by VerifyGroup, which only acts on
1852     # nodes/instances reachable from existing node groups.
1853
1854     dangling_nodes = set(node.name for node in self.all_node_info.values()
1855                          if node.group not in self.all_group_info)
1856
1857     dangling_instances = {}
1858     no_node_instances = []
1859
1860     for inst in self.all_inst_info.values():
1861       if inst.primary_node in dangling_nodes:
1862         dangling_instances.setdefault(inst.primary_node, []).append(inst.name)
1863       elif inst.primary_node not in self.all_node_info:
1864         no_node_instances.append(inst.name)
1865
1866     pretty_dangling = [
1867         "%s (%s)" %
1868         (node.name,
1869          utils.CommaJoin(dangling_instances.get(node.name,
1870                                                 ["no instances"])))
1871         for node in dangling_nodes]
1872
1873     self._ErrorIf(bool(dangling_nodes), constants.CV_ECLUSTERDANGLINGNODES,
1874                   None,
1875                   "the following nodes (and their instances) belong to a non"
1876                   " existing group: %s", utils.CommaJoin(pretty_dangling))
1877
1878     self._ErrorIf(bool(no_node_instances), constants.CV_ECLUSTERDANGLINGINST,
1879                   None,
1880                   "the following instances have a non-existing primary-node:"
1881                   " %s", utils.CommaJoin(no_node_instances))
1882
1883     return not self.bad
1884
1885
1886 class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
1887   """Verifies the status of a node group.
1888
1889   """
1890   HPATH = "cluster-verify"
1891   HTYPE = constants.HTYPE_CLUSTER
1892   REQ_BGL = False
1893
1894   _HOOKS_INDENT_RE = re.compile("^", re.M)
1895
1896   class NodeImage(object):
1897     """A class representing the logical and physical status of a node.
1898
1899     @type name: string
1900     @ivar name: the node name to which this object refers
1901     @ivar volumes: a structure as returned from
1902         L{ganeti.backend.GetVolumeList} (runtime)
1903     @ivar instances: a list of running instances (runtime)
1904     @ivar pinst: list of configured primary instances (config)
1905     @ivar sinst: list of configured secondary instances (config)
1906     @ivar sbp: dictionary of {primary-node: list of instances} for all
1907         instances for which this node is secondary (config)
1908     @ivar mfree: free memory, as reported by hypervisor (runtime)
1909     @ivar dfree: free disk, as reported by the node (runtime)
1910     @ivar offline: the offline status (config)
1911     @type rpc_fail: boolean
1912     @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1913         not whether the individual keys were correct) (runtime)
1914     @type lvm_fail: boolean
1915     @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1916     @type hyp_fail: boolean
1917     @ivar hyp_fail: whether the RPC call didn't return the instance list
1918     @type ghost: boolean
1919     @ivar ghost: whether this is a known node or not (config)
1920     @type os_fail: boolean
1921     @ivar os_fail: whether the RPC call didn't return valid OS data
1922     @type oslist: list
1923     @ivar oslist: list of OSes as diagnosed by DiagnoseOS
1924     @type vm_capable: boolean
1925     @ivar vm_capable: whether the node can host instances
1926
1927     """
1928     def __init__(self, offline=False, name=None, vm_capable=True):
1929       self.name = name
1930       self.volumes = {}
1931       self.instances = []
1932       self.pinst = []
1933       self.sinst = []
1934       self.sbp = {}
1935       self.mfree = 0
1936       self.dfree = 0
1937       self.offline = offline
1938       self.vm_capable = vm_capable
1939       self.rpc_fail = False
1940       self.lvm_fail = False
1941       self.hyp_fail = False
1942       self.ghost = False
1943       self.os_fail = False
1944       self.oslist = {}
1945
1946   def ExpandNames(self):
1947     # This raises errors.OpPrereqError on its own:
1948     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
1949
1950     # Get instances in node group; this is unsafe and needs verification later
1951     inst_names = self.cfg.GetNodeGroupInstances(self.group_uuid)
1952
1953     self.needed_locks = {
1954       locking.LEVEL_INSTANCE: inst_names,
1955       locking.LEVEL_NODEGROUP: [self.group_uuid],
1956       locking.LEVEL_NODE: [],
1957       }
1958
1959     self.share_locks = _ShareAll()
1960
1961   def DeclareLocks(self, level):
1962     if level == locking.LEVEL_NODE:
1963       # Get members of node group; this is unsafe and needs verification later
1964       nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
1965
1966       all_inst_info = self.cfg.GetAllInstancesInfo()
1967
1968       # In Exec(), we warn about mirrored instances that have primary and
1969       # secondary living in separate node groups. To fully verify that
1970       # volumes for these instances are healthy, we will need to do an
1971       # extra call to their secondaries. We ensure here those nodes will
1972       # be locked.
1973       for inst in self.owned_locks(locking.LEVEL_INSTANCE):
1974         # Important: access only the instances whose lock is owned
1975         if all_inst_info[inst].disk_template in constants.DTS_INT_MIRROR:
1976           nodes.update(all_inst_info[inst].secondary_nodes)
1977
1978       self.needed_locks[locking.LEVEL_NODE] = nodes
1979
1980   def CheckPrereq(self):
1981     assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
1982     self.group_info = self.cfg.GetNodeGroup(self.group_uuid)
1983
1984     group_nodes = set(self.group_info.members)
1985     group_instances = self.cfg.GetNodeGroupInstances(self.group_uuid)
1986
1987     unlocked_nodes = \
1988         group_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
1989
1990     unlocked_instances = \
1991         group_instances.difference(self.owned_locks(locking.LEVEL_INSTANCE))
1992
1993     if unlocked_nodes:
1994       raise errors.OpPrereqError("Missing lock for nodes: %s" %
1995                                  utils.CommaJoin(unlocked_nodes))
1996
1997     if unlocked_instances:
1998       raise errors.OpPrereqError("Missing lock for instances: %s" %
1999                                  utils.CommaJoin(unlocked_instances))
2000
2001     self.all_node_info = self.cfg.GetAllNodesInfo()
2002     self.all_inst_info = self.cfg.GetAllInstancesInfo()
2003
2004     self.my_node_names = utils.NiceSort(group_nodes)
2005     self.my_inst_names = utils.NiceSort(group_instances)
2006
2007     self.my_node_info = dict((name, self.all_node_info[name])
2008                              for name in self.my_node_names)
2009
2010     self.my_inst_info = dict((name, self.all_inst_info[name])
2011                              for name in self.my_inst_names)
2012
2013     # We detect here the nodes that will need the extra RPC calls for verifying
2014     # split LV volumes; they should be locked.
2015     extra_lv_nodes = set()
2016
2017     for inst in self.my_inst_info.values():
2018       if inst.disk_template in constants.DTS_INT_MIRROR:
2019         group = self.my_node_info[inst.primary_node].group
2020         for nname in inst.secondary_nodes:
2021           if self.all_node_info[nname].group != group:
2022             extra_lv_nodes.add(nname)
2023
2024     unlocked_lv_nodes = \
2025         extra_lv_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
2026
2027     if unlocked_lv_nodes:
2028       raise errors.OpPrereqError("these nodes could be locked: %s" %
2029                                  utils.CommaJoin(unlocked_lv_nodes))
2030     self.extra_lv_nodes = list(extra_lv_nodes)
2031
2032   def _VerifyNode(self, ninfo, nresult):
2033     """Perform some basic validation on data returned from a node.
2034
2035       - check the result data structure is well formed and has all the
2036         mandatory fields
2037       - check ganeti version
2038
2039     @type ninfo: L{objects.Node}
2040     @param ninfo: the node to check
2041     @param nresult: the results from the node
2042     @rtype: boolean
2043     @return: whether overall this call was successful (and we can expect
2044          reasonable values in the respose)
2045
2046     """
2047     node = ninfo.name
2048     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2049
2050     # main result, nresult should be a non-empty dict
2051     test = not nresult or not isinstance(nresult, dict)
2052     _ErrorIf(test, constants.CV_ENODERPC, node,
2053                   "unable to verify node: no data returned")
2054     if test:
2055       return False
2056
2057     # compares ganeti version
2058     local_version = constants.PROTOCOL_VERSION
2059     remote_version = nresult.get("version", None)
2060     test = not (remote_version and
2061                 isinstance(remote_version, (list, tuple)) and
2062                 len(remote_version) == 2)
2063     _ErrorIf(test, constants.CV_ENODERPC, node,
2064              "connection to node returned invalid data")
2065     if test:
2066       return False
2067
2068     test = local_version != remote_version[0]
2069     _ErrorIf(test, constants.CV_ENODEVERSION, node,
2070              "incompatible protocol versions: master %s,"
2071              " node %s", local_version, remote_version[0])
2072     if test:
2073       return False
2074
2075     # node seems compatible, we can actually try to look into its results
2076
2077     # full package version
2078     self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
2079                   constants.CV_ENODEVERSION, node,
2080                   "software version mismatch: master %s, node %s",
2081                   constants.RELEASE_VERSION, remote_version[1],
2082                   code=self.ETYPE_WARNING)
2083
2084     hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
2085     if ninfo.vm_capable and isinstance(hyp_result, dict):
2086       for hv_name, hv_result in hyp_result.iteritems():
2087         test = hv_result is not None
2088         _ErrorIf(test, constants.CV_ENODEHV, node,
2089                  "hypervisor %s verify failure: '%s'", hv_name, hv_result)
2090
2091     hvp_result = nresult.get(constants.NV_HVPARAMS, None)
2092     if ninfo.vm_capable and isinstance(hvp_result, list):
2093       for item, hv_name, hv_result in hvp_result:
2094         _ErrorIf(True, constants.CV_ENODEHV, node,
2095                  "hypervisor %s parameter verify failure (source %s): %s",
2096                  hv_name, item, hv_result)
2097
2098     test = nresult.get(constants.NV_NODESETUP,
2099                        ["Missing NODESETUP results"])
2100     _ErrorIf(test, constants.CV_ENODESETUP, node, "node setup error: %s",
2101              "; ".join(test))
2102
2103     return True
2104
2105   def _VerifyNodeTime(self, ninfo, nresult,
2106                       nvinfo_starttime, nvinfo_endtime):
2107     """Check the node time.
2108
2109     @type ninfo: L{objects.Node}
2110     @param ninfo: the node to check
2111     @param nresult: the remote results for the node
2112     @param nvinfo_starttime: the start time of the RPC call
2113     @param nvinfo_endtime: the end time of the RPC call
2114
2115     """
2116     node = ninfo.name
2117     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2118
2119     ntime = nresult.get(constants.NV_TIME, None)
2120     try:
2121       ntime_merged = utils.MergeTime(ntime)
2122     except (ValueError, TypeError):
2123       _ErrorIf(True, constants.CV_ENODETIME, node, "Node returned invalid time")
2124       return
2125
2126     if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
2127       ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
2128     elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
2129       ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
2130     else:
2131       ntime_diff = None
2132
2133     _ErrorIf(ntime_diff is not None, constants.CV_ENODETIME, node,
2134              "Node time diverges by at least %s from master node time",
2135              ntime_diff)
2136
2137   def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
2138     """Check the node LVM results.
2139
2140     @type ninfo: L{objects.Node}
2141     @param ninfo: the node to check
2142     @param nresult: the remote results for the node
2143     @param vg_name: the configured VG name
2144
2145     """
2146     if vg_name is None:
2147       return
2148
2149     node = ninfo.name
2150     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2151
2152     # checks vg existence and size > 20G
2153     vglist = nresult.get(constants.NV_VGLIST, None)
2154     test = not vglist
2155     _ErrorIf(test, constants.CV_ENODELVM, node, "unable to check volume groups")
2156     if not test:
2157       vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
2158                                             constants.MIN_VG_SIZE)
2159       _ErrorIf(vgstatus, constants.CV_ENODELVM, node, vgstatus)
2160
2161     # check pv names
2162     pvlist = nresult.get(constants.NV_PVLIST, None)
2163     test = pvlist is None
2164     _ErrorIf(test, constants.CV_ENODELVM, node, "Can't get PV list from node")
2165     if not test:
2166       # check that ':' is not present in PV names, since it's a
2167       # special character for lvcreate (denotes the range of PEs to
2168       # use on the PV)
2169       for _, pvname, owner_vg in pvlist:
2170         test = ":" in pvname
2171         _ErrorIf(test, constants.CV_ENODELVM, node,
2172                  "Invalid character ':' in PV '%s' of VG '%s'",
2173                  pvname, owner_vg)
2174
2175   def _VerifyNodeBridges(self, ninfo, nresult, bridges):
2176     """Check the node bridges.
2177
2178     @type ninfo: L{objects.Node}
2179     @param ninfo: the node to check
2180     @param nresult: the remote results for the node
2181     @param bridges: the expected list of bridges
2182
2183     """
2184     if not bridges:
2185       return
2186
2187     node = ninfo.name
2188     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2189
2190     missing = nresult.get(constants.NV_BRIDGES, None)
2191     test = not isinstance(missing, list)
2192     _ErrorIf(test, constants.CV_ENODENET, node,
2193              "did not return valid bridge information")
2194     if not test:
2195       _ErrorIf(bool(missing), constants.CV_ENODENET, node,
2196                "missing bridges: %s" % utils.CommaJoin(sorted(missing)))
2197
2198   def _VerifyNodeUserScripts(self, ninfo, nresult):
2199     """Check the results of user scripts presence and executability on the node
2200
2201     @type ninfo: L{objects.Node}
2202     @param ninfo: the node to check
2203     @param nresult: the remote results for the node
2204
2205     """
2206     node = ninfo.name
2207
2208     test = not constants.NV_USERSCRIPTS in nresult
2209     self._ErrorIf(test, constants.CV_ENODEUSERSCRIPTS, node,
2210                   "did not return user scripts information")
2211
2212     broken_scripts = nresult.get(constants.NV_USERSCRIPTS, None)
2213     if not test:
2214       self._ErrorIf(broken_scripts, constants.CV_ENODEUSERSCRIPTS, node,
2215                     "user scripts not present or not executable: %s" %
2216                     utils.CommaJoin(sorted(broken_scripts)))
2217
2218   def _VerifyNodeNetwork(self, ninfo, nresult):
2219     """Check the node network connectivity results.
2220
2221     @type ninfo: L{objects.Node}
2222     @param ninfo: the node to check
2223     @param nresult: the remote results for the node
2224
2225     """
2226     node = ninfo.name
2227     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2228
2229     test = constants.NV_NODELIST not in nresult
2230     _ErrorIf(test, constants.CV_ENODESSH, node,
2231              "node hasn't returned node ssh connectivity data")
2232     if not test:
2233       if nresult[constants.NV_NODELIST]:
2234         for a_node, a_msg in nresult[constants.NV_NODELIST].items():
2235           _ErrorIf(True, constants.CV_ENODESSH, node,
2236                    "ssh communication with node '%s': %s", a_node, a_msg)
2237
2238     test = constants.NV_NODENETTEST not in nresult
2239     _ErrorIf(test, constants.CV_ENODENET, node,
2240              "node hasn't returned node tcp connectivity data")
2241     if not test:
2242       if nresult[constants.NV_NODENETTEST]:
2243         nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
2244         for anode in nlist:
2245           _ErrorIf(True, constants.CV_ENODENET, node,
2246                    "tcp communication with node '%s': %s",
2247                    anode, nresult[constants.NV_NODENETTEST][anode])
2248
2249     test = constants.NV_MASTERIP not in nresult
2250     _ErrorIf(test, constants.CV_ENODENET, node,
2251              "node hasn't returned node master IP reachability data")
2252     if not test:
2253       if not nresult[constants.NV_MASTERIP]:
2254         if node == self.master_node:
2255           msg = "the master node cannot reach the master IP (not configured?)"
2256         else:
2257           msg = "cannot reach the master IP"
2258         _ErrorIf(True, constants.CV_ENODENET, node, msg)
2259
2260   def _VerifyInstancePolicy(self, instance):
2261     """Verify instance specs against instance policy set on node group level.
2262
2263
2264     """
2265     cluster = self.cfg.GetClusterInfo()
2266     full_beparams = cluster.FillBE(instance)
2267     ipolicy = cluster.SimpleFillIPolicy(self.group_info.ipolicy)
2268
2269     mem_size = full_beparams.get(constants.BE_MAXMEM, None)
2270     cpu_count = full_beparams.get(constants.BE_VCPUS, None)
2271     disk_count = len(instance.disks)
2272     disk_sizes = [disk.size for disk in instance.disks]
2273     nic_count = len(instance.nics)
2274
2275     test_settings = [
2276       (constants.ISPEC_MEM_SIZE, mem_size),
2277       (constants.ISPEC_CPU_COUNT, cpu_count),
2278       (constants.ISPEC_DISK_COUNT, disk_count),
2279       (constants.ISPEC_NIC_COUNT, nic_count),
2280       ] + map((lambda d: (constants.ISPEC_DISK_SIZE, d)), disk_sizes)
2281
2282     for (name, value) in test_settings:
2283       test_result = _CheckMinMaxSpecs(name, ipolicy, value)
2284       self._ErrorIf(test_result is not None,
2285                     constants.CV_EINSTANCEPOLICY, instance.name,
2286                     test_result)
2287
2288   def _VerifyInstance(self, instance, instanceconfig, node_image,
2289                       diskstatus):
2290     """Verify an instance.
2291
2292     This function checks to see if the required block devices are
2293     available on the instance's node.
2294
2295     """
2296     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2297     node_current = instanceconfig.primary_node
2298
2299     node_vol_should = {}
2300     instanceconfig.MapLVsByNode(node_vol_should)
2301
2302     self._VerifyInstancePolicy(instanceconfig)
2303
2304     for node in node_vol_should:
2305       n_img = node_image[node]
2306       if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2307         # ignore missing volumes on offline or broken nodes
2308         continue
2309       for volume in node_vol_should[node]:
2310         test = volume not in n_img.volumes
2311         _ErrorIf(test, constants.CV_EINSTANCEMISSINGDISK, instance,
2312                  "volume %s missing on node %s", volume, node)
2313
2314     if instanceconfig.admin_state == constants.ADMINST_UP:
2315       pri_img = node_image[node_current]
2316       test = instance not in pri_img.instances and not pri_img.offline
2317       _ErrorIf(test, constants.CV_EINSTANCEDOWN, instance,
2318                "instance not running on its primary node %s",
2319                node_current)
2320
2321     diskdata = [(nname, success, status, idx)
2322                 for (nname, disks) in diskstatus.items()
2323                 for idx, (success, status) in enumerate(disks)]
2324
2325     for nname, success, bdev_status, idx in diskdata:
2326       # the 'ghost node' construction in Exec() ensures that we have a
2327       # node here
2328       snode = node_image[nname]
2329       bad_snode = snode.ghost or snode.offline
2330       _ErrorIf(instanceconfig.admin_state == constants.ADMINST_UP and
2331                not success and not bad_snode,
2332                constants.CV_EINSTANCEFAULTYDISK, instance,
2333                "couldn't retrieve status for disk/%s on %s: %s",
2334                idx, nname, bdev_status)
2335       _ErrorIf((instanceconfig.admin_state == constants.ADMINST_UP and
2336                 success and bdev_status.ldisk_status == constants.LDS_FAULTY),
2337                constants.CV_EINSTANCEFAULTYDISK, instance,
2338                "disk/%s on %s is faulty", idx, nname)
2339
2340   def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
2341     """Verify if there are any unknown volumes in the cluster.
2342
2343     The .os, .swap and backup volumes are ignored. All other volumes are
2344     reported as unknown.
2345
2346     @type reserved: L{ganeti.utils.FieldSet}
2347     @param reserved: a FieldSet of reserved volume names
2348
2349     """
2350     for node, n_img in node_image.items():
2351       if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2352         # skip non-healthy nodes
2353         continue
2354       for volume in n_img.volumes:
2355         test = ((node not in node_vol_should or
2356                 volume not in node_vol_should[node]) and
2357                 not reserved.Matches(volume))
2358         self._ErrorIf(test, constants.CV_ENODEORPHANLV, node,
2359                       "volume %s is unknown", volume)
2360
2361   def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
2362     """Verify N+1 Memory Resilience.
2363
2364     Check that if one single node dies we can still start all the
2365     instances it was primary for.
2366
2367     """
2368     cluster_info = self.cfg.GetClusterInfo()
2369     for node, n_img in node_image.items():
2370       # This code checks that every node which is now listed as
2371       # secondary has enough memory to host all instances it is
2372       # supposed to should a single other node in the cluster fail.
2373       # FIXME: not ready for failover to an arbitrary node
2374       # FIXME: does not support file-backed instances
2375       # WARNING: we currently take into account down instances as well
2376       # as up ones, considering that even if they're down someone
2377       # might want to start them even in the event of a node failure.
2378       if n_img.offline:
2379         # we're skipping offline nodes from the N+1 warning, since
2380         # most likely we don't have good memory infromation from them;
2381         # we already list instances living on such nodes, and that's
2382         # enough warning
2383         continue
2384       #TODO(dynmem): use MINMEM for checking
2385       #TODO(dynmem): also consider ballooning out other instances
2386       for prinode, instances in n_img.sbp.items():
2387         needed_mem = 0
2388         for instance in instances:
2389           bep = cluster_info.FillBE(instance_cfg[instance])
2390           if bep[constants.BE_AUTO_BALANCE]:
2391             needed_mem += bep[constants.BE_MAXMEM]
2392         test = n_img.mfree < needed_mem
2393         self._ErrorIf(test, constants.CV_ENODEN1, node,
2394                       "not enough memory to accomodate instance failovers"
2395                       " should node %s fail (%dMiB needed, %dMiB available)",
2396                       prinode, needed_mem, n_img.mfree)
2397
2398   @classmethod
2399   def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
2400                    (files_all, files_opt, files_mc, files_vm)):
2401     """Verifies file checksums collected from all nodes.
2402
2403     @param errorif: Callback for reporting errors
2404     @param nodeinfo: List of L{objects.Node} objects
2405     @param master_node: Name of master node
2406     @param all_nvinfo: RPC results
2407
2408     """
2409     # Define functions determining which nodes to consider for a file
2410     files2nodefn = [
2411       (files_all, None),
2412       (files_mc, lambda node: (node.master_candidate or
2413                                node.name == master_node)),
2414       (files_vm, lambda node: node.vm_capable),
2415       ]
2416
2417     # Build mapping from filename to list of nodes which should have the file
2418     nodefiles = {}
2419     for (files, fn) in files2nodefn:
2420       if fn is None:
2421         filenodes = nodeinfo
2422       else:
2423         filenodes = filter(fn, nodeinfo)
2424       nodefiles.update((filename,
2425                         frozenset(map(operator.attrgetter("name"), filenodes)))
2426                        for filename in files)
2427
2428     assert set(nodefiles) == (files_all | files_mc | files_vm)
2429
2430     fileinfo = dict((filename, {}) for filename in nodefiles)
2431     ignore_nodes = set()
2432
2433     for node in nodeinfo:
2434       if node.offline:
2435         ignore_nodes.add(node.name)
2436         continue
2437
2438       nresult = all_nvinfo[node.name]
2439
2440       if nresult.fail_msg or not nresult.payload:
2441         node_files = None
2442       else:
2443         node_files = nresult.payload.get(constants.NV_FILELIST, None)
2444
2445       test = not (node_files and isinstance(node_files, dict))
2446       errorif(test, constants.CV_ENODEFILECHECK, node.name,
2447               "Node did not return file checksum data")
2448       if test:
2449         ignore_nodes.add(node.name)
2450         continue
2451
2452       # Build per-checksum mapping from filename to nodes having it
2453       for (filename, checksum) in node_files.items():
2454         assert filename in nodefiles
2455         fileinfo[filename].setdefault(checksum, set()).add(node.name)
2456
2457     for (filename, checksums) in fileinfo.items():
2458       assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
2459
2460       # Nodes having the file
2461       with_file = frozenset(node_name
2462                             for nodes in fileinfo[filename].values()
2463                             for node_name in nodes) - ignore_nodes
2464
2465       expected_nodes = nodefiles[filename] - ignore_nodes
2466
2467       # Nodes missing file
2468       missing_file = expected_nodes - with_file
2469
2470       if filename in files_opt:
2471         # All or no nodes
2472         errorif(missing_file and missing_file != expected_nodes,
2473                 constants.CV_ECLUSTERFILECHECK, None,
2474                 "File %s is optional, but it must exist on all or no"
2475                 " nodes (not found on %s)",
2476                 filename, utils.CommaJoin(utils.NiceSort(missing_file)))
2477       else:
2478         errorif(missing_file, constants.CV_ECLUSTERFILECHECK, None,
2479                 "File %s is missing from node(s) %s", filename,
2480                 utils.CommaJoin(utils.NiceSort(missing_file)))
2481
2482         # Warn if a node has a file it shouldn't
2483         unexpected = with_file - expected_nodes
2484         errorif(unexpected,
2485                 constants.CV_ECLUSTERFILECHECK, None,
2486                 "File %s should not exist on node(s) %s",
2487                 filename, utils.CommaJoin(utils.NiceSort(unexpected)))
2488
2489       # See if there are multiple versions of the file
2490       test = len(checksums) > 1
2491       if test:
2492         variants = ["variant %s on %s" %
2493                     (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
2494                     for (idx, (checksum, nodes)) in
2495                       enumerate(sorted(checksums.items()))]
2496       else:
2497         variants = []
2498
2499       errorif(test, constants.CV_ECLUSTERFILECHECK, None,
2500               "File %s found with %s different checksums (%s)",
2501               filename, len(checksums), "; ".join(variants))
2502
2503   def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
2504                       drbd_map):
2505     """Verifies and the node DRBD status.
2506
2507     @type ninfo: L{objects.Node}
2508     @param ninfo: the node to check
2509     @param nresult: the remote results for the node
2510     @param instanceinfo: the dict of instances
2511     @param drbd_helper: the configured DRBD usermode helper
2512     @param drbd_map: the DRBD map as returned by
2513         L{ganeti.config.ConfigWriter.ComputeDRBDMap}
2514
2515     """
2516     node = ninfo.name
2517     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2518
2519     if drbd_helper:
2520       helper_result = nresult.get(constants.NV_DRBDHELPER, None)
2521       test = (helper_result == None)
2522       _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2523                "no drbd usermode helper returned")
2524       if helper_result:
2525         status, payload = helper_result
2526         test = not status
2527         _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2528                  "drbd usermode helper check unsuccessful: %s", payload)
2529         test = status and (payload != drbd_helper)
2530         _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2531                  "wrong drbd usermode helper: %s", payload)
2532
2533     # compute the DRBD minors
2534     node_drbd = {}
2535     for minor, instance in drbd_map[node].items():
2536       test = instance not in instanceinfo
2537       _ErrorIf(test, constants.CV_ECLUSTERCFG, None,
2538                "ghost instance '%s' in temporary DRBD map", instance)
2539         # ghost instance should not be running, but otherwise we
2540         # don't give double warnings (both ghost instance and
2541         # unallocated minor in use)
2542       if test:
2543         node_drbd[minor] = (instance, False)
2544       else:
2545         instance = instanceinfo[instance]
2546         node_drbd[minor] = (instance.name,
2547                             instance.admin_state == constants.ADMINST_UP)
2548
2549     # and now check them
2550     used_minors = nresult.get(constants.NV_DRBDLIST, [])
2551     test = not isinstance(used_minors, (tuple, list))
2552     _ErrorIf(test, constants.CV_ENODEDRBD, node,
2553              "cannot parse drbd status file: %s", str(used_minors))
2554     if test:
2555       # we cannot check drbd status
2556       return
2557
2558     for minor, (iname, must_exist) in node_drbd.items():
2559       test = minor not in used_minors and must_exist
2560       _ErrorIf(test, constants.CV_ENODEDRBD, node,
2561                "drbd minor %d of instance %s is not active", minor, iname)
2562     for minor in used_minors:
2563       test = minor not in node_drbd
2564       _ErrorIf(test, constants.CV_ENODEDRBD, node,
2565                "unallocated drbd minor %d is in use", minor)
2566
2567   def _UpdateNodeOS(self, ninfo, nresult, nimg):
2568     """Builds the node OS structures.
2569
2570     @type ninfo: L{objects.Node}
2571     @param ninfo: the node to check
2572     @param nresult: the remote results for the node
2573     @param nimg: the node image object
2574
2575     """
2576     node = ninfo.name
2577     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2578
2579     remote_os = nresult.get(constants.NV_OSLIST, None)
2580     test = (not isinstance(remote_os, list) or
2581             not compat.all(isinstance(v, list) and len(v) == 7
2582                            for v in remote_os))
2583
2584     _ErrorIf(test, constants.CV_ENODEOS, node,
2585              "node hasn't returned valid OS data")
2586
2587     nimg.os_fail = test
2588
2589     if test:
2590       return
2591
2592     os_dict = {}
2593
2594     for (name, os_path, status, diagnose,
2595          variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
2596
2597       if name not in os_dict:
2598         os_dict[name] = []
2599
2600       # parameters is a list of lists instead of list of tuples due to
2601       # JSON lacking a real tuple type, fix it:
2602       parameters = [tuple(v) for v in parameters]
2603       os_dict[name].append((os_path, status, diagnose,
2604                             set(variants), set(parameters), set(api_ver)))
2605
2606     nimg.oslist = os_dict
2607
2608   def _VerifyNodeOS(self, ninfo, nimg, base):
2609     """Verifies the node OS list.
2610
2611     @type ninfo: L{objects.Node}
2612     @param ninfo: the node to check
2613     @param nimg: the node image object
2614     @param base: the 'template' node we match against (e.g. from the master)
2615
2616     """
2617     node = ninfo.name
2618     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2619
2620     assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
2621
2622     beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
2623     for os_name, os_data in nimg.oslist.items():
2624       assert os_data, "Empty OS status for OS %s?!" % os_name
2625       f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
2626       _ErrorIf(not f_status, constants.CV_ENODEOS, node,
2627                "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
2628       _ErrorIf(len(os_data) > 1, constants.CV_ENODEOS, node,
2629                "OS '%s' has multiple entries (first one shadows the rest): %s",
2630                os_name, utils.CommaJoin([v[0] for v in os_data]))
2631       # comparisons with the 'base' image
2632       test = os_name not in base.oslist
2633       _ErrorIf(test, constants.CV_ENODEOS, node,
2634                "Extra OS %s not present on reference node (%s)",
2635                os_name, base.name)
2636       if test:
2637         continue
2638       assert base.oslist[os_name], "Base node has empty OS status?"
2639       _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
2640       if not b_status:
2641         # base OS is invalid, skipping
2642         continue
2643       for kind, a, b in [("API version", f_api, b_api),
2644                          ("variants list", f_var, b_var),
2645                          ("parameters", beautify_params(f_param),
2646                           beautify_params(b_param))]:
2647         _ErrorIf(a != b, constants.CV_ENODEOS, node,
2648                  "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
2649                  kind, os_name, base.name,
2650                  utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
2651
2652     # check any missing OSes
2653     missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
2654     _ErrorIf(missing, constants.CV_ENODEOS, node,
2655              "OSes present on reference node %s but missing on this node: %s",
2656              base.name, utils.CommaJoin(missing))
2657
2658   def _VerifyOob(self, ninfo, nresult):
2659     """Verifies out of band functionality of a node.
2660
2661     @type ninfo: L{objects.Node}
2662     @param ninfo: the node to check
2663     @param nresult: the remote results for the node
2664
2665     """
2666     node = ninfo.name
2667     # We just have to verify the paths on master and/or master candidates
2668     # as the oob helper is invoked on the master
2669     if ((ninfo.master_candidate or ninfo.master_capable) and
2670         constants.NV_OOB_PATHS in nresult):
2671       for path_result in nresult[constants.NV_OOB_PATHS]:
2672         self._ErrorIf(path_result, constants.CV_ENODEOOBPATH, node, path_result)
2673
2674   def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
2675     """Verifies and updates the node volume data.
2676
2677     This function will update a L{NodeImage}'s internal structures
2678     with data from the remote call.
2679
2680     @type ninfo: L{objects.Node}
2681     @param ninfo: the node to check
2682     @param nresult: the remote results for the node
2683     @param nimg: the node image object
2684     @param vg_name: the configured VG name
2685
2686     """
2687     node = ninfo.name
2688     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2689
2690     nimg.lvm_fail = True
2691     lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
2692     if vg_name is None:
2693       pass
2694     elif isinstance(lvdata, basestring):
2695       _ErrorIf(True, constants.CV_ENODELVM, node, "LVM problem on node: %s",
2696                utils.SafeEncode(lvdata))
2697     elif not isinstance(lvdata, dict):
2698       _ErrorIf(True, constants.CV_ENODELVM, node,
2699                "rpc call to node failed (lvlist)")
2700     else:
2701       nimg.volumes = lvdata
2702       nimg.lvm_fail = False
2703
2704   def _UpdateNodeInstances(self, ninfo, nresult, nimg):
2705     """Verifies and updates the node instance list.
2706
2707     If the listing was successful, then updates this node's instance
2708     list. Otherwise, it marks the RPC call as failed for the instance
2709     list key.
2710
2711     @type ninfo: L{objects.Node}
2712     @param ninfo: the node to check
2713     @param nresult: the remote results for the node
2714     @param nimg: the node image object
2715
2716     """
2717     idata = nresult.get(constants.NV_INSTANCELIST, None)
2718     test = not isinstance(idata, list)
2719     self._ErrorIf(test, constants.CV_ENODEHV, ninfo.name,
2720                   "rpc call to node failed (instancelist): %s",
2721                   utils.SafeEncode(str(idata)))
2722     if test:
2723       nimg.hyp_fail = True
2724     else:
2725       nimg.instances = idata
2726
2727   def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
2728     """Verifies and computes a node information map
2729
2730     @type ninfo: L{objects.Node}
2731     @param ninfo: the node to check
2732     @param nresult: the remote results for the node
2733     @param nimg: the node image object
2734     @param vg_name: the configured VG name
2735
2736     """
2737     node = ninfo.name
2738     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2739
2740     # try to read free memory (from the hypervisor)
2741     hv_info = nresult.get(constants.NV_HVINFO, None)
2742     test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
2743     _ErrorIf(test, constants.CV_ENODEHV, node,
2744              "rpc call to node failed (hvinfo)")
2745     if not test:
2746       try:
2747         nimg.mfree = int(hv_info["memory_free"])
2748       except (ValueError, TypeError):
2749         _ErrorIf(True, constants.CV_ENODERPC, node,
2750                  "node returned invalid nodeinfo, check hypervisor")
2751
2752     # FIXME: devise a free space model for file based instances as well
2753     if vg_name is not None:
2754       test = (constants.NV_VGLIST not in nresult or
2755               vg_name not in nresult[constants.NV_VGLIST])
2756       _ErrorIf(test, constants.CV_ENODELVM, node,
2757                "node didn't return data for the volume group '%s'"
2758                " - it is either missing or broken", vg_name)
2759       if not test:
2760         try:
2761           nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
2762         except (ValueError, TypeError):
2763           _ErrorIf(True, constants.CV_ENODERPC, node,
2764                    "node returned invalid LVM info, check LVM status")
2765
2766   def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
2767     """Gets per-disk status information for all instances.
2768
2769     @type nodelist: list of strings
2770     @param nodelist: Node names
2771     @type node_image: dict of (name, L{objects.Node})
2772     @param node_image: Node objects
2773     @type instanceinfo: dict of (name, L{objects.Instance})
2774     @param instanceinfo: Instance objects
2775     @rtype: {instance: {node: [(succes, payload)]}}
2776     @return: a dictionary of per-instance dictionaries with nodes as
2777         keys and disk information as values; the disk information is a
2778         list of tuples (success, payload)
2779
2780     """
2781     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2782
2783     node_disks = {}
2784     node_disks_devonly = {}
2785     diskless_instances = set()
2786     diskless = constants.DT_DISKLESS
2787
2788     for nname in nodelist:
2789       node_instances = list(itertools.chain(node_image[nname].pinst,
2790                                             node_image[nname].sinst))
2791       diskless_instances.update(inst for inst in node_instances
2792                                 if instanceinfo[inst].disk_template == diskless)
2793       disks = [(inst, disk)
2794                for inst in node_instances
2795                for disk in instanceinfo[inst].disks]
2796
2797       if not disks:
2798         # No need to collect data
2799         continue
2800
2801       node_disks[nname] = disks
2802
2803       # Creating copies as SetDiskID below will modify the objects and that can
2804       # lead to incorrect data returned from nodes
2805       devonly = [dev.Copy() for (_, dev) in disks]
2806
2807       for dev in devonly:
2808         self.cfg.SetDiskID(dev, nname)
2809
2810       node_disks_devonly[nname] = devonly
2811
2812     assert len(node_disks) == len(node_disks_devonly)
2813
2814     # Collect data from all nodes with disks
2815     result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
2816                                                           node_disks_devonly)
2817
2818     assert len(result) == len(node_disks)
2819
2820     instdisk = {}
2821
2822     for (nname, nres) in result.items():
2823       disks = node_disks[nname]
2824
2825       if nres.offline:
2826         # No data from this node
2827         data = len(disks) * [(False, "node offline")]
2828       else:
2829         msg = nres.fail_msg
2830         _ErrorIf(msg, constants.CV_ENODERPC, nname,
2831                  "while getting disk information: %s", msg)
2832         if msg:
2833           # No data from this node
2834           data = len(disks) * [(False, msg)]
2835         else:
2836           data = []
2837           for idx, i in enumerate(nres.payload):
2838             if isinstance(i, (tuple, list)) and len(i) == 2:
2839               data.append(i)
2840             else:
2841               logging.warning("Invalid result from node %s, entry %d: %s",
2842                               nname, idx, i)
2843               data.append((False, "Invalid result from the remote node"))
2844
2845       for ((inst, _), status) in zip(disks, data):
2846         instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
2847
2848     # Add empty entries for diskless instances.
2849     for inst in diskless_instances:
2850       assert inst not in instdisk
2851       instdisk[inst] = {}
2852
2853     assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
2854                       len(nnames) <= len(instanceinfo[inst].all_nodes) and
2855                       compat.all(isinstance(s, (tuple, list)) and
2856                                  len(s) == 2 for s in statuses)
2857                       for inst, nnames in instdisk.items()
2858                       for nname, statuses in nnames.items())
2859     assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
2860
2861     return instdisk
2862
2863   @staticmethod
2864   def _SshNodeSelector(group_uuid, all_nodes):
2865     """Create endless iterators for all potential SSH check hosts.
2866
2867     """
2868     nodes = [node for node in all_nodes
2869              if (node.group != group_uuid and
2870                  not node.offline)]
2871     keyfunc = operator.attrgetter("group")
2872
2873     return map(itertools.cycle,
2874                [sorted(map(operator.attrgetter("name"), names))
2875                 for _, names in itertools.groupby(sorted(nodes, key=keyfunc),
2876                                                   keyfunc)])
2877
2878   @classmethod
2879   def _SelectSshCheckNodes(cls, group_nodes, group_uuid, all_nodes):
2880     """Choose which nodes should talk to which other nodes.
2881
2882     We will make nodes contact all nodes in their group, and one node from
2883     every other group.
2884
2885     @warning: This algorithm has a known issue if one node group is much
2886       smaller than others (e.g. just one node). In such a case all other
2887       nodes will talk to the single node.
2888
2889     """
2890     online_nodes = sorted(node.name for node in group_nodes if not node.offline)
2891     sel = cls._SshNodeSelector(group_uuid, all_nodes)
2892
2893     return (online_nodes,
2894             dict((name, sorted([i.next() for i in sel]))
2895                  for name in online_nodes))
2896
2897   def BuildHooksEnv(self):
2898     """Build hooks env.
2899
2900     Cluster-Verify hooks just ran in the post phase and their failure makes
2901     the output be logged in the verify output and the verification to fail.
2902
2903     """
2904     env = {
2905       "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
2906       }
2907
2908     env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
2909                for node in self.my_node_info.values())
2910
2911     return env
2912
2913   def BuildHooksNodes(self):
2914     """Build hooks nodes.
2915
2916     """
2917     return ([], self.my_node_names)
2918
2919   def Exec(self, feedback_fn):
2920     """Verify integrity of the node group, performing various test on nodes.
2921
2922     """
2923     # This method has too many local variables. pylint: disable=R0914
2924     feedback_fn("* Verifying group '%s'" % self.group_info.name)
2925
2926     if not self.my_node_names:
2927       # empty node group
2928       feedback_fn("* Empty node group, skipping verification")
2929       return True
2930
2931     self.bad = False
2932     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2933     verbose = self.op.verbose
2934     self._feedback_fn = feedback_fn
2935
2936     vg_name = self.cfg.GetVGName()
2937     drbd_helper = self.cfg.GetDRBDHelper()
2938     cluster = self.cfg.GetClusterInfo()
2939     groupinfo = self.cfg.GetAllNodeGroupsInfo()
2940     hypervisors = cluster.enabled_hypervisors
2941     node_data_list = [self.my_node_info[name] for name in self.my_node_names]
2942
2943     i_non_redundant = [] # Non redundant instances
2944     i_non_a_balanced = [] # Non auto-balanced instances
2945     i_offline = 0 # Count of offline instances
2946     n_offline = 0 # Count of offline nodes
2947     n_drained = 0 # Count of nodes being drained
2948     node_vol_should = {}
2949
2950     # FIXME: verify OS list
2951
2952     # File verification
2953     filemap = _ComputeAncillaryFiles(cluster, False)
2954
2955     # do local checksums
2956     master_node = self.master_node = self.cfg.GetMasterNode()
2957     master_ip = self.cfg.GetMasterIP()
2958
2959     feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names))
2960
2961     user_scripts = []
2962     if self.cfg.GetUseExternalMipScript():
2963       user_scripts.append(constants.EXTERNAL_MASTER_SETUP_SCRIPT)
2964
2965     node_verify_param = {
2966       constants.NV_FILELIST:
2967         utils.UniqueSequence(filename
2968                              for files in filemap
2969                              for filename in files),
2970       constants.NV_NODELIST:
2971         self._SelectSshCheckNodes(node_data_list, self.group_uuid,
2972                                   self.all_node_info.values()),
2973       constants.NV_HYPERVISOR: hypervisors,
2974       constants.NV_HVPARAMS:
2975         _GetAllHypervisorParameters(cluster, self.all_inst_info.values()),
2976       constants.NV_NODENETTEST: [(node.name, node.primary_ip, node.secondary_ip)
2977                                  for node in node_data_list
2978                                  if not node.offline],
2979       constants.NV_INSTANCELIST: hypervisors,
2980       constants.NV_VERSION: None,
2981       constants.NV_HVINFO: self.cfg.GetHypervisorType(),
2982       constants.NV_NODESETUP: None,
2983       constants.NV_TIME: None,
2984       constants.NV_MASTERIP: (master_node, master_ip),
2985       constants.NV_OSLIST: None,
2986       constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
2987       constants.NV_USERSCRIPTS: user_scripts,
2988       }
2989
2990     if vg_name is not None:
2991       node_verify_param[constants.NV_VGLIST] = None
2992       node_verify_param[constants.NV_LVLIST] = vg_name
2993       node_verify_param[constants.NV_PVLIST] = [vg_name]
2994       node_verify_param[constants.NV_DRBDLIST] = None
2995
2996     if drbd_helper:
2997       node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
2998
2999     # bridge checks
3000     # FIXME: this needs to be changed per node-group, not cluster-wide
3001     bridges = set()
3002     default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
3003     if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3004       bridges.add(default_nicpp[constants.NIC_LINK])
3005     for instance in self.my_inst_info.values():
3006       for nic in instance.nics:
3007         full_nic = cluster.SimpleFillNIC(nic.nicparams)
3008         if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3009           bridges.add(full_nic[constants.NIC_LINK])
3010
3011     if bridges:
3012       node_verify_param[constants.NV_BRIDGES] = list(bridges)
3013
3014     # Build our expected cluster state
3015     node_image = dict((node.name, self.NodeImage(offline=node.offline,
3016                                                  name=node.name,
3017                                                  vm_capable=node.vm_capable))
3018                       for node in node_data_list)
3019
3020     # Gather OOB paths
3021     oob_paths = []
3022     for node in self.all_node_info.values():
3023       path = _SupportsOob(self.cfg, node)
3024       if path and path not in oob_paths:
3025         oob_paths.append(path)
3026
3027     if oob_paths:
3028       node_verify_param[constants.NV_OOB_PATHS] = oob_paths
3029
3030     for instance in self.my_inst_names:
3031       inst_config = self.my_inst_info[instance]
3032
3033       for nname in inst_config.all_nodes:
3034         if nname not in node_image:
3035           gnode = self.NodeImage(name=nname)
3036           gnode.ghost = (nname not in self.all_node_info)
3037           node_image[nname] = gnode
3038
3039       inst_config.MapLVsByNode(node_vol_should)
3040
3041       pnode = inst_config.primary_node
3042       node_image[pnode].pinst.append(instance)
3043
3044       for snode in inst_config.secondary_nodes:
3045         nimg = node_image[snode]
3046         nimg.sinst.append(instance)
3047         if pnode not in nimg.sbp:
3048           nimg.sbp[pnode] = []
3049         nimg.sbp[pnode].append(instance)
3050
3051     # At this point, we have the in-memory data structures complete,
3052     # except for the runtime information, which we'll gather next
3053
3054     # Due to the way our RPC system works, exact response times cannot be
3055     # guaranteed (e.g. a broken node could run into a timeout). By keeping the
3056     # time before and after executing the request, we can at least have a time
3057     # window.
3058     nvinfo_starttime = time.time()
3059     all_nvinfo = self.rpc.call_node_verify(self.my_node_names,
3060                                            node_verify_param,
3061                                            self.cfg.GetClusterName())
3062     nvinfo_endtime = time.time()
3063
3064     if self.extra_lv_nodes and vg_name is not None:
3065       extra_lv_nvinfo = \
3066           self.rpc.call_node_verify(self.extra_lv_nodes,
3067                                     {constants.NV_LVLIST: vg_name},
3068                                     self.cfg.GetClusterName())
3069     else:
3070       extra_lv_nvinfo = {}
3071
3072     all_drbd_map = self.cfg.ComputeDRBDMap()
3073
3074     feedback_fn("* Gathering disk information (%s nodes)" %
3075                 len(self.my_node_names))
3076     instdisk = self._CollectDiskInfo(self.my_node_names, node_image,
3077                                      self.my_inst_info)
3078
3079     feedback_fn("* Verifying configuration file consistency")
3080
3081     # If not all nodes are being checked, we need to make sure the master node
3082     # and a non-checked vm_capable node are in the list.
3083     absent_nodes = set(self.all_node_info).difference(self.my_node_info)
3084     if absent_nodes:
3085       vf_nvinfo = all_nvinfo.copy()
3086       vf_node_info = list(self.my_node_info.values())
3087       additional_nodes = []
3088       if master_node not in self.my_node_info:
3089         additional_nodes.append(master_node)
3090         vf_node_info.append(self.all_node_info[master_node])
3091       # Add the first vm_capable node we find which is not included
3092       for node in absent_nodes:
3093         nodeinfo = self.all_node_info[node]
3094         if nodeinfo.vm_capable and not nodeinfo.offline:
3095           additional_nodes.append(node)
3096           vf_node_info.append(self.all_node_info[node])
3097           break
3098       key = constants.NV_FILELIST
3099       vf_nvinfo.update(self.rpc.call_node_verify(additional_nodes,
3100                                                  {key: node_verify_param[key]},
3101                                                  self.cfg.GetClusterName()))
3102     else:
3103       vf_nvinfo = all_nvinfo
3104       vf_node_info = self.my_node_info.values()
3105
3106     self._VerifyFiles(_ErrorIf, vf_node_info, master_node, vf_nvinfo, filemap)
3107
3108     feedback_fn("* Verifying node status")
3109
3110     refos_img = None
3111
3112     for node_i in node_data_list:
3113       node = node_i.name
3114       nimg = node_image[node]
3115
3116       if node_i.offline:
3117         if verbose:
3118           feedback_fn("* Skipping offline node %s" % (node,))
3119         n_offline += 1
3120         continue
3121
3122       if node == master_node:
3123         ntype = "master"
3124       elif node_i.master_candidate:
3125         ntype = "master candidate"
3126       elif node_i.drained:
3127         ntype = "drained"
3128         n_drained += 1
3129       else:
3130         ntype = "regular"
3131       if verbose:
3132         feedback_fn("* Verifying node %s (%s)" % (node, ntype))
3133
3134       msg = all_nvinfo[node].fail_msg
3135       _ErrorIf(msg, constants.CV_ENODERPC, node, "while contacting node: %s",
3136                msg)
3137       if msg:
3138         nimg.rpc_fail = True
3139         continue
3140
3141       nresult = all_nvinfo[node].payload
3142
3143       nimg.call_ok = self._VerifyNode(node_i, nresult)
3144       self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
3145       self._VerifyNodeNetwork(node_i, nresult)
3146       self._VerifyNodeUserScripts(node_i, nresult)
3147       self._VerifyOob(node_i, nresult)
3148
3149       if nimg.vm_capable:
3150         self._VerifyNodeLVM(node_i, nresult, vg_name)
3151         self._VerifyNodeDrbd(node_i, nresult, self.all_inst_info, drbd_helper,
3152                              all_drbd_map)
3153
3154         self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
3155         self._UpdateNodeInstances(node_i, nresult, nimg)
3156         self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
3157         self._UpdateNodeOS(node_i, nresult, nimg)
3158
3159         if not nimg.os_fail:
3160           if refos_img is None:
3161             refos_img = nimg
3162           self._VerifyNodeOS(node_i, nimg, refos_img)
3163         self._VerifyNodeBridges(node_i, nresult, bridges)
3164
3165         # Check whether all running instancies are primary for the node. (This
3166         # can no longer be done from _VerifyInstance below, since some of the
3167         # wrong instances could be from other node groups.)
3168         non_primary_inst = set(nimg.instances).difference(nimg.pinst)
3169
3170         for inst in non_primary_inst:
3171           # FIXME: investigate best way to handle offline insts
3172           if inst.admin_state == constants.ADMINST_OFFLINE:
3173             if verbose:
3174               feedback_fn("* Skipping offline instance %s" % inst.name)
3175             i_offline += 1
3176             continue
3177           test = inst in self.all_inst_info
3178           _ErrorIf(test, constants.CV_EINSTANCEWRONGNODE, inst,
3179                    "instance should not run on node %s", node_i.name)
3180           _ErrorIf(not test, constants.CV_ENODEORPHANINSTANCE, node_i.name,
3181                    "node is running unknown instance %s", inst)
3182
3183     for node, result in extra_lv_nvinfo.items():
3184       self._UpdateNodeVolumes(self.all_node_info[node], result.payload,
3185                               node_image[node], vg_name)
3186
3187     feedback_fn("* Verifying instance status")
3188     for instance in self.my_inst_names:
3189       if verbose:
3190         feedback_fn("* Verifying instance %s" % instance)
3191       inst_config = self.my_inst_info[instance]
3192       self._VerifyInstance(instance, inst_config, node_image,
3193                            instdisk[instance])
3194       inst_nodes_offline = []
3195
3196       pnode = inst_config.primary_node
3197       pnode_img = node_image[pnode]
3198       _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
3199                constants.CV_ENODERPC, pnode, "instance %s, connection to"
3200                " primary node failed", instance)
3201
3202       _ErrorIf(inst_config.admin_state == constants.ADMINST_UP and
3203                pnode_img.offline,
3204                constants.CV_EINSTANCEBADNODE, instance,
3205                "instance is marked as running and lives on offline node %s",
3206                inst_config.primary_node)
3207
3208       # If the instance is non-redundant we cannot survive losing its primary
3209       # node, so we are not N+1 compliant. On the other hand we have no disk
3210       # templates with more than one secondary so that situation is not well
3211       # supported either.
3212       # FIXME: does not support file-backed instances
3213       if not inst_config.secondary_nodes:
3214         i_non_redundant.append(instance)
3215
3216       _ErrorIf(len(inst_config.secondary_nodes) > 1,
3217                constants.CV_EINSTANCELAYOUT,
3218                instance, "instance has multiple secondary nodes: %s",
3219                utils.CommaJoin(inst_config.secondary_nodes),
3220                code=self.ETYPE_WARNING)
3221
3222       if inst_config.disk_template in constants.DTS_INT_MIRROR:
3223         pnode = inst_config.primary_node
3224         instance_nodes = utils.NiceSort(inst_config.all_nodes)
3225         instance_groups = {}
3226
3227         for node in instance_nodes:
3228           instance_groups.setdefault(self.all_node_info[node].group,
3229                                      []).append(node)
3230
3231         pretty_list = [
3232           "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
3233           # Sort so that we always list the primary node first.
3234           for group, nodes in sorted(instance_groups.items(),
3235                                      key=lambda (_, nodes): pnode in nodes,
3236                                      reverse=True)]
3237
3238         self._ErrorIf(len(instance_groups) > 1,
3239                       constants.CV_EINSTANCESPLITGROUPS,
3240                       instance, "instance has primary and secondary nodes in"
3241                       " different groups: %s", utils.CommaJoin(pretty_list),
3242                       code=self.ETYPE_WARNING)
3243
3244       if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
3245         i_non_a_balanced.append(instance)
3246
3247       for snode in inst_config.secondary_nodes:
3248         s_img = node_image[snode]
3249         _ErrorIf(s_img.rpc_fail and not s_img.offline, constants.CV_ENODERPC,
3250                  snode, "instance %s, connection to secondary node failed",
3251                  instance)
3252
3253         if s_img.offline:
3254           inst_nodes_offline.append(snode)
3255
3256       # warn that the instance lives on offline nodes
3257       _ErrorIf(inst_nodes_offline, constants.CV_EINSTANCEBADNODE, instance,
3258                "instance has offline secondary node(s) %s",
3259                utils.CommaJoin(inst_nodes_offline))
3260       # ... or ghost/non-vm_capable nodes
3261       for node in inst_config.all_nodes:
3262         _ErrorIf(node_image[node].ghost, constants.CV_EINSTANCEBADNODE,
3263                  instance, "instance lives on ghost node %s", node)
3264         _ErrorIf(not node_image[node].vm_capable, constants.CV_EINSTANCEBADNODE,
3265                  instance, "instance lives on non-vm_capable node %s", node)
3266
3267     feedback_fn("* Verifying orphan volumes")
3268     reserved = utils.FieldSet(*cluster.reserved_lvs)
3269
3270     # We will get spurious "unknown volume" warnings if any node of this group
3271     # is secondary for an instance whose primary is in another group. To avoid
3272     # them, we find these instances and add their volumes to node_vol_should.
3273     for inst in self.all_inst_info.values():
3274       for secondary in inst.secondary_nodes:
3275         if (secondary in self.my_node_info
3276             and inst.name not in self.my_inst_info):
3277           inst.MapLVsByNode(node_vol_should)
3278           break
3279
3280     self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
3281
3282     if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
3283       feedback_fn("* Verifying N+1 Memory redundancy")
3284       self._VerifyNPlusOneMemory(node_image, self.my_inst_info)
3285
3286     feedback_fn("* Other Notes")
3287     if i_non_redundant:
3288       feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
3289                   % len(i_non_redundant))
3290
3291     if i_non_a_balanced:
3292       feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
3293                   % len(i_non_a_balanced))
3294
3295     if i_offline:
3296       feedback_fn("  - NOTICE: %d offline instance(s) found." % i_offline)
3297
3298     if n_offline:
3299       feedback_fn("  - NOTICE: %d offline node(s) found." % n_offline)
3300
3301     if n_drained:
3302       feedback_fn("  - NOTICE: %d drained node(s) found." % n_drained)
3303
3304     return not self.bad
3305
3306   def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
3307     """Analyze the post-hooks' result
3308
3309     This method analyses the hook result, handles it, and sends some
3310     nicely-formatted feedback back to the user.
3311
3312     @param phase: one of L{constants.HOOKS_PHASE_POST} or
3313         L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
3314     @param hooks_results: the results of the multi-node hooks rpc call
3315     @param feedback_fn: function used send feedback back to the caller
3316     @param lu_result: previous Exec result
3317     @return: the new Exec result, based on the previous result
3318         and hook results
3319
3320     """
3321     # We only really run POST phase hooks, only for non-empty groups,
3322     # and are only interested in their results
3323     if not self.my_node_names:
3324       # empty node group
3325       pass
3326     elif phase == constants.HOOKS_PHASE_POST:
3327       # Used to change hooks' output to proper indentation
3328       feedback_fn("* Hooks Results")
3329       assert hooks_results, "invalid result from hooks"
3330
3331       for node_name in hooks_results:
3332         res = hooks_results[node_name]
3333         msg = res.fail_msg
3334         test = msg and not res.offline
3335         self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3336                       "Communication failure in hooks execution: %s", msg)
3337         if res.offline or msg:
3338           # No need to investigate payload if node is offline or gave
3339           # an error.
3340           continue
3341         for script, hkr, output in res.payload:
3342           test = hkr == constants.HKR_FAIL
3343           self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3344                         "Script %s failed, output:", script)
3345           if test:
3346             output = self._HOOKS_INDENT_RE.sub("      ", output)
3347             feedback_fn("%s" % output)
3348             lu_result = False
3349
3350     return lu_result
3351
3352
3353 class LUClusterVerifyDisks(NoHooksLU):
3354   """Verifies the cluster disks status.
3355
3356   """
3357   REQ_BGL = False
3358
3359   def ExpandNames(self):
3360     self.share_locks = _ShareAll()
3361     self.needed_locks = {
3362       locking.LEVEL_NODEGROUP: locking.ALL_SET,
3363       }
3364
3365   def Exec(self, feedback_fn):
3366     group_names = self.owned_locks(locking.LEVEL_NODEGROUP)
3367
3368     # Submit one instance of L{opcodes.OpGroupVerifyDisks} per node group
3369     return ResultWithJobs([[opcodes.OpGroupVerifyDisks(group_name=group)]
3370                            for group in group_names])
3371
3372
3373 class LUGroupVerifyDisks(NoHooksLU):
3374   """Verifies the status of all disks in a node group.
3375
3376   """
3377   REQ_BGL = False
3378
3379   def ExpandNames(self):
3380     # Raises errors.OpPrereqError on its own if group can't be found
3381     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
3382
3383     self.share_locks = _ShareAll()
3384     self.needed_locks = {
3385       locking.LEVEL_INSTANCE: [],
3386       locking.LEVEL_NODEGROUP: [],
3387       locking.LEVEL_NODE: [],
3388       }
3389
3390   def DeclareLocks(self, level):
3391     if level == locking.LEVEL_INSTANCE:
3392       assert not self.needed_locks[locking.LEVEL_INSTANCE]
3393
3394       # Lock instances optimistically, needs verification once node and group
3395       # locks have been acquired
3396       self.needed_locks[locking.LEVEL_INSTANCE] = \
3397         self.cfg.GetNodeGroupInstances(self.group_uuid)
3398
3399     elif level == locking.LEVEL_NODEGROUP:
3400       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
3401
3402       self.needed_locks[locking.LEVEL_NODEGROUP] = \
3403         set([self.group_uuid] +
3404             # Lock all groups used by instances optimistically; this requires
3405             # going via the node before it's locked, requiring verification
3406             # later on
3407             [group_uuid
3408              for instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
3409              for group_uuid in self.cfg.GetInstanceNodeGroups(instance_name)])
3410
3411     elif level == locking.LEVEL_NODE:
3412       # This will only lock the nodes in the group to be verified which contain
3413       # actual instances
3414       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
3415       self._LockInstancesNodes()
3416
3417       # Lock all nodes in group to be verified
3418       assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
3419       member_nodes = self.cfg.GetNodeGroup(self.group_uuid).members
3420       self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
3421
3422   def CheckPrereq(self):
3423     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
3424     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
3425     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
3426
3427     assert self.group_uuid in owned_groups
3428
3429     # Check if locked instances are still correct
3430     _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
3431
3432     # Get instance information
3433     self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
3434
3435     # Check if node groups for locked instances are still correct
3436     for (instance_name, inst) in self.instances.items():
3437       assert owned_nodes.issuperset(inst.all_nodes), \
3438         "Instance %s's nodes changed while we kept the lock" % instance_name
3439
3440       inst_groups = _CheckInstanceNodeGroups(self.cfg, instance_name,
3441                                              owned_groups)
3442
3443       assert self.group_uuid in inst_groups, \
3444         "Instance %s has no node in group %s" % (instance_name, self.group_uuid)
3445
3446   def Exec(self, feedback_fn):
3447     """Verify integrity of cluster disks.
3448
3449     @rtype: tuple of three items
3450     @return: a tuple of (dict of node-to-node_error, list of instances
3451         which need activate-disks, dict of instance: (node, volume) for
3452         missing volumes
3453
3454     """
3455     res_nodes = {}
3456     res_instances = set()
3457     res_missing = {}
3458
3459     nv_dict = _MapInstanceDisksToNodes([inst
3460             for inst in self.instances.values()
3461             if inst.admin_state == constants.ADMINST_UP])
3462
3463     if nv_dict:
3464       nodes = utils.NiceSort(set(self.owned_locks(locking.LEVEL_NODE)) &
3465                              set(self.cfg.GetVmCapableNodeList()))
3466
3467       node_lvs = self.rpc.call_lv_list(nodes, [])
3468
3469       for (node, node_res) in node_lvs.items():
3470         if node_res.offline:
3471           continue
3472
3473         msg = node_res.fail_msg
3474         if msg:
3475           logging.warning("Error enumerating LVs on node %s: %s", node, msg)
3476           res_nodes[node] = msg
3477           continue
3478
3479         for lv_name, (_, _, lv_online) in node_res.payload.items():
3480           inst = nv_dict.pop((node, lv_name), None)
3481           if not (lv_online or inst is None):
3482             res_instances.add(inst)
3483
3484       # any leftover items in nv_dict are missing LVs, let's arrange the data
3485       # better
3486       for key, inst in nv_dict.iteritems():
3487         res_missing.setdefault(inst, []).append(list(key))
3488
3489     return (res_nodes, list(res_instances), res_missing)
3490
3491
3492 class LUClusterRepairDiskSizes(NoHooksLU):
3493   """Verifies the cluster disks sizes.
3494
3495   """
3496   REQ_BGL = False
3497
3498   def ExpandNames(self):
3499     if self.op.instances:
3500       self.wanted_names = _GetWantedInstances(self, self.op.instances)
3501       self.needed_locks = {
3502         locking.LEVEL_NODE_RES: [],
3503         locking.LEVEL_INSTANCE: self.wanted_names,
3504         }
3505       self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
3506     else:
3507       self.wanted_names = None
3508       self.needed_locks = {
3509         locking.LEVEL_NODE_RES: locking.ALL_SET,
3510         locking.LEVEL_INSTANCE: locking.ALL_SET,
3511         }
3512     self.share_locks = {
3513       locking.LEVEL_NODE_RES: 1,
3514       locking.LEVEL_INSTANCE: 0,
3515       }
3516
3517   def DeclareLocks(self, level):
3518     if level == locking.LEVEL_NODE_RES and self.wanted_names is not None:
3519       self._LockInstancesNodes(primary_only=True, level=level)
3520
3521   def CheckPrereq(self):
3522     """Check prerequisites.
3523
3524     This only checks the optional instance list against the existing names.
3525
3526     """
3527     if self.wanted_names is None:
3528       self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
3529
3530     self.wanted_instances = \
3531         map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
3532
3533   def _EnsureChildSizes(self, disk):
3534     """Ensure children of the disk have the needed disk size.
3535
3536     This is valid mainly for DRBD8 and fixes an issue where the
3537     children have smaller disk size.
3538
3539     @param disk: an L{ganeti.objects.Disk} object
3540
3541     """
3542     if disk.dev_type == constants.LD_DRBD8:
3543       assert disk.children, "Empty children for DRBD8?"
3544       fchild = disk.children[0]
3545       mismatch = fchild.size < disk.size
3546       if mismatch:
3547         self.LogInfo("Child disk has size %d, parent %d, fixing",
3548                      fchild.size, disk.size)
3549         fchild.size = disk.size
3550
3551       # and we recurse on this child only, not on the metadev
3552       return self._EnsureChildSizes(fchild) or mismatch
3553     else:
3554       return False
3555
3556   def Exec(self, feedback_fn):
3557     """Verify the size of cluster disks.
3558
3559     """
3560     # TODO: check child disks too
3561     # TODO: check differences in size between primary/secondary nodes
3562     per_node_disks = {}
3563     for instance in self.wanted_instances:
3564       pnode = instance.primary_node
3565       if pnode not in per_node_disks:
3566         per_node_disks[pnode] = []
3567       for idx, disk in enumerate(instance.disks):
3568         per_node_disks[pnode].append((instance, idx, disk))
3569
3570     assert not (frozenset(per_node_disks.keys()) -
3571                 self.owned_locks(locking.LEVEL_NODE_RES)), \
3572       "Not owning correct locks"
3573     assert not self.owned_locks(locking.LEVEL_NODE)
3574
3575     changed = []
3576     for node, dskl in per_node_disks.items():
3577       newl = [v[2].Copy() for v in dskl]
3578       for dsk in newl:
3579         self.cfg.SetDiskID(dsk, node)
3580       result = self.rpc.call_blockdev_getsize(node, newl)
3581       if result.fail_msg:
3582         self.LogWarning("Failure in blockdev_getsize call to node"
3583                         " %s, ignoring", node)
3584         continue
3585       if len(result.payload) != len(dskl):
3586         logging.warning("Invalid result from node %s: len(dksl)=%d,"
3587                         " result.payload=%s", node, len(dskl), result.payload)
3588         self.LogWarning("Invalid result from node %s, ignoring node results",
3589                         node)
3590         continue
3591       for ((instance, idx, disk), size) in zip(dskl, result.payload):
3592         if size is None:
3593           self.LogWarning("Disk %d of instance %s did not return size"
3594                           " information, ignoring", idx, instance.name)
3595           continue
3596         if not isinstance(size, (int, long)):
3597           self.LogWarning("Disk %d of instance %s did not return valid"
3598                           " size information, ignoring", idx, instance.name)
3599           continue
3600         size = size >> 20
3601         if size != disk.size:
3602           self.LogInfo("Disk %d of instance %s has mismatched size,"
3603                        " correcting: recorded %d, actual %d", idx,
3604                        instance.name, disk.size, size)
3605           disk.size = size
3606           self.cfg.Update(instance, feedback_fn)
3607           changed.append((instance.name, idx, size))
3608         if self._EnsureChildSizes(disk):
3609           self.cfg.Update(instance, feedback_fn)
3610           changed.append((instance.name, idx, disk.size))
3611     return changed
3612
3613
3614 class LUClusterRename(LogicalUnit):
3615   """Rename the cluster.
3616
3617   """
3618   HPATH = "cluster-rename"
3619   HTYPE = constants.HTYPE_CLUSTER
3620
3621   def BuildHooksEnv(self):
3622     """Build hooks env.
3623
3624     """
3625     return {
3626       "OP_TARGET": self.cfg.GetClusterName(),
3627       "NEW_NAME": self.op.name,
3628       }
3629
3630   def BuildHooksNodes(self):
3631     """Build hooks nodes.
3632
3633     """
3634     return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
3635
3636   def CheckPrereq(self):
3637     """Verify that the passed name is a valid one.
3638
3639     """
3640     hostname = netutils.GetHostname(name=self.op.name,
3641                                     family=self.cfg.GetPrimaryIPFamily())
3642
3643     new_name = hostname.name
3644     self.ip = new_ip = hostname.ip
3645     old_name = self.cfg.GetClusterName()
3646     old_ip = self.cfg.GetMasterIP()
3647     if new_name == old_name and new_ip == old_ip:
3648       raise errors.OpPrereqError("Neither the name nor the IP address of the"
3649                                  " cluster has changed",
3650                                  errors.ECODE_INVAL)
3651     if new_ip != old_ip:
3652       if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
3653         raise errors.OpPrereqError("The given cluster IP address (%s) is"
3654                                    " reachable on the network" %
3655                                    new_ip, errors.ECODE_NOTUNIQUE)
3656
3657     self.op.name = new_name
3658
3659   def Exec(self, feedback_fn):
3660     """Rename the cluster.
3661
3662     """
3663     clustername = self.op.name
3664     new_ip = self.ip
3665
3666     # shutdown the master IP
3667     master_params = self.cfg.GetMasterNetworkParameters()
3668     ems = self.cfg.GetUseExternalMipScript()
3669     result = self.rpc.call_node_deactivate_master_ip(master_params.name,
3670                                                      master_params, ems)
3671     result.Raise("Could not disable the master role")
3672
3673     try:
3674       cluster = self.cfg.GetClusterInfo()
3675       cluster.cluster_name = clustername
3676       cluster.master_ip = new_ip
3677       self.cfg.Update(cluster, feedback_fn)
3678
3679       # update the known hosts file
3680       ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
3681       node_list = self.cfg.GetOnlineNodeList()
3682       try:
3683         node_list.remove(master_params.name)
3684       except ValueError:
3685         pass
3686       _UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE)
3687     finally:
3688       master_params.ip = new_ip
3689       result = self.rpc.call_node_activate_master_ip(master_params.name,
3690                                                      master_params, ems)
3691       msg = result.fail_msg
3692       if msg:
3693         self.LogWarning("Could not re-enable the master role on"
3694                         " the master, please restart manually: %s", msg)
3695
3696     return clustername
3697
3698
3699 def _ValidateNetmask(cfg, netmask):
3700   """Checks if a netmask is valid.
3701
3702   @type cfg: L{config.ConfigWriter}
3703   @param cfg: The cluster configuration
3704   @type netmask: int
3705   @param netmask: the netmask to be verified
3706   @raise errors.OpPrereqError: if the validation fails
3707
3708   """
3709   ip_family = cfg.GetPrimaryIPFamily()
3710   try:
3711     ipcls = netutils.IPAddress.GetClassFromIpFamily(ip_family)
3712   except errors.ProgrammerError:
3713     raise errors.OpPrereqError("Invalid primary ip family: %s." %
3714                                ip_family)
3715   if not ipcls.ValidateNetmask(netmask):
3716     raise errors.OpPrereqError("CIDR netmask (%s) not valid" %
3717                                 (netmask))
3718
3719
3720 class LUClusterSetParams(LogicalUnit):
3721   """Change the parameters of the cluster.
3722
3723   """
3724   HPATH = "cluster-modify"
3725   HTYPE = constants.HTYPE_CLUSTER
3726   REQ_BGL = False
3727
3728   def CheckArguments(self):
3729     """Check parameters
3730
3731     """
3732     if self.op.uid_pool:
3733       uidpool.CheckUidPool(self.op.uid_pool)
3734
3735     if self.op.add_uids:
3736       uidpool.CheckUidPool(self.op.add_uids)
3737
3738     if self.op.remove_uids:
3739       uidpool.CheckUidPool(self.op.remove_uids)
3740
3741     if self.op.master_netmask is not None:
3742       _ValidateNetmask(self.cfg, self.op.master_netmask)
3743
3744     if self.op.diskparams:
3745       for dt_params in self.op.diskparams.values():
3746         utils.ForceDictType(dt_params, constants.DISK_DT_TYPES)
3747
3748   def ExpandNames(self):
3749     # FIXME: in the future maybe other cluster params won't require checking on
3750     # all nodes to be modified.
3751     self.needed_locks = {
3752       locking.LEVEL_NODE: locking.ALL_SET,
3753     }
3754     self.share_locks[locking.LEVEL_NODE] = 1
3755
3756   def BuildHooksEnv(self):
3757     """Build hooks env.
3758
3759     """
3760     return {
3761       "OP_TARGET": self.cfg.GetClusterName(),
3762       "NEW_VG_NAME": self.op.vg_name,
3763       }
3764
3765   def BuildHooksNodes(self):
3766     """Build hooks nodes.
3767
3768     """
3769     mn = self.cfg.GetMasterNode()
3770     return ([mn], [mn])
3771
3772   def CheckPrereq(self):
3773     """Check prerequisites.
3774
3775     This checks whether the given params don't conflict and
3776     if the given volume group is valid.
3777
3778     """
3779     if self.op.vg_name is not None and not self.op.vg_name:
3780       if self.cfg.HasAnyDiskOfType(constants.LD_LV):
3781         raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
3782                                    " instances exist", errors.ECODE_INVAL)
3783
3784     if self.op.drbd_helper is not None and not self.op.drbd_helper:
3785       if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
3786         raise errors.OpPrereqError("Cannot disable drbd helper while"
3787                                    " drbd-based instances exist",
3788                                    errors.ECODE_INVAL)
3789
3790     node_list = self.owned_locks(locking.LEVEL_NODE)
3791
3792     # if vg_name not None, checks given volume group on all nodes
3793     if self.op.vg_name:
3794       vglist = self.rpc.call_vg_list(node_list)
3795       for node in node_list:
3796         msg = vglist[node].fail_msg
3797         if msg:
3798           # ignoring down node
3799           self.LogWarning("Error while gathering data on node %s"
3800                           " (ignoring node): %s", node, msg)
3801           continue
3802         vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
3803                                               self.op.vg_name,
3804                                               constants.MIN_VG_SIZE)
3805         if vgstatus:
3806           raise errors.OpPrereqError("Error on node '%s': %s" %
3807                                      (node, vgstatus), errors.ECODE_ENVIRON)
3808
3809     if self.op.drbd_helper:
3810       # checks given drbd helper on all nodes
3811       helpers = self.rpc.call_drbd_helper(node_list)
3812       for (node, ninfo) in self.cfg.GetMultiNodeInfo(node_list):
3813         if ninfo.offline:
3814           self.LogInfo("Not checking drbd helper on offline node %s", node)
3815           continue
3816         msg = helpers[node].fail_msg
3817         if msg:
3818           raise errors.OpPrereqError("Error checking drbd helper on node"
3819                                      " '%s': %s" % (node, msg),
3820                                      errors.ECODE_ENVIRON)
3821         node_helper = helpers[node].payload
3822         if node_helper != self.op.drbd_helper:
3823           raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
3824                                      (node, node_helper), errors.ECODE_ENVIRON)
3825
3826     self.cluster = cluster = self.cfg.GetClusterInfo()
3827     # validate params changes
3828     if self.op.beparams:
3829       objects.UpgradeBeParams(self.op.beparams)
3830       utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
3831       self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
3832
3833     if self.op.ndparams:
3834       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
3835       self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
3836
3837       # TODO: we need a more general way to handle resetting
3838       # cluster-level parameters to default values
3839       if self.new_ndparams["oob_program"] == "":
3840         self.new_ndparams["oob_program"] = \
3841             constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
3842
3843     if self.op.hv_state:
3844       new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
3845                                             self.cluster.hv_state_static)
3846       self.new_hv_state = dict((hv, cluster.SimpleFillHvState(values))
3847                                for hv, values in new_hv_state.items())
3848
3849     if self.op.disk_state:
3850       new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state,
3851                                                 self.cluster.disk_state_static)
3852       self.new_disk_state = \
3853         dict((storage, dict((name, cluster.SimpleFillDiskState(values))
3854                             for name, values in svalues.items()))
3855              for storage, svalues in new_disk_state.items())
3856
3857     if self.op.ipolicy:
3858       ipolicy = {}
3859       for key, value in self.op.ipolicy.items():
3860         utils.ForceDictType(value, constants.ISPECS_PARAMETER_TYPES)
3861         ipolicy[key] = _GetUpdatedParams(cluster.ipolicy.get(key, {}),
3862                                           value)
3863       objects.InstancePolicy.CheckParameterSyntax(ipolicy)
3864       self.new_ipolicy = ipolicy
3865
3866     if self.op.nicparams:
3867       utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
3868       self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
3869       objects.NIC.CheckParameterSyntax(self.new_nicparams)
3870       nic_errors = []
3871
3872       # check all instances for consistency
3873       for instance in self.cfg.GetAllInstancesInfo().values():
3874         for nic_idx, nic in enumerate(instance.nics):
3875           params_copy = copy.deepcopy(nic.nicparams)
3876           params_filled = objects.FillDict(self.new_nicparams, params_copy)
3877
3878           # check parameter syntax
3879           try:
3880             objects.NIC.CheckParameterSyntax(params_filled)
3881           except errors.ConfigurationError, err:
3882             nic_errors.append("Instance %s, nic/%d: %s" %
3883                               (instance.name, nic_idx, err))
3884
3885           # if we're moving instances to routed, check that they have an ip
3886           target_mode = params_filled[constants.NIC_MODE]
3887           if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
3888             nic_errors.append("Instance %s, nic/%d: routed NIC with no ip"
3889                               " address" % (instance.name, nic_idx))
3890       if nic_errors:
3891         raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
3892                                    "\n".join(nic_errors))
3893
3894     # hypervisor list/parameters
3895     self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
3896     if self.op.hvparams:
3897       for hv_name, hv_dict in self.op.hvparams.items():
3898         if hv_name not in self.new_hvparams:
3899           self.new_hvparams[hv_name] = hv_dict
3900         else:
3901           self.new_hvparams[hv_name].update(hv_dict)
3902
3903     # disk template parameters
3904     self.new_diskparams = objects.FillDict(cluster.diskparams, {})
3905     if self.op.diskparams:
3906       for dt_name, dt_params in self.op.diskparams.items():
3907         if dt_name not in self.op.diskparams:
3908           self.new_diskparams[dt_name] = dt_params
3909         else:
3910           self.new_diskparams[dt_name].update(dt_params)
3911
3912     # os hypervisor parameters
3913     self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
3914     if self.op.os_hvp:
3915       for os_name, hvs in self.op.os_hvp.items():
3916         if os_name not in self.new_os_hvp:
3917           self.new_os_hvp[os_name] = hvs
3918         else:
3919           for hv_name, hv_dict in hvs.items():
3920             if hv_name not in self.new_os_hvp[os_name]:
3921               self.new_os_hvp[os_name][hv_name] = hv_dict
3922             else:
3923               self.new_os_hvp[os_name][hv_name].update(hv_dict)
3924
3925     # os parameters
3926     self.new_osp = objects.FillDict(cluster.osparams, {})
3927     if self.op.osparams:
3928       for os_name, osp in self.op.osparams.items():
3929         if os_name not in self.new_osp:
3930           self.new_osp[os_name] = {}
3931
3932         self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
3933                                                   use_none=True)
3934
3935         if not self.new_osp[os_name]:
3936           # we removed all parameters
3937           del self.new_osp[os_name]
3938         else:
3939           # check the parameter validity (remote check)
3940           _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
3941                          os_name, self.new_osp[os_name])
3942
3943     # changes to the hypervisor list
3944     if self.op.enabled_hypervisors is not None:
3945       self.hv_list = self.op.enabled_hypervisors
3946       for hv in self.hv_list:
3947         # if the hypervisor doesn't already exist in the cluster
3948         # hvparams, we initialize it to empty, and then (in both
3949         # cases) we make sure to fill the defaults, as we might not
3950         # have a complete defaults list if the hypervisor wasn't
3951         # enabled before
3952         if hv not in new_hvp:
3953           new_hvp[hv] = {}
3954         new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
3955         utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
3956     else:
3957       self.hv_list = cluster.enabled_hypervisors
3958
3959     if self.op.hvparams or self.op.enabled_hypervisors is not None:
3960       # either the enabled list has changed, or the parameters have, validate
3961       for hv_name, hv_params in self.new_hvparams.items():
3962         if ((self.op.hvparams and hv_name in self.op.hvparams) or
3963             (self.op.enabled_hypervisors and
3964              hv_name in self.op.enabled_hypervisors)):
3965           # either this is a new hypervisor, or its parameters have changed
3966           hv_class = hypervisor.GetHypervisor(hv_name)
3967           utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3968           hv_class.CheckParameterSyntax(hv_params)
3969           _CheckHVParams(self, node_list, hv_name, hv_params)
3970
3971     if self.op.os_hvp:
3972       # no need to check any newly-enabled hypervisors, since the
3973       # defaults have already been checked in the above code-block
3974       for os_name, os_hvp in self.new_os_hvp.items():
3975         for hv_name, hv_params in os_hvp.items():
3976           utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3977           # we need to fill in the new os_hvp on top of the actual hv_p
3978           cluster_defaults = self.new_hvparams.get(hv_name, {})
3979           new_osp = objects.FillDict(cluster_defaults, hv_params)
3980           hv_class = hypervisor.GetHypervisor(hv_name)
3981           hv_class.CheckParameterSyntax(new_osp)
3982           _CheckHVParams(self, node_list, hv_name, new_osp)
3983
3984     if self.op.default_iallocator:
3985       alloc_script = utils.FindFile(self.op.default_iallocator,
3986                                     constants.IALLOCATOR_SEARCH_PATH,
3987                                     os.path.isfile)
3988       if alloc_script is None:
3989         raise errors.OpPrereqError("Invalid default iallocator script '%s'"
3990                                    " specified" % self.op.default_iallocator,
3991                                    errors.ECODE_INVAL)
3992
3993   def Exec(self, feedback_fn):
3994     """Change the parameters of the cluster.
3995
3996     """
3997     if self.op.vg_name is not None:
3998       new_volume = self.op.vg_name
3999       if not new_volume:
4000         new_volume = None
4001       if new_volume != self.cfg.GetVGName():
4002         self.cfg.SetVGName(new_volume)
4003       else:
4004         feedback_fn("Cluster LVM configuration already in desired"
4005                     " state, not changing")
4006     if self.op.drbd_helper is not None:
4007       new_helper = self.op.drbd_helper
4008       if not new_helper:
4009         new_helper = None
4010       if new_helper != self.cfg.GetDRBDHelper():
4011         self.cfg.SetDRBDHelper(new_helper)
4012       else:
4013         feedback_fn("Cluster DRBD helper already in desired state,"
4014                     " not changing")
4015     if self.op.hvparams:
4016       self.cluster.hvparams = self.new_hvparams
4017     if self.op.os_hvp:
4018       self.cluster.os_hvp = self.new_os_hvp
4019     if self.op.enabled_hypervisors is not None:
4020       self.cluster.hvparams = self.new_hvparams
4021       self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
4022     if self.op.beparams:
4023       self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
4024     if self.op.nicparams:
4025       self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
4026     if self.op.ipolicy:
4027       self.cluster.ipolicy = self.new_ipolicy
4028     if self.op.osparams:
4029       self.cluster.osparams = self.new_osp
4030     if self.op.ndparams:
4031       self.cluster.ndparams = self.new_ndparams
4032     if self.op.diskparams:
4033       self.cluster.diskparams = self.new_diskparams
4034     if self.op.hv_state:
4035       self.cluster.hv_state_static = self.new_hv_state
4036     if self.op.disk_state:
4037       self.cluster.disk_state_static = self.new_disk_state
4038
4039     if self.op.candidate_pool_size is not None:
4040       self.cluster.candidate_pool_size = self.op.candidate_pool_size
4041       # we need to update the pool size here, otherwise the save will fail
4042       _AdjustCandidatePool(self, [])
4043
4044     if self.op.maintain_node_health is not None:
4045       if self.op.maintain_node_health and not constants.ENABLE_CONFD:
4046         feedback_fn("Note: CONFD was disabled at build time, node health"
4047                     " maintenance is not useful (still enabling it)")
4048       self.cluster.maintain_node_health = self.op.maintain_node_health
4049
4050     if self.op.prealloc_wipe_disks is not None:
4051       self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
4052
4053     if self.op.add_uids is not None:
4054       uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
4055
4056     if self.op.remove_uids is not None:
4057       uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
4058
4059     if self.op.uid_pool is not None:
4060       self.cluster.uid_pool = self.op.uid_pool
4061
4062     if self.op.default_iallocator is not None:
4063       self.cluster.default_iallocator = self.op.default_iallocator
4064
4065     if self.op.reserved_lvs is not None:
4066       self.cluster.reserved_lvs = self.op.reserved_lvs
4067
4068     if self.op.use_external_mip_script is not None:
4069       self.cluster.use_external_mip_script = self.op.use_external_mip_script
4070
4071     def helper_os(aname, mods, desc):
4072       desc += " OS list"
4073       lst = getattr(self.cluster, aname)
4074       for key, val in mods:
4075         if key == constants.DDM_ADD:
4076           if val in lst:
4077             feedback_fn("OS %s already in %s, ignoring" % (val, desc))
4078           else:
4079             lst.append(val)
4080         elif key == constants.DDM_REMOVE:
4081           if val in lst:
4082             lst.remove(val)
4083           else:
4084             feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
4085         else:
4086           raise errors.ProgrammerError("Invalid modification '%s'" % key)
4087
4088     if self.op.hidden_os:
4089       helper_os("hidden_os", self.op.hidden_os, "hidden")
4090
4091     if self.op.blacklisted_os:
4092       helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
4093
4094     if self.op.master_netdev:
4095       master_params = self.cfg.GetMasterNetworkParameters()
4096       ems = self.cfg.GetUseExternalMipScript()
4097       feedback_fn("Shutting down master ip on the current netdev (%s)" %
4098                   self.cluster.master_netdev)
4099       result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4100                                                        master_params, ems)
4101       result.Raise("Could not disable the master ip")
4102       feedback_fn("Changing master_netdev from %s to %s" %
4103                   (master_params.netdev, self.op.master_netdev))
4104       self.cluster.master_netdev = self.op.master_netdev
4105
4106     if self.op.master_netmask:
4107       master_params = self.cfg.GetMasterNetworkParameters()
4108       feedback_fn("Changing master IP netmask to %s" % self.op.master_netmask)
4109       result = self.rpc.call_node_change_master_netmask(master_params.name,
4110                                                         master_params.netmask,
4111                                                         self.op.master_netmask,
4112                                                         master_params.ip,
4113                                                         master_params.netdev)
4114       if result.fail_msg:
4115         msg = "Could not change the master IP netmask: %s" % result.fail_msg
4116         feedback_fn(msg)
4117
4118       self.cluster.master_netmask = self.op.master_netmask
4119
4120     self.cfg.Update(self.cluster, feedback_fn)
4121
4122     if self.op.master_netdev:
4123       master_params = self.cfg.GetMasterNetworkParameters()
4124       feedback_fn("Starting the master ip on the new master netdev (%s)" %
4125                   self.op.master_netdev)
4126       ems = self.cfg.GetUseExternalMipScript()
4127       result = self.rpc.call_node_activate_master_ip(master_params.name,
4128                                                      master_params, ems)
4129       if result.fail_msg:
4130         self.LogWarning("Could not re-enable the master ip on"
4131                         " the master, please restart manually: %s",
4132                         result.fail_msg)
4133
4134
4135 def _UploadHelper(lu, nodes, fname):
4136   """Helper for uploading a file and showing warnings.
4137
4138   """
4139   if os.path.exists(fname):
4140     result = lu.rpc.call_upload_file(nodes, fname)
4141     for to_node, to_result in result.items():
4142       msg = to_result.fail_msg
4143       if msg:
4144         msg = ("Copy of file %s to node %s failed: %s" %
4145                (fname, to_node, msg))
4146         lu.proc.LogWarning(msg)
4147
4148
4149 def _ComputeAncillaryFiles(cluster, redist):
4150   """Compute files external to Ganeti which need to be consistent.
4151
4152   @type redist: boolean
4153   @param redist: Whether to include files which need to be redistributed
4154
4155   """
4156   # Compute files for all nodes
4157   files_all = set([
4158     constants.SSH_KNOWN_HOSTS_FILE,
4159     constants.CONFD_HMAC_KEY,
4160     constants.CLUSTER_DOMAIN_SECRET_FILE,
4161     constants.SPICE_CERT_FILE,
4162     constants.SPICE_CACERT_FILE,
4163     constants.RAPI_USERS_FILE,
4164     ])
4165
4166   if not redist:
4167     files_all.update(constants.ALL_CERT_FILES)
4168     files_all.update(ssconf.SimpleStore().GetFileList())
4169   else:
4170     # we need to ship at least the RAPI certificate
4171     files_all.add(constants.RAPI_CERT_FILE)
4172
4173   if cluster.modify_etc_hosts:
4174     files_all.add(constants.ETC_HOSTS)
4175
4176   # Files which are optional, these must:
4177   # - be present in one other category as well
4178   # - either exist or not exist on all nodes of that category (mc, vm all)
4179   files_opt = set([
4180     constants.RAPI_USERS_FILE,
4181     ])
4182
4183   # Files which should only be on master candidates
4184   files_mc = set()
4185
4186   if not redist:
4187     files_mc.add(constants.CLUSTER_CONF_FILE)
4188
4189     # FIXME: this should also be replicated but Ganeti doesn't support files_mc
4190     # replication
4191     files_mc.add(constants.DEFAULT_MASTER_SETUP_SCRIPT)
4192
4193   # Files which should only be on VM-capable nodes
4194   files_vm = set(filename
4195     for hv_name in cluster.enabled_hypervisors
4196     for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[0])
4197
4198   files_opt |= set(filename
4199     for hv_name in cluster.enabled_hypervisors
4200     for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[1])
4201
4202   # Filenames in each category must be unique
4203   all_files_set = files_all | files_mc | files_vm
4204   assert (len(all_files_set) ==
4205           sum(map(len, [files_all, files_mc, files_vm]))), \
4206          "Found file listed in more than one file list"
4207
4208   # Optional files must be present in one other category
4209   assert all_files_set.issuperset(files_opt), \
4210          "Optional file not in a different required list"
4211
4212   return (files_all, files_opt, files_mc, files_vm)
4213
4214
4215 def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
4216   """Distribute additional files which are part of the cluster configuration.
4217
4218   ConfigWriter takes care of distributing the config and ssconf files, but
4219   there are more files which should be distributed to all nodes. This function
4220   makes sure those are copied.
4221
4222   @param lu: calling logical unit
4223   @param additional_nodes: list of nodes not in the config to distribute to
4224   @type additional_vm: boolean
4225   @param additional_vm: whether the additional nodes are vm-capable or not
4226
4227   """
4228   # Gather target nodes
4229   cluster = lu.cfg.GetClusterInfo()
4230   master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
4231
4232   online_nodes = lu.cfg.GetOnlineNodeList()
4233   vm_nodes = lu.cfg.GetVmCapableNodeList()
4234
4235   if additional_nodes is not None:
4236     online_nodes.extend(additional_nodes)
4237     if additional_vm:
4238       vm_nodes.extend(additional_nodes)
4239
4240   # Never distribute to master node
4241   for nodelist in [online_nodes, vm_nodes]:
4242     if master_info.name in nodelist:
4243       nodelist.remove(master_info.name)
4244
4245   # Gather file lists
4246   (files_all, _, files_mc, files_vm) = \
4247     _ComputeAncillaryFiles(cluster, True)
4248
4249   # Never re-distribute configuration file from here
4250   assert not (constants.CLUSTER_CONF_FILE in files_all or
4251               constants.CLUSTER_CONF_FILE in files_vm)
4252   assert not files_mc, "Master candidates not handled in this function"
4253
4254   filemap = [
4255     (online_nodes, files_all),
4256     (vm_nodes, files_vm),
4257     ]
4258
4259   # Upload the files
4260   for (node_list, files) in filemap:
4261     for fname in files:
4262       _UploadHelper(lu, node_list, fname)
4263
4264
4265 class LUClusterRedistConf(NoHooksLU):
4266   """Force the redistribution of cluster configuration.
4267
4268   This is a very simple LU.
4269
4270   """
4271   REQ_BGL = False
4272
4273   def ExpandNames(self):
4274     self.needed_locks = {
4275       locking.LEVEL_NODE: locking.ALL_SET,
4276     }
4277     self.share_locks[locking.LEVEL_NODE] = 1
4278
4279   def Exec(self, feedback_fn):
4280     """Redistribute the configuration.
4281
4282     """
4283     self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
4284     _RedistributeAncillaryFiles(self)
4285
4286
4287 class LUClusterActivateMasterIp(NoHooksLU):
4288   """Activate the master IP on the master node.
4289
4290   """
4291   def Exec(self, feedback_fn):
4292     """Activate the master IP.
4293
4294     """
4295     master_params = self.cfg.GetMasterNetworkParameters()
4296     ems = self.cfg.GetUseExternalMipScript()
4297     result = self.rpc.call_node_activate_master_ip(master_params.name,
4298                                                    master_params, ems)
4299     result.Raise("Could not activate the master IP")
4300
4301
4302 class LUClusterDeactivateMasterIp(NoHooksLU):
4303   """Deactivate the master IP on the master node.
4304
4305   """
4306   def Exec(self, feedback_fn):
4307     """Deactivate the master IP.
4308
4309     """
4310     master_params = self.cfg.GetMasterNetworkParameters()
4311     ems = self.cfg.GetUseExternalMipScript()
4312     result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4313                                                      master_params, ems)
4314     result.Raise("Could not deactivate the master IP")
4315
4316
4317 def _WaitForSync(lu, instance, disks=None, oneshot=False):
4318   """Sleep and poll for an instance's disk to sync.
4319
4320   """
4321   if not instance.disks or disks is not None and not disks:
4322     return True
4323
4324   disks = _ExpandCheckDisks(instance, disks)
4325
4326   if not oneshot:
4327     lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
4328
4329   node = instance.primary_node
4330
4331   for dev in disks:
4332     lu.cfg.SetDiskID(dev, node)
4333
4334   # TODO: Convert to utils.Retry
4335
4336   retries = 0
4337   degr_retries = 10 # in seconds, as we sleep 1 second each time
4338   while True:
4339     max_time = 0
4340     done = True
4341     cumul_degraded = False
4342     rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
4343     msg = rstats.fail_msg
4344     if msg:
4345       lu.LogWarning("Can't get any data from node %s: %s", node, msg)
4346       retries += 1
4347       if retries >= 10:
4348         raise errors.RemoteError("Can't contact node %s for mirror data,"
4349                                  " aborting." % node)
4350       time.sleep(6)
4351       continue
4352     rstats = rstats.payload
4353     retries = 0
4354     for i, mstat in enumerate(rstats):
4355       if mstat is None:
4356         lu.LogWarning("Can't compute data for node %s/%s",
4357                            node, disks[i].iv_name)
4358         continue
4359
4360       cumul_degraded = (cumul_degraded or
4361                         (mstat.is_degraded and mstat.sync_percent is None))
4362       if mstat.sync_percent is not None:
4363         done = False
4364         if mstat.estimated_time is not None:
4365           rem_time = ("%s remaining (estimated)" %
4366                       utils.FormatSeconds(mstat.estimated_time))
4367           max_time = mstat.estimated_time
4368         else:
4369           rem_time = "no time estimate"
4370         lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
4371                         (disks[i].iv_name, mstat.sync_percent, rem_time))
4372
4373     # if we're done but degraded, let's do a few small retries, to
4374     # make sure we see a stable and not transient situation; therefore
4375     # we force restart of the loop
4376     if (done or oneshot) and cumul_degraded and degr_retries > 0:
4377       logging.info("Degraded disks found, %d retries left", degr_retries)
4378       degr_retries -= 1
4379       time.sleep(1)
4380       continue
4381
4382     if done or oneshot:
4383       break
4384
4385     time.sleep(min(60, max_time))
4386
4387   if done:
4388     lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
4389   return not cumul_degraded
4390
4391
4392 def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
4393   """Check that mirrors are not degraded.
4394
4395   The ldisk parameter, if True, will change the test from the
4396   is_degraded attribute (which represents overall non-ok status for
4397   the device(s)) to the ldisk (representing the local storage status).
4398
4399   """
4400   lu.cfg.SetDiskID(dev, node)
4401
4402   result = True
4403
4404   if on_primary or dev.AssembleOnSecondary():
4405     rstats = lu.rpc.call_blockdev_find(node, dev)
4406     msg = rstats.fail_msg
4407     if msg:
4408       lu.LogWarning("Can't find disk on node %s: %s", node, msg)
4409       result = False
4410     elif not rstats.payload:
4411       lu.LogWarning("Can't find disk on node %s", node)
4412       result = False
4413     else:
4414       if ldisk:
4415         result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
4416       else:
4417         result = result and not rstats.payload.is_degraded
4418
4419   if dev.children:
4420     for child in dev.children:
4421       result = result and _CheckDiskConsistency(lu, child, node, on_primary)
4422
4423   return result
4424
4425
4426 class LUOobCommand(NoHooksLU):
4427   """Logical unit for OOB handling.
4428
4429   """
4430   REG_BGL = False
4431   _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
4432
4433   def ExpandNames(self):
4434     """Gather locks we need.
4435
4436     """
4437     if self.op.node_names:
4438       self.op.node_names = _GetWantedNodes(self, self.op.node_names)
4439       lock_names = self.op.node_names
4440     else:
4441       lock_names = locking.ALL_SET
4442
4443     self.needed_locks = {
4444       locking.LEVEL_NODE: lock_names,
4445       }
4446
4447   def CheckPrereq(self):
4448     """Check prerequisites.
4449
4450     This checks:
4451      - the node exists in the configuration
4452      - OOB is supported
4453
4454     Any errors are signaled by raising errors.OpPrereqError.
4455
4456     """
4457     self.nodes = []
4458     self.master_node = self.cfg.GetMasterNode()
4459
4460     assert self.op.power_delay >= 0.0
4461
4462     if self.op.node_names:
4463       if (self.op.command in self._SKIP_MASTER and
4464           self.master_node in self.op.node_names):
4465         master_node_obj = self.cfg.GetNodeInfo(self.master_node)
4466         master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
4467
4468         if master_oob_handler:
4469           additional_text = ("run '%s %s %s' if you want to operate on the"
4470                              " master regardless") % (master_oob_handler,
4471                                                       self.op.command,
4472                                                       self.master_node)
4473         else:
4474           additional_text = "it does not support out-of-band operations"
4475
4476         raise errors.OpPrereqError(("Operating on the master node %s is not"
4477                                     " allowed for %s; %s") %
4478                                    (self.master_node, self.op.command,
4479                                     additional_text), errors.ECODE_INVAL)
4480     else:
4481       self.op.node_names = self.cfg.GetNodeList()
4482       if self.op.command in self._SKIP_MASTER:
4483         self.op.node_names.remove(self.master_node)
4484
4485     if self.op.command in self._SKIP_MASTER:
4486       assert self.master_node not in self.op.node_names
4487
4488     for (node_name, node) in self.cfg.GetMultiNodeInfo(self.op.node_names):
4489       if node is None:
4490         raise errors.OpPrereqError("Node %s not found" % node_name,
4491                                    errors.ECODE_NOENT)
4492       else:
4493         self.nodes.append(node)
4494
4495       if (not self.op.ignore_status and
4496           (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
4497         raise errors.OpPrereqError(("Cannot power off node %s because it is"
4498                                     " not marked offline") % node_name,
4499                                    errors.ECODE_STATE)
4500
4501   def Exec(self, feedback_fn):
4502     """Execute OOB and return result if we expect any.
4503
4504     """
4505     master_node = self.master_node
4506     ret = []
4507
4508     for idx, node in enumerate(utils.NiceSort(self.nodes,
4509                                               key=lambda node: node.name)):
4510       node_entry = [(constants.RS_NORMAL, node.name)]
4511       ret.append(node_entry)
4512
4513       oob_program = _SupportsOob(self.cfg, node)
4514
4515       if not oob_program:
4516         node_entry.append((constants.RS_UNAVAIL, None))
4517         continue
4518
4519       logging.info("Executing out-of-band command '%s' using '%s' on %s",
4520                    self.op.command, oob_program, node.name)
4521       result = self.rpc.call_run_oob(master_node, oob_program,
4522                                      self.op.command, node.name,
4523                                      self.op.timeout)
4524
4525       if result.fail_msg:
4526         self.LogWarning("Out-of-band RPC failed on node '%s': %s",
4527                         node.name, result.fail_msg)
4528         node_entry.append((constants.RS_NODATA, None))
4529       else:
4530         try:
4531           self._CheckPayload(result)
4532         except errors.OpExecError, err:
4533           self.LogWarning("Payload returned by node '%s' is not valid: %s",
4534                           node.name, err)
4535           node_entry.append((constants.RS_NODATA, None))
4536         else:
4537           if self.op.command == constants.OOB_HEALTH:
4538             # For health we should log important events
4539             for item, status in result.payload:
4540               if status in [constants.OOB_STATUS_WARNING,
4541                             constants.OOB_STATUS_CRITICAL]:
4542                 self.LogWarning("Item '%s' on node '%s' has status '%s'",
4543                                 item, node.name, status)
4544
4545           if self.op.command == constants.OOB_POWER_ON:
4546             node.powered = True
4547           elif self.op.command == constants.OOB_POWER_OFF:
4548             node.powered = False
4549           elif self.op.command == constants.OOB_POWER_STATUS:
4550             powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
4551             if powered != node.powered:
4552               logging.warning(("Recorded power state (%s) of node '%s' does not"
4553                                " match actual power state (%s)"), node.powered,
4554                               node.name, powered)
4555
4556           # For configuration changing commands we should update the node
4557           if self.op.command in (constants.OOB_POWER_ON,
4558                                  constants.OOB_POWER_OFF):
4559             self.cfg.Update(node, feedback_fn)
4560
4561           node_entry.append((constants.RS_NORMAL, result.payload))
4562
4563           if (self.op.command == constants.OOB_POWER_ON and
4564               idx < len(self.nodes) - 1):
4565             time.sleep(self.op.power_delay)
4566
4567     return ret
4568
4569   def _CheckPayload(self, result):
4570     """Checks if the payload is valid.
4571
4572     @param result: RPC result
4573     @raises errors.OpExecError: If payload is not valid
4574
4575     """
4576     errs = []
4577     if self.op.command == constants.OOB_HEALTH:
4578       if not isinstance(result.payload, list):
4579         errs.append("command 'health' is expected to return a list but got %s" %
4580                     type(result.payload))
4581       else:
4582         for item, status in result.payload:
4583           if status not in constants.OOB_STATUSES:
4584             errs.append("health item '%s' has invalid status '%s'" %
4585                         (item, status))
4586
4587     if self.op.command == constants.OOB_POWER_STATUS:
4588       if not isinstance(result.payload, dict):
4589         errs.append("power-status is expected to return a dict but got %s" %
4590                     type(result.payload))
4591
4592     if self.op.command in [
4593         constants.OOB_POWER_ON,
4594         constants.OOB_POWER_OFF,
4595         constants.OOB_POWER_CYCLE,
4596         ]:
4597       if result.payload is not None:
4598         errs.append("%s is expected to not return payload but got '%s'" %
4599                     (self.op.command, result.payload))
4600
4601     if errs:
4602       raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
4603                                utils.CommaJoin(errs))
4604
4605
4606 class _OsQuery(_QueryBase):
4607   FIELDS = query.OS_FIELDS
4608
4609   def ExpandNames(self, lu):
4610     # Lock all nodes in shared mode
4611     # Temporary removal of locks, should be reverted later
4612     # TODO: reintroduce locks when they are lighter-weight
4613     lu.needed_locks = {}
4614     #self.share_locks[locking.LEVEL_NODE] = 1
4615     #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4616
4617     # The following variables interact with _QueryBase._GetNames
4618     if self.names:
4619       self.wanted = self.names
4620     else:
4621       self.wanted = locking.ALL_SET
4622
4623     self.do_locking = self.use_locking
4624
4625   def DeclareLocks(self, lu, level):
4626     pass
4627
4628   @staticmethod
4629   def _DiagnoseByOS(rlist):
4630     """Remaps a per-node return list into an a per-os per-node dictionary
4631
4632     @param rlist: a map with node names as keys and OS objects as values
4633
4634     @rtype: dict
4635     @return: a dictionary with osnames as keys and as value another
4636         map, with nodes as keys and tuples of (path, status, diagnose,
4637         variants, parameters, api_versions) as values, eg::
4638
4639           {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
4640                                      (/srv/..., False, "invalid api")],
4641                            "node2": [(/srv/..., True, "", [], [])]}
4642           }
4643
4644     """
4645     all_os = {}
4646     # we build here the list of nodes that didn't fail the RPC (at RPC
4647     # level), so that nodes with a non-responding node daemon don't
4648     # make all OSes invalid
4649     good_nodes = [node_name for node_name in rlist
4650                   if not rlist[node_name].fail_msg]
4651     for node_name, nr in rlist.items():
4652       if nr.fail_msg or not nr.payload:
4653         continue
4654       for (name, path, status, diagnose, variants,
4655            params, api_versions) in nr.payload:
4656         if name not in all_os:
4657           # build a list of nodes for this os containing empty lists
4658           # for each node in node_list
4659           all_os[name] = {}
4660           for nname in good_nodes:
4661             all_os[name][nname] = []
4662         # convert params from [name, help] to (name, help)
4663         params = [tuple(v) for v in params]
4664         all_os[name][node_name].append((path, status, diagnose,
4665                                         variants, params, api_versions))
4666     return all_os
4667
4668   def _GetQueryData(self, lu):
4669     """Computes the list of nodes and their attributes.
4670
4671     """
4672     # Locking is not used
4673     assert not (compat.any(lu.glm.is_owned(level)
4674                            for level in locking.LEVELS
4675                            if level != locking.LEVEL_CLUSTER) or
4676                 self.do_locking or self.use_locking)
4677
4678     valid_nodes = [node.name
4679                    for node in lu.cfg.GetAllNodesInfo().values()
4680                    if not node.offline and node.vm_capable]
4681     pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
4682     cluster = lu.cfg.GetClusterInfo()
4683
4684     data = {}
4685
4686     for (os_name, os_data) in pol.items():
4687       info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
4688                           hidden=(os_name in cluster.hidden_os),
4689                           blacklisted=(os_name in cluster.blacklisted_os))
4690
4691       variants = set()
4692       parameters = set()
4693       api_versions = set()
4694
4695       for idx, osl in enumerate(os_data.values()):
4696         info.valid = bool(info.valid and osl and osl[0][1])
4697         if not info.valid:
4698           break
4699
4700         (node_variants, node_params, node_api) = osl[0][3:6]
4701         if idx == 0:
4702           # First entry
4703           variants.update(node_variants)
4704           parameters.update(node_params)
4705           api_versions.update(node_api)
4706         else:
4707           # Filter out inconsistent values
4708           variants.intersection_update(node_variants)
4709           parameters.intersection_update(node_params)
4710           api_versions.intersection_update(node_api)
4711
4712       info.variants = list(variants)
4713       info.parameters = list(parameters)
4714       info.api_versions = list(api_versions)
4715
4716       data[os_name] = info
4717
4718     # Prepare data in requested order
4719     return [data[name] for name in self._GetNames(lu, pol.keys(), None)
4720             if name in data]
4721
4722
4723 class LUOsDiagnose(NoHooksLU):
4724   """Logical unit for OS diagnose/query.
4725
4726   """
4727   REQ_BGL = False
4728
4729   @staticmethod
4730   def _BuildFilter(fields, names):
4731     """Builds a filter for querying OSes.
4732
4733     """
4734     name_filter = qlang.MakeSimpleFilter("name", names)
4735
4736     # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
4737     # respective field is not requested
4738     status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
4739                      for fname in ["hidden", "blacklisted"]
4740                      if fname not in fields]
4741     if "valid" not in fields:
4742       status_filter.append([qlang.OP_TRUE, "valid"])
4743
4744     if status_filter:
4745       status_filter.insert(0, qlang.OP_AND)
4746     else:
4747       status_filter = None
4748
4749     if name_filter and status_filter:
4750       return [qlang.OP_AND, name_filter, status_filter]
4751     elif name_filter:
4752       return name_filter
4753     else:
4754       return status_filter
4755
4756   def CheckArguments(self):
4757     self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
4758                        self.op.output_fields, False)
4759
4760   def ExpandNames(self):
4761     self.oq.ExpandNames(self)
4762
4763   def Exec(self, feedback_fn):
4764     return self.oq.OldStyleQuery(self)
4765
4766
4767 class LUNodeRemove(LogicalUnit):
4768   """Logical unit for removing a node.
4769
4770   """
4771   HPATH = "node-remove"
4772   HTYPE = constants.HTYPE_NODE
4773
4774   def BuildHooksEnv(self):
4775     """Build hooks env.
4776
4777     This doesn't run on the target node in the pre phase as a failed
4778     node would then be impossible to remove.
4779
4780     """
4781     return {
4782       "OP_TARGET": self.op.node_name,
4783       "NODE_NAME": self.op.node_name,
4784       }
4785
4786   def BuildHooksNodes(self):
4787     """Build hooks nodes.
4788
4789     """
4790     all_nodes = self.cfg.GetNodeList()
4791     try:
4792       all_nodes.remove(self.op.node_name)
4793     except ValueError:
4794       logging.warning("Node '%s', which is about to be removed, was not found"
4795                       " in the list of all nodes", self.op.node_name)
4796     return (all_nodes, all_nodes)
4797
4798   def CheckPrereq(self):
4799     """Check prerequisites.
4800
4801     This checks:
4802      - the node exists in the configuration
4803      - it does not have primary or secondary instances
4804      - it's not the master
4805
4806     Any errors are signaled by raising errors.OpPrereqError.
4807
4808     """
4809     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4810     node = self.cfg.GetNodeInfo(self.op.node_name)
4811     assert node is not None
4812
4813     masternode = self.cfg.GetMasterNode()
4814     if node.name == masternode:
4815       raise errors.OpPrereqError("Node is the master node, failover to another"
4816                                  " node is required", errors.ECODE_INVAL)
4817
4818     for instance_name, instance in self.cfg.GetAllInstancesInfo().items():
4819       if node.name in instance.all_nodes:
4820         raise errors.OpPrereqError("Instance %s is still running on the node,"
4821                                    " please remove first" % instance_name,
4822                                    errors.ECODE_INVAL)
4823     self.op.node_name = node.name
4824     self.node = node
4825
4826   def Exec(self, feedback_fn):
4827     """Removes the node from the cluster.
4828
4829     """
4830     node = self.node
4831     logging.info("Stopping the node daemon and removing configs from node %s",
4832                  node.name)
4833
4834     modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
4835
4836     assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
4837       "Not owning BGL"
4838
4839     # Promote nodes to master candidate as needed
4840     _AdjustCandidatePool(self, exceptions=[node.name])
4841     self.context.RemoveNode(node.name)
4842
4843     # Run post hooks on the node before it's removed
4844     _RunPostHook(self, node.name)
4845
4846     result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
4847     msg = result.fail_msg
4848     if msg:
4849       self.LogWarning("Errors encountered on the remote node while leaving"
4850                       " the cluster: %s", msg)
4851
4852     # Remove node from our /etc/hosts
4853     if self.cfg.GetClusterInfo().modify_etc_hosts:
4854       master_node = self.cfg.GetMasterNode()
4855       result = self.rpc.call_etc_hosts_modify(master_node,
4856                                               constants.ETC_HOSTS_REMOVE,
4857                                               node.name, None)
4858       result.Raise("Can't update hosts file with new host data")
4859       _RedistributeAncillaryFiles(self)
4860
4861
4862 class _NodeQuery(_QueryBase):
4863   FIELDS = query.NODE_FIELDS
4864
4865   def ExpandNames(self, lu):
4866     lu.needed_locks = {}
4867     lu.share_locks = _ShareAll()
4868
4869     if self.names:
4870       self.wanted = _GetWantedNodes(lu, self.names)
4871     else:
4872       self.wanted = locking.ALL_SET
4873
4874     self.do_locking = (self.use_locking and
4875                        query.NQ_LIVE in self.requested_data)
4876
4877     if self.do_locking:
4878       # If any non-static field is requested we need to lock the nodes
4879       lu.needed_locks[locking.LEVEL_NODE] = self.wanted
4880
4881   def DeclareLocks(self, lu, level):
4882     pass
4883
4884   def _GetQueryData(self, lu):
4885     """Computes the list of nodes and their attributes.
4886
4887     """
4888     all_info = lu.cfg.GetAllNodesInfo()
4889
4890     nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
4891
4892     # Gather data as requested
4893     if query.NQ_LIVE in self.requested_data:
4894       # filter out non-vm_capable nodes
4895       toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
4896
4897       node_data = lu.rpc.call_node_info(toquery_nodes, [lu.cfg.GetVGName()],
4898                                         [lu.cfg.GetHypervisorType()])
4899       live_data = dict((name, _MakeLegacyNodeInfo(nresult.payload))
4900                        for (name, nresult) in node_data.items()
4901                        if not nresult.fail_msg and nresult.payload)
4902     else:
4903       live_data = None
4904
4905     if query.NQ_INST in self.requested_data:
4906       node_to_primary = dict([(name, set()) for name in nodenames])
4907       node_to_secondary = dict([(name, set()) for name in nodenames])
4908
4909       inst_data = lu.cfg.GetAllInstancesInfo()
4910
4911       for inst in inst_data.values():
4912         if inst.primary_node in node_to_primary:
4913           node_to_primary[inst.primary_node].add(inst.name)
4914         for secnode in inst.secondary_nodes:
4915           if secnode in node_to_secondary:
4916             node_to_secondary[secnode].add(inst.name)
4917     else:
4918       node_to_primary = None
4919       node_to_secondary = None
4920
4921     if query.NQ_OOB in self.requested_data:
4922       oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
4923                          for name, node in all_info.iteritems())
4924     else:
4925       oob_support = None
4926
4927     if query.NQ_GROUP in self.requested_data:
4928       groups = lu.cfg.GetAllNodeGroupsInfo()
4929     else:
4930       groups = {}
4931
4932     return query.NodeQueryData([all_info[name] for name in nodenames],
4933                                live_data, lu.cfg.GetMasterNode(),
4934                                node_to_primary, node_to_secondary, groups,
4935                                oob_support, lu.cfg.GetClusterInfo())
4936
4937
4938 class LUNodeQuery(NoHooksLU):
4939   """Logical unit for querying nodes.
4940
4941   """
4942   # pylint: disable=W0142
4943   REQ_BGL = False
4944
4945   def CheckArguments(self):
4946     self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
4947                          self.op.output_fields, self.op.use_locking)
4948
4949   def ExpandNames(self):
4950     self.nq.ExpandNames(self)
4951
4952   def DeclareLocks(self, level):
4953     self.nq.DeclareLocks(self, level)
4954
4955   def Exec(self, feedback_fn):
4956     return self.nq.OldStyleQuery(self)
4957
4958
4959 class LUNodeQueryvols(NoHooksLU):
4960   """Logical unit for getting volumes on node(s).
4961
4962   """
4963   REQ_BGL = False
4964   _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
4965   _FIELDS_STATIC = utils.FieldSet("node")
4966
4967   def CheckArguments(self):
4968     _CheckOutputFields(static=self._FIELDS_STATIC,
4969                        dynamic=self._FIELDS_DYNAMIC,
4970                        selected=self.op.output_fields)
4971
4972   def ExpandNames(self):
4973     self.share_locks = _ShareAll()
4974     self.needed_locks = {}
4975
4976     if not self.op.nodes:
4977       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4978     else:
4979       self.needed_locks[locking.LEVEL_NODE] = \
4980         _GetWantedNodes(self, self.op.nodes)
4981
4982   def Exec(self, feedback_fn):
4983     """Computes the list of nodes and their attributes.
4984
4985     """
4986     nodenames = self.owned_locks(locking.LEVEL_NODE)
4987     volumes = self.rpc.call_node_volumes(nodenames)
4988
4989     ilist = self.cfg.GetAllInstancesInfo()
4990     vol2inst = _MapInstanceDisksToNodes(ilist.values())
4991
4992     output = []
4993     for node in nodenames:
4994       nresult = volumes[node]
4995       if nresult.offline:
4996         continue
4997       msg = nresult.fail_msg
4998       if msg:
4999         self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
5000         continue
5001
5002       node_vols = sorted(nresult.payload,
5003                          key=operator.itemgetter("dev"))
5004
5005       for vol in node_vols:
5006         node_output = []
5007         for field in self.op.output_fields:
5008           if field == "node":
5009             val = node
5010           elif field == "phys":
5011             val = vol["dev"]
5012           elif field == "vg":
5013             val = vol["vg"]
5014           elif field == "name":
5015             val = vol["name"]
5016           elif field == "size":
5017             val = int(float(vol["size"]))
5018           elif field == "instance":
5019             val = vol2inst.get((node, vol["vg"] + "/" + vol["name"]), "-")
5020           else:
5021             raise errors.ParameterError(field)
5022           node_output.append(str(val))
5023
5024         output.append(node_output)
5025
5026     return output
5027
5028
5029 class LUNodeQueryStorage(NoHooksLU):
5030   """Logical unit for getting information on storage units on node(s).
5031
5032   """
5033   _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
5034   REQ_BGL = False
5035
5036   def CheckArguments(self):
5037     _CheckOutputFields(static=self._FIELDS_STATIC,
5038                        dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
5039                        selected=self.op.output_fields)
5040
5041   def ExpandNames(self):
5042     self.share_locks = _ShareAll()
5043     self.needed_locks = {}
5044
5045     if self.op.nodes:
5046       self.needed_locks[locking.LEVEL_NODE] = \
5047         _GetWantedNodes(self, self.op.nodes)
5048     else:
5049       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5050
5051   def Exec(self, feedback_fn):
5052     """Computes the list of nodes and their attributes.
5053
5054     """
5055     self.nodes = self.owned_locks(locking.LEVEL_NODE)
5056
5057     # Always get name to sort by
5058     if constants.SF_NAME in self.op.output_fields:
5059       fields = self.op.output_fields[:]
5060     else:
5061       fields = [constants.SF_NAME] + self.op.output_fields
5062
5063     # Never ask for node or type as it's only known to the LU
5064     for extra in [constants.SF_NODE, constants.SF_TYPE]:
5065       while extra in fields:
5066         fields.remove(extra)
5067
5068     field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
5069     name_idx = field_idx[constants.SF_NAME]
5070
5071     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5072     data = self.rpc.call_storage_list(self.nodes,
5073                                       self.op.storage_type, st_args,
5074                                       self.op.name, fields)
5075
5076     result = []
5077
5078     for node in utils.NiceSort(self.nodes):
5079       nresult = data[node]
5080       if nresult.offline:
5081         continue
5082
5083       msg = nresult.fail_msg
5084       if msg:
5085         self.LogWarning("Can't get storage data from node %s: %s", node, msg)
5086         continue
5087
5088       rows = dict([(row[name_idx], row) for row in nresult.payload])
5089
5090       for name in utils.NiceSort(rows.keys()):
5091         row = rows[name]
5092
5093         out = []
5094
5095         for field in self.op.output_fields:
5096           if field == constants.SF_NODE:
5097             val = node
5098           elif field == constants.SF_TYPE:
5099             val = self.op.storage_type
5100           elif field in field_idx:
5101             val = row[field_idx[field]]
5102           else:
5103             raise errors.ParameterError(field)
5104
5105           out.append(val)
5106
5107         result.append(out)
5108
5109     return result
5110
5111
5112 class _InstanceQuery(_QueryBase):
5113   FIELDS = query.INSTANCE_FIELDS
5114
5115   def ExpandNames(self, lu):
5116     lu.needed_locks = {}
5117     lu.share_locks = _ShareAll()
5118
5119     if self.names:
5120       self.wanted = _GetWantedInstances(lu, self.names)
5121     else:
5122       self.wanted = locking.ALL_SET
5123
5124     self.do_locking = (self.use_locking and
5125                        query.IQ_LIVE in self.requested_data)
5126     if self.do_locking:
5127       lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
5128       lu.needed_locks[locking.LEVEL_NODEGROUP] = []
5129       lu.needed_locks[locking.LEVEL_NODE] = []
5130       lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5131
5132     self.do_grouplocks = (self.do_locking and
5133                           query.IQ_NODES in self.requested_data)
5134
5135   def DeclareLocks(self, lu, level):
5136     if self.do_locking:
5137       if level == locking.LEVEL_NODEGROUP and self.do_grouplocks:
5138         assert not lu.needed_locks[locking.LEVEL_NODEGROUP]
5139
5140         # Lock all groups used by instances optimistically; this requires going
5141         # via the node before it's locked, requiring verification later on
5142         lu.needed_locks[locking.LEVEL_NODEGROUP] = \
5143           set(group_uuid
5144               for instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
5145               for group_uuid in lu.cfg.GetInstanceNodeGroups(instance_name))
5146       elif level == locking.LEVEL_NODE:
5147         lu._LockInstancesNodes() # pylint: disable=W0212
5148
5149   @staticmethod
5150   def _CheckGroupLocks(lu):
5151     owned_instances = frozenset(lu.owned_locks(locking.LEVEL_INSTANCE))
5152     owned_groups = frozenset(lu.owned_locks(locking.LEVEL_NODEGROUP))
5153
5154     # Check if node groups for locked instances are still correct
5155     for instance_name in owned_instances:
5156       _CheckInstanceNodeGroups(lu.cfg, instance_name, owned_groups)
5157
5158   def _GetQueryData(self, lu):
5159     """Computes the list of instances and their attributes.
5160
5161     """
5162     if self.do_grouplocks:
5163       self._CheckGroupLocks(lu)
5164
5165     cluster = lu.cfg.GetClusterInfo()
5166     all_info = lu.cfg.GetAllInstancesInfo()
5167
5168     instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
5169
5170     instance_list = [all_info[name] for name in instance_names]
5171     nodes = frozenset(itertools.chain(*(inst.all_nodes
5172                                         for inst in instance_list)))
5173     hv_list = list(set([inst.hypervisor for inst in instance_list]))
5174     bad_nodes = []
5175     offline_nodes = []
5176     wrongnode_inst = set()
5177
5178     # Gather data as requested
5179     if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
5180       live_data = {}
5181       node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
5182       for name in nodes:
5183         result = node_data[name]
5184         if result.offline:
5185           # offline nodes will be in both lists
5186           assert result.fail_msg
5187           offline_nodes.append(name)
5188         if result.fail_msg:
5189           bad_nodes.append(name)
5190         elif result.payload:
5191           for inst in result.payload:
5192             if inst in all_info:
5193               if all_info[inst].primary_node == name:
5194                 live_data.update(result.payload)
5195               else:
5196                 wrongnode_inst.add(inst)
5197             else:
5198               # orphan instance; we don't list it here as we don't
5199               # handle this case yet in the output of instance listing
5200               logging.warning("Orphan instance '%s' found on node %s",
5201                               inst, name)
5202         # else no instance is alive
5203     else:
5204       live_data = {}
5205
5206     if query.IQ_DISKUSAGE in self.requested_data:
5207       disk_usage = dict((inst.name,
5208                          _ComputeDiskSize(inst.disk_template,
5209                                           [{constants.IDISK_SIZE: disk.size}
5210                                            for disk in inst.disks]))
5211                         for inst in instance_list)
5212     else:
5213       disk_usage = None
5214
5215     if query.IQ_CONSOLE in self.requested_data:
5216       consinfo = {}
5217       for inst in instance_list:
5218         if inst.name in live_data:
5219           # Instance is running
5220           consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
5221         else:
5222           consinfo[inst.name] = None
5223       assert set(consinfo.keys()) == set(instance_names)
5224     else:
5225       consinfo = None
5226
5227     if query.IQ_NODES in self.requested_data:
5228       node_names = set(itertools.chain(*map(operator.attrgetter("all_nodes"),
5229                                             instance_list)))
5230       nodes = dict(lu.cfg.GetMultiNodeInfo(node_names))
5231       groups = dict((uuid, lu.cfg.GetNodeGroup(uuid))
5232                     for uuid in set(map(operator.attrgetter("group"),
5233                                         nodes.values())))
5234     else:
5235       nodes = None
5236       groups = None
5237
5238     return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
5239                                    disk_usage, offline_nodes, bad_nodes,
5240                                    live_data, wrongnode_inst, consinfo,
5241                                    nodes, groups)
5242
5243
5244 class LUQuery(NoHooksLU):
5245   """Query for resources/items of a certain kind.
5246
5247   """
5248   # pylint: disable=W0142
5249   REQ_BGL = False
5250
5251   def CheckArguments(self):
5252     qcls = _GetQueryImplementation(self.op.what)
5253
5254     self.impl = qcls(self.op.qfilter, self.op.fields, self.op.use_locking)
5255
5256   def ExpandNames(self):
5257     self.impl.ExpandNames(self)
5258
5259   def DeclareLocks(self, level):
5260     self.impl.DeclareLocks(self, level)
5261
5262   def Exec(self, feedback_fn):
5263     return self.impl.NewStyleQuery(self)
5264
5265
5266 class LUQueryFields(NoHooksLU):
5267   """Query for resources/items of a certain kind.
5268
5269   """
5270   # pylint: disable=W0142
5271   REQ_BGL = False
5272
5273   def CheckArguments(self):
5274     self.qcls = _GetQueryImplementation(self.op.what)
5275
5276   def ExpandNames(self):
5277     self.needed_locks = {}
5278
5279   def Exec(self, feedback_fn):
5280     return query.QueryFields(self.qcls.FIELDS, self.op.fields)
5281
5282
5283 class LUNodeModifyStorage(NoHooksLU):
5284   """Logical unit for modifying a storage volume on a node.
5285
5286   """
5287   REQ_BGL = False
5288
5289   def CheckArguments(self):
5290     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5291
5292     storage_type = self.op.storage_type
5293
5294     try:
5295       modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
5296     except KeyError:
5297       raise errors.OpPrereqError("Storage units of type '%s' can not be"
5298                                  " modified" % storage_type,
5299                                  errors.ECODE_INVAL)
5300
5301     diff = set(self.op.changes.keys()) - modifiable
5302     if diff:
5303       raise errors.OpPrereqError("The following fields can not be modified for"
5304                                  " storage units of type '%s': %r" %
5305                                  (storage_type, list(diff)),
5306                                  errors.ECODE_INVAL)
5307
5308   def ExpandNames(self):
5309     self.needed_locks = {
5310       locking.LEVEL_NODE: self.op.node_name,
5311       }
5312
5313   def Exec(self, feedback_fn):
5314     """Computes the list of nodes and their attributes.
5315
5316     """
5317     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5318     result = self.rpc.call_storage_modify(self.op.node_name,
5319                                           self.op.storage_type, st_args,
5320                                           self.op.name, self.op.changes)
5321     result.Raise("Failed to modify storage unit '%s' on %s" %
5322                  (self.op.name, self.op.node_name))
5323
5324
5325 class LUNodeAdd(LogicalUnit):
5326   """Logical unit for adding node to the cluster.
5327
5328   """
5329   HPATH = "node-add"
5330   HTYPE = constants.HTYPE_NODE
5331   _NFLAGS = ["master_capable", "vm_capable"]
5332
5333   def CheckArguments(self):
5334     self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
5335     # validate/normalize the node name
5336     self.hostname = netutils.GetHostname(name=self.op.node_name,
5337                                          family=self.primary_ip_family)
5338     self.op.node_name = self.hostname.name
5339
5340     if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
5341       raise errors.OpPrereqError("Cannot readd the master node",
5342                                  errors.ECODE_STATE)
5343
5344     if self.op.readd and self.op.group:
5345       raise errors.OpPrereqError("Cannot pass a node group when a node is"
5346                                  " being readded", errors.ECODE_INVAL)
5347
5348   def BuildHooksEnv(self):
5349     """Build hooks env.
5350
5351     This will run on all nodes before, and on all nodes + the new node after.
5352
5353     """
5354     return {
5355       "OP_TARGET": self.op.node_name,
5356       "NODE_NAME": self.op.node_name,
5357       "NODE_PIP": self.op.primary_ip,
5358       "NODE_SIP": self.op.secondary_ip,
5359       "MASTER_CAPABLE": str(self.op.master_capable),
5360       "VM_CAPABLE": str(self.op.vm_capable),
5361       }
5362
5363   def BuildHooksNodes(self):
5364     """Build hooks nodes.
5365
5366     """
5367     # Exclude added node
5368     pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
5369     post_nodes = pre_nodes + [self.op.node_name, ]
5370
5371     return (pre_nodes, post_nodes)
5372
5373   def CheckPrereq(self):
5374     """Check prerequisites.
5375
5376     This checks:
5377      - the new node is not already in the config
5378      - it is resolvable
5379      - its parameters (single/dual homed) matches the cluster
5380
5381     Any errors are signaled by raising errors.OpPrereqError.
5382
5383     """
5384     cfg = self.cfg
5385     hostname = self.hostname
5386     node = hostname.name
5387     primary_ip = self.op.primary_ip = hostname.ip
5388     if self.op.secondary_ip is None:
5389       if self.primary_ip_family == netutils.IP6Address.family:
5390         raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
5391                                    " IPv4 address must be given as secondary",
5392                                    errors.ECODE_INVAL)
5393       self.op.secondary_ip = primary_ip
5394
5395     secondary_ip = self.op.secondary_ip
5396     if not netutils.IP4Address.IsValid(secondary_ip):
5397       raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5398                                  " address" % secondary_ip, errors.ECODE_INVAL)
5399
5400     node_list = cfg.GetNodeList()
5401     if not self.op.readd and node in node_list:
5402       raise errors.OpPrereqError("Node %s is already in the configuration" %
5403                                  node, errors.ECODE_EXISTS)
5404     elif self.op.readd and node not in node_list:
5405       raise errors.OpPrereqError("Node %s is not in the configuration" % node,
5406                                  errors.ECODE_NOENT)
5407
5408     self.changed_primary_ip = False
5409
5410     for existing_node_name, existing_node in cfg.GetMultiNodeInfo(node_list):
5411       if self.op.readd and node == existing_node_name:
5412         if existing_node.secondary_ip != secondary_ip:
5413           raise errors.OpPrereqError("Readded node doesn't have the same IP"
5414                                      " address configuration as before",
5415                                      errors.ECODE_INVAL)
5416         if existing_node.primary_ip != primary_ip:
5417           self.changed_primary_ip = True
5418
5419         continue
5420
5421       if (existing_node.primary_ip == primary_ip or
5422           existing_node.secondary_ip == primary_ip or
5423           existing_node.primary_ip == secondary_ip or
5424           existing_node.secondary_ip == secondary_ip):
5425         raise errors.OpPrereqError("New node ip address(es) conflict with"
5426                                    " existing node %s" % existing_node.name,
5427                                    errors.ECODE_NOTUNIQUE)
5428
5429     # After this 'if' block, None is no longer a valid value for the
5430     # _capable op attributes
5431     if self.op.readd:
5432       old_node = self.cfg.GetNodeInfo(node)
5433       assert old_node is not None, "Can't retrieve locked node %s" % node
5434       for attr in self._NFLAGS:
5435         if getattr(self.op, attr) is None:
5436           setattr(self.op, attr, getattr(old_node, attr))
5437     else:
5438       for attr in self._NFLAGS:
5439         if getattr(self.op, attr) is None:
5440           setattr(self.op, attr, True)
5441
5442     if self.op.readd and not self.op.vm_capable:
5443       pri, sec = cfg.GetNodeInstances(node)
5444       if pri or sec:
5445         raise errors.OpPrereqError("Node %s being re-added with vm_capable"
5446                                    " flag set to false, but it already holds"
5447                                    " instances" % node,
5448                                    errors.ECODE_STATE)
5449
5450     # check that the type of the node (single versus dual homed) is the
5451     # same as for the master
5452     myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
5453     master_singlehomed = myself.secondary_ip == myself.primary_ip
5454     newbie_singlehomed = secondary_ip == primary_ip
5455     if master_singlehomed != newbie_singlehomed:
5456       if master_singlehomed:
5457         raise errors.OpPrereqError("The master has no secondary ip but the"
5458                                    " new node has one",
5459                                    errors.ECODE_INVAL)
5460       else:
5461         raise errors.OpPrereqError("The master has a secondary ip but the"
5462                                    " new node doesn't have one",
5463                                    errors.ECODE_INVAL)
5464
5465     # checks reachability
5466     if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
5467       raise errors.OpPrereqError("Node not reachable by ping",
5468                                  errors.ECODE_ENVIRON)
5469
5470     if not newbie_singlehomed:
5471       # check reachability from my secondary ip to newbie's secondary ip
5472       if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
5473                            source=myself.secondary_ip):
5474         raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5475                                    " based ping to node daemon port",
5476                                    errors.ECODE_ENVIRON)
5477
5478     if self.op.readd:
5479       exceptions = [node]
5480     else:
5481       exceptions = []
5482
5483     if self.op.master_capable:
5484       self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
5485     else:
5486       self.master_candidate = False
5487
5488     if self.op.readd:
5489       self.new_node = old_node
5490     else:
5491       node_group = cfg.LookupNodeGroup(self.op.group)
5492       self.new_node = objects.Node(name=node,
5493                                    primary_ip=primary_ip,
5494                                    secondary_ip=secondary_ip,
5495                                    master_candidate=self.master_candidate,
5496                                    offline=False, drained=False,
5497                                    group=node_group)
5498
5499     if self.op.ndparams:
5500       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
5501
5502     if self.op.hv_state:
5503       self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
5504
5505     if self.op.disk_state:
5506       self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
5507
5508   def Exec(self, feedback_fn):
5509     """Adds the new node to the cluster.
5510
5511     """
5512     new_node = self.new_node
5513     node = new_node.name
5514
5515     assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
5516       "Not owning BGL"
5517
5518     # We adding a new node so we assume it's powered
5519     new_node.powered = True
5520
5521     # for re-adds, reset the offline/drained/master-candidate flags;
5522     # we need to reset here, otherwise offline would prevent RPC calls
5523     # later in the procedure; this also means that if the re-add
5524     # fails, we are left with a non-offlined, broken node
5525     if self.op.readd:
5526       new_node.drained = new_node.offline = False # pylint: disable=W0201
5527       self.LogInfo("Readding a node, the offline/drained flags were reset")
5528       # if we demote the node, we do cleanup later in the procedure
5529       new_node.master_candidate = self.master_candidate
5530       if self.changed_primary_ip:
5531         new_node.primary_ip = self.op.primary_ip
5532
5533     # copy the master/vm_capable flags
5534     for attr in self._NFLAGS:
5535       setattr(new_node, attr, getattr(self.op, attr))
5536
5537     # notify the user about any possible mc promotion
5538     if new_node.master_candidate:
5539       self.LogInfo("Node will be a master candidate")
5540
5541     if self.op.ndparams:
5542       new_node.ndparams = self.op.ndparams
5543     else:
5544       new_node.ndparams = {}
5545
5546     if self.op.hv_state:
5547       new_node.hv_state_static = self.new_hv_state
5548
5549     if self.op.disk_state:
5550       new_node.disk_state_static = self.new_disk_state
5551
5552     # check connectivity
5553     result = self.rpc.call_version([node])[node]
5554     result.Raise("Can't get version information from node %s" % node)
5555     if constants.PROTOCOL_VERSION == result.payload:
5556       logging.info("Communication to node %s fine, sw version %s match",
5557                    node, result.payload)
5558     else:
5559       raise errors.OpExecError("Version mismatch master version %s,"
5560                                " node version %s" %
5561                                (constants.PROTOCOL_VERSION, result.payload))
5562
5563     # Add node to our /etc/hosts, and add key to known_hosts
5564     if self.cfg.GetClusterInfo().modify_etc_hosts:
5565       master_node = self.cfg.GetMasterNode()
5566       result = self.rpc.call_etc_hosts_modify(master_node,
5567                                               constants.ETC_HOSTS_ADD,
5568                                               self.hostname.name,
5569                                               self.hostname.ip)
5570       result.Raise("Can't update hosts file with new host data")
5571
5572     if new_node.secondary_ip != new_node.primary_ip:
5573       _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
5574                                False)
5575
5576     node_verify_list = [self.cfg.GetMasterNode()]
5577     node_verify_param = {
5578       constants.NV_NODELIST: ([node], {}),
5579       # TODO: do a node-net-test as well?
5580     }
5581
5582     result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
5583                                        self.cfg.GetClusterName())
5584     for verifier in node_verify_list:
5585       result[verifier].Raise("Cannot communicate with node %s" % verifier)
5586       nl_payload = result[verifier].payload[constants.NV_NODELIST]
5587       if nl_payload:
5588         for failed in nl_payload:
5589           feedback_fn("ssh/hostname verification failed"
5590                       " (checking from %s): %s" %
5591                       (verifier, nl_payload[failed]))
5592         raise errors.OpExecError("ssh/hostname verification failed")
5593
5594     if self.op.readd:
5595       _RedistributeAncillaryFiles(self)
5596       self.context.ReaddNode(new_node)
5597       # make sure we redistribute the config
5598       self.cfg.Update(new_node, feedback_fn)
5599       # and make sure the new node will not have old files around
5600       if not new_node.master_candidate:
5601         result = self.rpc.call_node_demote_from_mc(new_node.name)
5602         msg = result.fail_msg
5603         if msg:
5604           self.LogWarning("Node failed to demote itself from master"
5605                           " candidate status: %s" % msg)
5606     else:
5607       _RedistributeAncillaryFiles(self, additional_nodes=[node],
5608                                   additional_vm=self.op.vm_capable)
5609       self.context.AddNode(new_node, self.proc.GetECId())
5610
5611
5612 class LUNodeSetParams(LogicalUnit):
5613   """Modifies the parameters of a node.
5614
5615   @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
5616       to the node role (as _ROLE_*)
5617   @cvar _R2F: a dictionary from node role to tuples of flags
5618   @cvar _FLAGS: a list of attribute names corresponding to the flags
5619
5620   """
5621   HPATH = "node-modify"
5622   HTYPE = constants.HTYPE_NODE
5623   REQ_BGL = False
5624   (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
5625   _F2R = {
5626     (True, False, False): _ROLE_CANDIDATE,
5627     (False, True, False): _ROLE_DRAINED,
5628     (False, False, True): _ROLE_OFFLINE,
5629     (False, False, False): _ROLE_REGULAR,
5630     }
5631   _R2F = dict((v, k) for k, v in _F2R.items())
5632   _FLAGS = ["master_candidate", "drained", "offline"]
5633
5634   def CheckArguments(self):
5635     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5636     all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
5637                 self.op.master_capable, self.op.vm_capable,
5638                 self.op.secondary_ip, self.op.ndparams, self.op.hv_state,
5639                 self.op.disk_state]
5640     if all_mods.count(None) == len(all_mods):
5641       raise errors.OpPrereqError("Please pass at least one modification",
5642                                  errors.ECODE_INVAL)
5643     if all_mods.count(True) > 1:
5644       raise errors.OpPrereqError("Can't set the node into more than one"
5645                                  " state at the same time",
5646                                  errors.ECODE_INVAL)
5647
5648     # Boolean value that tells us whether we might be demoting from MC
5649     self.might_demote = (self.op.master_candidate == False or
5650                          self.op.offline == True or
5651                          self.op.drained == True or
5652                          self.op.master_capable == False)
5653
5654     if self.op.secondary_ip:
5655       if not netutils.IP4Address.IsValid(self.op.secondary_ip):
5656         raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5657                                    " address" % self.op.secondary_ip,
5658                                    errors.ECODE_INVAL)
5659
5660     self.lock_all = self.op.auto_promote and self.might_demote
5661     self.lock_instances = self.op.secondary_ip is not None
5662
5663   def _InstanceFilter(self, instance):
5664     """Filter for getting affected instances.
5665
5666     """
5667     return (instance.disk_template in constants.DTS_INT_MIRROR and
5668             self.op.node_name in instance.all_nodes)
5669
5670   def ExpandNames(self):
5671     if self.lock_all:
5672       self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
5673     else:
5674       self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
5675
5676     # Since modifying a node can have severe effects on currently running
5677     # operations the resource lock is at least acquired in shared mode
5678     self.needed_locks[locking.LEVEL_NODE_RES] = \
5679       self.needed_locks[locking.LEVEL_NODE]
5680
5681     # Get node resource and instance locks in shared mode; they are not used
5682     # for anything but read-only access
5683     self.share_locks[locking.LEVEL_NODE_RES] = 1
5684     self.share_locks[locking.LEVEL_INSTANCE] = 1
5685
5686     if self.lock_instances:
5687       self.needed_locks[locking.LEVEL_INSTANCE] = \
5688         frozenset(self.cfg.GetInstancesInfoByFilter(self._InstanceFilter))
5689
5690   def BuildHooksEnv(self):
5691     """Build hooks env.
5692
5693     This runs on the master node.
5694
5695     """
5696     return {
5697       "OP_TARGET": self.op.node_name,
5698       "MASTER_CANDIDATE": str(self.op.master_candidate),
5699       "OFFLINE": str(self.op.offline),
5700       "DRAINED": str(self.op.drained),
5701       "MASTER_CAPABLE": str(self.op.master_capable),
5702       "VM_CAPABLE": str(self.op.vm_capable),
5703       }
5704
5705   def BuildHooksNodes(self):
5706     """Build hooks nodes.
5707
5708     """
5709     nl = [self.cfg.GetMasterNode(), self.op.node_name]
5710     return (nl, nl)
5711
5712   def CheckPrereq(self):
5713     """Check prerequisites.
5714
5715     This only checks the instance list against the existing names.
5716
5717     """
5718     node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
5719
5720     if self.lock_instances:
5721       affected_instances = \
5722         self.cfg.GetInstancesInfoByFilter(self._InstanceFilter)
5723
5724       # Verify instance locks
5725       owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
5726       wanted_instances = frozenset(affected_instances.keys())
5727       if wanted_instances - owned_instances:
5728         raise errors.OpPrereqError("Instances affected by changing node %s's"
5729                                    " secondary IP address have changed since"
5730                                    " locks were acquired, wanted '%s', have"
5731                                    " '%s'; retry the operation" %
5732                                    (self.op.node_name,
5733                                     utils.CommaJoin(wanted_instances),
5734                                     utils.CommaJoin(owned_instances)),
5735                                    errors.ECODE_STATE)
5736     else:
5737       affected_instances = None
5738
5739     if (self.op.master_candidate is not None or
5740         self.op.drained is not None or
5741         self.op.offline is not None):
5742       # we can't change the master's node flags
5743       if self.op.node_name == self.cfg.GetMasterNode():
5744         raise errors.OpPrereqError("The master role can be changed"
5745                                    " only via master-failover",
5746                                    errors.ECODE_INVAL)
5747
5748     if self.op.master_candidate and not node.master_capable:
5749       raise errors.OpPrereqError("Node %s is not master capable, cannot make"
5750                                  " it a master candidate" % node.name,
5751                                  errors.ECODE_STATE)
5752
5753     if self.op.vm_capable == False:
5754       (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
5755       if ipri or isec:
5756         raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
5757                                    " the vm_capable flag" % node.name,
5758                                    errors.ECODE_STATE)
5759
5760     if node.master_candidate and self.might_demote and not self.lock_all:
5761       assert not self.op.auto_promote, "auto_promote set but lock_all not"
5762       # check if after removing the current node, we're missing master
5763       # candidates
5764       (mc_remaining, mc_should, _) = \
5765           self.cfg.GetMasterCandidateStats(exceptions=[node.name])
5766       if mc_remaining < mc_should:
5767         raise errors.OpPrereqError("Not enough master candidates, please"
5768                                    " pass auto promote option to allow"
5769                                    " promotion", errors.ECODE_STATE)
5770
5771     self.old_flags = old_flags = (node.master_candidate,
5772                                   node.drained, node.offline)
5773     assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
5774     self.old_role = old_role = self._F2R[old_flags]
5775
5776     # Check for ineffective changes
5777     for attr in self._FLAGS:
5778       if (getattr(self.op, attr) == False and getattr(node, attr) == False):
5779         self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
5780         setattr(self.op, attr, None)
5781
5782     # Past this point, any flag change to False means a transition
5783     # away from the respective state, as only real changes are kept
5784
5785     # TODO: We might query the real power state if it supports OOB
5786     if _SupportsOob(self.cfg, node):
5787       if self.op.offline is False and not (node.powered or
5788                                            self.op.powered == True):
5789         raise errors.OpPrereqError(("Node %s needs to be turned on before its"
5790                                     " offline status can be reset") %
5791                                    self.op.node_name)
5792     elif self.op.powered is not None:
5793       raise errors.OpPrereqError(("Unable to change powered state for node %s"
5794                                   " as it does not support out-of-band"
5795                                   " handling") % self.op.node_name)
5796
5797     # If we're being deofflined/drained, we'll MC ourself if needed
5798     if (self.op.drained == False or self.op.offline == False or
5799         (self.op.master_capable and not node.master_capable)):
5800       if _DecideSelfPromotion(self):
5801         self.op.master_candidate = True
5802         self.LogInfo("Auto-promoting node to master candidate")
5803
5804     # If we're no longer master capable, we'll demote ourselves from MC
5805     if self.op.master_capable == False and node.master_candidate:
5806       self.LogInfo("Demoting from master candidate")
5807       self.op.master_candidate = False
5808
5809     # Compute new role
5810     assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
5811     if self.op.master_candidate:
5812       new_role = self._ROLE_CANDIDATE
5813     elif self.op.drained:
5814       new_role = self._ROLE_DRAINED
5815     elif self.op.offline:
5816       new_role = self._ROLE_OFFLINE
5817     elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
5818       # False is still in new flags, which means we're un-setting (the
5819       # only) True flag
5820       new_role = self._ROLE_REGULAR
5821     else: # no new flags, nothing, keep old role
5822       new_role = old_role
5823
5824     self.new_role = new_role
5825
5826     if old_role == self._ROLE_OFFLINE and new_role != old_role:
5827       # Trying to transition out of offline status
5828       # TODO: Use standard RPC runner, but make sure it works when the node is
5829       # still marked offline
5830       result = rpc.BootstrapRunner().call_version([node.name])[node.name]
5831       if result.fail_msg:
5832         raise errors.OpPrereqError("Node %s is being de-offlined but fails"
5833                                    " to report its version: %s" %
5834                                    (node.name, result.fail_msg),
5835                                    errors.ECODE_STATE)
5836       else:
5837         self.LogWarning("Transitioning node from offline to online state"
5838                         " without using re-add. Please make sure the node"
5839                         " is healthy!")
5840
5841     if self.op.secondary_ip:
5842       # Ok even without locking, because this can't be changed by any LU
5843       master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
5844       master_singlehomed = master.secondary_ip == master.primary_ip
5845       if master_singlehomed and self.op.secondary_ip:
5846         raise errors.OpPrereqError("Cannot change the secondary ip on a single"
5847                                    " homed cluster", errors.ECODE_INVAL)
5848
5849       assert not (frozenset(affected_instances) -
5850                   self.owned_locks(locking.LEVEL_INSTANCE))
5851
5852       if node.offline:
5853         if affected_instances:
5854           raise errors.OpPrereqError("Cannot change secondary IP address:"
5855                                      " offline node has instances (%s)"
5856                                      " configured to use it" %
5857                                      utils.CommaJoin(affected_instances.keys()))
5858       else:
5859         # On online nodes, check that no instances are running, and that
5860         # the node has the new ip and we can reach it.
5861         for instance in affected_instances.values():
5862           _CheckInstanceState(self, instance, INSTANCE_DOWN,
5863                               msg="cannot change secondary ip")
5864
5865         _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
5866         if master.name != node.name:
5867           # check reachability from master secondary ip to new secondary ip
5868           if not netutils.TcpPing(self.op.secondary_ip,
5869                                   constants.DEFAULT_NODED_PORT,
5870                                   source=master.secondary_ip):
5871             raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5872                                        " based ping to node daemon port",
5873                                        errors.ECODE_ENVIRON)
5874
5875     if self.op.ndparams:
5876       new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
5877       utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
5878       self.new_ndparams = new_ndparams
5879
5880     if self.op.hv_state:
5881       self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
5882                                                  self.node.hv_state_static)
5883
5884     if self.op.disk_state:
5885       self.new_disk_state = \
5886         _MergeAndVerifyDiskState(self.op.disk_state,
5887                                  self.node.disk_state_static)
5888
5889   def Exec(self, feedback_fn):
5890     """Modifies a node.
5891
5892     """
5893     node = self.node
5894     old_role = self.old_role
5895     new_role = self.new_role
5896
5897     result = []
5898
5899     if self.op.ndparams:
5900       node.ndparams = self.new_ndparams
5901
5902     if self.op.powered is not None:
5903       node.powered = self.op.powered
5904
5905     if self.op.hv_state:
5906       node.hv_state_static = self.new_hv_state
5907
5908     if self.op.disk_state:
5909       node.disk_state_static = self.new_disk_state
5910
5911     for attr in ["master_capable", "vm_capable"]:
5912       val = getattr(self.op, attr)
5913       if val is not None:
5914         setattr(node, attr, val)
5915         result.append((attr, str(val)))
5916
5917     if new_role != old_role:
5918       # Tell the node to demote itself, if no longer MC and not offline
5919       if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
5920         msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
5921         if msg:
5922           self.LogWarning("Node failed to demote itself: %s", msg)
5923
5924       new_flags = self._R2F[new_role]
5925       for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
5926         if of != nf:
5927           result.append((desc, str(nf)))
5928       (node.master_candidate, node.drained, node.offline) = new_flags
5929
5930       # we locked all nodes, we adjust the CP before updating this node
5931       if self.lock_all:
5932         _AdjustCandidatePool(self, [node.name])
5933
5934     if self.op.secondary_ip:
5935       node.secondary_ip = self.op.secondary_ip
5936       result.append(("secondary_ip", self.op.secondary_ip))
5937
5938     # this will trigger configuration file update, if needed
5939     self.cfg.Update(node, feedback_fn)
5940
5941     # this will trigger job queue propagation or cleanup if the mc
5942     # flag changed
5943     if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
5944       self.context.ReaddNode(node)
5945
5946     return result
5947
5948
5949 class LUNodePowercycle(NoHooksLU):
5950   """Powercycles a node.
5951
5952   """
5953   REQ_BGL = False
5954
5955   def CheckArguments(self):
5956     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5957     if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
5958       raise errors.OpPrereqError("The node is the master and the force"
5959                                  " parameter was not set",
5960                                  errors.ECODE_INVAL)
5961
5962   def ExpandNames(self):
5963     """Locking for PowercycleNode.
5964
5965     This is a last-resort option and shouldn't block on other
5966     jobs. Therefore, we grab no locks.
5967
5968     """
5969     self.needed_locks = {}
5970
5971   def Exec(self, feedback_fn):
5972     """Reboots a node.
5973
5974     """
5975     result = self.rpc.call_node_powercycle(self.op.node_name,
5976                                            self.cfg.GetHypervisorType())
5977     result.Raise("Failed to schedule the reboot")
5978     return result.payload
5979
5980
5981 class LUClusterQuery(NoHooksLU):
5982   """Query cluster configuration.
5983
5984   """
5985   REQ_BGL = False
5986
5987   def ExpandNames(self):
5988     self.needed_locks = {}
5989
5990   def Exec(self, feedback_fn):
5991     """Return cluster config.
5992
5993     """
5994     cluster = self.cfg.GetClusterInfo()
5995     os_hvp = {}
5996
5997     # Filter just for enabled hypervisors
5998     for os_name, hv_dict in cluster.os_hvp.items():
5999       os_hvp[os_name] = {}
6000       for hv_name, hv_params in hv_dict.items():
6001         if hv_name in cluster.enabled_hypervisors:
6002           os_hvp[os_name][hv_name] = hv_params
6003
6004     # Convert ip_family to ip_version
6005     primary_ip_version = constants.IP4_VERSION
6006     if cluster.primary_ip_family == netutils.IP6Address.family:
6007       primary_ip_version = constants.IP6_VERSION
6008
6009     result = {
6010       "software_version": constants.RELEASE_VERSION,
6011       "protocol_version": constants.PROTOCOL_VERSION,
6012       "config_version": constants.CONFIG_VERSION,
6013       "os_api_version": max(constants.OS_API_VERSIONS),
6014       "export_version": constants.EXPORT_VERSION,
6015       "architecture": (platform.architecture()[0], platform.machine()),
6016       "name": cluster.cluster_name,
6017       "master": cluster.master_node,
6018       "default_hypervisor": cluster.primary_hypervisor,
6019       "enabled_hypervisors": cluster.enabled_hypervisors,
6020       "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
6021                         for hypervisor_name in cluster.enabled_hypervisors]),
6022       "os_hvp": os_hvp,
6023       "beparams": cluster.beparams,
6024       "osparams": cluster.osparams,
6025       "ipolicy": cluster.ipolicy,
6026       "nicparams": cluster.nicparams,
6027       "ndparams": cluster.ndparams,
6028       "candidate_pool_size": cluster.candidate_pool_size,
6029       "master_netdev": cluster.master_netdev,
6030       "master_netmask": cluster.master_netmask,
6031       "use_external_mip_script": cluster.use_external_mip_script,
6032       "volume_group_name": cluster.volume_group_name,
6033       "drbd_usermode_helper": cluster.drbd_usermode_helper,
6034       "file_storage_dir": cluster.file_storage_dir,
6035       "shared_file_storage_dir": cluster.shared_file_storage_dir,
6036       "maintain_node_health": cluster.maintain_node_health,
6037       "ctime": cluster.ctime,
6038       "mtime": cluster.mtime,
6039       "uuid": cluster.uuid,
6040       "tags": list(cluster.GetTags()),
6041       "uid_pool": cluster.uid_pool,
6042       "default_iallocator": cluster.default_iallocator,
6043       "reserved_lvs": cluster.reserved_lvs,
6044       "primary_ip_version": primary_ip_version,
6045       "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
6046       "hidden_os": cluster.hidden_os,
6047       "blacklisted_os": cluster.blacklisted_os,
6048       }
6049
6050     return result
6051
6052
6053 class LUClusterConfigQuery(NoHooksLU):
6054   """Return configuration values.
6055
6056   """
6057   REQ_BGL = False
6058   _FIELDS_DYNAMIC = utils.FieldSet()
6059   _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
6060                                   "watcher_pause", "volume_group_name")
6061
6062   def CheckArguments(self):
6063     _CheckOutputFields(static=self._FIELDS_STATIC,
6064                        dynamic=self._FIELDS_DYNAMIC,
6065                        selected=self.op.output_fields)
6066
6067   def ExpandNames(self):
6068     self.needed_locks = {}
6069
6070   def Exec(self, feedback_fn):
6071     """Dump a representation of the cluster config to the standard output.
6072
6073     """
6074     values = []
6075     for field in self.op.output_fields:
6076       if field == "cluster_name":
6077         entry = self.cfg.GetClusterName()
6078       elif field == "master_node":
6079         entry = self.cfg.GetMasterNode()
6080       elif field == "drain_flag":
6081         entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
6082       elif field == "watcher_pause":
6083         entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
6084       elif field == "volume_group_name":
6085         entry = self.cfg.GetVGName()
6086       else:
6087         raise errors.ParameterError(field)
6088       values.append(entry)
6089     return values
6090
6091
6092 class LUInstanceActivateDisks(NoHooksLU):
6093   """Bring up an instance's disks.
6094
6095   """
6096   REQ_BGL = False
6097
6098   def ExpandNames(self):
6099     self._ExpandAndLockInstance()
6100     self.needed_locks[locking.LEVEL_NODE] = []
6101     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6102
6103   def DeclareLocks(self, level):
6104     if level == locking.LEVEL_NODE:
6105       self._LockInstancesNodes()
6106
6107   def CheckPrereq(self):
6108     """Check prerequisites.
6109
6110     This checks that the instance is in the cluster.
6111
6112     """
6113     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6114     assert self.instance is not None, \
6115       "Cannot retrieve locked instance %s" % self.op.instance_name
6116     _CheckNodeOnline(self, self.instance.primary_node)
6117
6118   def Exec(self, feedback_fn):
6119     """Activate the disks.
6120
6121     """
6122     disks_ok, disks_info = \
6123               _AssembleInstanceDisks(self, self.instance,
6124                                      ignore_size=self.op.ignore_size)
6125     if not disks_ok:
6126       raise errors.OpExecError("Cannot activate block devices")
6127
6128     return disks_info
6129
6130
6131 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
6132                            ignore_size=False):
6133   """Prepare the block devices for an instance.
6134
6135   This sets up the block devices on all nodes.
6136
6137   @type lu: L{LogicalUnit}
6138   @param lu: the logical unit on whose behalf we execute
6139   @type instance: L{objects.Instance}
6140   @param instance: the instance for whose disks we assemble
6141   @type disks: list of L{objects.Disk} or None
6142   @param disks: which disks to assemble (or all, if None)
6143   @type ignore_secondaries: boolean
6144   @param ignore_secondaries: if true, errors on secondary nodes
6145       won't result in an error return from the function
6146   @type ignore_size: boolean
6147   @param ignore_size: if true, the current known size of the disk
6148       will not be used during the disk activation, useful for cases
6149       when the size is wrong
6150   @return: False if the operation failed, otherwise a list of
6151       (host, instance_visible_name, node_visible_name)
6152       with the mapping from node devices to instance devices
6153
6154   """
6155   device_info = []
6156   disks_ok = True
6157   iname = instance.name
6158   disks = _ExpandCheckDisks(instance, disks)
6159
6160   # With the two passes mechanism we try to reduce the window of
6161   # opportunity for the race condition of switching DRBD to primary
6162   # before handshaking occured, but we do not eliminate it
6163
6164   # The proper fix would be to wait (with some limits) until the
6165   # connection has been made and drbd transitions from WFConnection
6166   # into any other network-connected state (Connected, SyncTarget,
6167   # SyncSource, etc.)
6168
6169   # 1st pass, assemble on all nodes in secondary mode
6170   for idx, inst_disk in enumerate(disks):
6171     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6172       if ignore_size:
6173         node_disk = node_disk.Copy()
6174         node_disk.UnsetSize()
6175       lu.cfg.SetDiskID(node_disk, node)
6176       result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False, idx)
6177       msg = result.fail_msg
6178       if msg:
6179         lu.proc.LogWarning("Could not prepare block device %s on node %s"
6180                            " (is_primary=False, pass=1): %s",
6181                            inst_disk.iv_name, node, msg)
6182         if not ignore_secondaries:
6183           disks_ok = False
6184
6185   # FIXME: race condition on drbd migration to primary
6186
6187   # 2nd pass, do only the primary node
6188   for idx, inst_disk in enumerate(disks):
6189     dev_path = None
6190
6191     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6192       if node != instance.primary_node:
6193         continue
6194       if ignore_size:
6195         node_disk = node_disk.Copy()
6196         node_disk.UnsetSize()
6197       lu.cfg.SetDiskID(node_disk, node)
6198       result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True, idx)
6199       msg = result.fail_msg
6200       if msg:
6201         lu.proc.LogWarning("Could not prepare block device %s on node %s"
6202                            " (is_primary=True, pass=2): %s",
6203                            inst_disk.iv_name, node, msg)
6204         disks_ok = False
6205       else:
6206         dev_path = result.payload
6207
6208     device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
6209
6210   # leave the disks configured for the primary node
6211   # this is a workaround that would be fixed better by
6212   # improving the logical/physical id handling
6213   for disk in disks:
6214     lu.cfg.SetDiskID(disk, instance.primary_node)
6215
6216   return disks_ok, device_info
6217
6218
6219 def _StartInstanceDisks(lu, instance, force):
6220   """Start the disks of an instance.
6221
6222   """
6223   disks_ok, _ = _AssembleInstanceDisks(lu, instance,
6224                                            ignore_secondaries=force)
6225   if not disks_ok:
6226     _ShutdownInstanceDisks(lu, instance)
6227     if force is not None and not force:
6228       lu.proc.LogWarning("", hint="If the message above refers to a"
6229                          " secondary node,"
6230                          " you can retry the operation using '--force'.")
6231     raise errors.OpExecError("Disk consistency error")
6232
6233
6234 class LUInstanceDeactivateDisks(NoHooksLU):
6235   """Shutdown an instance's disks.
6236
6237   """
6238   REQ_BGL = False
6239
6240   def ExpandNames(self):
6241     self._ExpandAndLockInstance()
6242     self.needed_locks[locking.LEVEL_NODE] = []
6243     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6244
6245   def DeclareLocks(self, level):
6246     if level == locking.LEVEL_NODE:
6247       self._LockInstancesNodes()
6248
6249   def CheckPrereq(self):
6250     """Check prerequisites.
6251
6252     This checks that the instance is in the cluster.
6253
6254     """
6255     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6256     assert self.instance is not None, \
6257       "Cannot retrieve locked instance %s" % self.op.instance_name
6258
6259   def Exec(self, feedback_fn):
6260     """Deactivate the disks
6261
6262     """
6263     instance = self.instance
6264     if self.op.force:
6265       _ShutdownInstanceDisks(self, instance)
6266     else:
6267       _SafeShutdownInstanceDisks(self, instance)
6268
6269
6270 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
6271   """Shutdown block devices of an instance.
6272
6273   This function checks if an instance is running, before calling
6274   _ShutdownInstanceDisks.
6275
6276   """
6277   _CheckInstanceState(lu, instance, INSTANCE_DOWN, msg="cannot shutdown disks")
6278   _ShutdownInstanceDisks(lu, instance, disks=disks)
6279
6280
6281 def _ExpandCheckDisks(instance, disks):
6282   """Return the instance disks selected by the disks list
6283
6284   @type disks: list of L{objects.Disk} or None
6285   @param disks: selected disks
6286   @rtype: list of L{objects.Disk}
6287   @return: selected instance disks to act on
6288
6289   """
6290   if disks is None:
6291     return instance.disks
6292   else:
6293     if not set(disks).issubset(instance.disks):
6294       raise errors.ProgrammerError("Can only act on disks belonging to the"
6295                                    " target instance")
6296     return disks
6297
6298
6299 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
6300   """Shutdown block devices of an instance.
6301
6302   This does the shutdown on all nodes of the instance.
6303
6304   If the ignore_primary is false, errors on the primary node are
6305   ignored.
6306
6307   """
6308   all_result = True
6309   disks = _ExpandCheckDisks(instance, disks)
6310
6311   for disk in disks:
6312     for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
6313       lu.cfg.SetDiskID(top_disk, node)
6314       result = lu.rpc.call_blockdev_shutdown(node, top_disk)
6315       msg = result.fail_msg
6316       if msg:
6317         lu.LogWarning("Could not shutdown block device %s on node %s: %s",
6318                       disk.iv_name, node, msg)
6319         if ((node == instance.primary_node and not ignore_primary) or
6320             (node != instance.primary_node and not result.offline)):
6321           all_result = False
6322   return all_result
6323
6324
6325 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
6326   """Checks if a node has enough free memory.
6327
6328   This function check if a given node has the needed amount of free
6329   memory. In case the node has less memory or we cannot get the
6330   information from the node, this function raise an OpPrereqError
6331   exception.
6332
6333   @type lu: C{LogicalUnit}
6334   @param lu: a logical unit from which we get configuration data
6335   @type node: C{str}
6336   @param node: the node to check
6337   @type reason: C{str}
6338   @param reason: string to use in the error message
6339   @type requested: C{int}
6340   @param requested: the amount of memory in MiB to check for
6341   @type hypervisor_name: C{str}
6342   @param hypervisor_name: the hypervisor to ask for memory stats
6343   @raise errors.OpPrereqError: if the node doesn't have enough memory, or
6344       we cannot check the node
6345
6346   """
6347   nodeinfo = lu.rpc.call_node_info([node], None, [hypervisor_name])
6348   nodeinfo[node].Raise("Can't get data from node %s" % node,
6349                        prereq=True, ecode=errors.ECODE_ENVIRON)
6350   (_, _, (hv_info, )) = nodeinfo[node].payload
6351
6352   free_mem = hv_info.get("memory_free", None)
6353   if not isinstance(free_mem, int):
6354     raise errors.OpPrereqError("Can't compute free memory on node %s, result"
6355                                " was '%s'" % (node, free_mem),
6356                                errors.ECODE_ENVIRON)
6357   if requested > free_mem:
6358     raise errors.OpPrereqError("Not enough memory on node %s for %s:"
6359                                " needed %s MiB, available %s MiB" %
6360                                (node, reason, requested, free_mem),
6361                                errors.ECODE_NORES)
6362
6363
6364 def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
6365   """Checks if nodes have enough free disk space in the all VGs.
6366
6367   This function check if all given nodes have the needed amount of
6368   free disk. In case any node has less disk or we cannot get the
6369   information from the node, this function raise an OpPrereqError
6370   exception.
6371
6372   @type lu: C{LogicalUnit}
6373   @param lu: a logical unit from which we get configuration data
6374   @type nodenames: C{list}
6375   @param nodenames: the list of node names to check
6376   @type req_sizes: C{dict}
6377   @param req_sizes: the hash of vg and corresponding amount of disk in
6378       MiB to check for
6379   @raise errors.OpPrereqError: if the node doesn't have enough disk,
6380       or we cannot check the node
6381
6382   """
6383   for vg, req_size in req_sizes.items():
6384     _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
6385
6386
6387 def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
6388   """Checks if nodes have enough free disk space in the specified VG.
6389
6390   This function check if all given nodes have the needed amount of
6391   free disk. In case any node has less disk or we cannot get the
6392   information from the node, this function raise an OpPrereqError
6393   exception.
6394
6395   @type lu: C{LogicalUnit}
6396   @param lu: a logical unit from which we get configuration data
6397   @type nodenames: C{list}
6398   @param nodenames: the list of node names to check
6399   @type vg: C{str}
6400   @param vg: the volume group to check
6401   @type requested: C{int}
6402   @param requested: the amount of disk in MiB to check for
6403   @raise errors.OpPrereqError: if the node doesn't have enough disk,
6404       or we cannot check the node
6405
6406   """
6407   nodeinfo = lu.rpc.call_node_info(nodenames, [vg], None)
6408   for node in nodenames:
6409     info = nodeinfo[node]
6410     info.Raise("Cannot get current information from node %s" % node,
6411                prereq=True, ecode=errors.ECODE_ENVIRON)
6412     (_, (vg_info, ), _) = info.payload
6413     vg_free = vg_info.get("vg_free", None)
6414     if not isinstance(vg_free, int):
6415       raise errors.OpPrereqError("Can't compute free disk space on node"
6416                                  " %s for vg %s, result was '%s'" %
6417                                  (node, vg, vg_free), errors.ECODE_ENVIRON)
6418     if requested > vg_free:
6419       raise errors.OpPrereqError("Not enough disk space on target node %s"
6420                                  " vg %s: required %d MiB, available %d MiB" %
6421                                  (node, vg, requested, vg_free),
6422                                  errors.ECODE_NORES)
6423
6424
6425 def _CheckNodesPhysicalCPUs(lu, nodenames, requested, hypervisor_name):
6426   """Checks if nodes have enough physical CPUs
6427
6428   This function checks if all given nodes have the needed number of
6429   physical CPUs. In case any node has less CPUs or we cannot get the
6430   information from the node, this function raises an OpPrereqError
6431   exception.
6432
6433   @type lu: C{LogicalUnit}
6434   @param lu: a logical unit from which we get configuration data
6435   @type nodenames: C{list}
6436   @param nodenames: the list of node names to check
6437   @type requested: C{int}
6438   @param requested: the minimum acceptable number of physical CPUs
6439   @raise errors.OpPrereqError: if the node doesn't have enough CPUs,
6440       or we cannot check the node
6441
6442   """
6443   nodeinfo = lu.rpc.call_node_info(nodenames, None, [hypervisor_name])
6444   for node in nodenames:
6445     info = nodeinfo[node]
6446     info.Raise("Cannot get current information from node %s" % node,
6447                prereq=True, ecode=errors.ECODE_ENVIRON)
6448     (_, _, (hv_info, )) = info.payload
6449     num_cpus = hv_info.get("cpu_total", None)
6450     if not isinstance(num_cpus, int):
6451       raise errors.OpPrereqError("Can't compute the number of physical CPUs"
6452                                  " on node %s, result was '%s'" %
6453                                  (node, num_cpus), errors.ECODE_ENVIRON)
6454     if requested > num_cpus:
6455       raise errors.OpPrereqError("Node %s has %s physical CPUs, but %s are "
6456                                  "required" % (node, num_cpus, requested),
6457                                  errors.ECODE_NORES)
6458
6459
6460 class LUInstanceStartup(LogicalUnit):
6461   """Starts an instance.
6462
6463   """
6464   HPATH = "instance-start"
6465   HTYPE = constants.HTYPE_INSTANCE
6466   REQ_BGL = False
6467
6468   def CheckArguments(self):
6469     # extra beparams
6470     if self.op.beparams:
6471       # fill the beparams dict
6472       objects.UpgradeBeParams(self.op.beparams)
6473       utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
6474
6475   def ExpandNames(self):
6476     self._ExpandAndLockInstance()
6477
6478   def BuildHooksEnv(self):
6479     """Build hooks env.
6480
6481     This runs on master, primary and secondary nodes of the instance.
6482
6483     """
6484     env = {
6485       "FORCE": self.op.force,
6486       }
6487
6488     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6489
6490     return env
6491
6492   def BuildHooksNodes(self):
6493     """Build hooks nodes.
6494
6495     """
6496     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6497     return (nl, nl)
6498
6499   def CheckPrereq(self):
6500     """Check prerequisites.
6501
6502     This checks that the instance is in the cluster.
6503
6504     """
6505     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6506     assert self.instance is not None, \
6507       "Cannot retrieve locked instance %s" % self.op.instance_name
6508
6509     # extra hvparams
6510     if self.op.hvparams:
6511       # check hypervisor parameter syntax (locally)
6512       cluster = self.cfg.GetClusterInfo()
6513       utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
6514       filled_hvp = cluster.FillHV(instance)
6515       filled_hvp.update(self.op.hvparams)
6516       hv_type = hypervisor.GetHypervisor(instance.hypervisor)
6517       hv_type.CheckParameterSyntax(filled_hvp)
6518       _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
6519
6520     _CheckInstanceState(self, instance, INSTANCE_ONLINE)
6521
6522     self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
6523
6524     if self.primary_offline and self.op.ignore_offline_nodes:
6525       self.proc.LogWarning("Ignoring offline primary node")
6526
6527       if self.op.hvparams or self.op.beparams:
6528         self.proc.LogWarning("Overridden parameters are ignored")
6529     else:
6530       _CheckNodeOnline(self, instance.primary_node)
6531
6532       bep = self.cfg.GetClusterInfo().FillBE(instance)
6533
6534       # check bridges existence
6535       _CheckInstanceBridgesExist(self, instance)
6536
6537       remote_info = self.rpc.call_instance_info(instance.primary_node,
6538                                                 instance.name,
6539                                                 instance.hypervisor)
6540       remote_info.Raise("Error checking node %s" % instance.primary_node,
6541                         prereq=True, ecode=errors.ECODE_ENVIRON)
6542       if not remote_info.payload: # not running already
6543         _CheckNodeFreeMemory(self, instance.primary_node,
6544                              "starting instance %s" % instance.name,
6545                              bep[constants.BE_MAXMEM], instance.hypervisor)
6546
6547   def Exec(self, feedback_fn):
6548     """Start the instance.
6549
6550     """
6551     instance = self.instance
6552     force = self.op.force
6553
6554     if not self.op.no_remember:
6555       self.cfg.MarkInstanceUp(instance.name)
6556
6557     if self.primary_offline:
6558       assert self.op.ignore_offline_nodes
6559       self.proc.LogInfo("Primary node offline, marked instance as started")
6560     else:
6561       node_current = instance.primary_node
6562
6563       _StartInstanceDisks(self, instance, force)
6564
6565       result = \
6566         self.rpc.call_instance_start(node_current,
6567                                      (instance, self.op.hvparams,
6568                                       self.op.beparams),
6569                                      self.op.startup_paused)
6570       msg = result.fail_msg
6571       if msg:
6572         _ShutdownInstanceDisks(self, instance)
6573         raise errors.OpExecError("Could not start instance: %s" % msg)
6574
6575
6576 class LUInstanceReboot(LogicalUnit):
6577   """Reboot an instance.
6578
6579   """
6580   HPATH = "instance-reboot"
6581   HTYPE = constants.HTYPE_INSTANCE
6582   REQ_BGL = False
6583
6584   def ExpandNames(self):
6585     self._ExpandAndLockInstance()
6586
6587   def BuildHooksEnv(self):
6588     """Build hooks env.
6589
6590     This runs on master, primary and secondary nodes of the instance.
6591
6592     """
6593     env = {
6594       "IGNORE_SECONDARIES": self.op.ignore_secondaries,
6595       "REBOOT_TYPE": self.op.reboot_type,
6596       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6597       }
6598
6599     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6600
6601     return env
6602
6603   def BuildHooksNodes(self):
6604     """Build hooks nodes.
6605
6606     """
6607     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6608     return (nl, nl)
6609
6610   def CheckPrereq(self):
6611     """Check prerequisites.
6612
6613     This checks that the instance is in the cluster.
6614
6615     """
6616     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6617     assert self.instance is not None, \
6618       "Cannot retrieve locked instance %s" % self.op.instance_name
6619     _CheckInstanceState(self, instance, INSTANCE_ONLINE)
6620     _CheckNodeOnline(self, instance.primary_node)
6621
6622     # check bridges existence
6623     _CheckInstanceBridgesExist(self, instance)
6624
6625   def Exec(self, feedback_fn):
6626     """Reboot the instance.
6627
6628     """
6629     instance = self.instance
6630     ignore_secondaries = self.op.ignore_secondaries
6631     reboot_type = self.op.reboot_type
6632
6633     remote_info = self.rpc.call_instance_info(instance.primary_node,
6634                                               instance.name,
6635                                               instance.hypervisor)
6636     remote_info.Raise("Error checking node %s" % instance.primary_node)
6637     instance_running = bool(remote_info.payload)
6638
6639     node_current = instance.primary_node
6640
6641     if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
6642                                             constants.INSTANCE_REBOOT_HARD]:
6643       for disk in instance.disks:
6644         self.cfg.SetDiskID(disk, node_current)
6645       result = self.rpc.call_instance_reboot(node_current, instance,
6646                                              reboot_type,
6647                                              self.op.shutdown_timeout)
6648       result.Raise("Could not reboot instance")
6649     else:
6650       if instance_running:
6651         result = self.rpc.call_instance_shutdown(node_current, instance,
6652                                                  self.op.shutdown_timeout)
6653         result.Raise("Could not shutdown instance for full reboot")
6654         _ShutdownInstanceDisks(self, instance)
6655       else:
6656         self.LogInfo("Instance %s was already stopped, starting now",
6657                      instance.name)
6658       _StartInstanceDisks(self, instance, ignore_secondaries)
6659       result = self.rpc.call_instance_start(node_current,
6660                                             (instance, None, None), False)
6661       msg = result.fail_msg
6662       if msg:
6663         _ShutdownInstanceDisks(self, instance)
6664         raise errors.OpExecError("Could not start instance for"
6665                                  " full reboot: %s" % msg)
6666
6667     self.cfg.MarkInstanceUp(instance.name)
6668
6669
6670 class LUInstanceShutdown(LogicalUnit):
6671   """Shutdown an instance.
6672
6673   """
6674   HPATH = "instance-stop"
6675   HTYPE = constants.HTYPE_INSTANCE
6676   REQ_BGL = False
6677
6678   def ExpandNames(self):
6679     self._ExpandAndLockInstance()
6680
6681   def BuildHooksEnv(self):
6682     """Build hooks env.
6683
6684     This runs on master, primary and secondary nodes of the instance.
6685
6686     """
6687     env = _BuildInstanceHookEnvByObject(self, self.instance)
6688     env["TIMEOUT"] = self.op.timeout
6689     return env
6690
6691   def BuildHooksNodes(self):
6692     """Build hooks nodes.
6693
6694     """
6695     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6696     return (nl, nl)
6697
6698   def CheckPrereq(self):
6699     """Check prerequisites.
6700
6701     This checks that the instance is in the cluster.
6702
6703     """
6704     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6705     assert self.instance is not None, \
6706       "Cannot retrieve locked instance %s" % self.op.instance_name
6707
6708     _CheckInstanceState(self, self.instance, INSTANCE_ONLINE)
6709
6710     self.primary_offline = \
6711       self.cfg.GetNodeInfo(self.instance.primary_node).offline
6712
6713     if self.primary_offline and self.op.ignore_offline_nodes:
6714       self.proc.LogWarning("Ignoring offline primary node")
6715     else:
6716       _CheckNodeOnline(self, self.instance.primary_node)
6717
6718   def Exec(self, feedback_fn):
6719     """Shutdown the instance.
6720
6721     """
6722     instance = self.instance
6723     node_current = instance.primary_node
6724     timeout = self.op.timeout
6725
6726     if not self.op.no_remember:
6727       self.cfg.MarkInstanceDown(instance.name)
6728
6729     if self.primary_offline:
6730       assert self.op.ignore_offline_nodes
6731       self.proc.LogInfo("Primary node offline, marked instance as stopped")
6732     else:
6733       result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
6734       msg = result.fail_msg
6735       if msg:
6736         self.proc.LogWarning("Could not shutdown instance: %s" % msg)
6737
6738       _ShutdownInstanceDisks(self, instance)
6739
6740
6741 class LUInstanceReinstall(LogicalUnit):
6742   """Reinstall an instance.
6743
6744   """
6745   HPATH = "instance-reinstall"
6746   HTYPE = constants.HTYPE_INSTANCE
6747   REQ_BGL = False
6748
6749   def ExpandNames(self):
6750     self._ExpandAndLockInstance()
6751
6752   def BuildHooksEnv(self):
6753     """Build hooks env.
6754
6755     This runs on master, primary and secondary nodes of the instance.
6756
6757     """
6758     return _BuildInstanceHookEnvByObject(self, self.instance)
6759
6760   def BuildHooksNodes(self):
6761     """Build hooks nodes.
6762
6763     """
6764     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6765     return (nl, nl)
6766
6767   def CheckPrereq(self):
6768     """Check prerequisites.
6769
6770     This checks that the instance is in the cluster and is not running.
6771
6772     """
6773     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6774     assert instance is not None, \
6775       "Cannot retrieve locked instance %s" % self.op.instance_name
6776     _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
6777                      " offline, cannot reinstall")
6778     for node in instance.secondary_nodes:
6779       _CheckNodeOnline(self, node, "Instance secondary node offline,"
6780                        " cannot reinstall")
6781
6782     if instance.disk_template == constants.DT_DISKLESS:
6783       raise errors.OpPrereqError("Instance '%s' has no disks" %
6784                                  self.op.instance_name,
6785                                  errors.ECODE_INVAL)
6786     _CheckInstanceState(self, instance, INSTANCE_DOWN, msg="cannot reinstall")
6787
6788     if self.op.os_type is not None:
6789       # OS verification
6790       pnode = _ExpandNodeName(self.cfg, instance.primary_node)
6791       _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
6792       instance_os = self.op.os_type
6793     else:
6794       instance_os = instance.os
6795
6796     nodelist = list(instance.all_nodes)
6797
6798     if self.op.osparams:
6799       i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
6800       _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
6801       self.os_inst = i_osdict # the new dict (without defaults)
6802     else:
6803       self.os_inst = None
6804
6805     self.instance = instance
6806
6807   def Exec(self, feedback_fn):
6808     """Reinstall the instance.
6809
6810     """
6811     inst = self.instance
6812
6813     if self.op.os_type is not None:
6814       feedback_fn("Changing OS to '%s'..." % self.op.os_type)
6815       inst.os = self.op.os_type
6816       # Write to configuration
6817       self.cfg.Update(inst, feedback_fn)
6818
6819     _StartInstanceDisks(self, inst, None)
6820     try:
6821       feedback_fn("Running the instance OS create scripts...")
6822       # FIXME: pass debug option from opcode to backend
6823       result = self.rpc.call_instance_os_add(inst.primary_node,
6824                                              (inst, self.os_inst), True,
6825                                              self.op.debug_level)
6826       result.Raise("Could not install OS for instance %s on node %s" %
6827                    (inst.name, inst.primary_node))
6828     finally:
6829       _ShutdownInstanceDisks(self, inst)
6830
6831
6832 class LUInstanceRecreateDisks(LogicalUnit):
6833   """Recreate an instance's missing disks.
6834
6835   """
6836   HPATH = "instance-recreate-disks"
6837   HTYPE = constants.HTYPE_INSTANCE
6838   REQ_BGL = False
6839
6840   def CheckArguments(self):
6841     # normalise the disk list
6842     self.op.disks = sorted(frozenset(self.op.disks))
6843
6844   def ExpandNames(self):
6845     self._ExpandAndLockInstance()
6846     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6847     if self.op.nodes:
6848       self.op.nodes = [_ExpandNodeName(self.cfg, n) for n in self.op.nodes]
6849       self.needed_locks[locking.LEVEL_NODE] = list(self.op.nodes)
6850     else:
6851       self.needed_locks[locking.LEVEL_NODE] = []
6852
6853   def DeclareLocks(self, level):
6854     if level == locking.LEVEL_NODE:
6855       # if we replace the nodes, we only need to lock the old primary,
6856       # otherwise we need to lock all nodes for disk re-creation
6857       primary_only = bool(self.op.nodes)
6858       self._LockInstancesNodes(primary_only=primary_only)
6859     elif level == locking.LEVEL_NODE_RES:
6860       # Copy node locks
6861       self.needed_locks[locking.LEVEL_NODE_RES] = \
6862         self.needed_locks[locking.LEVEL_NODE][:]
6863
6864   def BuildHooksEnv(self):
6865     """Build hooks env.
6866
6867     This runs on master, primary and secondary nodes of the instance.
6868
6869     """
6870     return _BuildInstanceHookEnvByObject(self, self.instance)
6871
6872   def BuildHooksNodes(self):
6873     """Build hooks nodes.
6874
6875     """
6876     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6877     return (nl, nl)
6878
6879   def CheckPrereq(self):
6880     """Check prerequisites.
6881
6882     This checks that the instance is in the cluster and is not running.
6883
6884     """
6885     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6886     assert instance is not None, \
6887       "Cannot retrieve locked instance %s" % self.op.instance_name
6888     if self.op.nodes:
6889       if len(self.op.nodes) != len(instance.all_nodes):
6890         raise errors.OpPrereqError("Instance %s currently has %d nodes, but"
6891                                    " %d replacement nodes were specified" %
6892                                    (instance.name, len(instance.all_nodes),
6893                                     len(self.op.nodes)),
6894                                    errors.ECODE_INVAL)
6895       assert instance.disk_template != constants.DT_DRBD8 or \
6896           len(self.op.nodes) == 2
6897       assert instance.disk_template != constants.DT_PLAIN or \
6898           len(self.op.nodes) == 1
6899       primary_node = self.op.nodes[0]
6900     else:
6901       primary_node = instance.primary_node
6902     _CheckNodeOnline(self, primary_node)
6903
6904     if instance.disk_template == constants.DT_DISKLESS:
6905       raise errors.OpPrereqError("Instance '%s' has no disks" %
6906                                  self.op.instance_name, errors.ECODE_INVAL)
6907     # if we replace nodes *and* the old primary is offline, we don't
6908     # check
6909     assert instance.primary_node in self.owned_locks(locking.LEVEL_NODE)
6910     assert instance.primary_node in self.owned_locks(locking.LEVEL_NODE_RES)
6911     old_pnode = self.cfg.GetNodeInfo(instance.primary_node)
6912     if not (self.op.nodes and old_pnode.offline):
6913       _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
6914                           msg="cannot recreate disks")
6915
6916     if not self.op.disks:
6917       self.op.disks = range(len(instance.disks))
6918     else:
6919       for idx in self.op.disks:
6920         if idx >= len(instance.disks):
6921           raise errors.OpPrereqError("Invalid disk index '%s'" % idx,
6922                                      errors.ECODE_INVAL)
6923     if self.op.disks != range(len(instance.disks)) and self.op.nodes:
6924       raise errors.OpPrereqError("Can't recreate disks partially and"
6925                                  " change the nodes at the same time",
6926                                  errors.ECODE_INVAL)
6927     self.instance = instance
6928
6929   def Exec(self, feedback_fn):
6930     """Recreate the disks.
6931
6932     """
6933     instance = self.instance
6934
6935     assert (self.owned_locks(locking.LEVEL_NODE) ==
6936             self.owned_locks(locking.LEVEL_NODE_RES))
6937
6938     to_skip = []
6939     mods = [] # keeps track of needed logical_id changes
6940
6941     for idx, disk in enumerate(instance.disks):
6942       if idx not in self.op.disks: # disk idx has not been passed in
6943         to_skip.append(idx)
6944         continue
6945       # update secondaries for disks, if needed
6946       if self.op.nodes:
6947         if disk.dev_type == constants.LD_DRBD8:
6948           # need to update the nodes and minors
6949           assert len(self.op.nodes) == 2
6950           assert len(disk.logical_id) == 6 # otherwise disk internals
6951                                            # have changed
6952           (_, _, old_port, _, _, old_secret) = disk.logical_id
6953           new_minors = self.cfg.AllocateDRBDMinor(self.op.nodes, instance.name)
6954           new_id = (self.op.nodes[0], self.op.nodes[1], old_port,
6955                     new_minors[0], new_minors[1], old_secret)
6956           assert len(disk.logical_id) == len(new_id)
6957           mods.append((idx, new_id))
6958
6959     # now that we have passed all asserts above, we can apply the mods
6960     # in a single run (to avoid partial changes)
6961     for idx, new_id in mods:
6962       instance.disks[idx].logical_id = new_id
6963
6964     # change primary node, if needed
6965     if self.op.nodes:
6966       instance.primary_node = self.op.nodes[0]
6967       self.LogWarning("Changing the instance's nodes, you will have to"
6968                       " remove any disks left on the older nodes manually")
6969
6970     if self.op.nodes:
6971       self.cfg.Update(instance, feedback_fn)
6972
6973     _CreateDisks(self, instance, to_skip=to_skip)
6974
6975
6976 class LUInstanceRename(LogicalUnit):
6977   """Rename an instance.
6978
6979   """
6980   HPATH = "instance-rename"
6981   HTYPE = constants.HTYPE_INSTANCE
6982
6983   def CheckArguments(self):
6984     """Check arguments.
6985
6986     """
6987     if self.op.ip_check and not self.op.name_check:
6988       # TODO: make the ip check more flexible and not depend on the name check
6989       raise errors.OpPrereqError("IP address check requires a name check",
6990                                  errors.ECODE_INVAL)
6991
6992   def BuildHooksEnv(self):
6993     """Build hooks env.
6994
6995     This runs on master, primary and secondary nodes of the instance.
6996
6997     """
6998     env = _BuildInstanceHookEnvByObject(self, self.instance)
6999     env["INSTANCE_NEW_NAME"] = self.op.new_name
7000     return env
7001
7002   def BuildHooksNodes(self):
7003     """Build hooks nodes.
7004
7005     """
7006     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7007     return (nl, nl)
7008
7009   def CheckPrereq(self):
7010     """Check prerequisites.
7011
7012     This checks that the instance is in the cluster and is not running.
7013
7014     """
7015     self.op.instance_name = _ExpandInstanceName(self.cfg,
7016                                                 self.op.instance_name)
7017     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7018     assert instance is not None
7019     _CheckNodeOnline(self, instance.primary_node)
7020     _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
7021                         msg="cannot rename")
7022     self.instance = instance
7023
7024     new_name = self.op.new_name
7025     if self.op.name_check:
7026       hostname = netutils.GetHostname(name=new_name)
7027       if hostname.name != new_name:
7028         self.LogInfo("Resolved given name '%s' to '%s'", new_name,
7029                      hostname.name)
7030       if not utils.MatchNameComponent(self.op.new_name, [hostname.name]):
7031         raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
7032                                     " same as given hostname '%s'") %
7033                                     (hostname.name, self.op.new_name),
7034                                     errors.ECODE_INVAL)
7035       new_name = self.op.new_name = hostname.name
7036       if (self.op.ip_check and
7037           netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
7038         raise errors.OpPrereqError("IP %s of instance %s already in use" %
7039                                    (hostname.ip, new_name),
7040                                    errors.ECODE_NOTUNIQUE)
7041
7042     instance_list = self.cfg.GetInstanceList()
7043     if new_name in instance_list and new_name != instance.name:
7044       raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
7045                                  new_name, errors.ECODE_EXISTS)
7046
7047   def Exec(self, feedback_fn):
7048     """Rename the instance.
7049
7050     """
7051     inst = self.instance
7052     old_name = inst.name
7053
7054     rename_file_storage = False
7055     if (inst.disk_template in constants.DTS_FILEBASED and
7056         self.op.new_name != inst.name):
7057       old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
7058       rename_file_storage = True
7059
7060     self.cfg.RenameInstance(inst.name, self.op.new_name)
7061     # Change the instance lock. This is definitely safe while we hold the BGL.
7062     # Otherwise the new lock would have to be added in acquired mode.
7063     assert self.REQ_BGL
7064     self.glm.remove(locking.LEVEL_INSTANCE, old_name)
7065     self.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
7066
7067     # re-read the instance from the configuration after rename
7068     inst = self.cfg.GetInstanceInfo(self.op.new_name)
7069
7070     if rename_file_storage:
7071       new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
7072       result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
7073                                                      old_file_storage_dir,
7074                                                      new_file_storage_dir)
7075       result.Raise("Could not rename on node %s directory '%s' to '%s'"
7076                    " (but the instance has been renamed in Ganeti)" %
7077                    (inst.primary_node, old_file_storage_dir,
7078                     new_file_storage_dir))
7079
7080     _StartInstanceDisks(self, inst, None)
7081     try:
7082       result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
7083                                                  old_name, self.op.debug_level)
7084       msg = result.fail_msg
7085       if msg:
7086         msg = ("Could not run OS rename script for instance %s on node %s"
7087                " (but the instance has been renamed in Ganeti): %s" %
7088                (inst.name, inst.primary_node, msg))
7089         self.proc.LogWarning(msg)
7090     finally:
7091       _ShutdownInstanceDisks(self, inst)
7092
7093     return inst.name
7094
7095
7096 class LUInstanceRemove(LogicalUnit):
7097   """Remove an instance.
7098
7099   """
7100   HPATH = "instance-remove"
7101   HTYPE = constants.HTYPE_INSTANCE
7102   REQ_BGL = False
7103
7104   def ExpandNames(self):
7105     self._ExpandAndLockInstance()
7106     self.needed_locks[locking.LEVEL_NODE] = []
7107     self.needed_locks[locking.LEVEL_NODE_RES] = []
7108     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7109
7110   def DeclareLocks(self, level):
7111     if level == locking.LEVEL_NODE:
7112       self._LockInstancesNodes()
7113     elif level == locking.LEVEL_NODE_RES:
7114       # Copy node locks
7115       self.needed_locks[locking.LEVEL_NODE_RES] = \
7116         self.needed_locks[locking.LEVEL_NODE][:]
7117
7118   def BuildHooksEnv(self):
7119     """Build hooks env.
7120
7121     This runs on master, primary and secondary nodes of the instance.
7122
7123     """
7124     env = _BuildInstanceHookEnvByObject(self, self.instance)
7125     env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
7126     return env
7127
7128   def BuildHooksNodes(self):
7129     """Build hooks nodes.
7130
7131     """
7132     nl = [self.cfg.GetMasterNode()]
7133     nl_post = list(self.instance.all_nodes) + nl
7134     return (nl, nl_post)
7135
7136   def CheckPrereq(self):
7137     """Check prerequisites.
7138
7139     This checks that the instance is in the cluster.
7140
7141     """
7142     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7143     assert self.instance is not None, \
7144       "Cannot retrieve locked instance %s" % self.op.instance_name
7145
7146   def Exec(self, feedback_fn):
7147     """Remove the instance.
7148
7149     """
7150     instance = self.instance
7151     logging.info("Shutting down instance %s on node %s",
7152                  instance.name, instance.primary_node)
7153
7154     result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
7155                                              self.op.shutdown_timeout)
7156     msg = result.fail_msg
7157     if msg:
7158       if self.op.ignore_failures:
7159         feedback_fn("Warning: can't shutdown instance: %s" % msg)
7160       else:
7161         raise errors.OpExecError("Could not shutdown instance %s on"
7162                                  " node %s: %s" %
7163                                  (instance.name, instance.primary_node, msg))
7164
7165     assert (self.owned_locks(locking.LEVEL_NODE) ==
7166             self.owned_locks(locking.LEVEL_NODE_RES))
7167     assert not (set(instance.all_nodes) -
7168                 self.owned_locks(locking.LEVEL_NODE)), \
7169       "Not owning correct locks"
7170
7171     _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
7172
7173
7174 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
7175   """Utility function to remove an instance.
7176
7177   """
7178   logging.info("Removing block devices for instance %s", instance.name)
7179
7180   if not _RemoveDisks(lu, instance):
7181     if not ignore_failures:
7182       raise errors.OpExecError("Can't remove instance's disks")
7183     feedback_fn("Warning: can't remove instance's disks")
7184
7185   logging.info("Removing instance %s out of cluster config", instance.name)
7186
7187   lu.cfg.RemoveInstance(instance.name)
7188
7189   assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
7190     "Instance lock removal conflict"
7191
7192   # Remove lock for the instance
7193   lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
7194
7195
7196 class LUInstanceQuery(NoHooksLU):
7197   """Logical unit for querying instances.
7198
7199   """
7200   # pylint: disable=W0142
7201   REQ_BGL = False
7202
7203   def CheckArguments(self):
7204     self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
7205                              self.op.output_fields, self.op.use_locking)
7206
7207   def ExpandNames(self):
7208     self.iq.ExpandNames(self)
7209
7210   def DeclareLocks(self, level):
7211     self.iq.DeclareLocks(self, level)
7212
7213   def Exec(self, feedback_fn):
7214     return self.iq.OldStyleQuery(self)
7215
7216
7217 class LUInstanceFailover(LogicalUnit):
7218   """Failover an instance.
7219
7220   """
7221   HPATH = "instance-failover"
7222   HTYPE = constants.HTYPE_INSTANCE
7223   REQ_BGL = False
7224
7225   def CheckArguments(self):
7226     """Check the arguments.
7227
7228     """
7229     self.iallocator = getattr(self.op, "iallocator", None)
7230     self.target_node = getattr(self.op, "target_node", None)
7231
7232   def ExpandNames(self):
7233     self._ExpandAndLockInstance()
7234
7235     if self.op.target_node is not None:
7236       self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7237
7238     self.needed_locks[locking.LEVEL_NODE] = []
7239     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7240
7241     ignore_consistency = self.op.ignore_consistency
7242     shutdown_timeout = self.op.shutdown_timeout
7243     self._migrater = TLMigrateInstance(self, self.op.instance_name,
7244                                        cleanup=False,
7245                                        failover=True,
7246                                        ignore_consistency=ignore_consistency,
7247                                        shutdown_timeout=shutdown_timeout,
7248                                        ignore_ipolicy=self.op.ignore_ipolicy)
7249     self.tasklets = [self._migrater]
7250
7251   def DeclareLocks(self, level):
7252     if level == locking.LEVEL_NODE:
7253       instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
7254       if instance.disk_template in constants.DTS_EXT_MIRROR:
7255         if self.op.target_node is None:
7256           self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7257         else:
7258           self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
7259                                                    self.op.target_node]
7260         del self.recalculate_locks[locking.LEVEL_NODE]
7261       else:
7262         self._LockInstancesNodes()
7263
7264   def BuildHooksEnv(self):
7265     """Build hooks env.
7266
7267     This runs on master, primary and secondary nodes of the instance.
7268
7269     """
7270     instance = self._migrater.instance
7271     source_node = instance.primary_node
7272     target_node = self.op.target_node
7273     env = {
7274       "IGNORE_CONSISTENCY": self.op.ignore_consistency,
7275       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7276       "OLD_PRIMARY": source_node,
7277       "NEW_PRIMARY": target_node,
7278       }
7279
7280     if instance.disk_template in constants.DTS_INT_MIRROR:
7281       env["OLD_SECONDARY"] = instance.secondary_nodes[0]
7282       env["NEW_SECONDARY"] = source_node
7283     else:
7284       env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
7285
7286     env.update(_BuildInstanceHookEnvByObject(self, instance))
7287
7288     return env
7289
7290   def BuildHooksNodes(self):
7291     """Build hooks nodes.
7292
7293     """
7294     instance = self._migrater.instance
7295     nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
7296     return (nl, nl + [instance.primary_node])
7297
7298
7299 class LUInstanceMigrate(LogicalUnit):
7300   """Migrate an instance.
7301
7302   This is migration without shutting down, compared to the failover,
7303   which is done with shutdown.
7304
7305   """
7306   HPATH = "instance-migrate"
7307   HTYPE = constants.HTYPE_INSTANCE
7308   REQ_BGL = False
7309
7310   def ExpandNames(self):
7311     self._ExpandAndLockInstance()
7312
7313     if self.op.target_node is not None:
7314       self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7315
7316     self.needed_locks[locking.LEVEL_NODE] = []
7317     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7318
7319     self._migrater = TLMigrateInstance(self, self.op.instance_name,
7320                                        cleanup=self.op.cleanup,
7321                                        failover=False,
7322                                        fallback=self.op.allow_failover,
7323                                        ignore_ipolicy=self.op.ignore_ipolicy)
7324     self.tasklets = [self._migrater]
7325
7326   def DeclareLocks(self, level):
7327     if level == locking.LEVEL_NODE:
7328       instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
7329       if instance.disk_template in constants.DTS_EXT_MIRROR:
7330         if self.op.target_node is None:
7331           self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7332         else:
7333           self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
7334                                                    self.op.target_node]
7335         del self.recalculate_locks[locking.LEVEL_NODE]
7336       else:
7337         self._LockInstancesNodes()
7338
7339   def BuildHooksEnv(self):
7340     """Build hooks env.
7341
7342     This runs on master, primary and secondary nodes of the instance.
7343
7344     """
7345     instance = self._migrater.instance
7346     source_node = instance.primary_node
7347     target_node = self.op.target_node
7348     env = _BuildInstanceHookEnvByObject(self, instance)
7349     env.update({
7350       "MIGRATE_LIVE": self._migrater.live,
7351       "MIGRATE_CLEANUP": self.op.cleanup,
7352       "OLD_PRIMARY": source_node,
7353       "NEW_PRIMARY": target_node,
7354       })
7355
7356     if instance.disk_template in constants.DTS_INT_MIRROR:
7357       env["OLD_SECONDARY"] = target_node
7358       env["NEW_SECONDARY"] = source_node
7359     else:
7360       env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
7361
7362     return env
7363
7364   def BuildHooksNodes(self):
7365     """Build hooks nodes.
7366
7367     """
7368     instance = self._migrater.instance
7369     nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
7370     return (nl, nl + [instance.primary_node])
7371
7372
7373 class LUInstanceMove(LogicalUnit):
7374   """Move an instance by data-copying.
7375
7376   """
7377   HPATH = "instance-move"
7378   HTYPE = constants.HTYPE_INSTANCE
7379   REQ_BGL = False
7380
7381   def ExpandNames(self):
7382     self._ExpandAndLockInstance()
7383     target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7384     self.op.target_node = target_node
7385     self.needed_locks[locking.LEVEL_NODE] = [target_node]
7386     self.needed_locks[locking.LEVEL_NODE_RES] = []
7387     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7388
7389   def DeclareLocks(self, level):
7390     if level == locking.LEVEL_NODE:
7391       self._LockInstancesNodes(primary_only=True)
7392     elif level == locking.LEVEL_NODE_RES:
7393       # Copy node locks
7394       self.needed_locks[locking.LEVEL_NODE_RES] = \
7395         self.needed_locks[locking.LEVEL_NODE][:]
7396
7397   def BuildHooksEnv(self):
7398     """Build hooks env.
7399
7400     This runs on master, primary and secondary nodes of the instance.
7401
7402     """
7403     env = {
7404       "TARGET_NODE": self.op.target_node,
7405       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7406       }
7407     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
7408     return env
7409
7410   def BuildHooksNodes(self):
7411     """Build hooks nodes.
7412
7413     """
7414     nl = [
7415       self.cfg.GetMasterNode(),
7416       self.instance.primary_node,
7417       self.op.target_node,
7418       ]
7419     return (nl, nl)
7420
7421   def CheckPrereq(self):
7422     """Check prerequisites.
7423
7424     This checks that the instance is in the cluster.
7425
7426     """
7427     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7428     assert self.instance is not None, \
7429       "Cannot retrieve locked instance %s" % self.op.instance_name
7430
7431     node = self.cfg.GetNodeInfo(self.op.target_node)
7432     assert node is not None, \
7433       "Cannot retrieve locked node %s" % self.op.target_node
7434
7435     self.target_node = target_node = node.name
7436
7437     if target_node == instance.primary_node:
7438       raise errors.OpPrereqError("Instance %s is already on the node %s" %
7439                                  (instance.name, target_node),
7440                                  errors.ECODE_STATE)
7441
7442     bep = self.cfg.GetClusterInfo().FillBE(instance)
7443
7444     for idx, dsk in enumerate(instance.disks):
7445       if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
7446         raise errors.OpPrereqError("Instance disk %d has a complex layout,"
7447                                    " cannot copy" % idx, errors.ECODE_STATE)
7448
7449     _CheckNodeOnline(self, target_node)
7450     _CheckNodeNotDrained(self, target_node)
7451     _CheckNodeVmCapable(self, target_node)
7452
7453     if instance.admin_state == constants.ADMINST_UP:
7454       # check memory requirements on the secondary node
7455       _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
7456                            instance.name, bep[constants.BE_MAXMEM],
7457                            instance.hypervisor)
7458     else:
7459       self.LogInfo("Not checking memory on the secondary node as"
7460                    " instance will not be started")
7461
7462     # check bridge existance
7463     _CheckInstanceBridgesExist(self, instance, node=target_node)
7464
7465   def Exec(self, feedback_fn):
7466     """Move an instance.
7467
7468     The move is done by shutting it down on its present node, copying
7469     the data over (slow) and starting it on the new node.
7470
7471     """
7472     instance = self.instance
7473
7474     source_node = instance.primary_node
7475     target_node = self.target_node
7476
7477     self.LogInfo("Shutting down instance %s on source node %s",
7478                  instance.name, source_node)
7479
7480     assert (self.owned_locks(locking.LEVEL_NODE) ==
7481             self.owned_locks(locking.LEVEL_NODE_RES))
7482
7483     result = self.rpc.call_instance_shutdown(source_node, instance,
7484                                              self.op.shutdown_timeout)
7485     msg = result.fail_msg
7486     if msg:
7487       if self.op.ignore_consistency:
7488         self.proc.LogWarning("Could not shutdown instance %s on node %s."
7489                              " Proceeding anyway. Please make sure node"
7490                              " %s is down. Error details: %s",
7491                              instance.name, source_node, source_node, msg)
7492       else:
7493         raise errors.OpExecError("Could not shutdown instance %s on"
7494                                  " node %s: %s" %
7495                                  (instance.name, source_node, msg))
7496
7497     # create the target disks
7498     try:
7499       _CreateDisks(self, instance, target_node=target_node)
7500     except errors.OpExecError:
7501       self.LogWarning("Device creation failed, reverting...")
7502       try:
7503         _RemoveDisks(self, instance, target_node=target_node)
7504       finally:
7505         self.cfg.ReleaseDRBDMinors(instance.name)
7506         raise
7507
7508     cluster_name = self.cfg.GetClusterInfo().cluster_name
7509
7510     errs = []
7511     # activate, get path, copy the data over
7512     for idx, disk in enumerate(instance.disks):
7513       self.LogInfo("Copying data for disk %d", idx)
7514       result = self.rpc.call_blockdev_assemble(target_node, disk,
7515                                                instance.name, True, idx)
7516       if result.fail_msg:
7517         self.LogWarning("Can't assemble newly created disk %d: %s",
7518                         idx, result.fail_msg)
7519         errs.append(result.fail_msg)
7520         break
7521       dev_path = result.payload
7522       result = self.rpc.call_blockdev_export(source_node, disk,
7523                                              target_node, dev_path,
7524                                              cluster_name)
7525       if result.fail_msg:
7526         self.LogWarning("Can't copy data over for disk %d: %s",
7527                         idx, result.fail_msg)
7528         errs.append(result.fail_msg)
7529         break
7530
7531     if errs:
7532       self.LogWarning("Some disks failed to copy, aborting")
7533       try:
7534         _RemoveDisks(self, instance, target_node=target_node)
7535       finally:
7536         self.cfg.ReleaseDRBDMinors(instance.name)
7537         raise errors.OpExecError("Errors during disk copy: %s" %
7538                                  (",".join(errs),))
7539
7540     instance.primary_node = target_node
7541     self.cfg.Update(instance, feedback_fn)
7542
7543     self.LogInfo("Removing the disks on the original node")
7544     _RemoveDisks(self, instance, target_node=source_node)
7545
7546     # Only start the instance if it's marked as up
7547     if instance.admin_state == constants.ADMINST_UP:
7548       self.LogInfo("Starting instance %s on node %s",
7549                    instance.name, target_node)
7550
7551       disks_ok, _ = _AssembleInstanceDisks(self, instance,
7552                                            ignore_secondaries=True)
7553       if not disks_ok:
7554         _ShutdownInstanceDisks(self, instance)
7555         raise errors.OpExecError("Can't activate the instance's disks")
7556
7557       result = self.rpc.call_instance_start(target_node,
7558                                             (instance, None, None), False)
7559       msg = result.fail_msg
7560       if msg:
7561         _ShutdownInstanceDisks(self, instance)
7562         raise errors.OpExecError("Could not start instance %s on node %s: %s" %
7563                                  (instance.name, target_node, msg))
7564
7565
7566 class LUNodeMigrate(LogicalUnit):
7567   """Migrate all instances from a node.
7568
7569   """
7570   HPATH = "node-migrate"
7571   HTYPE = constants.HTYPE_NODE
7572   REQ_BGL = False
7573
7574   def CheckArguments(self):
7575     pass
7576
7577   def ExpandNames(self):
7578     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
7579
7580     self.share_locks = _ShareAll()
7581     self.needed_locks = {
7582       locking.LEVEL_NODE: [self.op.node_name],
7583       }
7584
7585   def BuildHooksEnv(self):
7586     """Build hooks env.
7587
7588     This runs on the master, the primary and all the secondaries.
7589
7590     """
7591     return {
7592       "NODE_NAME": self.op.node_name,
7593       }
7594
7595   def BuildHooksNodes(self):
7596     """Build hooks nodes.
7597
7598     """
7599     nl = [self.cfg.GetMasterNode()]
7600     return (nl, nl)
7601
7602   def CheckPrereq(self):
7603     pass
7604
7605   def Exec(self, feedback_fn):
7606     # Prepare jobs for migration instances
7607     jobs = [
7608       [opcodes.OpInstanceMigrate(instance_name=inst.name,
7609                                  mode=self.op.mode,
7610                                  live=self.op.live,
7611                                  iallocator=self.op.iallocator,
7612                                  target_node=self.op.target_node)]
7613       for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name)
7614       ]
7615
7616     # TODO: Run iallocator in this opcode and pass correct placement options to
7617     # OpInstanceMigrate. Since other jobs can modify the cluster between
7618     # running the iallocator and the actual migration, a good consistency model
7619     # will have to be found.
7620
7621     assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
7622             frozenset([self.op.node_name]))
7623
7624     return ResultWithJobs(jobs)
7625
7626
7627 class TLMigrateInstance(Tasklet):
7628   """Tasklet class for instance migration.
7629
7630   @type live: boolean
7631   @ivar live: whether the migration will be done live or non-live;
7632       this variable is initalized only after CheckPrereq has run
7633   @type cleanup: boolean
7634   @ivar cleanup: Wheater we cleanup from a failed migration
7635   @type iallocator: string
7636   @ivar iallocator: The iallocator used to determine target_node
7637   @type target_node: string
7638   @ivar target_node: If given, the target_node to reallocate the instance to
7639   @type failover: boolean
7640   @ivar failover: Whether operation results in failover or migration
7641   @type fallback: boolean
7642   @ivar fallback: Whether fallback to failover is allowed if migration not
7643                   possible
7644   @type ignore_consistency: boolean
7645   @ivar ignore_consistency: Wheter we should ignore consistency between source
7646                             and target node
7647   @type shutdown_timeout: int
7648   @ivar shutdown_timeout: In case of failover timeout of the shutdown
7649   @type ignore_ipolicy: bool
7650   @ivar ignore_ipolicy: If true, we can ignore instance policy when migrating
7651
7652   """
7653
7654   # Constants
7655   _MIGRATION_POLL_INTERVAL = 1      # seconds
7656   _MIGRATION_FEEDBACK_INTERVAL = 10 # seconds
7657
7658   def __init__(self, lu, instance_name, cleanup=False,
7659                failover=False, fallback=False,
7660                ignore_consistency=False,
7661                shutdown_timeout=constants.DEFAULT_SHUTDOWN_TIMEOUT,
7662                ignore_ipolicy=False):
7663     """Initializes this class.
7664
7665     """
7666     Tasklet.__init__(self, lu)
7667
7668     # Parameters
7669     self.instance_name = instance_name
7670     self.cleanup = cleanup
7671     self.live = False # will be overridden later
7672     self.failover = failover
7673     self.fallback = fallback
7674     self.ignore_consistency = ignore_consistency
7675     self.shutdown_timeout = shutdown_timeout
7676     self.ignore_ipolicy = ignore_ipolicy
7677
7678   def CheckPrereq(self):
7679     """Check prerequisites.
7680
7681     This checks that the instance is in the cluster.
7682
7683     """
7684     instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
7685     instance = self.cfg.GetInstanceInfo(instance_name)
7686     assert instance is not None
7687     self.instance = instance
7688     cluster = self.cfg.GetClusterInfo()
7689
7690     if (not self.cleanup and
7691         not instance.admin_state == constants.ADMINST_UP and
7692         not self.failover and self.fallback):
7693       self.lu.LogInfo("Instance is marked down or offline, fallback allowed,"
7694                       " switching to failover")
7695       self.failover = True
7696
7697     if instance.disk_template not in constants.DTS_MIRRORED:
7698       if self.failover:
7699         text = "failovers"
7700       else:
7701         text = "migrations"
7702       raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
7703                                  " %s" % (instance.disk_template, text),
7704                                  errors.ECODE_STATE)
7705
7706     if instance.disk_template in constants.DTS_EXT_MIRROR:
7707       _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
7708
7709       if self.lu.op.iallocator:
7710         self._RunAllocator()
7711       else:
7712         # We set set self.target_node as it is required by
7713         # BuildHooksEnv
7714         self.target_node = self.lu.op.target_node
7715
7716       # Check that the target node is correct in terms of instance policy
7717       nodeinfo = self.cfg.GetNodeInfo(self.target_node)
7718       ipolicy = _CalculateGroupIPolicy(cluster, nodeinfo.group)
7719       _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo,
7720                               ignore=self.ignore_ipolicy)
7721
7722       # self.target_node is already populated, either directly or by the
7723       # iallocator run
7724       target_node = self.target_node
7725       if self.target_node == instance.primary_node:
7726         raise errors.OpPrereqError("Cannot migrate instance %s"
7727                                    " to its primary (%s)" %
7728                                    (instance.name, instance.primary_node))
7729
7730       if len(self.lu.tasklets) == 1:
7731         # It is safe to release locks only when we're the only tasklet
7732         # in the LU
7733         _ReleaseLocks(self.lu, locking.LEVEL_NODE,
7734                       keep=[instance.primary_node, self.target_node])
7735
7736     else:
7737       secondary_nodes = instance.secondary_nodes
7738       if not secondary_nodes:
7739         raise errors.ConfigurationError("No secondary node but using"
7740                                         " %s disk template" %
7741                                         instance.disk_template)
7742       target_node = secondary_nodes[0]
7743       if self.lu.op.iallocator or (self.lu.op.target_node and
7744                                    self.lu.op.target_node != target_node):
7745         if self.failover:
7746           text = "failed over"
7747         else:
7748           text = "migrated"
7749         raise errors.OpPrereqError("Instances with disk template %s cannot"
7750                                    " be %s to arbitrary nodes"
7751                                    " (neither an iallocator nor a target"
7752                                    " node can be passed)" %
7753                                    (instance.disk_template, text),
7754                                    errors.ECODE_INVAL)
7755       nodeinfo = self.cfg.GetNodeInfo(target_node)
7756       ipolicy = _CalculateGroupIPolicy(cluster, nodeinfo.group)
7757       _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo,
7758                               ignore=self.ignore_ipolicy)
7759
7760     i_be = cluster.FillBE(instance)
7761
7762     # check memory requirements on the secondary node
7763     if not self.failover or instance.admin_state == constants.ADMINST_UP:
7764       _CheckNodeFreeMemory(self.lu, target_node, "migrating instance %s" %
7765                            instance.name, i_be[constants.BE_MAXMEM],
7766                            instance.hypervisor)
7767     else:
7768       self.lu.LogInfo("Not checking memory on the secondary node as"
7769                       " instance will not be started")
7770
7771     # check if failover must be forced instead of migration
7772     if (not self.cleanup and not self.failover and
7773         i_be[constants.BE_ALWAYS_FAILOVER]):
7774       if self.fallback:
7775         self.lu.LogInfo("Instance configured to always failover; fallback"
7776                         " to failover")
7777         self.failover = True
7778       else:
7779         raise errors.OpPrereqError("This instance has been configured to"
7780                                    " always failover, please allow failover",
7781                                    errors.ECODE_STATE)
7782
7783     # check bridge existance
7784     _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
7785
7786     if not self.cleanup:
7787       _CheckNodeNotDrained(self.lu, target_node)
7788       if not self.failover:
7789         result = self.rpc.call_instance_migratable(instance.primary_node,
7790                                                    instance)
7791         if result.fail_msg and self.fallback:
7792           self.lu.LogInfo("Can't migrate, instance offline, fallback to"
7793                           " failover")
7794           self.failover = True
7795         else:
7796           result.Raise("Can't migrate, please use failover",
7797                        prereq=True, ecode=errors.ECODE_STATE)
7798
7799     assert not (self.failover and self.cleanup)
7800
7801     if not self.failover:
7802       if self.lu.op.live is not None and self.lu.op.mode is not None:
7803         raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
7804                                    " parameters are accepted",
7805                                    errors.ECODE_INVAL)
7806       if self.lu.op.live is not None:
7807         if self.lu.op.live:
7808           self.lu.op.mode = constants.HT_MIGRATION_LIVE
7809         else:
7810           self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
7811         # reset the 'live' parameter to None so that repeated
7812         # invocations of CheckPrereq do not raise an exception
7813         self.lu.op.live = None
7814       elif self.lu.op.mode is None:
7815         # read the default value from the hypervisor
7816         i_hv = cluster.FillHV(self.instance, skip_globals=False)
7817         self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
7818
7819       self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
7820     else:
7821       # Failover is never live
7822       self.live = False
7823
7824   def _RunAllocator(self):
7825     """Run the allocator based on input opcode.
7826
7827     """
7828     # FIXME: add a self.ignore_ipolicy option
7829     ial = IAllocator(self.cfg, self.rpc,
7830                      mode=constants.IALLOCATOR_MODE_RELOC,
7831                      name=self.instance_name,
7832                      # TODO See why hail breaks with a single node below
7833                      relocate_from=[self.instance.primary_node,
7834                                     self.instance.primary_node],
7835                      )
7836
7837     ial.Run(self.lu.op.iallocator)
7838
7839     if not ial.success:
7840       raise errors.OpPrereqError("Can't compute nodes using"
7841                                  " iallocator '%s': %s" %
7842                                  (self.lu.op.iallocator, ial.info),
7843                                  errors.ECODE_NORES)
7844     if len(ial.result) != ial.required_nodes:
7845       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7846                                  " of nodes (%s), required %s" %
7847                                  (self.lu.op.iallocator, len(ial.result),
7848                                   ial.required_nodes), errors.ECODE_FAULT)
7849     self.target_node = ial.result[0]
7850     self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
7851                  self.instance_name, self.lu.op.iallocator,
7852                  utils.CommaJoin(ial.result))
7853
7854   def _WaitUntilSync(self):
7855     """Poll with custom rpc for disk sync.
7856
7857     This uses our own step-based rpc call.
7858
7859     """
7860     self.feedback_fn("* wait until resync is done")
7861     all_done = False
7862     while not all_done:
7863       all_done = True
7864       result = self.rpc.call_drbd_wait_sync(self.all_nodes,
7865                                             self.nodes_ip,
7866                                             self.instance.disks)
7867       min_percent = 100
7868       for node, nres in result.items():
7869         nres.Raise("Cannot resync disks on node %s" % node)
7870         node_done, node_percent = nres.payload
7871         all_done = all_done and node_done
7872         if node_percent is not None:
7873           min_percent = min(min_percent, node_percent)
7874       if not all_done:
7875         if min_percent < 100:
7876           self.feedback_fn("   - progress: %.1f%%" % min_percent)
7877         time.sleep(2)
7878
7879   def _EnsureSecondary(self, node):
7880     """Demote a node to secondary.
7881
7882     """
7883     self.feedback_fn("* switching node %s to secondary mode" % node)
7884
7885     for dev in self.instance.disks:
7886       self.cfg.SetDiskID(dev, node)
7887
7888     result = self.rpc.call_blockdev_close(node, self.instance.name,
7889                                           self.instance.disks)
7890     result.Raise("Cannot change disk to secondary on node %s" % node)
7891
7892   def _GoStandalone(self):
7893     """Disconnect from the network.
7894
7895     """
7896     self.feedback_fn("* changing into standalone mode")
7897     result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
7898                                                self.instance.disks)
7899     for node, nres in result.items():
7900       nres.Raise("Cannot disconnect disks node %s" % node)
7901
7902   def _GoReconnect(self, multimaster):
7903     """Reconnect to the network.
7904
7905     """
7906     if multimaster:
7907       msg = "dual-master"
7908     else:
7909       msg = "single-master"
7910     self.feedback_fn("* changing disks into %s mode" % msg)
7911     result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
7912                                            self.instance.disks,
7913                                            self.instance.name, multimaster)
7914     for node, nres in result.items():
7915       nres.Raise("Cannot change disks config on node %s" % node)
7916
7917   def _ExecCleanup(self):
7918     """Try to cleanup after a failed migration.
7919
7920     The cleanup is done by:
7921       - check that the instance is running only on one node
7922         (and update the config if needed)
7923       - change disks on its secondary node to secondary
7924       - wait until disks are fully synchronized
7925       - disconnect from the network
7926       - change disks into single-master mode
7927       - wait again until disks are fully synchronized
7928
7929     """
7930     instance = self.instance
7931     target_node = self.target_node
7932     source_node = self.source_node
7933
7934     # check running on only one node
7935     self.feedback_fn("* checking where the instance actually runs"
7936                      " (if this hangs, the hypervisor might be in"
7937                      " a bad state)")
7938     ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
7939     for node, result in ins_l.items():
7940       result.Raise("Can't contact node %s" % node)
7941
7942     runningon_source = instance.name in ins_l[source_node].payload
7943     runningon_target = instance.name in ins_l[target_node].payload
7944
7945     if runningon_source and runningon_target:
7946       raise errors.OpExecError("Instance seems to be running on two nodes,"
7947                                " or the hypervisor is confused; you will have"
7948                                " to ensure manually that it runs only on one"
7949                                " and restart this operation")
7950
7951     if not (runningon_source or runningon_target):
7952       raise errors.OpExecError("Instance does not seem to be running at all;"
7953                                " in this case it's safer to repair by"
7954                                " running 'gnt-instance stop' to ensure disk"
7955                                " shutdown, and then restarting it")
7956
7957     if runningon_target:
7958       # the migration has actually succeeded, we need to update the config
7959       self.feedback_fn("* instance running on secondary node (%s),"
7960                        " updating config" % target_node)
7961       instance.primary_node = target_node
7962       self.cfg.Update(instance, self.feedback_fn)
7963       demoted_node = source_node
7964     else:
7965       self.feedback_fn("* instance confirmed to be running on its"
7966                        " primary node (%s)" % source_node)
7967       demoted_node = target_node
7968
7969     if instance.disk_template in constants.DTS_INT_MIRROR:
7970       self._EnsureSecondary(demoted_node)
7971       try:
7972         self._WaitUntilSync()
7973       except errors.OpExecError:
7974         # we ignore here errors, since if the device is standalone, it
7975         # won't be able to sync
7976         pass
7977       self._GoStandalone()
7978       self._GoReconnect(False)
7979       self._WaitUntilSync()
7980
7981     self.feedback_fn("* done")
7982
7983   def _RevertDiskStatus(self):
7984     """Try to revert the disk status after a failed migration.
7985
7986     """
7987     target_node = self.target_node
7988     if self.instance.disk_template in constants.DTS_EXT_MIRROR:
7989       return
7990
7991     try:
7992       self._EnsureSecondary(target_node)
7993       self._GoStandalone()
7994       self._GoReconnect(False)
7995       self._WaitUntilSync()
7996     except errors.OpExecError, err:
7997       self.lu.LogWarning("Migration failed and I can't reconnect the drives,"
7998                          " please try to recover the instance manually;"
7999                          " error '%s'" % str(err))
8000
8001   def _AbortMigration(self):
8002     """Call the hypervisor code to abort a started migration.
8003
8004     """
8005     instance = self.instance
8006     target_node = self.target_node
8007     source_node = self.source_node
8008     migration_info = self.migration_info
8009
8010     abort_result = self.rpc.call_instance_finalize_migration_dst(target_node,
8011                                                                  instance,
8012                                                                  migration_info,
8013                                                                  False)
8014     abort_msg = abort_result.fail_msg
8015     if abort_msg:
8016       logging.error("Aborting migration failed on target node %s: %s",
8017                     target_node, abort_msg)
8018       # Don't raise an exception here, as we stil have to try to revert the
8019       # disk status, even if this step failed.
8020
8021     abort_result = self.rpc.call_instance_finalize_migration_src(source_node,
8022         instance, False, self.live)
8023     abort_msg = abort_result.fail_msg
8024     if abort_msg:
8025       logging.error("Aborting migration failed on source node %s: %s",
8026                     source_node, abort_msg)
8027
8028   def _ExecMigration(self):
8029     """Migrate an instance.
8030
8031     The migrate is done by:
8032       - change the disks into dual-master mode
8033       - wait until disks are fully synchronized again
8034       - migrate the instance
8035       - change disks on the new secondary node (the old primary) to secondary
8036       - wait until disks are fully synchronized
8037       - change disks into single-master mode
8038
8039     """
8040     instance = self.instance
8041     target_node = self.target_node
8042     source_node = self.source_node
8043
8044     # Check for hypervisor version mismatch and warn the user.
8045     nodeinfo = self.rpc.call_node_info([source_node, target_node],
8046                                        None, [self.instance.hypervisor])
8047     for ninfo in nodeinfo.values():
8048       ninfo.Raise("Unable to retrieve node information from node '%s'" %
8049                   ninfo.node)
8050     (_, _, (src_info, )) = nodeinfo[source_node].payload
8051     (_, _, (dst_info, )) = nodeinfo[target_node].payload
8052
8053     if ((constants.HV_NODEINFO_KEY_VERSION in src_info) and
8054         (constants.HV_NODEINFO_KEY_VERSION in dst_info)):
8055       src_version = src_info[constants.HV_NODEINFO_KEY_VERSION]
8056       dst_version = dst_info[constants.HV_NODEINFO_KEY_VERSION]
8057       if src_version != dst_version:
8058         self.feedback_fn("* warning: hypervisor version mismatch between"
8059                          " source (%s) and target (%s) node" %
8060                          (src_version, dst_version))
8061
8062     self.feedback_fn("* checking disk consistency between source and target")
8063     for dev in instance.disks:
8064       if not _CheckDiskConsistency(self.lu, dev, target_node, False):
8065         raise errors.OpExecError("Disk %s is degraded or not fully"
8066                                  " synchronized on target node,"
8067                                  " aborting migration" % dev.iv_name)
8068
8069     # First get the migration information from the remote node
8070     result = self.rpc.call_migration_info(source_node, instance)
8071     msg = result.fail_msg
8072     if msg:
8073       log_err = ("Failed fetching source migration information from %s: %s" %
8074                  (source_node, msg))
8075       logging.error(log_err)
8076       raise errors.OpExecError(log_err)
8077
8078     self.migration_info = migration_info = result.payload
8079
8080     if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
8081       # Then switch the disks to master/master mode
8082       self._EnsureSecondary(target_node)
8083       self._GoStandalone()
8084       self._GoReconnect(True)
8085       self._WaitUntilSync()
8086
8087     self.feedback_fn("* preparing %s to accept the instance" % target_node)
8088     result = self.rpc.call_accept_instance(target_node,
8089                                            instance,
8090                                            migration_info,
8091                                            self.nodes_ip[target_node])
8092
8093     msg = result.fail_msg
8094     if msg:
8095       logging.error("Instance pre-migration failed, trying to revert"
8096                     " disk status: %s", msg)
8097       self.feedback_fn("Pre-migration failed, aborting")
8098       self._AbortMigration()
8099       self._RevertDiskStatus()
8100       raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
8101                                (instance.name, msg))
8102
8103     self.feedback_fn("* migrating instance to %s" % target_node)
8104     result = self.rpc.call_instance_migrate(source_node, instance,
8105                                             self.nodes_ip[target_node],
8106                                             self.live)
8107     msg = result.fail_msg
8108     if msg:
8109       logging.error("Instance migration failed, trying to revert"
8110                     " disk status: %s", msg)
8111       self.feedback_fn("Migration failed, aborting")
8112       self._AbortMigration()
8113       self._RevertDiskStatus()
8114       raise errors.OpExecError("Could not migrate instance %s: %s" %
8115                                (instance.name, msg))
8116
8117     self.feedback_fn("* starting memory transfer")
8118     last_feedback = time.time()
8119     while True:
8120       result = self.rpc.call_instance_get_migration_status(source_node,
8121                                                            instance)
8122       msg = result.fail_msg
8123       ms = result.payload   # MigrationStatus instance
8124       if msg or (ms.status in constants.HV_MIGRATION_FAILED_STATUSES):
8125         logging.error("Instance migration failed, trying to revert"
8126                       " disk status: %s", msg)
8127         self.feedback_fn("Migration failed, aborting")
8128         self._AbortMigration()
8129         self._RevertDiskStatus()
8130         raise errors.OpExecError("Could not migrate instance %s: %s" %
8131                                  (instance.name, msg))
8132
8133       if result.payload.status != constants.HV_MIGRATION_ACTIVE:
8134         self.feedback_fn("* memory transfer complete")
8135         break
8136
8137       if (utils.TimeoutExpired(last_feedback,
8138                                self._MIGRATION_FEEDBACK_INTERVAL) and
8139           ms.transferred_ram is not None):
8140         mem_progress = 100 * float(ms.transferred_ram) / float(ms.total_ram)
8141         self.feedback_fn("* memory transfer progress: %.2f %%" % mem_progress)
8142         last_feedback = time.time()
8143
8144       time.sleep(self._MIGRATION_POLL_INTERVAL)
8145
8146     result = self.rpc.call_instance_finalize_migration_src(source_node,
8147                                                            instance,
8148                                                            True,
8149                                                            self.live)
8150     msg = result.fail_msg
8151     if msg:
8152       logging.error("Instance migration succeeded, but finalization failed"
8153                     " on the source node: %s", msg)
8154       raise errors.OpExecError("Could not finalize instance migration: %s" %
8155                                msg)
8156
8157     instance.primary_node = target_node
8158
8159     # distribute new instance config to the other nodes
8160     self.cfg.Update(instance, self.feedback_fn)
8161
8162     result = self.rpc.call_instance_finalize_migration_dst(target_node,
8163                                                            instance,
8164                                                            migration_info,
8165                                                            True)
8166     msg = result.fail_msg
8167     if msg:
8168       logging.error("Instance migration succeeded, but finalization failed"
8169                     " on the target node: %s", msg)
8170       raise errors.OpExecError("Could not finalize instance migration: %s" %
8171                                msg)
8172
8173     if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
8174       self._EnsureSecondary(source_node)
8175       self._WaitUntilSync()
8176       self._GoStandalone()
8177       self._GoReconnect(False)
8178       self._WaitUntilSync()
8179
8180     self.feedback_fn("* done")
8181
8182   def _ExecFailover(self):
8183     """Failover an instance.
8184
8185     The failover is done by shutting it down on its present node and
8186     starting it on the secondary.
8187
8188     """
8189     instance = self.instance
8190     primary_node = self.cfg.GetNodeInfo(instance.primary_node)
8191
8192     source_node = instance.primary_node
8193     target_node = self.target_node
8194
8195     if instance.admin_state == constants.ADMINST_UP:
8196       self.feedback_fn("* checking disk consistency between source and target")
8197       for dev in instance.disks:
8198         # for drbd, these are drbd over lvm
8199         if not _CheckDiskConsistency(self.lu, dev, target_node, False):
8200           if primary_node.offline:
8201             self.feedback_fn("Node %s is offline, ignoring degraded disk %s on"
8202                              " target node %s" %
8203                              (primary_node.name, dev.iv_name, target_node))
8204           elif not self.ignore_consistency:
8205             raise errors.OpExecError("Disk %s is degraded on target node,"
8206                                      " aborting failover" % dev.iv_name)
8207     else:
8208       self.feedback_fn("* not checking disk consistency as instance is not"
8209                        " running")
8210
8211     self.feedback_fn("* shutting down instance on source node")
8212     logging.info("Shutting down instance %s on node %s",
8213                  instance.name, source_node)
8214
8215     result = self.rpc.call_instance_shutdown(source_node, instance,
8216                                              self.shutdown_timeout)
8217     msg = result.fail_msg
8218     if msg:
8219       if self.ignore_consistency or primary_node.offline:
8220         self.lu.LogWarning("Could not shutdown instance %s on node %s,"
8221                            " proceeding anyway; please make sure node"
8222                            " %s is down; error details: %s",
8223                            instance.name, source_node, source_node, msg)
8224       else:
8225         raise errors.OpExecError("Could not shutdown instance %s on"
8226                                  " node %s: %s" %
8227                                  (instance.name, source_node, msg))
8228
8229     self.feedback_fn("* deactivating the instance's disks on source node")
8230     if not _ShutdownInstanceDisks(self.lu, instance, ignore_primary=True):
8231       raise errors.OpExecError("Can't shut down the instance's disks")
8232
8233     instance.primary_node = target_node
8234     # distribute new instance config to the other nodes
8235     self.cfg.Update(instance, self.feedback_fn)
8236
8237     # Only start the instance if it's marked as up
8238     if instance.admin_state == constants.ADMINST_UP:
8239       self.feedback_fn("* activating the instance's disks on target node %s" %
8240                        target_node)
8241       logging.info("Starting instance %s on node %s",
8242                    instance.name, target_node)
8243
8244       disks_ok, _ = _AssembleInstanceDisks(self.lu, instance,
8245                                            ignore_secondaries=True)
8246       if not disks_ok:
8247         _ShutdownInstanceDisks(self.lu, instance)
8248         raise errors.OpExecError("Can't activate the instance's disks")
8249
8250       self.feedback_fn("* starting the instance on the target node %s" %
8251                        target_node)
8252       result = self.rpc.call_instance_start(target_node, (instance, None, None),
8253                                             False)
8254       msg = result.fail_msg
8255       if msg:
8256         _ShutdownInstanceDisks(self.lu, instance)
8257         raise errors.OpExecError("Could not start instance %s on node %s: %s" %
8258                                  (instance.name, target_node, msg))
8259
8260   def Exec(self, feedback_fn):
8261     """Perform the migration.
8262
8263     """
8264     self.feedback_fn = feedback_fn
8265     self.source_node = self.instance.primary_node
8266
8267     # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
8268     if self.instance.disk_template in constants.DTS_INT_MIRROR:
8269       self.target_node = self.instance.secondary_nodes[0]
8270       # Otherwise self.target_node has been populated either
8271       # directly, or through an iallocator.
8272
8273     self.all_nodes = [self.source_node, self.target_node]
8274     self.nodes_ip = dict((name, node.secondary_ip) for (name, node)
8275                          in self.cfg.GetMultiNodeInfo(self.all_nodes))
8276
8277     if self.failover:
8278       feedback_fn("Failover instance %s" % self.instance.name)
8279       self._ExecFailover()
8280     else:
8281       feedback_fn("Migrating instance %s" % self.instance.name)
8282
8283       if self.cleanup:
8284         return self._ExecCleanup()
8285       else:
8286         return self._ExecMigration()
8287
8288
8289 def _CreateBlockDev(lu, node, instance, device, force_create,
8290                     info, force_open):
8291   """Create a tree of block devices on a given node.
8292
8293   If this device type has to be created on secondaries, create it and
8294   all its children.
8295
8296   If not, just recurse to children keeping the same 'force' value.
8297
8298   @param lu: the lu on whose behalf we execute
8299   @param node: the node on which to create the device
8300   @type instance: L{objects.Instance}
8301   @param instance: the instance which owns the device
8302   @type device: L{objects.Disk}
8303   @param device: the device to create
8304   @type force_create: boolean
8305   @param force_create: whether to force creation of this device; this
8306       will be change to True whenever we find a device which has
8307       CreateOnSecondary() attribute
8308   @param info: the extra 'metadata' we should attach to the device
8309       (this will be represented as a LVM tag)
8310   @type force_open: boolean
8311   @param force_open: this parameter will be passes to the
8312       L{backend.BlockdevCreate} function where it specifies
8313       whether we run on primary or not, and it affects both
8314       the child assembly and the device own Open() execution
8315
8316   """
8317   if device.CreateOnSecondary():
8318     force_create = True
8319
8320   if device.children:
8321     for child in device.children:
8322       _CreateBlockDev(lu, node, instance, child, force_create,
8323                       info, force_open)
8324
8325   if not force_create:
8326     return
8327
8328   _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
8329
8330
8331 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
8332   """Create a single block device on a given node.
8333
8334   This will not recurse over children of the device, so they must be
8335   created in advance.
8336
8337   @param lu: the lu on whose behalf we execute
8338   @param node: the node on which to create the device
8339   @type instance: L{objects.Instance}
8340   @param instance: the instance which owns the device
8341   @type device: L{objects.Disk}
8342   @param device: the device to create
8343   @param info: the extra 'metadata' we should attach to the device
8344       (this will be represented as a LVM tag)
8345   @type force_open: boolean
8346   @param force_open: this parameter will be passes to the
8347       L{backend.BlockdevCreate} function where it specifies
8348       whether we run on primary or not, and it affects both
8349       the child assembly and the device own Open() execution
8350
8351   """
8352   lu.cfg.SetDiskID(device, node)
8353   result = lu.rpc.call_blockdev_create(node, device, device.size,
8354                                        instance.name, force_open, info)
8355   result.Raise("Can't create block device %s on"
8356                " node %s for instance %s" % (device, node, instance.name))
8357   if device.physical_id is None:
8358     device.physical_id = result.payload
8359
8360
8361 def _GenerateUniqueNames(lu, exts):
8362   """Generate a suitable LV name.
8363
8364   This will generate a logical volume name for the given instance.
8365
8366   """
8367   results = []
8368   for val in exts:
8369     new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
8370     results.append("%s%s" % (new_id, val))
8371   return results
8372
8373
8374 def _ComputeLDParams(disk_template, disk_params):
8375   """Computes Logical Disk parameters from Disk Template parameters.
8376
8377   @type disk_template: string
8378   @param disk_template: disk template, one of L{constants.DISK_TEMPLATES}
8379   @type disk_params: dict
8380   @param disk_params: disk template parameters; dict(template_name -> parameters
8381   @rtype: list(dict)
8382   @return: a list of dicts, one for each node of the disk hierarchy. Each dict
8383     contains the LD parameters of the node. The tree is flattened in-order.
8384
8385   """
8386   if disk_template not in constants.DISK_TEMPLATES:
8387     raise errors.ProgrammerError("Unknown disk template %s" % disk_template)
8388
8389   result = list()
8390   dt_params = disk_params[disk_template]
8391   if disk_template == constants.DT_DRBD8:
8392     drbd_params = {
8393       constants.LDP_RESYNC_RATE: dt_params[constants.DRBD_RESYNC_RATE],
8394       constants.LDP_BARRIERS: dt_params[constants.DRBD_DISK_BARRIERS],
8395       constants.LDP_NO_META_FLUSH: dt_params[constants.DRBD_META_BARRIERS],
8396       constants.LDP_DEFAULT_METAVG: dt_params[constants.DRBD_DEFAULT_METAVG],
8397       constants.LDP_DISK_CUSTOM: dt_params[constants.DRBD_DISK_CUSTOM],
8398       constants.LDP_NET_CUSTOM: dt_params[constants.DRBD_NET_CUSTOM],
8399       constants.LDP_DYNAMIC_RESYNC: dt_params[constants.DRBD_DYNAMIC_RESYNC],
8400       constants.LDP_PLAN_AHEAD: dt_params[constants.DRBD_PLAN_AHEAD],
8401       constants.LDP_FILL_TARGET: dt_params[constants.DRBD_FILL_TARGET],
8402       constants.LDP_DELAY_TARGET: dt_params[constants.DRBD_DELAY_TARGET],
8403       constants.LDP_MAX_RATE: dt_params[constants.DRBD_MAX_RATE],
8404       constants.LDP_MIN_RATE: dt_params[constants.DRBD_MIN_RATE],
8405       }
8406
8407     drbd_params = \
8408       objects.FillDict(constants.DISK_LD_DEFAULTS[constants.LD_DRBD8],
8409                        drbd_params)
8410
8411     result.append(drbd_params)
8412
8413     # data LV
8414     data_params = {
8415       constants.LDP_STRIPES: dt_params[constants.DRBD_DATA_STRIPES],
8416       }
8417     data_params = \
8418       objects.FillDict(constants.DISK_LD_DEFAULTS[constants.LD_LV],
8419                        data_params)
8420     result.append(data_params)
8421
8422     # metadata LV
8423     meta_params = {
8424       constants.LDP_STRIPES: dt_params[constants.DRBD_META_STRIPES],
8425       }
8426     meta_params = \
8427       objects.FillDict(constants.DISK_LD_DEFAULTS[constants.LD_LV],
8428                        meta_params)
8429     result.append(meta_params)
8430
8431   elif (disk_template == constants.DT_FILE or
8432         disk_template == constants.DT_SHARED_FILE):
8433     result.append(constants.DISK_LD_DEFAULTS[constants.LD_FILE])
8434
8435   elif disk_template == constants.DT_PLAIN:
8436     params = {
8437       constants.LDP_STRIPES: dt_params[constants.LV_STRIPES],
8438       }
8439     params = \
8440       objects.FillDict(constants.DISK_LD_DEFAULTS[constants.LD_LV],
8441                        params)
8442     result.append(params)
8443
8444   elif disk_template == constants.DT_BLOCK:
8445     result.append(constants.DISK_LD_DEFAULTS[constants.LD_BLOCKDEV])
8446
8447   return result
8448
8449
8450 def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
8451                          iv_name, p_minor, s_minor, drbd_params, data_params,
8452                          meta_params):
8453   """Generate a drbd8 device complete with its children.
8454
8455   """
8456   assert len(vgnames) == len(names) == 2
8457   port = lu.cfg.AllocatePort()
8458   shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
8459
8460   dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
8461                           logical_id=(vgnames[0], names[0]),
8462                           params=data_params)
8463   dev_meta = objects.Disk(dev_type=constants.LD_LV, size=DRBD_META_SIZE,
8464                           logical_id=(vgnames[1], names[1]),
8465                           params=meta_params)
8466   drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
8467                           logical_id=(primary, secondary, port,
8468                                       p_minor, s_minor,
8469                                       shared_secret),
8470                           children=[dev_data, dev_meta],
8471                           iv_name=iv_name, params=drbd_params)
8472   return drbd_dev
8473
8474
8475 def _GenerateDiskTemplate(lu, template_name,
8476                           instance_name, primary_node,
8477                           secondary_nodes, disk_info,
8478                           file_storage_dir, file_driver,
8479                           base_index, feedback_fn, disk_params):
8480   """Generate the entire disk layout for a given template type.
8481
8482   """
8483   #TODO: compute space requirements
8484
8485   vgname = lu.cfg.GetVGName()
8486   disk_count = len(disk_info)
8487   disks = []
8488   ld_params = _ComputeLDParams(template_name, disk_params)
8489   if template_name == constants.DT_DISKLESS:
8490     pass
8491   elif template_name == constants.DT_PLAIN:
8492     if len(secondary_nodes) != 0:
8493       raise errors.ProgrammerError("Wrong template configuration")
8494
8495     names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
8496                                       for i in range(disk_count)])
8497     for idx, disk in enumerate(disk_info):
8498       disk_index = idx + base_index
8499       vg = disk.get(constants.IDISK_VG, vgname)
8500       feedback_fn("* disk %i, vg %s, name %s" % (idx, vg, names[idx]))
8501       disk_dev = objects.Disk(dev_type=constants.LD_LV,
8502                               size=disk[constants.IDISK_SIZE],
8503                               logical_id=(vg, names[idx]),
8504                               iv_name="disk/%d" % disk_index,
8505                               mode=disk[constants.IDISK_MODE],
8506                               params=ld_params[0])
8507       disks.append(disk_dev)
8508   elif template_name == constants.DT_DRBD8:
8509     drbd_params, data_params, meta_params = ld_params
8510     if len(secondary_nodes) != 1:
8511       raise errors.ProgrammerError("Wrong template configuration")
8512     remote_node = secondary_nodes[0]
8513     minors = lu.cfg.AllocateDRBDMinor(
8514       [primary_node, remote_node] * len(disk_info), instance_name)
8515
8516     names = []
8517     for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
8518                                                for i in range(disk_count)]):
8519       names.append(lv_prefix + "_data")
8520       names.append(lv_prefix + "_meta")
8521     for idx, disk in enumerate(disk_info):
8522       disk_index = idx + base_index
8523       drbd_default_metavg = drbd_params[constants.LDP_DEFAULT_METAVG]
8524       data_vg = disk.get(constants.IDISK_VG, vgname)
8525       meta_vg = disk.get(constants.IDISK_METAVG, drbd_default_metavg)
8526       disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
8527                                       disk[constants.IDISK_SIZE],
8528                                       [data_vg, meta_vg],
8529                                       names[idx * 2:idx * 2 + 2],
8530                                       "disk/%d" % disk_index,
8531                                       minors[idx * 2], minors[idx * 2 + 1],
8532                                       drbd_params, data_params, meta_params)
8533       disk_dev.mode = disk[constants.IDISK_MODE]
8534       disks.append(disk_dev)
8535   elif template_name == constants.DT_FILE:
8536     if len(secondary_nodes) != 0:
8537       raise errors.ProgrammerError("Wrong template configuration")
8538
8539     opcodes.RequireFileStorage()
8540
8541     for idx, disk in enumerate(disk_info):
8542       disk_index = idx + base_index
8543       disk_dev = objects.Disk(dev_type=constants.LD_FILE,
8544                               size=disk[constants.IDISK_SIZE],
8545                               iv_name="disk/%d" % disk_index,
8546                               logical_id=(file_driver,
8547                                           "%s/disk%d" % (file_storage_dir,
8548                                                          disk_index)),
8549                               mode=disk[constants.IDISK_MODE],
8550                               params=ld_params[0])
8551       disks.append(disk_dev)
8552   elif template_name == constants.DT_SHARED_FILE:
8553     if len(secondary_nodes) != 0:
8554       raise errors.ProgrammerError("Wrong template configuration")
8555
8556     opcodes.RequireSharedFileStorage()
8557
8558     for idx, disk in enumerate(disk_info):
8559       disk_index = idx + base_index
8560       disk_dev = objects.Disk(dev_type=constants.LD_FILE,
8561                               size=disk[constants.IDISK_SIZE],
8562                               iv_name="disk/%d" % disk_index,
8563                               logical_id=(file_driver,
8564                                           "%s/disk%d" % (file_storage_dir,
8565                                                          disk_index)),
8566                               mode=disk[constants.IDISK_MODE],
8567                               params=ld_params[0])
8568       disks.append(disk_dev)
8569   elif template_name == constants.DT_BLOCK:
8570     if len(secondary_nodes) != 0:
8571       raise errors.ProgrammerError("Wrong template configuration")
8572
8573     for idx, disk in enumerate(disk_info):
8574       disk_index = idx + base_index
8575       disk_dev = objects.Disk(dev_type=constants.LD_BLOCKDEV,
8576                               size=disk[constants.IDISK_SIZE],
8577                               logical_id=(constants.BLOCKDEV_DRIVER_MANUAL,
8578                                           disk[constants.IDISK_ADOPT]),
8579                               iv_name="disk/%d" % disk_index,
8580                               mode=disk[constants.IDISK_MODE],
8581                               params=ld_params[0])
8582       disks.append(disk_dev)
8583
8584   else:
8585     raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
8586   return disks
8587
8588
8589 def _GetInstanceInfoText(instance):
8590   """Compute that text that should be added to the disk's metadata.
8591
8592   """
8593   return "originstname+%s" % instance.name
8594
8595
8596 def _CalcEta(time_taken, written, total_size):
8597   """Calculates the ETA based on size written and total size.
8598
8599   @param time_taken: The time taken so far
8600   @param written: amount written so far
8601   @param total_size: The total size of data to be written
8602   @return: The remaining time in seconds
8603
8604   """
8605   avg_time = time_taken / float(written)
8606   return (total_size - written) * avg_time
8607
8608
8609 def _WipeDisks(lu, instance):
8610   """Wipes instance disks.
8611
8612   @type lu: L{LogicalUnit}
8613   @param lu: the logical unit on whose behalf we execute
8614   @type instance: L{objects.Instance}
8615   @param instance: the instance whose disks we should create
8616   @return: the success of the wipe
8617
8618   """
8619   node = instance.primary_node
8620
8621   for device in instance.disks:
8622     lu.cfg.SetDiskID(device, node)
8623
8624   logging.info("Pause sync of instance %s disks", instance.name)
8625   result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, True)
8626
8627   for idx, success in enumerate(result.payload):
8628     if not success:
8629       logging.warn("pause-sync of instance %s for disks %d failed",
8630                    instance.name, idx)
8631
8632   try:
8633     for idx, device in enumerate(instance.disks):
8634       # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
8635       # MAX_WIPE_CHUNK at max
8636       wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 *
8637                             constants.MIN_WIPE_CHUNK_PERCENT)
8638       # we _must_ make this an int, otherwise rounding errors will
8639       # occur
8640       wipe_chunk_size = int(wipe_chunk_size)
8641
8642       lu.LogInfo("* Wiping disk %d", idx)
8643       logging.info("Wiping disk %d for instance %s, node %s using"
8644                    " chunk size %s", idx, instance.name, node, wipe_chunk_size)
8645
8646       offset = 0
8647       size = device.size
8648       last_output = 0
8649       start_time = time.time()
8650
8651       while offset < size:
8652         wipe_size = min(wipe_chunk_size, size - offset)
8653         logging.debug("Wiping disk %d, offset %s, chunk %s",
8654                       idx, offset, wipe_size)
8655         result = lu.rpc.call_blockdev_wipe(node, device, offset, wipe_size)
8656         result.Raise("Could not wipe disk %d at offset %d for size %d" %
8657                      (idx, offset, wipe_size))
8658         now = time.time()
8659         offset += wipe_size
8660         if now - last_output >= 60:
8661           eta = _CalcEta(now - start_time, offset, size)
8662           lu.LogInfo(" - done: %.1f%% ETA: %s" %
8663                      (offset / float(size) * 100, utils.FormatSeconds(eta)))
8664           last_output = now
8665   finally:
8666     logging.info("Resume sync of instance %s disks", instance.name)
8667
8668     result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, False)
8669
8670     for idx, success in enumerate(result.payload):
8671       if not success:
8672         lu.LogWarning("Resume sync of disk %d failed, please have a"
8673                       " look at the status and troubleshoot the issue", idx)
8674         logging.warn("resume-sync of instance %s for disks %d failed",
8675                      instance.name, idx)
8676
8677
8678 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
8679   """Create all disks for an instance.
8680
8681   This abstracts away some work from AddInstance.
8682
8683   @type lu: L{LogicalUnit}
8684   @param lu: the logical unit on whose behalf we execute
8685   @type instance: L{objects.Instance}
8686   @param instance: the instance whose disks we should create
8687   @type to_skip: list
8688   @param to_skip: list of indices to skip
8689   @type target_node: string
8690   @param target_node: if passed, overrides the target node for creation
8691   @rtype: boolean
8692   @return: the success of the creation
8693
8694   """
8695   info = _GetInstanceInfoText(instance)
8696   if target_node is None:
8697     pnode = instance.primary_node
8698     all_nodes = instance.all_nodes
8699   else:
8700     pnode = target_node
8701     all_nodes = [pnode]
8702
8703   if instance.disk_template in constants.DTS_FILEBASED:
8704     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
8705     result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
8706
8707     result.Raise("Failed to create directory '%s' on"
8708                  " node %s" % (file_storage_dir, pnode))
8709
8710   # Note: this needs to be kept in sync with adding of disks in
8711   # LUInstanceSetParams
8712   for idx, device in enumerate(instance.disks):
8713     if to_skip and idx in to_skip:
8714       continue
8715     logging.info("Creating volume %s for instance %s",
8716                  device.iv_name, instance.name)
8717     #HARDCODE
8718     for node in all_nodes:
8719       f_create = node == pnode
8720       _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
8721
8722
8723 def _RemoveDisks(lu, instance, target_node=None):
8724   """Remove all disks for an instance.
8725
8726   This abstracts away some work from `AddInstance()` and
8727   `RemoveInstance()`. Note that in case some of the devices couldn't
8728   be removed, the removal will continue with the other ones (compare
8729   with `_CreateDisks()`).
8730
8731   @type lu: L{LogicalUnit}
8732   @param lu: the logical unit on whose behalf we execute
8733   @type instance: L{objects.Instance}
8734   @param instance: the instance whose disks we should remove
8735   @type target_node: string
8736   @param target_node: used to override the node on which to remove the disks
8737   @rtype: boolean
8738   @return: the success of the removal
8739
8740   """
8741   logging.info("Removing block devices for instance %s", instance.name)
8742
8743   all_result = True
8744   for device in instance.disks:
8745     if target_node:
8746       edata = [(target_node, device)]
8747     else:
8748       edata = device.ComputeNodeTree(instance.primary_node)
8749     for node, disk in edata:
8750       lu.cfg.SetDiskID(disk, node)
8751       msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
8752       if msg:
8753         lu.LogWarning("Could not remove block device %s on node %s,"
8754                       " continuing anyway: %s", device.iv_name, node, msg)
8755         all_result = False
8756
8757     # if this is a DRBD disk, return its port to the pool
8758     if device.dev_type in constants.LDS_DRBD:
8759       tcp_port = device.logical_id[2]
8760       lu.cfg.AddTcpUdpPort(tcp_port)
8761
8762   if instance.disk_template == constants.DT_FILE:
8763     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
8764     if target_node:
8765       tgt = target_node
8766     else:
8767       tgt = instance.primary_node
8768     result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
8769     if result.fail_msg:
8770       lu.LogWarning("Could not remove directory '%s' on node %s: %s",
8771                     file_storage_dir, instance.primary_node, result.fail_msg)
8772       all_result = False
8773
8774   return all_result
8775
8776
8777 def _ComputeDiskSizePerVG(disk_template, disks):
8778   """Compute disk size requirements in the volume group
8779
8780   """
8781   def _compute(disks, payload):
8782     """Universal algorithm.
8783
8784     """
8785     vgs = {}
8786     for disk in disks:
8787       vgs[disk[constants.IDISK_VG]] = \
8788         vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
8789
8790     return vgs
8791
8792   # Required free disk space as a function of disk and swap space
8793   req_size_dict = {
8794     constants.DT_DISKLESS: {},
8795     constants.DT_PLAIN: _compute(disks, 0),
8796     # 128 MB are added for drbd metadata for each disk
8797     constants.DT_DRBD8: _compute(disks, DRBD_META_SIZE),
8798     constants.DT_FILE: {},
8799     constants.DT_SHARED_FILE: {},
8800   }
8801
8802   if disk_template not in req_size_dict:
8803     raise errors.ProgrammerError("Disk template '%s' size requirement"
8804                                  " is unknown" % disk_template)
8805
8806   return req_size_dict[disk_template]
8807
8808
8809 def _ComputeDiskSize(disk_template, disks):
8810   """Compute disk size requirements in the volume group
8811
8812   """
8813   # Required free disk space as a function of disk and swap space
8814   req_size_dict = {
8815     constants.DT_DISKLESS: None,
8816     constants.DT_PLAIN: sum(d[constants.IDISK_SIZE] for d in disks),
8817     # 128 MB are added for drbd metadata for each disk
8818     constants.DT_DRBD8:
8819       sum(d[constants.IDISK_SIZE] + DRBD_META_SIZE for d in disks),
8820     constants.DT_FILE: None,
8821     constants.DT_SHARED_FILE: 0,
8822     constants.DT_BLOCK: 0,
8823   }
8824
8825   if disk_template not in req_size_dict:
8826     raise errors.ProgrammerError("Disk template '%s' size requirement"
8827                                  " is unknown" % disk_template)
8828
8829   return req_size_dict[disk_template]
8830
8831
8832 def _FilterVmNodes(lu, nodenames):
8833   """Filters out non-vm_capable nodes from a list.
8834
8835   @type lu: L{LogicalUnit}
8836   @param lu: the logical unit for which we check
8837   @type nodenames: list
8838   @param nodenames: the list of nodes on which we should check
8839   @rtype: list
8840   @return: the list of vm-capable nodes
8841
8842   """
8843   vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
8844   return [name for name in nodenames if name not in vm_nodes]
8845
8846
8847 def _CheckHVParams(lu, nodenames, hvname, hvparams):
8848   """Hypervisor parameter validation.
8849
8850   This function abstract the hypervisor parameter validation to be
8851   used in both instance create and instance modify.
8852
8853   @type lu: L{LogicalUnit}
8854   @param lu: the logical unit for which we check
8855   @type nodenames: list
8856   @param nodenames: the list of nodes on which we should check
8857   @type hvname: string
8858   @param hvname: the name of the hypervisor we should use
8859   @type hvparams: dict
8860   @param hvparams: the parameters which we need to check
8861   @raise errors.OpPrereqError: if the parameters are not valid
8862
8863   """
8864   nodenames = _FilterVmNodes(lu, nodenames)
8865
8866   cluster = lu.cfg.GetClusterInfo()
8867   hvfull = objects.FillDict(cluster.hvparams.get(hvname, {}), hvparams)
8868
8869   hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames, hvname, hvfull)
8870   for node in nodenames:
8871     info = hvinfo[node]
8872     if info.offline:
8873       continue
8874     info.Raise("Hypervisor parameter validation failed on node %s" % node)
8875
8876
8877 def _CheckOSParams(lu, required, nodenames, osname, osparams):
8878   """OS parameters validation.
8879
8880   @type lu: L{LogicalUnit}
8881   @param lu: the logical unit for which we check
8882   @type required: boolean
8883   @param required: whether the validation should fail if the OS is not
8884       found
8885   @type nodenames: list
8886   @param nodenames: the list of nodes on which we should check
8887   @type osname: string
8888   @param osname: the name of the hypervisor we should use
8889   @type osparams: dict
8890   @param osparams: the parameters which we need to check
8891   @raise errors.OpPrereqError: if the parameters are not valid
8892
8893   """
8894   nodenames = _FilterVmNodes(lu, nodenames)
8895   result = lu.rpc.call_os_validate(nodenames, required, osname,
8896                                    [constants.OS_VALIDATE_PARAMETERS],
8897                                    osparams)
8898   for node, nres in result.items():
8899     # we don't check for offline cases since this should be run only
8900     # against the master node and/or an instance's nodes
8901     nres.Raise("OS Parameters validation failed on node %s" % node)
8902     if not nres.payload:
8903       lu.LogInfo("OS %s not found on node %s, validation skipped",
8904                  osname, node)
8905
8906
8907 class LUInstanceCreate(LogicalUnit):
8908   """Create an instance.
8909
8910   """
8911   HPATH = "instance-add"
8912   HTYPE = constants.HTYPE_INSTANCE
8913   REQ_BGL = False
8914
8915   def CheckArguments(self):
8916     """Check arguments.
8917
8918     """
8919     # do not require name_check to ease forward/backward compatibility
8920     # for tools
8921     if self.op.no_install and self.op.start:
8922       self.LogInfo("No-installation mode selected, disabling startup")
8923       self.op.start = False
8924     # validate/normalize the instance name
8925     self.op.instance_name = \
8926       netutils.Hostname.GetNormalizedName(self.op.instance_name)
8927
8928     if self.op.ip_check and not self.op.name_check:
8929       # TODO: make the ip check more flexible and not depend on the name check
8930       raise errors.OpPrereqError("Cannot do IP address check without a name"
8931                                  " check", errors.ECODE_INVAL)
8932
8933     # check nics' parameter names
8934     for nic in self.op.nics:
8935       utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
8936
8937     # check disks. parameter names and consistent adopt/no-adopt strategy
8938     has_adopt = has_no_adopt = False
8939     for disk in self.op.disks:
8940       utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
8941       if constants.IDISK_ADOPT in disk:
8942         has_adopt = True
8943       else:
8944         has_no_adopt = True
8945     if has_adopt and has_no_adopt:
8946       raise errors.OpPrereqError("Either all disks are adopted or none is",
8947                                  errors.ECODE_INVAL)
8948     if has_adopt:
8949       if self.op.disk_template not in constants.DTS_MAY_ADOPT:
8950         raise errors.OpPrereqError("Disk adoption is not supported for the"
8951                                    " '%s' disk template" %
8952                                    self.op.disk_template,
8953                                    errors.ECODE_INVAL)
8954       if self.op.iallocator is not None:
8955         raise errors.OpPrereqError("Disk adoption not allowed with an"
8956                                    " iallocator script", errors.ECODE_INVAL)
8957       if self.op.mode == constants.INSTANCE_IMPORT:
8958         raise errors.OpPrereqError("Disk adoption not allowed for"
8959                                    " instance import", errors.ECODE_INVAL)
8960     else:
8961       if self.op.disk_template in constants.DTS_MUST_ADOPT:
8962         raise errors.OpPrereqError("Disk template %s requires disk adoption,"
8963                                    " but no 'adopt' parameter given" %
8964                                    self.op.disk_template,
8965                                    errors.ECODE_INVAL)
8966
8967     self.adopt_disks = has_adopt
8968
8969     # instance name verification
8970     if self.op.name_check:
8971       self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
8972       self.op.instance_name = self.hostname1.name
8973       # used in CheckPrereq for ip ping check
8974       self.check_ip = self.hostname1.ip
8975     else:
8976       self.check_ip = None
8977
8978     # file storage checks
8979     if (self.op.file_driver and
8980         not self.op.file_driver in constants.FILE_DRIVER):
8981       raise errors.OpPrereqError("Invalid file driver name '%s'" %
8982                                  self.op.file_driver, errors.ECODE_INVAL)
8983
8984     if self.op.disk_template == constants.DT_FILE:
8985       opcodes.RequireFileStorage()
8986     elif self.op.disk_template == constants.DT_SHARED_FILE:
8987       opcodes.RequireSharedFileStorage()
8988
8989     ### Node/iallocator related checks
8990     _CheckIAllocatorOrNode(self, "iallocator", "pnode")
8991
8992     if self.op.pnode is not None:
8993       if self.op.disk_template in constants.DTS_INT_MIRROR:
8994         if self.op.snode is None:
8995           raise errors.OpPrereqError("The networked disk templates need"
8996                                      " a mirror node", errors.ECODE_INVAL)
8997       elif self.op.snode:
8998         self.LogWarning("Secondary node will be ignored on non-mirrored disk"
8999                         " template")
9000         self.op.snode = None
9001
9002     self._cds = _GetClusterDomainSecret()
9003
9004     if self.op.mode == constants.INSTANCE_IMPORT:
9005       # On import force_variant must be True, because if we forced it at
9006       # initial install, our only chance when importing it back is that it
9007       # works again!
9008       self.op.force_variant = True
9009
9010       if self.op.no_install:
9011         self.LogInfo("No-installation mode has no effect during import")
9012
9013     elif self.op.mode == constants.INSTANCE_CREATE:
9014       if self.op.os_type is None:
9015         raise errors.OpPrereqError("No guest OS specified",
9016                                    errors.ECODE_INVAL)
9017       if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
9018         raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
9019                                    " installation" % self.op.os_type,
9020                                    errors.ECODE_STATE)
9021       if self.op.disk_template is None:
9022         raise errors.OpPrereqError("No disk template specified",
9023                                    errors.ECODE_INVAL)
9024
9025     elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
9026       # Check handshake to ensure both clusters have the same domain secret
9027       src_handshake = self.op.source_handshake
9028       if not src_handshake:
9029         raise errors.OpPrereqError("Missing source handshake",
9030                                    errors.ECODE_INVAL)
9031
9032       errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
9033                                                            src_handshake)
9034       if errmsg:
9035         raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
9036                                    errors.ECODE_INVAL)
9037
9038       # Load and check source CA
9039       self.source_x509_ca_pem = self.op.source_x509_ca
9040       if not self.source_x509_ca_pem:
9041         raise errors.OpPrereqError("Missing source X509 CA",
9042                                    errors.ECODE_INVAL)
9043
9044       try:
9045         (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
9046                                                     self._cds)
9047       except OpenSSL.crypto.Error, err:
9048         raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
9049                                    (err, ), errors.ECODE_INVAL)
9050
9051       (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
9052       if errcode is not None:
9053         raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
9054                                    errors.ECODE_INVAL)
9055
9056       self.source_x509_ca = cert
9057
9058       src_instance_name = self.op.source_instance_name
9059       if not src_instance_name:
9060         raise errors.OpPrereqError("Missing source instance name",
9061                                    errors.ECODE_INVAL)
9062
9063       self.source_instance_name = \
9064           netutils.GetHostname(name=src_instance_name).name
9065
9066     else:
9067       raise errors.OpPrereqError("Invalid instance creation mode %r" %
9068                                  self.op.mode, errors.ECODE_INVAL)
9069
9070   def ExpandNames(self):
9071     """ExpandNames for CreateInstance.
9072
9073     Figure out the right locks for instance creation.
9074
9075     """
9076     self.needed_locks = {}
9077
9078     instance_name = self.op.instance_name
9079     # this is just a preventive check, but someone might still add this
9080     # instance in the meantime, and creation will fail at lock-add time
9081     if instance_name in self.cfg.GetInstanceList():
9082       raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
9083                                  instance_name, errors.ECODE_EXISTS)
9084
9085     self.add_locks[locking.LEVEL_INSTANCE] = instance_name
9086
9087     if self.op.iallocator:
9088       # TODO: Find a solution to not lock all nodes in the cluster, e.g. by
9089       # specifying a group on instance creation and then selecting nodes from
9090       # that group
9091       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9092       self.needed_locks[locking.LEVEL_NODE_RES] = locking.ALL_SET
9093     else:
9094       self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
9095       nodelist = [self.op.pnode]
9096       if self.op.snode is not None:
9097         self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
9098         nodelist.append(self.op.snode)
9099       self.needed_locks[locking.LEVEL_NODE] = nodelist
9100       # Lock resources of instance's primary and secondary nodes (copy to
9101       # prevent accidential modification)
9102       self.needed_locks[locking.LEVEL_NODE_RES] = list(nodelist)
9103
9104     # in case of import lock the source node too
9105     if self.op.mode == constants.INSTANCE_IMPORT:
9106       src_node = self.op.src_node
9107       src_path = self.op.src_path
9108
9109       if src_path is None:
9110         self.op.src_path = src_path = self.op.instance_name
9111
9112       if src_node is None:
9113         self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9114         self.op.src_node = None
9115         if os.path.isabs(src_path):
9116           raise errors.OpPrereqError("Importing an instance from a path"
9117                                      " requires a source node option",
9118                                      errors.ECODE_INVAL)
9119       else:
9120         self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
9121         if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
9122           self.needed_locks[locking.LEVEL_NODE].append(src_node)
9123         if not os.path.isabs(src_path):
9124           self.op.src_path = src_path = \
9125             utils.PathJoin(constants.EXPORT_DIR, src_path)
9126
9127   def _RunAllocator(self):
9128     """Run the allocator based on input opcode.
9129
9130     """
9131     nics = [n.ToDict() for n in self.nics]
9132     ial = IAllocator(self.cfg, self.rpc,
9133                      mode=constants.IALLOCATOR_MODE_ALLOC,
9134                      name=self.op.instance_name,
9135                      disk_template=self.op.disk_template,
9136                      tags=self.op.tags,
9137                      os=self.op.os_type,
9138                      vcpus=self.be_full[constants.BE_VCPUS],
9139                      memory=self.be_full[constants.BE_MAXMEM],
9140                      disks=self.disks,
9141                      nics=nics,
9142                      hypervisor=self.op.hypervisor,
9143                      )
9144
9145     ial.Run(self.op.iallocator)
9146
9147     if not ial.success:
9148       raise errors.OpPrereqError("Can't compute nodes using"
9149                                  " iallocator '%s': %s" %
9150                                  (self.op.iallocator, ial.info),
9151                                  errors.ECODE_NORES)
9152     if len(ial.result) != ial.required_nodes:
9153       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
9154                                  " of nodes (%s), required %s" %
9155                                  (self.op.iallocator, len(ial.result),
9156                                   ial.required_nodes), errors.ECODE_FAULT)
9157     self.op.pnode = ial.result[0]
9158     self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
9159                  self.op.instance_name, self.op.iallocator,
9160                  utils.CommaJoin(ial.result))
9161     if ial.required_nodes == 2:
9162       self.op.snode = ial.result[1]
9163
9164   def BuildHooksEnv(self):
9165     """Build hooks env.
9166
9167     This runs on master, primary and secondary nodes of the instance.
9168
9169     """
9170     env = {
9171       "ADD_MODE": self.op.mode,
9172       }
9173     if self.op.mode == constants.INSTANCE_IMPORT:
9174       env["SRC_NODE"] = self.op.src_node
9175       env["SRC_PATH"] = self.op.src_path
9176       env["SRC_IMAGES"] = self.src_images
9177
9178     env.update(_BuildInstanceHookEnv(
9179       name=self.op.instance_name,
9180       primary_node=self.op.pnode,
9181       secondary_nodes=self.secondaries,
9182       status=self.op.start,
9183       os_type=self.op.os_type,
9184       minmem=self.be_full[constants.BE_MINMEM],
9185       maxmem=self.be_full[constants.BE_MAXMEM],
9186       vcpus=self.be_full[constants.BE_VCPUS],
9187       nics=_NICListToTuple(self, self.nics),
9188       disk_template=self.op.disk_template,
9189       disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE])
9190              for d in self.disks],
9191       bep=self.be_full,
9192       hvp=self.hv_full,
9193       hypervisor_name=self.op.hypervisor,
9194       tags=self.op.tags,
9195     ))
9196
9197     return env
9198
9199   def BuildHooksNodes(self):
9200     """Build hooks nodes.
9201
9202     """
9203     nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
9204     return nl, nl
9205
9206   def _ReadExportInfo(self):
9207     """Reads the export information from disk.
9208
9209     It will override the opcode source node and path with the actual
9210     information, if these two were not specified before.
9211
9212     @return: the export information
9213
9214     """
9215     assert self.op.mode == constants.INSTANCE_IMPORT
9216
9217     src_node = self.op.src_node
9218     src_path = self.op.src_path
9219
9220     if src_node is None:
9221       locked_nodes = self.owned_locks(locking.LEVEL_NODE)
9222       exp_list = self.rpc.call_export_list(locked_nodes)
9223       found = False
9224       for node in exp_list:
9225         if exp_list[node].fail_msg:
9226           continue
9227         if src_path in exp_list[node].payload:
9228           found = True
9229           self.op.src_node = src_node = node
9230           self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
9231                                                        src_path)
9232           break
9233       if not found:
9234         raise errors.OpPrereqError("No export found for relative path %s" %
9235                                     src_path, errors.ECODE_INVAL)
9236
9237     _CheckNodeOnline(self, src_node)
9238     result = self.rpc.call_export_info(src_node, src_path)
9239     result.Raise("No export or invalid export found in dir %s" % src_path)
9240
9241     export_info = objects.SerializableConfigParser.Loads(str(result.payload))
9242     if not export_info.has_section(constants.INISECT_EXP):
9243       raise errors.ProgrammerError("Corrupted export config",
9244                                    errors.ECODE_ENVIRON)
9245
9246     ei_version = export_info.get(constants.INISECT_EXP, "version")
9247     if (int(ei_version) != constants.EXPORT_VERSION):
9248       raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
9249                                  (ei_version, constants.EXPORT_VERSION),
9250                                  errors.ECODE_ENVIRON)
9251     return export_info
9252
9253   def _ReadExportParams(self, einfo):
9254     """Use export parameters as defaults.
9255
9256     In case the opcode doesn't specify (as in override) some instance
9257     parameters, then try to use them from the export information, if
9258     that declares them.
9259
9260     """
9261     self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
9262
9263     if self.op.disk_template is None:
9264       if einfo.has_option(constants.INISECT_INS, "disk_template"):
9265         self.op.disk_template = einfo.get(constants.INISECT_INS,
9266                                           "disk_template")
9267         if self.op.disk_template not in constants.DISK_TEMPLATES:
9268           raise errors.OpPrereqError("Disk template specified in configuration"
9269                                      " file is not one of the allowed values:"
9270                                      " %s" % " ".join(constants.DISK_TEMPLATES))
9271       else:
9272         raise errors.OpPrereqError("No disk template specified and the export"
9273                                    " is missing the disk_template information",
9274                                    errors.ECODE_INVAL)
9275
9276     if not self.op.disks:
9277       disks = []
9278       # TODO: import the disk iv_name too
9279       for idx in range(constants.MAX_DISKS):
9280         if einfo.has_option(constants.INISECT_INS, "disk%d_size" % idx):
9281           disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
9282           disks.append({constants.IDISK_SIZE: disk_sz})
9283       self.op.disks = disks
9284       if not disks and self.op.disk_template != constants.DT_DISKLESS:
9285         raise errors.OpPrereqError("No disk info specified and the export"
9286                                    " is missing the disk information",
9287                                    errors.ECODE_INVAL)
9288
9289     if not self.op.nics:
9290       nics = []
9291       for idx in range(constants.MAX_NICS):
9292         if einfo.has_option(constants.INISECT_INS, "nic%d_mac" % idx):
9293           ndict = {}
9294           for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
9295             v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
9296             ndict[name] = v
9297           nics.append(ndict)
9298         else:
9299           break
9300       self.op.nics = nics
9301
9302     if not self.op.tags and einfo.has_option(constants.INISECT_INS, "tags"):
9303       self.op.tags = einfo.get(constants.INISECT_INS, "tags").split()
9304
9305     if (self.op.hypervisor is None and
9306         einfo.has_option(constants.INISECT_INS, "hypervisor")):
9307       self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
9308
9309     if einfo.has_section(constants.INISECT_HYP):
9310       # use the export parameters but do not override the ones
9311       # specified by the user
9312       for name, value in einfo.items(constants.INISECT_HYP):
9313         if name not in self.op.hvparams:
9314           self.op.hvparams[name] = value
9315
9316     if einfo.has_section(constants.INISECT_BEP):
9317       # use the parameters, without overriding
9318       for name, value in einfo.items(constants.INISECT_BEP):
9319         if name not in self.op.beparams:
9320           self.op.beparams[name] = value
9321         # Compatibility for the old "memory" be param
9322         if name == constants.BE_MEMORY:
9323           if constants.BE_MAXMEM not in self.op.beparams:
9324             self.op.beparams[constants.BE_MAXMEM] = value
9325           if constants.BE_MINMEM not in self.op.beparams:
9326             self.op.beparams[constants.BE_MINMEM] = value
9327     else:
9328       # try to read the parameters old style, from the main section
9329       for name in constants.BES_PARAMETERS:
9330         if (name not in self.op.beparams and
9331             einfo.has_option(constants.INISECT_INS, name)):
9332           self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
9333
9334     if einfo.has_section(constants.INISECT_OSP):
9335       # use the parameters, without overriding
9336       for name, value in einfo.items(constants.INISECT_OSP):
9337         if name not in self.op.osparams:
9338           self.op.osparams[name] = value
9339
9340   def _RevertToDefaults(self, cluster):
9341     """Revert the instance parameters to the default values.
9342
9343     """
9344     # hvparams
9345     hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
9346     for name in self.op.hvparams.keys():
9347       if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
9348         del self.op.hvparams[name]
9349     # beparams
9350     be_defs = cluster.SimpleFillBE({})
9351     for name in self.op.beparams.keys():
9352       if name in be_defs and be_defs[name] == self.op.beparams[name]:
9353         del self.op.beparams[name]
9354     # nic params
9355     nic_defs = cluster.SimpleFillNIC({})
9356     for nic in self.op.nics:
9357       for name in constants.NICS_PARAMETERS:
9358         if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
9359           del nic[name]
9360     # osparams
9361     os_defs = cluster.SimpleFillOS(self.op.os_type, {})
9362     for name in self.op.osparams.keys():
9363       if name in os_defs and os_defs[name] == self.op.osparams[name]:
9364         del self.op.osparams[name]
9365
9366   def _CalculateFileStorageDir(self):
9367     """Calculate final instance file storage dir.
9368
9369     """
9370     # file storage dir calculation/check
9371     self.instance_file_storage_dir = None
9372     if self.op.disk_template in constants.DTS_FILEBASED:
9373       # build the full file storage dir path
9374       joinargs = []
9375
9376       if self.op.disk_template == constants.DT_SHARED_FILE:
9377         get_fsd_fn = self.cfg.GetSharedFileStorageDir
9378       else:
9379         get_fsd_fn = self.cfg.GetFileStorageDir
9380
9381       cfg_storagedir = get_fsd_fn()
9382       if not cfg_storagedir:
9383         raise errors.OpPrereqError("Cluster file storage dir not defined")
9384       joinargs.append(cfg_storagedir)
9385
9386       if self.op.file_storage_dir is not None:
9387         joinargs.append(self.op.file_storage_dir)
9388
9389       joinargs.append(self.op.instance_name)
9390
9391       # pylint: disable=W0142
9392       self.instance_file_storage_dir = utils.PathJoin(*joinargs)
9393
9394   def CheckPrereq(self):
9395     """Check prerequisites.
9396
9397     """
9398     self._CalculateFileStorageDir()
9399
9400     if self.op.mode == constants.INSTANCE_IMPORT:
9401       export_info = self._ReadExportInfo()
9402       self._ReadExportParams(export_info)
9403
9404     if (not self.cfg.GetVGName() and
9405         self.op.disk_template not in constants.DTS_NOT_LVM):
9406       raise errors.OpPrereqError("Cluster does not support lvm-based"
9407                                  " instances", errors.ECODE_STATE)
9408
9409     if (self.op.hypervisor is None or
9410         self.op.hypervisor == constants.VALUE_AUTO):
9411       self.op.hypervisor = self.cfg.GetHypervisorType()
9412
9413     cluster = self.cfg.GetClusterInfo()
9414     enabled_hvs = cluster.enabled_hypervisors
9415     if self.op.hypervisor not in enabled_hvs:
9416       raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
9417                                  " cluster (%s)" % (self.op.hypervisor,
9418                                   ",".join(enabled_hvs)),
9419                                  errors.ECODE_STATE)
9420
9421     # Check tag validity
9422     for tag in self.op.tags:
9423       objects.TaggableObject.ValidateTag(tag)
9424
9425     # check hypervisor parameter syntax (locally)
9426     utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
9427     filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
9428                                       self.op.hvparams)
9429     hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
9430     hv_type.CheckParameterSyntax(filled_hvp)
9431     self.hv_full = filled_hvp
9432     # check that we don't specify global parameters on an instance
9433     _CheckGlobalHvParams(self.op.hvparams)
9434
9435     # fill and remember the beparams dict
9436     default_beparams = cluster.beparams[constants.PP_DEFAULT]
9437     for param, value in self.op.beparams.iteritems():
9438       if value == constants.VALUE_AUTO:
9439         self.op.beparams[param] = default_beparams[param]
9440     objects.UpgradeBeParams(self.op.beparams)
9441     utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
9442     self.be_full = cluster.SimpleFillBE(self.op.beparams)
9443
9444     # build os parameters
9445     self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
9446
9447     # now that hvp/bep are in final format, let's reset to defaults,
9448     # if told to do so
9449     if self.op.identify_defaults:
9450       self._RevertToDefaults(cluster)
9451
9452     # NIC buildup
9453     self.nics = []
9454     for idx, nic in enumerate(self.op.nics):
9455       nic_mode_req = nic.get(constants.INIC_MODE, None)
9456       nic_mode = nic_mode_req
9457       if nic_mode is None or nic_mode == constants.VALUE_AUTO:
9458         nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
9459
9460       # in routed mode, for the first nic, the default ip is 'auto'
9461       if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
9462         default_ip_mode = constants.VALUE_AUTO
9463       else:
9464         default_ip_mode = constants.VALUE_NONE
9465
9466       # ip validity checks
9467       ip = nic.get(constants.INIC_IP, default_ip_mode)
9468       if ip is None or ip.lower() == constants.VALUE_NONE:
9469         nic_ip = None
9470       elif ip.lower() == constants.VALUE_AUTO:
9471         if not self.op.name_check:
9472           raise errors.OpPrereqError("IP address set to auto but name checks"
9473                                      " have been skipped",
9474                                      errors.ECODE_INVAL)
9475         nic_ip = self.hostname1.ip
9476       else:
9477         if not netutils.IPAddress.IsValid(ip):
9478           raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
9479                                      errors.ECODE_INVAL)
9480         nic_ip = ip
9481
9482       # TODO: check the ip address for uniqueness
9483       if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
9484         raise errors.OpPrereqError("Routed nic mode requires an ip address",
9485                                    errors.ECODE_INVAL)
9486
9487       # MAC address verification
9488       mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
9489       if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9490         mac = utils.NormalizeAndValidateMac(mac)
9491
9492         try:
9493           self.cfg.ReserveMAC(mac, self.proc.GetECId())
9494         except errors.ReservationError:
9495           raise errors.OpPrereqError("MAC address %s already in use"
9496                                      " in cluster" % mac,
9497                                      errors.ECODE_NOTUNIQUE)
9498
9499       #  Build nic parameters
9500       link = nic.get(constants.INIC_LINK, None)
9501       if link == constants.VALUE_AUTO:
9502         link = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_LINK]
9503       nicparams = {}
9504       if nic_mode_req:
9505         nicparams[constants.NIC_MODE] = nic_mode
9506       if link:
9507         nicparams[constants.NIC_LINK] = link
9508
9509       check_params = cluster.SimpleFillNIC(nicparams)
9510       objects.NIC.CheckParameterSyntax(check_params)
9511       self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
9512
9513     # disk checks/pre-build
9514     default_vg = self.cfg.GetVGName()
9515     self.disks = []
9516     for disk in self.op.disks:
9517       mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
9518       if mode not in constants.DISK_ACCESS_SET:
9519         raise errors.OpPrereqError("Invalid disk access mode '%s'" %
9520                                    mode, errors.ECODE_INVAL)
9521       size = disk.get(constants.IDISK_SIZE, None)
9522       if size is None:
9523         raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
9524       try:
9525         size = int(size)
9526       except (TypeError, ValueError):
9527         raise errors.OpPrereqError("Invalid disk size '%s'" % size,
9528                                    errors.ECODE_INVAL)
9529
9530       data_vg = disk.get(constants.IDISK_VG, default_vg)
9531       new_disk = {
9532         constants.IDISK_SIZE: size,
9533         constants.IDISK_MODE: mode,
9534         constants.IDISK_VG: data_vg,
9535         }
9536       if constants.IDISK_METAVG in disk:
9537         new_disk[constants.IDISK_METAVG] = disk[constants.IDISK_METAVG]
9538       if constants.IDISK_ADOPT in disk:
9539         new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
9540       self.disks.append(new_disk)
9541
9542     if self.op.mode == constants.INSTANCE_IMPORT:
9543       disk_images = []
9544       for idx in range(len(self.disks)):
9545         option = "disk%d_dump" % idx
9546         if export_info.has_option(constants.INISECT_INS, option):
9547           # FIXME: are the old os-es, disk sizes, etc. useful?
9548           export_name = export_info.get(constants.INISECT_INS, option)
9549           image = utils.PathJoin(self.op.src_path, export_name)
9550           disk_images.append(image)
9551         else:
9552           disk_images.append(False)
9553
9554       self.src_images = disk_images
9555
9556       old_name = export_info.get(constants.INISECT_INS, "name")
9557       if self.op.instance_name == old_name:
9558         for idx, nic in enumerate(self.nics):
9559           if nic.mac == constants.VALUE_AUTO:
9560             nic_mac_ini = "nic%d_mac" % idx
9561             nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
9562
9563     # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
9564
9565     # ip ping checks (we use the same ip that was resolved in ExpandNames)
9566     if self.op.ip_check:
9567       if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
9568         raise errors.OpPrereqError("IP %s of instance %s already in use" %
9569                                    (self.check_ip, self.op.instance_name),
9570                                    errors.ECODE_NOTUNIQUE)
9571
9572     #### mac address generation
9573     # By generating here the mac address both the allocator and the hooks get
9574     # the real final mac address rather than the 'auto' or 'generate' value.
9575     # There is a race condition between the generation and the instance object
9576     # creation, which means that we know the mac is valid now, but we're not
9577     # sure it will be when we actually add the instance. If things go bad
9578     # adding the instance will abort because of a duplicate mac, and the
9579     # creation job will fail.
9580     for nic in self.nics:
9581       if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9582         nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
9583
9584     #### allocator run
9585
9586     if self.op.iallocator is not None:
9587       self._RunAllocator()
9588
9589     # Release all unneeded node locks
9590     _ReleaseLocks(self, locking.LEVEL_NODE,
9591                   keep=filter(None, [self.op.pnode, self.op.snode,
9592                                      self.op.src_node]))
9593
9594     #### node related checks
9595
9596     # check primary node
9597     self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
9598     assert self.pnode is not None, \
9599       "Cannot retrieve locked node %s" % self.op.pnode
9600     if pnode.offline:
9601       raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
9602                                  pnode.name, errors.ECODE_STATE)
9603     if pnode.drained:
9604       raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
9605                                  pnode.name, errors.ECODE_STATE)
9606     if not pnode.vm_capable:
9607       raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
9608                                  " '%s'" % pnode.name, errors.ECODE_STATE)
9609
9610     self.secondaries = []
9611
9612     # mirror node verification
9613     if self.op.disk_template in constants.DTS_INT_MIRROR:
9614       if self.op.snode == pnode.name:
9615         raise errors.OpPrereqError("The secondary node cannot be the"
9616                                    " primary node", errors.ECODE_INVAL)
9617       _CheckNodeOnline(self, self.op.snode)
9618       _CheckNodeNotDrained(self, self.op.snode)
9619       _CheckNodeVmCapable(self, self.op.snode)
9620       self.secondaries.append(self.op.snode)
9621
9622       snode = self.cfg.GetNodeInfo(self.op.snode)
9623       if pnode.group != snode.group:
9624         self.LogWarning("The primary and secondary nodes are in two"
9625                         " different node groups; the disk parameters"
9626                         " from the first disk's node group will be"
9627                         " used")
9628
9629     nodenames = [pnode.name] + self.secondaries
9630
9631     # disk parameters (not customizable at instance or node level)
9632     # just use the primary node parameters, ignoring the secondary.
9633     self.diskparams = self.cfg.GetNodeGroup(pnode.group).diskparams
9634
9635     if not self.adopt_disks:
9636       # Check lv size requirements, if not adopting
9637       req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
9638       _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
9639
9640     elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
9641       all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
9642                                 disk[constants.IDISK_ADOPT])
9643                      for disk in self.disks])
9644       if len(all_lvs) != len(self.disks):
9645         raise errors.OpPrereqError("Duplicate volume names given for adoption",
9646                                    errors.ECODE_INVAL)
9647       for lv_name in all_lvs:
9648         try:
9649           # FIXME: lv_name here is "vg/lv" need to ensure that other calls
9650           # to ReserveLV uses the same syntax
9651           self.cfg.ReserveLV(lv_name, self.proc.GetECId())
9652         except errors.ReservationError:
9653           raise errors.OpPrereqError("LV named %s used by another instance" %
9654                                      lv_name, errors.ECODE_NOTUNIQUE)
9655
9656       vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
9657       vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
9658
9659       node_lvs = self.rpc.call_lv_list([pnode.name],
9660                                        vg_names.payload.keys())[pnode.name]
9661       node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
9662       node_lvs = node_lvs.payload
9663
9664       delta = all_lvs.difference(node_lvs.keys())
9665       if delta:
9666         raise errors.OpPrereqError("Missing logical volume(s): %s" %
9667                                    utils.CommaJoin(delta),
9668                                    errors.ECODE_INVAL)
9669       online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
9670       if online_lvs:
9671         raise errors.OpPrereqError("Online logical volumes found, cannot"
9672                                    " adopt: %s" % utils.CommaJoin(online_lvs),
9673                                    errors.ECODE_STATE)
9674       # update the size of disk based on what is found
9675       for dsk in self.disks:
9676         dsk[constants.IDISK_SIZE] = \
9677           int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
9678                                         dsk[constants.IDISK_ADOPT])][0]))
9679
9680     elif self.op.disk_template == constants.DT_BLOCK:
9681       # Normalize and de-duplicate device paths
9682       all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
9683                        for disk in self.disks])
9684       if len(all_disks) != len(self.disks):
9685         raise errors.OpPrereqError("Duplicate disk names given for adoption",
9686                                    errors.ECODE_INVAL)
9687       baddisks = [d for d in all_disks
9688                   if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
9689       if baddisks:
9690         raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
9691                                    " cannot be adopted" %
9692                                    (", ".join(baddisks),
9693                                     constants.ADOPTABLE_BLOCKDEV_ROOT),
9694                                    errors.ECODE_INVAL)
9695
9696       node_disks = self.rpc.call_bdev_sizes([pnode.name],
9697                                             list(all_disks))[pnode.name]
9698       node_disks.Raise("Cannot get block device information from node %s" %
9699                        pnode.name)
9700       node_disks = node_disks.payload
9701       delta = all_disks.difference(node_disks.keys())
9702       if delta:
9703         raise errors.OpPrereqError("Missing block device(s): %s" %
9704                                    utils.CommaJoin(delta),
9705                                    errors.ECODE_INVAL)
9706       for dsk in self.disks:
9707         dsk[constants.IDISK_SIZE] = \
9708           int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
9709
9710     _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
9711
9712     _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
9713     # check OS parameters (remotely)
9714     _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
9715
9716     _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
9717
9718     # memory check on primary node
9719     #TODO(dynmem): use MINMEM for checking
9720     if self.op.start:
9721       _CheckNodeFreeMemory(self, self.pnode.name,
9722                            "creating instance %s" % self.op.instance_name,
9723                            self.be_full[constants.BE_MAXMEM],
9724                            self.op.hypervisor)
9725
9726     self.dry_run_result = list(nodenames)
9727
9728   def Exec(self, feedback_fn):
9729     """Create and add the instance to the cluster.
9730
9731     """
9732     instance = self.op.instance_name
9733     pnode_name = self.pnode.name
9734
9735     assert not (self.owned_locks(locking.LEVEL_NODE_RES) -
9736                 self.owned_locks(locking.LEVEL_NODE)), \
9737       "Node locks differ from node resource locks"
9738
9739     ht_kind = self.op.hypervisor
9740     if ht_kind in constants.HTS_REQ_PORT:
9741       network_port = self.cfg.AllocatePort()
9742     else:
9743       network_port = None
9744
9745     disks = _GenerateDiskTemplate(self,
9746                                   self.op.disk_template,
9747                                   instance, pnode_name,
9748                                   self.secondaries,
9749                                   self.disks,
9750                                   self.instance_file_storage_dir,
9751                                   self.op.file_driver,
9752                                   0,
9753                                   feedback_fn,
9754                                   self.diskparams)
9755
9756     iobj = objects.Instance(name=instance, os=self.op.os_type,
9757                             primary_node=pnode_name,
9758                             nics=self.nics, disks=disks,
9759                             disk_template=self.op.disk_template,
9760                             admin_state=constants.ADMINST_DOWN,
9761                             network_port=network_port,
9762                             beparams=self.op.beparams,
9763                             hvparams=self.op.hvparams,
9764                             hypervisor=self.op.hypervisor,
9765                             osparams=self.op.osparams,
9766                             )
9767
9768     if self.op.tags:
9769       for tag in self.op.tags:
9770         iobj.AddTag(tag)
9771
9772     if self.adopt_disks:
9773       if self.op.disk_template == constants.DT_PLAIN:
9774         # rename LVs to the newly-generated names; we need to construct
9775         # 'fake' LV disks with the old data, plus the new unique_id
9776         tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
9777         rename_to = []
9778         for t_dsk, a_dsk in zip(tmp_disks, self.disks):
9779           rename_to.append(t_dsk.logical_id)
9780           t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
9781           self.cfg.SetDiskID(t_dsk, pnode_name)
9782         result = self.rpc.call_blockdev_rename(pnode_name,
9783                                                zip(tmp_disks, rename_to))
9784         result.Raise("Failed to rename adoped LVs")
9785     else:
9786       feedback_fn("* creating instance disks...")
9787       try:
9788         _CreateDisks(self, iobj)
9789       except errors.OpExecError:
9790         self.LogWarning("Device creation failed, reverting...")
9791         try:
9792           _RemoveDisks(self, iobj)
9793         finally:
9794           self.cfg.ReleaseDRBDMinors(instance)
9795           raise
9796
9797     feedback_fn("adding instance %s to cluster config" % instance)
9798
9799     self.cfg.AddInstance(iobj, self.proc.GetECId())
9800
9801     # Declare that we don't want to remove the instance lock anymore, as we've
9802     # added the instance to the config
9803     del self.remove_locks[locking.LEVEL_INSTANCE]
9804
9805     if self.op.mode == constants.INSTANCE_IMPORT:
9806       # Release unused nodes
9807       _ReleaseLocks(self, locking.LEVEL_NODE, keep=[self.op.src_node])
9808     else:
9809       # Release all nodes
9810       _ReleaseLocks(self, locking.LEVEL_NODE)
9811
9812     disk_abort = False
9813     if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
9814       feedback_fn("* wiping instance disks...")
9815       try:
9816         _WipeDisks(self, iobj)
9817       except errors.OpExecError, err:
9818         logging.exception("Wiping disks failed")
9819         self.LogWarning("Wiping instance disks failed (%s)", err)
9820         disk_abort = True
9821
9822     if disk_abort:
9823       # Something is already wrong with the disks, don't do anything else
9824       pass
9825     elif self.op.wait_for_sync:
9826       disk_abort = not _WaitForSync(self, iobj)
9827     elif iobj.disk_template in constants.DTS_INT_MIRROR:
9828       # make sure the disks are not degraded (still sync-ing is ok)
9829       feedback_fn("* checking mirrors status")
9830       disk_abort = not _WaitForSync(self, iobj, oneshot=True)
9831     else:
9832       disk_abort = False
9833
9834     if disk_abort:
9835       _RemoveDisks(self, iobj)
9836       self.cfg.RemoveInstance(iobj.name)
9837       # Make sure the instance lock gets removed
9838       self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
9839       raise errors.OpExecError("There are some degraded disks for"
9840                                " this instance")
9841
9842     # Release all node resource locks
9843     _ReleaseLocks(self, locking.LEVEL_NODE_RES)
9844
9845     if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
9846       if self.op.mode == constants.INSTANCE_CREATE:
9847         if not self.op.no_install:
9848           pause_sync = (iobj.disk_template in constants.DTS_INT_MIRROR and
9849                         not self.op.wait_for_sync)
9850           if pause_sync:
9851             feedback_fn("* pausing disk sync to install instance OS")
9852             result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
9853                                                               iobj.disks, True)
9854             for idx, success in enumerate(result.payload):
9855               if not success:
9856                 logging.warn("pause-sync of instance %s for disk %d failed",
9857                              instance, idx)
9858
9859           feedback_fn("* running the instance OS create scripts...")
9860           # FIXME: pass debug option from opcode to backend
9861           os_add_result = \
9862             self.rpc.call_instance_os_add(pnode_name, (iobj, None), False,
9863                                           self.op.debug_level)
9864           if pause_sync:
9865             feedback_fn("* resuming disk sync")
9866             result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
9867                                                               iobj.disks, False)
9868             for idx, success in enumerate(result.payload):
9869               if not success:
9870                 logging.warn("resume-sync of instance %s for disk %d failed",
9871                              instance, idx)
9872
9873           os_add_result.Raise("Could not add os for instance %s"
9874                               " on node %s" % (instance, pnode_name))
9875
9876       elif self.op.mode == constants.INSTANCE_IMPORT:
9877         feedback_fn("* running the instance OS import scripts...")
9878
9879         transfers = []
9880
9881         for idx, image in enumerate(self.src_images):
9882           if not image:
9883             continue
9884
9885           # FIXME: pass debug option from opcode to backend
9886           dt = masterd.instance.DiskTransfer("disk/%s" % idx,
9887                                              constants.IEIO_FILE, (image, ),
9888                                              constants.IEIO_SCRIPT,
9889                                              (iobj.disks[idx], idx),
9890                                              None)
9891           transfers.append(dt)
9892
9893         import_result = \
9894           masterd.instance.TransferInstanceData(self, feedback_fn,
9895                                                 self.op.src_node, pnode_name,
9896                                                 self.pnode.secondary_ip,
9897                                                 iobj, transfers)
9898         if not compat.all(import_result):
9899           self.LogWarning("Some disks for instance %s on node %s were not"
9900                           " imported successfully" % (instance, pnode_name))
9901
9902       elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
9903         feedback_fn("* preparing remote import...")
9904         # The source cluster will stop the instance before attempting to make a
9905         # connection. In some cases stopping an instance can take a long time,
9906         # hence the shutdown timeout is added to the connection timeout.
9907         connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
9908                            self.op.source_shutdown_timeout)
9909         timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
9910
9911         assert iobj.primary_node == self.pnode.name
9912         disk_results = \
9913           masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
9914                                         self.source_x509_ca,
9915                                         self._cds, timeouts)
9916         if not compat.all(disk_results):
9917           # TODO: Should the instance still be started, even if some disks
9918           # failed to import (valid for local imports, too)?
9919           self.LogWarning("Some disks for instance %s on node %s were not"
9920                           " imported successfully" % (instance, pnode_name))
9921
9922         # Run rename script on newly imported instance
9923         assert iobj.name == instance
9924         feedback_fn("Running rename script for %s" % instance)
9925         result = self.rpc.call_instance_run_rename(pnode_name, iobj,
9926                                                    self.source_instance_name,
9927                                                    self.op.debug_level)
9928         if result.fail_msg:
9929           self.LogWarning("Failed to run rename script for %s on node"
9930                           " %s: %s" % (instance, pnode_name, result.fail_msg))
9931
9932       else:
9933         # also checked in the prereq part
9934         raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
9935                                      % self.op.mode)
9936
9937     assert not self.owned_locks(locking.LEVEL_NODE_RES)
9938
9939     if self.op.start:
9940       iobj.admin_state = constants.ADMINST_UP
9941       self.cfg.Update(iobj, feedback_fn)
9942       logging.info("Starting instance %s on node %s", instance, pnode_name)
9943       feedback_fn("* starting instance...")
9944       result = self.rpc.call_instance_start(pnode_name, (iobj, None, None),
9945                                             False)
9946       result.Raise("Could not start instance")
9947
9948     return list(iobj.all_nodes)
9949
9950
9951 class LUInstanceConsole(NoHooksLU):
9952   """Connect to an instance's console.
9953
9954   This is somewhat special in that it returns the command line that
9955   you need to run on the master node in order to connect to the
9956   console.
9957
9958   """
9959   REQ_BGL = False
9960
9961   def ExpandNames(self):
9962     self.share_locks = _ShareAll()
9963     self._ExpandAndLockInstance()
9964
9965   def CheckPrereq(self):
9966     """Check prerequisites.
9967
9968     This checks that the instance is in the cluster.
9969
9970     """
9971     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
9972     assert self.instance is not None, \
9973       "Cannot retrieve locked instance %s" % self.op.instance_name
9974     _CheckNodeOnline(self, self.instance.primary_node)
9975
9976   def Exec(self, feedback_fn):
9977     """Connect to the console of an instance
9978
9979     """
9980     instance = self.instance
9981     node = instance.primary_node
9982
9983     node_insts = self.rpc.call_instance_list([node],
9984                                              [instance.hypervisor])[node]
9985     node_insts.Raise("Can't get node information from %s" % node)
9986
9987     if instance.name not in node_insts.payload:
9988       if instance.admin_state == constants.ADMINST_UP:
9989         state = constants.INSTST_ERRORDOWN
9990       elif instance.admin_state == constants.ADMINST_DOWN:
9991         state = constants.INSTST_ADMINDOWN
9992       else:
9993         state = constants.INSTST_ADMINOFFLINE
9994       raise errors.OpExecError("Instance %s is not running (state %s)" %
9995                                (instance.name, state))
9996
9997     logging.debug("Connecting to console of %s on %s", instance.name, node)
9998
9999     return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
10000
10001
10002 def _GetInstanceConsole(cluster, instance):
10003   """Returns console information for an instance.
10004
10005   @type cluster: L{objects.Cluster}
10006   @type instance: L{objects.Instance}
10007   @rtype: dict
10008
10009   """
10010   hyper = hypervisor.GetHypervisor(instance.hypervisor)
10011   # beparams and hvparams are passed separately, to avoid editing the
10012   # instance and then saving the defaults in the instance itself.
10013   hvparams = cluster.FillHV(instance)
10014   beparams = cluster.FillBE(instance)
10015   console = hyper.GetInstanceConsole(instance, hvparams, beparams)
10016
10017   assert console.instance == instance.name
10018   assert console.Validate()
10019
10020   return console.ToDict()
10021
10022
10023 class LUInstanceReplaceDisks(LogicalUnit):
10024   """Replace the disks of an instance.
10025
10026   """
10027   HPATH = "mirrors-replace"
10028   HTYPE = constants.HTYPE_INSTANCE
10029   REQ_BGL = False
10030
10031   def CheckArguments(self):
10032     TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
10033                                   self.op.iallocator)
10034
10035   def ExpandNames(self):
10036     self._ExpandAndLockInstance()
10037
10038     assert locking.LEVEL_NODE not in self.needed_locks
10039     assert locking.LEVEL_NODE_RES not in self.needed_locks
10040     assert locking.LEVEL_NODEGROUP not in self.needed_locks
10041
10042     assert self.op.iallocator is None or self.op.remote_node is None, \
10043       "Conflicting options"
10044
10045     if self.op.remote_node is not None:
10046       self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10047
10048       # Warning: do not remove the locking of the new secondary here
10049       # unless DRBD8.AddChildren is changed to work in parallel;
10050       # currently it doesn't since parallel invocations of
10051       # FindUnusedMinor will conflict
10052       self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
10053       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
10054     else:
10055       self.needed_locks[locking.LEVEL_NODE] = []
10056       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10057
10058       if self.op.iallocator is not None:
10059         # iallocator will select a new node in the same group
10060         self.needed_locks[locking.LEVEL_NODEGROUP] = []
10061
10062     self.needed_locks[locking.LEVEL_NODE_RES] = []
10063
10064     self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
10065                                    self.op.iallocator, self.op.remote_node,
10066                                    self.op.disks, False, self.op.early_release)
10067
10068     self.tasklets = [self.replacer]
10069
10070   def DeclareLocks(self, level):
10071     if level == locking.LEVEL_NODEGROUP:
10072       assert self.op.remote_node is None
10073       assert self.op.iallocator is not None
10074       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
10075
10076       self.share_locks[locking.LEVEL_NODEGROUP] = 1
10077       # Lock all groups used by instance optimistically; this requires going
10078       # via the node before it's locked, requiring verification later on
10079       self.needed_locks[locking.LEVEL_NODEGROUP] = \
10080         self.cfg.GetInstanceNodeGroups(self.op.instance_name)
10081
10082     elif level == locking.LEVEL_NODE:
10083       if self.op.iallocator is not None:
10084         assert self.op.remote_node is None
10085         assert not self.needed_locks[locking.LEVEL_NODE]
10086
10087         # Lock member nodes of all locked groups
10088         self.needed_locks[locking.LEVEL_NODE] = [node_name
10089           for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
10090           for node_name in self.cfg.GetNodeGroup(group_uuid).members]
10091       else:
10092         self._LockInstancesNodes()
10093     elif level == locking.LEVEL_NODE_RES:
10094       # Reuse node locks
10095       self.needed_locks[locking.LEVEL_NODE_RES] = \
10096         self.needed_locks[locking.LEVEL_NODE]
10097
10098   def BuildHooksEnv(self):
10099     """Build hooks env.
10100
10101     This runs on the master, the primary and all the secondaries.
10102
10103     """
10104     instance = self.replacer.instance
10105     env = {
10106       "MODE": self.op.mode,
10107       "NEW_SECONDARY": self.op.remote_node,
10108       "OLD_SECONDARY": instance.secondary_nodes[0],
10109       }
10110     env.update(_BuildInstanceHookEnvByObject(self, instance))
10111     return env
10112
10113   def BuildHooksNodes(self):
10114     """Build hooks nodes.
10115
10116     """
10117     instance = self.replacer.instance
10118     nl = [
10119       self.cfg.GetMasterNode(),
10120       instance.primary_node,
10121       ]
10122     if self.op.remote_node is not None:
10123       nl.append(self.op.remote_node)
10124     return nl, nl
10125
10126   def CheckPrereq(self):
10127     """Check prerequisites.
10128
10129     """
10130     assert (self.glm.is_owned(locking.LEVEL_NODEGROUP) or
10131             self.op.iallocator is None)
10132
10133     # Verify if node group locks are still correct
10134     owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
10135     if owned_groups:
10136       _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
10137
10138     return LogicalUnit.CheckPrereq(self)
10139
10140
10141 class TLReplaceDisks(Tasklet):
10142   """Replaces disks for an instance.
10143
10144   Note: Locking is not within the scope of this class.
10145
10146   """
10147   def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
10148                disks, delay_iallocator, early_release):
10149     """Initializes this class.
10150
10151     """
10152     Tasklet.__init__(self, lu)
10153
10154     # Parameters
10155     self.instance_name = instance_name
10156     self.mode = mode
10157     self.iallocator_name = iallocator_name
10158     self.remote_node = remote_node
10159     self.disks = disks
10160     self.delay_iallocator = delay_iallocator
10161     self.early_release = early_release
10162
10163     # Runtime data
10164     self.instance = None
10165     self.new_node = None
10166     self.target_node = None
10167     self.other_node = None
10168     self.remote_node_info = None
10169     self.node_secondary_ip = None
10170
10171   @staticmethod
10172   def CheckArguments(mode, remote_node, iallocator):
10173     """Helper function for users of this class.
10174
10175     """
10176     # check for valid parameter combination
10177     if mode == constants.REPLACE_DISK_CHG:
10178       if remote_node is None and iallocator is None:
10179         raise errors.OpPrereqError("When changing the secondary either an"
10180                                    " iallocator script must be used or the"
10181                                    " new node given", errors.ECODE_INVAL)
10182
10183       if remote_node is not None and iallocator is not None:
10184         raise errors.OpPrereqError("Give either the iallocator or the new"
10185                                    " secondary, not both", errors.ECODE_INVAL)
10186
10187     elif remote_node is not None or iallocator is not None:
10188       # Not replacing the secondary
10189       raise errors.OpPrereqError("The iallocator and new node options can"
10190                                  " only be used when changing the"
10191                                  " secondary node", errors.ECODE_INVAL)
10192
10193   @staticmethod
10194   def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
10195     """Compute a new secondary node using an IAllocator.
10196
10197     """
10198     ial = IAllocator(lu.cfg, lu.rpc,
10199                      mode=constants.IALLOCATOR_MODE_RELOC,
10200                      name=instance_name,
10201                      relocate_from=list(relocate_from))
10202
10203     ial.Run(iallocator_name)
10204
10205     if not ial.success:
10206       raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
10207                                  " %s" % (iallocator_name, ial.info),
10208                                  errors.ECODE_NORES)
10209
10210     if len(ial.result) != ial.required_nodes:
10211       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
10212                                  " of nodes (%s), required %s" %
10213                                  (iallocator_name,
10214                                   len(ial.result), ial.required_nodes),
10215                                  errors.ECODE_FAULT)
10216
10217     remote_node_name = ial.result[0]
10218
10219     lu.LogInfo("Selected new secondary for instance '%s': %s",
10220                instance_name, remote_node_name)
10221
10222     return remote_node_name
10223
10224   def _FindFaultyDisks(self, node_name):
10225     """Wrapper for L{_FindFaultyInstanceDisks}.
10226
10227     """
10228     return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
10229                                     node_name, True)
10230
10231   def _CheckDisksActivated(self, instance):
10232     """Checks if the instance disks are activated.
10233
10234     @param instance: The instance to check disks
10235     @return: True if they are activated, False otherwise
10236
10237     """
10238     nodes = instance.all_nodes
10239
10240     for idx, dev in enumerate(instance.disks):
10241       for node in nodes:
10242         self.lu.LogInfo("Checking disk/%d on %s", idx, node)
10243         self.cfg.SetDiskID(dev, node)
10244
10245         result = self.rpc.call_blockdev_find(node, dev)
10246
10247         if result.offline:
10248           continue
10249         elif result.fail_msg or not result.payload:
10250           return False
10251
10252     return True
10253
10254   def CheckPrereq(self):
10255     """Check prerequisites.
10256
10257     This checks that the instance is in the cluster.
10258
10259     """
10260     self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
10261     assert instance is not None, \
10262       "Cannot retrieve locked instance %s" % self.instance_name
10263
10264     if instance.disk_template != constants.DT_DRBD8:
10265       raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
10266                                  " instances", errors.ECODE_INVAL)
10267
10268     if len(instance.secondary_nodes) != 1:
10269       raise errors.OpPrereqError("The instance has a strange layout,"
10270                                  " expected one secondary but found %d" %
10271                                  len(instance.secondary_nodes),
10272                                  errors.ECODE_FAULT)
10273
10274     if not self.delay_iallocator:
10275       self._CheckPrereq2()
10276
10277   def _CheckPrereq2(self):
10278     """Check prerequisites, second part.
10279
10280     This function should always be part of CheckPrereq. It was separated and is
10281     now called from Exec because during node evacuation iallocator was only
10282     called with an unmodified cluster model, not taking planned changes into
10283     account.
10284
10285     """
10286     instance = self.instance
10287     secondary_node = instance.secondary_nodes[0]
10288
10289     if self.iallocator_name is None:
10290       remote_node = self.remote_node
10291     else:
10292       remote_node = self._RunAllocator(self.lu, self.iallocator_name,
10293                                        instance.name, instance.secondary_nodes)
10294
10295     if remote_node is None:
10296       self.remote_node_info = None
10297     else:
10298       assert remote_node in self.lu.owned_locks(locking.LEVEL_NODE), \
10299              "Remote node '%s' is not locked" % remote_node
10300
10301       self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
10302       assert self.remote_node_info is not None, \
10303         "Cannot retrieve locked node %s" % remote_node
10304
10305     if remote_node == self.instance.primary_node:
10306       raise errors.OpPrereqError("The specified node is the primary node of"
10307                                  " the instance", errors.ECODE_INVAL)
10308
10309     if remote_node == secondary_node:
10310       raise errors.OpPrereqError("The specified node is already the"
10311                                  " secondary node of the instance",
10312                                  errors.ECODE_INVAL)
10313
10314     if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
10315                                     constants.REPLACE_DISK_CHG):
10316       raise errors.OpPrereqError("Cannot specify disks to be replaced",
10317                                  errors.ECODE_INVAL)
10318
10319     if self.mode == constants.REPLACE_DISK_AUTO:
10320       if not self._CheckDisksActivated(instance):
10321         raise errors.OpPrereqError("Please run activate-disks on instance %s"
10322                                    " first" % self.instance_name,
10323                                    errors.ECODE_STATE)
10324       faulty_primary = self._FindFaultyDisks(instance.primary_node)
10325       faulty_secondary = self._FindFaultyDisks(secondary_node)
10326
10327       if faulty_primary and faulty_secondary:
10328         raise errors.OpPrereqError("Instance %s has faulty disks on more than"
10329                                    " one node and can not be repaired"
10330                                    " automatically" % self.instance_name,
10331                                    errors.ECODE_STATE)
10332
10333       if faulty_primary:
10334         self.disks = faulty_primary
10335         self.target_node = instance.primary_node
10336         self.other_node = secondary_node
10337         check_nodes = [self.target_node, self.other_node]
10338       elif faulty_secondary:
10339         self.disks = faulty_secondary
10340         self.target_node = secondary_node
10341         self.other_node = instance.primary_node
10342         check_nodes = [self.target_node, self.other_node]
10343       else:
10344         self.disks = []
10345         check_nodes = []
10346
10347     else:
10348       # Non-automatic modes
10349       if self.mode == constants.REPLACE_DISK_PRI:
10350         self.target_node = instance.primary_node
10351         self.other_node = secondary_node
10352         check_nodes = [self.target_node, self.other_node]
10353
10354       elif self.mode == constants.REPLACE_DISK_SEC:
10355         self.target_node = secondary_node
10356         self.other_node = instance.primary_node
10357         check_nodes = [self.target_node, self.other_node]
10358
10359       elif self.mode == constants.REPLACE_DISK_CHG:
10360         self.new_node = remote_node
10361         self.other_node = instance.primary_node
10362         self.target_node = secondary_node
10363         check_nodes = [self.new_node, self.other_node]
10364
10365         _CheckNodeNotDrained(self.lu, remote_node)
10366         _CheckNodeVmCapable(self.lu, remote_node)
10367
10368         old_node_info = self.cfg.GetNodeInfo(secondary_node)
10369         assert old_node_info is not None
10370         if old_node_info.offline and not self.early_release:
10371           # doesn't make sense to delay the release
10372           self.early_release = True
10373           self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
10374                           " early-release mode", secondary_node)
10375
10376       else:
10377         raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
10378                                      self.mode)
10379
10380       # If not specified all disks should be replaced
10381       if not self.disks:
10382         self.disks = range(len(self.instance.disks))
10383
10384     # TODO: compute disk parameters
10385     primary_node_info = self.cfg.GetNodeInfo(instance.primary_node)
10386     secondary_node_info = self.cfg.GetNodeInfo(secondary_node)
10387     if primary_node_info.group != secondary_node_info.group:
10388       self.lu.LogInfo("The instance primary and secondary nodes are in two"
10389                       " different node groups; the disk parameters of the"
10390                       " primary node's group will be applied.")
10391
10392     self.diskparams = self.cfg.GetNodeGroup(primary_node_info.group).diskparams
10393
10394     for node in check_nodes:
10395       _CheckNodeOnline(self.lu, node)
10396
10397     touched_nodes = frozenset(node_name for node_name in [self.new_node,
10398                                                           self.other_node,
10399                                                           self.target_node]
10400                               if node_name is not None)
10401
10402     # Release unneeded node and node resource locks
10403     _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
10404     _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES, keep=touched_nodes)
10405
10406     # Release any owned node group
10407     if self.lu.glm.is_owned(locking.LEVEL_NODEGROUP):
10408       _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
10409
10410     # Check whether disks are valid
10411     for disk_idx in self.disks:
10412       instance.FindDisk(disk_idx)
10413
10414     # Get secondary node IP addresses
10415     self.node_secondary_ip = dict((name, node.secondary_ip) for (name, node)
10416                                   in self.cfg.GetMultiNodeInfo(touched_nodes))
10417
10418   def Exec(self, feedback_fn):
10419     """Execute disk replacement.
10420
10421     This dispatches the disk replacement to the appropriate handler.
10422
10423     """
10424     if self.delay_iallocator:
10425       self._CheckPrereq2()
10426
10427     if __debug__:
10428       # Verify owned locks before starting operation
10429       owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
10430       assert set(owned_nodes) == set(self.node_secondary_ip), \
10431           ("Incorrect node locks, owning %s, expected %s" %
10432            (owned_nodes, self.node_secondary_ip.keys()))
10433       assert (self.lu.owned_locks(locking.LEVEL_NODE) ==
10434               self.lu.owned_locks(locking.LEVEL_NODE_RES))
10435
10436       owned_instances = self.lu.owned_locks(locking.LEVEL_INSTANCE)
10437       assert list(owned_instances) == [self.instance_name], \
10438           "Instance '%s' not locked" % self.instance_name
10439
10440       assert not self.lu.glm.is_owned(locking.LEVEL_NODEGROUP), \
10441           "Should not own any node group lock at this point"
10442
10443     if not self.disks:
10444       feedback_fn("No disks need replacement")
10445       return
10446
10447     feedback_fn("Replacing disk(s) %s for %s" %
10448                 (utils.CommaJoin(self.disks), self.instance.name))
10449
10450     activate_disks = (self.instance.admin_state != constants.ADMINST_UP)
10451
10452     # Activate the instance disks if we're replacing them on a down instance
10453     if activate_disks:
10454       _StartInstanceDisks(self.lu, self.instance, True)
10455
10456     try:
10457       # Should we replace the secondary node?
10458       if self.new_node is not None:
10459         fn = self._ExecDrbd8Secondary
10460       else:
10461         fn = self._ExecDrbd8DiskOnly
10462
10463       result = fn(feedback_fn)
10464     finally:
10465       # Deactivate the instance disks if we're replacing them on a
10466       # down instance
10467       if activate_disks:
10468         _SafeShutdownInstanceDisks(self.lu, self.instance)
10469
10470     assert not self.lu.owned_locks(locking.LEVEL_NODE)
10471
10472     if __debug__:
10473       # Verify owned locks
10474       owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE_RES)
10475       nodes = frozenset(self.node_secondary_ip)
10476       assert ((self.early_release and not owned_nodes) or
10477               (not self.early_release and not (set(owned_nodes) - nodes))), \
10478         ("Not owning the correct locks, early_release=%s, owned=%r,"
10479          " nodes=%r" % (self.early_release, owned_nodes, nodes))
10480
10481     return result
10482
10483   def _CheckVolumeGroup(self, nodes):
10484     self.lu.LogInfo("Checking volume groups")
10485
10486     vgname = self.cfg.GetVGName()
10487
10488     # Make sure volume group exists on all involved nodes
10489     results = self.rpc.call_vg_list(nodes)
10490     if not results:
10491       raise errors.OpExecError("Can't list volume groups on the nodes")
10492
10493     for node in nodes:
10494       res = results[node]
10495       res.Raise("Error checking node %s" % node)
10496       if vgname not in res.payload:
10497         raise errors.OpExecError("Volume group '%s' not found on node %s" %
10498                                  (vgname, node))
10499
10500   def _CheckDisksExistence(self, nodes):
10501     # Check disk existence
10502     for idx, dev in enumerate(self.instance.disks):
10503       if idx not in self.disks:
10504         continue
10505
10506       for node in nodes:
10507         self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
10508         self.cfg.SetDiskID(dev, node)
10509
10510         result = self.rpc.call_blockdev_find(node, dev)
10511
10512         msg = result.fail_msg
10513         if msg or not result.payload:
10514           if not msg:
10515             msg = "disk not found"
10516           raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
10517                                    (idx, node, msg))
10518
10519   def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
10520     for idx, dev in enumerate(self.instance.disks):
10521       if idx not in self.disks:
10522         continue
10523
10524       self.lu.LogInfo("Checking disk/%d consistency on node %s" %
10525                       (idx, node_name))
10526
10527       if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
10528                                    ldisk=ldisk):
10529         raise errors.OpExecError("Node %s has degraded storage, unsafe to"
10530                                  " replace disks for instance %s" %
10531                                  (node_name, self.instance.name))
10532
10533   def _CreateNewStorage(self, node_name):
10534     """Create new storage on the primary or secondary node.
10535
10536     This is only used for same-node replaces, not for changing the
10537     secondary node, hence we don't want to modify the existing disk.
10538
10539     """
10540     iv_names = {}
10541
10542     for idx, dev in enumerate(self.instance.disks):
10543       if idx not in self.disks:
10544         continue
10545
10546       self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
10547
10548       self.cfg.SetDiskID(dev, node_name)
10549
10550       lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
10551       names = _GenerateUniqueNames(self.lu, lv_names)
10552
10553       _, data_p, meta_p = _ComputeLDParams(constants.DT_DRBD8, self.diskparams)
10554
10555       vg_data = dev.children[0].logical_id[0]
10556       lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
10557                              logical_id=(vg_data, names[0]), params=data_p)
10558       vg_meta = dev.children[1].logical_id[0]
10559       lv_meta = objects.Disk(dev_type=constants.LD_LV, size=DRBD_META_SIZE,
10560                              logical_id=(vg_meta, names[1]), params=meta_p)
10561
10562       new_lvs = [lv_data, lv_meta]
10563       old_lvs = [child.Copy() for child in dev.children]
10564       iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
10565
10566       # we pass force_create=True to force the LVM creation
10567       for new_lv in new_lvs:
10568         _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
10569                         _GetInstanceInfoText(self.instance), False)
10570
10571     return iv_names
10572
10573   def _CheckDevices(self, node_name, iv_names):
10574     for name, (dev, _, _) in iv_names.iteritems():
10575       self.cfg.SetDiskID(dev, node_name)
10576
10577       result = self.rpc.call_blockdev_find(node_name, dev)
10578
10579       msg = result.fail_msg
10580       if msg or not result.payload:
10581         if not msg:
10582           msg = "disk not found"
10583         raise errors.OpExecError("Can't find DRBD device %s: %s" %
10584                                  (name, msg))
10585
10586       if result.payload.is_degraded:
10587         raise errors.OpExecError("DRBD device %s is degraded!" % name)
10588
10589   def _RemoveOldStorage(self, node_name, iv_names):
10590     for name, (_, old_lvs, _) in iv_names.iteritems():
10591       self.lu.LogInfo("Remove logical volumes for %s" % name)
10592
10593       for lv in old_lvs:
10594         self.cfg.SetDiskID(lv, node_name)
10595
10596         msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
10597         if msg:
10598           self.lu.LogWarning("Can't remove old LV: %s" % msg,
10599                              hint="remove unused LVs manually")
10600
10601   def _ExecDrbd8DiskOnly(self, feedback_fn): # pylint: disable=W0613
10602     """Replace a disk on the primary or secondary for DRBD 8.
10603
10604     The algorithm for replace is quite complicated:
10605
10606       1. for each disk to be replaced:
10607
10608         1. create new LVs on the target node with unique names
10609         1. detach old LVs from the drbd device
10610         1. rename old LVs to name_replaced.<time_t>
10611         1. rename new LVs to old LVs
10612         1. attach the new LVs (with the old names now) to the drbd device
10613
10614       1. wait for sync across all devices
10615
10616       1. for each modified disk:
10617
10618         1. remove old LVs (which have the name name_replaces.<time_t>)
10619
10620     Failures are not very well handled.
10621
10622     """
10623     steps_total = 6
10624
10625     # Step: check device activation
10626     self.lu.LogStep(1, steps_total, "Check device existence")
10627     self._CheckDisksExistence([self.other_node, self.target_node])
10628     self._CheckVolumeGroup([self.target_node, self.other_node])
10629
10630     # Step: check other node consistency
10631     self.lu.LogStep(2, steps_total, "Check peer consistency")
10632     self._CheckDisksConsistency(self.other_node,
10633                                 self.other_node == self.instance.primary_node,
10634                                 False)
10635
10636     # Step: create new storage
10637     self.lu.LogStep(3, steps_total, "Allocate new storage")
10638     iv_names = self._CreateNewStorage(self.target_node)
10639
10640     # Step: for each lv, detach+rename*2+attach
10641     self.lu.LogStep(4, steps_total, "Changing drbd configuration")
10642     for dev, old_lvs, new_lvs in iv_names.itervalues():
10643       self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
10644
10645       result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
10646                                                      old_lvs)
10647       result.Raise("Can't detach drbd from local storage on node"
10648                    " %s for device %s" % (self.target_node, dev.iv_name))
10649       #dev.children = []
10650       #cfg.Update(instance)
10651
10652       # ok, we created the new LVs, so now we know we have the needed
10653       # storage; as such, we proceed on the target node to rename
10654       # old_lv to _old, and new_lv to old_lv; note that we rename LVs
10655       # using the assumption that logical_id == physical_id (which in
10656       # turn is the unique_id on that node)
10657
10658       # FIXME(iustin): use a better name for the replaced LVs
10659       temp_suffix = int(time.time())
10660       ren_fn = lambda d, suff: (d.physical_id[0],
10661                                 d.physical_id[1] + "_replaced-%s" % suff)
10662
10663       # Build the rename list based on what LVs exist on the node
10664       rename_old_to_new = []
10665       for to_ren in old_lvs:
10666         result = self.rpc.call_blockdev_find(self.target_node, to_ren)
10667         if not result.fail_msg and result.payload:
10668           # device exists
10669           rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
10670
10671       self.lu.LogInfo("Renaming the old LVs on the target node")
10672       result = self.rpc.call_blockdev_rename(self.target_node,
10673                                              rename_old_to_new)
10674       result.Raise("Can't rename old LVs on node %s" % self.target_node)
10675
10676       # Now we rename the new LVs to the old LVs
10677       self.lu.LogInfo("Renaming the new LVs on the target node")
10678       rename_new_to_old = [(new, old.physical_id)
10679                            for old, new in zip(old_lvs, new_lvs)]
10680       result = self.rpc.call_blockdev_rename(self.target_node,
10681                                              rename_new_to_old)
10682       result.Raise("Can't rename new LVs on node %s" % self.target_node)
10683
10684       # Intermediate steps of in memory modifications
10685       for old, new in zip(old_lvs, new_lvs):
10686         new.logical_id = old.logical_id
10687         self.cfg.SetDiskID(new, self.target_node)
10688
10689       # We need to modify old_lvs so that removal later removes the
10690       # right LVs, not the newly added ones; note that old_lvs is a
10691       # copy here
10692       for disk in old_lvs:
10693         disk.logical_id = ren_fn(disk, temp_suffix)
10694         self.cfg.SetDiskID(disk, self.target_node)
10695
10696       # Now that the new lvs have the old name, we can add them to the device
10697       self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
10698       result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
10699                                                   new_lvs)
10700       msg = result.fail_msg
10701       if msg:
10702         for new_lv in new_lvs:
10703           msg2 = self.rpc.call_blockdev_remove(self.target_node,
10704                                                new_lv).fail_msg
10705           if msg2:
10706             self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
10707                                hint=("cleanup manually the unused logical"
10708                                      "volumes"))
10709         raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
10710
10711     cstep = itertools.count(5)
10712
10713     if self.early_release:
10714       self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
10715       self._RemoveOldStorage(self.target_node, iv_names)
10716       # TODO: Check if releasing locks early still makes sense
10717       _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
10718     else:
10719       # Release all resource locks except those used by the instance
10720       _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
10721                     keep=self.node_secondary_ip.keys())
10722
10723     # Release all node locks while waiting for sync
10724     _ReleaseLocks(self.lu, locking.LEVEL_NODE)
10725
10726     # TODO: Can the instance lock be downgraded here? Take the optional disk
10727     # shutdown in the caller into consideration.
10728
10729     # Wait for sync
10730     # This can fail as the old devices are degraded and _WaitForSync
10731     # does a combined result over all disks, so we don't check its return value
10732     self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
10733     _WaitForSync(self.lu, self.instance)
10734
10735     # Check all devices manually
10736     self._CheckDevices(self.instance.primary_node, iv_names)
10737
10738     # Step: remove old storage
10739     if not self.early_release:
10740       self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
10741       self._RemoveOldStorage(self.target_node, iv_names)
10742
10743   def _ExecDrbd8Secondary(self, feedback_fn):
10744     """Replace the secondary node for DRBD 8.
10745
10746     The algorithm for replace is quite complicated:
10747       - for all disks of the instance:
10748         - create new LVs on the new node with same names
10749         - shutdown the drbd device on the old secondary
10750         - disconnect the drbd network on the primary
10751         - create the drbd device on the new secondary
10752         - network attach the drbd on the primary, using an artifice:
10753           the drbd code for Attach() will connect to the network if it
10754           finds a device which is connected to the good local disks but
10755           not network enabled
10756       - wait for sync across all devices
10757       - remove all disks from the old secondary
10758
10759     Failures are not very well handled.
10760
10761     """
10762     steps_total = 6
10763
10764     pnode = self.instance.primary_node
10765
10766     # Step: check device activation
10767     self.lu.LogStep(1, steps_total, "Check device existence")
10768     self._CheckDisksExistence([self.instance.primary_node])
10769     self._CheckVolumeGroup([self.instance.primary_node])
10770
10771     # Step: check other node consistency
10772     self.lu.LogStep(2, steps_total, "Check peer consistency")
10773     self._CheckDisksConsistency(self.instance.primary_node, True, True)
10774
10775     # Step: create new storage
10776     self.lu.LogStep(3, steps_total, "Allocate new storage")
10777     for idx, dev in enumerate(self.instance.disks):
10778       self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
10779                       (self.new_node, idx))
10780       # we pass force_create=True to force LVM creation
10781       for new_lv in dev.children:
10782         _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
10783                         _GetInstanceInfoText(self.instance), False)
10784
10785     # Step 4: dbrd minors and drbd setups changes
10786     # after this, we must manually remove the drbd minors on both the
10787     # error and the success paths
10788     self.lu.LogStep(4, steps_total, "Changing drbd configuration")
10789     minors = self.cfg.AllocateDRBDMinor([self.new_node
10790                                          for dev in self.instance.disks],
10791                                         self.instance.name)
10792     logging.debug("Allocated minors %r", minors)
10793
10794     iv_names = {}
10795     for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
10796       self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
10797                       (self.new_node, idx))
10798       # create new devices on new_node; note that we create two IDs:
10799       # one without port, so the drbd will be activated without
10800       # networking information on the new node at this stage, and one
10801       # with network, for the latter activation in step 4
10802       (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
10803       if self.instance.primary_node == o_node1:
10804         p_minor = o_minor1
10805       else:
10806         assert self.instance.primary_node == o_node2, "Three-node instance?"
10807         p_minor = o_minor2
10808
10809       new_alone_id = (self.instance.primary_node, self.new_node, None,
10810                       p_minor, new_minor, o_secret)
10811       new_net_id = (self.instance.primary_node, self.new_node, o_port,
10812                     p_minor, new_minor, o_secret)
10813
10814       iv_names[idx] = (dev, dev.children, new_net_id)
10815       logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
10816                     new_net_id)
10817       drbd_params, _, _ = _ComputeLDParams(constants.DT_DRBD8, self.diskparams)
10818       new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
10819                               logical_id=new_alone_id,
10820                               children=dev.children,
10821                               size=dev.size,
10822                               params=drbd_params)
10823       try:
10824         _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
10825                               _GetInstanceInfoText(self.instance), False)
10826       except errors.GenericError:
10827         self.cfg.ReleaseDRBDMinors(self.instance.name)
10828         raise
10829
10830     # We have new devices, shutdown the drbd on the old secondary
10831     for idx, dev in enumerate(self.instance.disks):
10832       self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
10833       self.cfg.SetDiskID(dev, self.target_node)
10834       msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
10835       if msg:
10836         self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
10837                            "node: %s" % (idx, msg),
10838                            hint=("Please cleanup this device manually as"
10839                                  " soon as possible"))
10840
10841     self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
10842     result = self.rpc.call_drbd_disconnect_net([pnode], self.node_secondary_ip,
10843                                                self.instance.disks)[pnode]
10844
10845     msg = result.fail_msg
10846     if msg:
10847       # detaches didn't succeed (unlikely)
10848       self.cfg.ReleaseDRBDMinors(self.instance.name)
10849       raise errors.OpExecError("Can't detach the disks from the network on"
10850                                " old node: %s" % (msg,))
10851
10852     # if we managed to detach at least one, we update all the disks of
10853     # the instance to point to the new secondary
10854     self.lu.LogInfo("Updating instance configuration")
10855     for dev, _, new_logical_id in iv_names.itervalues():
10856       dev.logical_id = new_logical_id
10857       self.cfg.SetDiskID(dev, self.instance.primary_node)
10858
10859     self.cfg.Update(self.instance, feedback_fn)
10860
10861     # Release all node locks (the configuration has been updated)
10862     _ReleaseLocks(self.lu, locking.LEVEL_NODE)
10863
10864     # and now perform the drbd attach
10865     self.lu.LogInfo("Attaching primary drbds to new secondary"
10866                     " (standalone => connected)")
10867     result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
10868                                             self.new_node],
10869                                            self.node_secondary_ip,
10870                                            self.instance.disks,
10871                                            self.instance.name,
10872                                            False)
10873     for to_node, to_result in result.items():
10874       msg = to_result.fail_msg
10875       if msg:
10876         self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
10877                            to_node, msg,
10878                            hint=("please do a gnt-instance info to see the"
10879                                  " status of disks"))
10880
10881     cstep = itertools.count(5)
10882
10883     if self.early_release:
10884       self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
10885       self._RemoveOldStorage(self.target_node, iv_names)
10886       # TODO: Check if releasing locks early still makes sense
10887       _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
10888     else:
10889       # Release all resource locks except those used by the instance
10890       _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
10891                     keep=self.node_secondary_ip.keys())
10892
10893     # TODO: Can the instance lock be downgraded here? Take the optional disk
10894     # shutdown in the caller into consideration.
10895
10896     # Wait for sync
10897     # This can fail as the old devices are degraded and _WaitForSync
10898     # does a combined result over all disks, so we don't check its return value
10899     self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
10900     _WaitForSync(self.lu, self.instance)
10901
10902     # Check all devices manually
10903     self._CheckDevices(self.instance.primary_node, iv_names)
10904
10905     # Step: remove old storage
10906     if not self.early_release:
10907       self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
10908       self._RemoveOldStorage(self.target_node, iv_names)
10909
10910
10911 class LURepairNodeStorage(NoHooksLU):
10912   """Repairs the volume group on a node.
10913
10914   """
10915   REQ_BGL = False
10916
10917   def CheckArguments(self):
10918     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
10919
10920     storage_type = self.op.storage_type
10921
10922     if (constants.SO_FIX_CONSISTENCY not in
10923         constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
10924       raise errors.OpPrereqError("Storage units of type '%s' can not be"
10925                                  " repaired" % storage_type,
10926                                  errors.ECODE_INVAL)
10927
10928   def ExpandNames(self):
10929     self.needed_locks = {
10930       locking.LEVEL_NODE: [self.op.node_name],
10931       }
10932
10933   def _CheckFaultyDisks(self, instance, node_name):
10934     """Ensure faulty disks abort the opcode or at least warn."""
10935     try:
10936       if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
10937                                   node_name, True):
10938         raise errors.OpPrereqError("Instance '%s' has faulty disks on"
10939                                    " node '%s'" % (instance.name, node_name),
10940                                    errors.ECODE_STATE)
10941     except errors.OpPrereqError, err:
10942       if self.op.ignore_consistency:
10943         self.proc.LogWarning(str(err.args[0]))
10944       else:
10945         raise
10946
10947   def CheckPrereq(self):
10948     """Check prerequisites.
10949
10950     """
10951     # Check whether any instance on this node has faulty disks
10952     for inst in _GetNodeInstances(self.cfg, self.op.node_name):
10953       if inst.admin_state != constants.ADMINST_UP:
10954         continue
10955       check_nodes = set(inst.all_nodes)
10956       check_nodes.discard(self.op.node_name)
10957       for inst_node_name in check_nodes:
10958         self._CheckFaultyDisks(inst, inst_node_name)
10959
10960   def Exec(self, feedback_fn):
10961     feedback_fn("Repairing storage unit '%s' on %s ..." %
10962                 (self.op.name, self.op.node_name))
10963
10964     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
10965     result = self.rpc.call_storage_execute(self.op.node_name,
10966                                            self.op.storage_type, st_args,
10967                                            self.op.name,
10968                                            constants.SO_FIX_CONSISTENCY)
10969     result.Raise("Failed to repair storage unit '%s' on %s" %
10970                  (self.op.name, self.op.node_name))
10971
10972
10973 class LUNodeEvacuate(NoHooksLU):
10974   """Evacuates instances off a list of nodes.
10975
10976   """
10977   REQ_BGL = False
10978
10979   _MODE2IALLOCATOR = {
10980     constants.NODE_EVAC_PRI: constants.IALLOCATOR_NEVAC_PRI,
10981     constants.NODE_EVAC_SEC: constants.IALLOCATOR_NEVAC_SEC,
10982     constants.NODE_EVAC_ALL: constants.IALLOCATOR_NEVAC_ALL,
10983     }
10984   assert frozenset(_MODE2IALLOCATOR.keys()) == constants.NODE_EVAC_MODES
10985   assert (frozenset(_MODE2IALLOCATOR.values()) ==
10986           constants.IALLOCATOR_NEVAC_MODES)
10987
10988   def CheckArguments(self):
10989     _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
10990
10991   def ExpandNames(self):
10992     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
10993
10994     if self.op.remote_node is not None:
10995       self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10996       assert self.op.remote_node
10997
10998       if self.op.remote_node == self.op.node_name:
10999         raise errors.OpPrereqError("Can not use evacuated node as a new"
11000                                    " secondary node", errors.ECODE_INVAL)
11001
11002       if self.op.mode != constants.NODE_EVAC_SEC:
11003         raise errors.OpPrereqError("Without the use of an iallocator only"
11004                                    " secondary instances can be evacuated",
11005                                    errors.ECODE_INVAL)
11006
11007     # Declare locks
11008     self.share_locks = _ShareAll()
11009     self.needed_locks = {
11010       locking.LEVEL_INSTANCE: [],
11011       locking.LEVEL_NODEGROUP: [],
11012       locking.LEVEL_NODE: [],
11013       }
11014
11015     # Determine nodes (via group) optimistically, needs verification once locks
11016     # have been acquired
11017     self.lock_nodes = self._DetermineNodes()
11018
11019   def _DetermineNodes(self):
11020     """Gets the list of nodes to operate on.
11021
11022     """
11023     if self.op.remote_node is None:
11024       # Iallocator will choose any node(s) in the same group
11025       group_nodes = self.cfg.GetNodeGroupMembersByNodes([self.op.node_name])
11026     else:
11027       group_nodes = frozenset([self.op.remote_node])
11028
11029     # Determine nodes to be locked
11030     return set([self.op.node_name]) | group_nodes
11031
11032   def _DetermineInstances(self):
11033     """Builds list of instances to operate on.
11034
11035     """
11036     assert self.op.mode in constants.NODE_EVAC_MODES
11037
11038     if self.op.mode == constants.NODE_EVAC_PRI:
11039       # Primary instances only
11040       inst_fn = _GetNodePrimaryInstances
11041       assert self.op.remote_node is None, \
11042         "Evacuating primary instances requires iallocator"
11043     elif self.op.mode == constants.NODE_EVAC_SEC:
11044       # Secondary instances only
11045       inst_fn = _GetNodeSecondaryInstances
11046     else:
11047       # All instances
11048       assert self.op.mode == constants.NODE_EVAC_ALL
11049       inst_fn = _GetNodeInstances
11050       # TODO: In 2.6, change the iallocator interface to take an evacuation mode
11051       # per instance
11052       raise errors.OpPrereqError("Due to an issue with the iallocator"
11053                                  " interface it is not possible to evacuate"
11054                                  " all instances at once; specify explicitly"
11055                                  " whether to evacuate primary or secondary"
11056                                  " instances",
11057                                  errors.ECODE_INVAL)
11058
11059     return inst_fn(self.cfg, self.op.node_name)
11060
11061   def DeclareLocks(self, level):
11062     if level == locking.LEVEL_INSTANCE:
11063       # Lock instances optimistically, needs verification once node and group
11064       # locks have been acquired
11065       self.needed_locks[locking.LEVEL_INSTANCE] = \
11066         set(i.name for i in self._DetermineInstances())
11067
11068     elif level == locking.LEVEL_NODEGROUP:
11069       # Lock node groups for all potential target nodes optimistically, needs
11070       # verification once nodes have been acquired
11071       self.needed_locks[locking.LEVEL_NODEGROUP] = \
11072         self.cfg.GetNodeGroupsFromNodes(self.lock_nodes)
11073
11074     elif level == locking.LEVEL_NODE:
11075       self.needed_locks[locking.LEVEL_NODE] = self.lock_nodes
11076
11077   def CheckPrereq(self):
11078     # Verify locks
11079     owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
11080     owned_nodes = self.owned_locks(locking.LEVEL_NODE)
11081     owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
11082
11083     need_nodes = self._DetermineNodes()
11084
11085     if not owned_nodes.issuperset(need_nodes):
11086       raise errors.OpPrereqError("Nodes in same group as '%s' changed since"
11087                                  " locks were acquired, current nodes are"
11088                                  " are '%s', used to be '%s'; retry the"
11089                                  " operation" %
11090                                  (self.op.node_name,
11091                                   utils.CommaJoin(need_nodes),
11092                                   utils.CommaJoin(owned_nodes)),
11093                                  errors.ECODE_STATE)
11094
11095     wanted_groups = self.cfg.GetNodeGroupsFromNodes(owned_nodes)
11096     if owned_groups != wanted_groups:
11097       raise errors.OpExecError("Node groups changed since locks were acquired,"
11098                                " current groups are '%s', used to be '%s';"
11099                                " retry the operation" %
11100                                (utils.CommaJoin(wanted_groups),
11101                                 utils.CommaJoin(owned_groups)))
11102
11103     # Determine affected instances
11104     self.instances = self._DetermineInstances()
11105     self.instance_names = [i.name for i in self.instances]
11106
11107     if set(self.instance_names) != owned_instances:
11108       raise errors.OpExecError("Instances on node '%s' changed since locks"
11109                                " were acquired, current instances are '%s',"
11110                                " used to be '%s'; retry the operation" %
11111                                (self.op.node_name,
11112                                 utils.CommaJoin(self.instance_names),
11113                                 utils.CommaJoin(owned_instances)))
11114
11115     if self.instance_names:
11116       self.LogInfo("Evacuating instances from node '%s': %s",
11117                    self.op.node_name,
11118                    utils.CommaJoin(utils.NiceSort(self.instance_names)))
11119     else:
11120       self.LogInfo("No instances to evacuate from node '%s'",
11121                    self.op.node_name)
11122
11123     if self.op.remote_node is not None:
11124       for i in self.instances:
11125         if i.primary_node == self.op.remote_node:
11126           raise errors.OpPrereqError("Node %s is the primary node of"
11127                                      " instance %s, cannot use it as"
11128                                      " secondary" %
11129                                      (self.op.remote_node, i.name),
11130                                      errors.ECODE_INVAL)
11131
11132   def Exec(self, feedback_fn):
11133     assert (self.op.iallocator is not None) ^ (self.op.remote_node is not None)
11134
11135     if not self.instance_names:
11136       # No instances to evacuate
11137       jobs = []
11138
11139     elif self.op.iallocator is not None:
11140       # TODO: Implement relocation to other group
11141       ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_NODE_EVAC,
11142                        evac_mode=self._MODE2IALLOCATOR[self.op.mode],
11143                        instances=list(self.instance_names))
11144
11145       ial.Run(self.op.iallocator)
11146
11147       if not ial.success:
11148         raise errors.OpPrereqError("Can't compute node evacuation using"
11149                                    " iallocator '%s': %s" %
11150                                    (self.op.iallocator, ial.info),
11151                                    errors.ECODE_NORES)
11152
11153       jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, True)
11154
11155     elif self.op.remote_node is not None:
11156       assert self.op.mode == constants.NODE_EVAC_SEC
11157       jobs = [
11158         [opcodes.OpInstanceReplaceDisks(instance_name=instance_name,
11159                                         remote_node=self.op.remote_node,
11160                                         disks=[],
11161                                         mode=constants.REPLACE_DISK_CHG,
11162                                         early_release=self.op.early_release)]
11163         for instance_name in self.instance_names
11164         ]
11165
11166     else:
11167       raise errors.ProgrammerError("No iallocator or remote node")
11168
11169     return ResultWithJobs(jobs)
11170
11171
11172 def _SetOpEarlyRelease(early_release, op):
11173   """Sets C{early_release} flag on opcodes if available.
11174
11175   """
11176   try:
11177     op.early_release = early_release
11178   except AttributeError:
11179     assert not isinstance(op, opcodes.OpInstanceReplaceDisks)
11180
11181   return op
11182
11183
11184 def _NodeEvacDest(use_nodes, group, nodes):
11185   """Returns group or nodes depending on caller's choice.
11186
11187   """
11188   if use_nodes:
11189     return utils.CommaJoin(nodes)
11190   else:
11191     return group
11192
11193
11194 def _LoadNodeEvacResult(lu, alloc_result, early_release, use_nodes):
11195   """Unpacks the result of change-group and node-evacuate iallocator requests.
11196
11197   Iallocator modes L{constants.IALLOCATOR_MODE_NODE_EVAC} and
11198   L{constants.IALLOCATOR_MODE_CHG_GROUP}.
11199
11200   @type lu: L{LogicalUnit}
11201   @param lu: Logical unit instance
11202   @type alloc_result: tuple/list
11203   @param alloc_result: Result from iallocator
11204   @type early_release: bool
11205   @param early_release: Whether to release locks early if possible
11206   @type use_nodes: bool
11207   @param use_nodes: Whether to display node names instead of groups
11208
11209   """
11210   (moved, failed, jobs) = alloc_result
11211
11212   if failed:
11213     failreason = utils.CommaJoin("%s (%s)" % (name, reason)
11214                                  for (name, reason) in failed)
11215     lu.LogWarning("Unable to evacuate instances %s", failreason)
11216     raise errors.OpExecError("Unable to evacuate instances %s" % failreason)
11217
11218   if moved:
11219     lu.LogInfo("Instances to be moved: %s",
11220                utils.CommaJoin("%s (to %s)" %
11221                                (name, _NodeEvacDest(use_nodes, group, nodes))
11222                                for (name, group, nodes) in moved))
11223
11224   return [map(compat.partial(_SetOpEarlyRelease, early_release),
11225               map(opcodes.OpCode.LoadOpCode, ops))
11226           for ops in jobs]
11227
11228
11229 class LUInstanceGrowDisk(LogicalUnit):
11230   """Grow a disk of an instance.
11231
11232   """
11233   HPATH = "disk-grow"
11234   HTYPE = constants.HTYPE_INSTANCE
11235   REQ_BGL = False
11236
11237   def ExpandNames(self):
11238     self._ExpandAndLockInstance()
11239     self.needed_locks[locking.LEVEL_NODE] = []
11240     self.needed_locks[locking.LEVEL_NODE_RES] = []
11241     self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
11242
11243   def DeclareLocks(self, level):
11244     if level == locking.LEVEL_NODE:
11245       self._LockInstancesNodes()
11246     elif level == locking.LEVEL_NODE_RES:
11247       # Copy node locks
11248       self.needed_locks[locking.LEVEL_NODE_RES] = \
11249         self.needed_locks[locking.LEVEL_NODE][:]
11250
11251   def BuildHooksEnv(self):
11252     """Build hooks env.
11253
11254     This runs on the master, the primary and all the secondaries.
11255
11256     """
11257     env = {
11258       "DISK": self.op.disk,
11259       "AMOUNT": self.op.amount,
11260       }
11261     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
11262     return env
11263
11264   def BuildHooksNodes(self):
11265     """Build hooks nodes.
11266
11267     """
11268     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
11269     return (nl, nl)
11270
11271   def CheckPrereq(self):
11272     """Check prerequisites.
11273
11274     This checks that the instance is in the cluster.
11275
11276     """
11277     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
11278     assert instance is not None, \
11279       "Cannot retrieve locked instance %s" % self.op.instance_name
11280     nodenames = list(instance.all_nodes)
11281     for node in nodenames:
11282       _CheckNodeOnline(self, node)
11283
11284     self.instance = instance
11285
11286     if instance.disk_template not in constants.DTS_GROWABLE:
11287       raise errors.OpPrereqError("Instance's disk layout does not support"
11288                                  " growing", errors.ECODE_INVAL)
11289
11290     self.disk = instance.FindDisk(self.op.disk)
11291
11292     if instance.disk_template not in (constants.DT_FILE,
11293                                       constants.DT_SHARED_FILE):
11294       # TODO: check the free disk space for file, when that feature will be
11295       # supported
11296       _CheckNodesFreeDiskPerVG(self, nodenames,
11297                                self.disk.ComputeGrowth(self.op.amount))
11298
11299   def Exec(self, feedback_fn):
11300     """Execute disk grow.
11301
11302     """
11303     instance = self.instance
11304     disk = self.disk
11305
11306     assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
11307     assert (self.owned_locks(locking.LEVEL_NODE) ==
11308             self.owned_locks(locking.LEVEL_NODE_RES))
11309
11310     disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
11311     if not disks_ok:
11312       raise errors.OpExecError("Cannot activate block device to grow")
11313
11314     feedback_fn("Growing disk %s of instance '%s' by %s" %
11315                 (self.op.disk, instance.name,
11316                  utils.FormatUnit(self.op.amount, "h")))
11317
11318     # First run all grow ops in dry-run mode
11319     for node in instance.all_nodes:
11320       self.cfg.SetDiskID(disk, node)
11321       result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, True)
11322       result.Raise("Grow request failed to node %s" % node)
11323
11324     # We know that (as far as we can test) operations across different
11325     # nodes will succeed, time to run it for real
11326     for node in instance.all_nodes:
11327       self.cfg.SetDiskID(disk, node)
11328       result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, False)
11329       result.Raise("Grow request failed to node %s" % node)
11330
11331       # TODO: Rewrite code to work properly
11332       # DRBD goes into sync mode for a short amount of time after executing the
11333       # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
11334       # calling "resize" in sync mode fails. Sleeping for a short amount of
11335       # time is a work-around.
11336       time.sleep(5)
11337
11338     disk.RecordGrow(self.op.amount)
11339     self.cfg.Update(instance, feedback_fn)
11340
11341     # Changes have been recorded, release node lock
11342     _ReleaseLocks(self, locking.LEVEL_NODE)
11343
11344     # Downgrade lock while waiting for sync
11345     self.glm.downgrade(locking.LEVEL_INSTANCE)
11346
11347     if self.op.wait_for_sync:
11348       disk_abort = not _WaitForSync(self, instance, disks=[disk])
11349       if disk_abort:
11350         self.proc.LogWarning("Disk sync-ing has not returned a good"
11351                              " status; please check the instance")
11352       if instance.admin_state != constants.ADMINST_UP:
11353         _SafeShutdownInstanceDisks(self, instance, disks=[disk])
11354     elif instance.admin_state != constants.ADMINST_UP:
11355       self.proc.LogWarning("Not shutting down the disk even if the instance is"
11356                            " not supposed to be running because no wait for"
11357                            " sync mode was requested")
11358
11359     assert self.owned_locks(locking.LEVEL_NODE_RES)
11360     assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
11361
11362
11363 class LUInstanceQueryData(NoHooksLU):
11364   """Query runtime instance data.
11365
11366   """
11367   REQ_BGL = False
11368
11369   def ExpandNames(self):
11370     self.needed_locks = {}
11371
11372     # Use locking if requested or when non-static information is wanted
11373     if not (self.op.static or self.op.use_locking):
11374       self.LogWarning("Non-static data requested, locks need to be acquired")
11375       self.op.use_locking = True
11376
11377     if self.op.instances or not self.op.use_locking:
11378       # Expand instance names right here
11379       self.wanted_names = _GetWantedInstances(self, self.op.instances)
11380     else:
11381       # Will use acquired locks
11382       self.wanted_names = None
11383
11384     if self.op.use_locking:
11385       self.share_locks = _ShareAll()
11386
11387       if self.wanted_names is None:
11388         self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
11389       else:
11390         self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
11391
11392       self.needed_locks[locking.LEVEL_NODE] = []
11393       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
11394
11395   def DeclareLocks(self, level):
11396     if self.op.use_locking and level == locking.LEVEL_NODE:
11397       self._LockInstancesNodes()
11398
11399   def CheckPrereq(self):
11400     """Check prerequisites.
11401
11402     This only checks the optional instance list against the existing names.
11403
11404     """
11405     if self.wanted_names is None:
11406       assert self.op.use_locking, "Locking was not used"
11407       self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
11408
11409     self.wanted_instances = \
11410         map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
11411
11412   def _ComputeBlockdevStatus(self, node, instance_name, dev):
11413     """Returns the status of a block device
11414
11415     """
11416     if self.op.static or not node:
11417       return None
11418
11419     self.cfg.SetDiskID(dev, node)
11420
11421     result = self.rpc.call_blockdev_find(node, dev)
11422     if result.offline:
11423       return None
11424
11425     result.Raise("Can't compute disk status for %s" % instance_name)
11426
11427     status = result.payload
11428     if status is None:
11429       return None
11430
11431     return (status.dev_path, status.major, status.minor,
11432             status.sync_percent, status.estimated_time,
11433             status.is_degraded, status.ldisk_status)
11434
11435   def _ComputeDiskStatus(self, instance, snode, dev):
11436     """Compute block device status.
11437
11438     """
11439     if dev.dev_type in constants.LDS_DRBD:
11440       # we change the snode then (otherwise we use the one passed in)
11441       if dev.logical_id[0] == instance.primary_node:
11442         snode = dev.logical_id[1]
11443       else:
11444         snode = dev.logical_id[0]
11445
11446     dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
11447                                               instance.name, dev)
11448     dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
11449
11450     if dev.children:
11451       dev_children = map(compat.partial(self._ComputeDiskStatus,
11452                                         instance, snode),
11453                          dev.children)
11454     else:
11455       dev_children = []
11456
11457     return {
11458       "iv_name": dev.iv_name,
11459       "dev_type": dev.dev_type,
11460       "logical_id": dev.logical_id,
11461       "physical_id": dev.physical_id,
11462       "pstatus": dev_pstatus,
11463       "sstatus": dev_sstatus,
11464       "children": dev_children,
11465       "mode": dev.mode,
11466       "size": dev.size,
11467       }
11468
11469   def Exec(self, feedback_fn):
11470     """Gather and return data"""
11471     result = {}
11472
11473     cluster = self.cfg.GetClusterInfo()
11474
11475     pri_nodes = self.cfg.GetMultiNodeInfo(i.primary_node
11476                                           for i in self.wanted_instances)
11477     for instance, (_, pnode) in zip(self.wanted_instances, pri_nodes):
11478       if self.op.static or pnode.offline:
11479         remote_state = None
11480         if pnode.offline:
11481           self.LogWarning("Primary node %s is marked offline, returning static"
11482                           " information only for instance %s" %
11483                           (pnode.name, instance.name))
11484       else:
11485         remote_info = self.rpc.call_instance_info(instance.primary_node,
11486                                                   instance.name,
11487                                                   instance.hypervisor)
11488         remote_info.Raise("Error checking node %s" % instance.primary_node)
11489         remote_info = remote_info.payload
11490         if remote_info and "state" in remote_info:
11491           remote_state = "up"
11492         else:
11493           if instance.admin_state == constants.ADMINST_UP:
11494             remote_state = "down"
11495           else:
11496             remote_state = instance.admin_state
11497
11498       disks = map(compat.partial(self._ComputeDiskStatus, instance, None),
11499                   instance.disks)
11500
11501       result[instance.name] = {
11502         "name": instance.name,
11503         "config_state": instance.admin_state,
11504         "run_state": remote_state,
11505         "pnode": instance.primary_node,
11506         "snodes": instance.secondary_nodes,
11507         "os": instance.os,
11508         # this happens to be the same format used for hooks
11509         "nics": _NICListToTuple(self, instance.nics),
11510         "disk_template": instance.disk_template,
11511         "disks": disks,
11512         "hypervisor": instance.hypervisor,
11513         "network_port": instance.network_port,
11514         "hv_instance": instance.hvparams,
11515         "hv_actual": cluster.FillHV(instance, skip_globals=True),
11516         "be_instance": instance.beparams,
11517         "be_actual": cluster.FillBE(instance),
11518         "os_instance": instance.osparams,
11519         "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
11520         "serial_no": instance.serial_no,
11521         "mtime": instance.mtime,
11522         "ctime": instance.ctime,
11523         "uuid": instance.uuid,
11524         }
11525
11526     return result
11527
11528
11529 class LUInstanceSetParams(LogicalUnit):
11530   """Modifies an instances's parameters.
11531
11532   """
11533   HPATH = "instance-modify"
11534   HTYPE = constants.HTYPE_INSTANCE
11535   REQ_BGL = False
11536
11537   def CheckArguments(self):
11538     if not (self.op.nics or self.op.disks or self.op.disk_template or
11539             self.op.hvparams or self.op.beparams or self.op.os_name or
11540             self.op.online_inst or self.op.offline_inst):
11541       raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
11542
11543     if self.op.hvparams:
11544       _CheckGlobalHvParams(self.op.hvparams)
11545
11546     # Disk validation
11547     disk_addremove = 0
11548     for disk_op, disk_dict in self.op.disks:
11549       utils.ForceDictType(disk_dict, constants.IDISK_PARAMS_TYPES)
11550       if disk_op == constants.DDM_REMOVE:
11551         disk_addremove += 1
11552         continue
11553       elif disk_op == constants.DDM_ADD:
11554         disk_addremove += 1
11555       else:
11556         if not isinstance(disk_op, int):
11557           raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
11558         if not isinstance(disk_dict, dict):
11559           msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
11560           raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
11561
11562       if disk_op == constants.DDM_ADD:
11563         mode = disk_dict.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
11564         if mode not in constants.DISK_ACCESS_SET:
11565           raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
11566                                      errors.ECODE_INVAL)
11567         size = disk_dict.get(constants.IDISK_SIZE, None)
11568         if size is None:
11569           raise errors.OpPrereqError("Required disk parameter size missing",
11570                                      errors.ECODE_INVAL)
11571         try:
11572           size = int(size)
11573         except (TypeError, ValueError), err:
11574           raise errors.OpPrereqError("Invalid disk size parameter: %s" %
11575                                      str(err), errors.ECODE_INVAL)
11576         disk_dict[constants.IDISK_SIZE] = size
11577       else:
11578         # modification of disk
11579         if constants.IDISK_SIZE in disk_dict:
11580           raise errors.OpPrereqError("Disk size change not possible, use"
11581                                      " grow-disk", errors.ECODE_INVAL)
11582
11583     if disk_addremove > 1:
11584       raise errors.OpPrereqError("Only one disk add or remove operation"
11585                                  " supported at a time", errors.ECODE_INVAL)
11586
11587     if self.op.disks and self.op.disk_template is not None:
11588       raise errors.OpPrereqError("Disk template conversion and other disk"
11589                                  " changes not supported at the same time",
11590                                  errors.ECODE_INVAL)
11591
11592     if (self.op.disk_template and
11593         self.op.disk_template in constants.DTS_INT_MIRROR and
11594         self.op.remote_node is None):
11595       raise errors.OpPrereqError("Changing the disk template to a mirrored"
11596                                  " one requires specifying a secondary node",
11597                                  errors.ECODE_INVAL)
11598
11599     # NIC validation
11600     nic_addremove = 0
11601     for nic_op, nic_dict in self.op.nics:
11602       utils.ForceDictType(nic_dict, constants.INIC_PARAMS_TYPES)
11603       if nic_op == constants.DDM_REMOVE:
11604         nic_addremove += 1
11605         continue
11606       elif nic_op == constants.DDM_ADD:
11607         nic_addremove += 1
11608       else:
11609         if not isinstance(nic_op, int):
11610           raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
11611         if not isinstance(nic_dict, dict):
11612           msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
11613           raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
11614
11615       # nic_dict should be a dict
11616       nic_ip = nic_dict.get(constants.INIC_IP, None)
11617       if nic_ip is not None:
11618         if nic_ip.lower() == constants.VALUE_NONE:
11619           nic_dict[constants.INIC_IP] = None
11620         else:
11621           if not netutils.IPAddress.IsValid(nic_ip):
11622             raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
11623                                        errors.ECODE_INVAL)
11624
11625       nic_bridge = nic_dict.get("bridge", None)
11626       nic_link = nic_dict.get(constants.INIC_LINK, None)
11627       if nic_bridge and nic_link:
11628         raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
11629                                    " at the same time", errors.ECODE_INVAL)
11630       elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
11631         nic_dict["bridge"] = None
11632       elif nic_link and nic_link.lower() == constants.VALUE_NONE:
11633         nic_dict[constants.INIC_LINK] = None
11634
11635       if nic_op == constants.DDM_ADD:
11636         nic_mac = nic_dict.get(constants.INIC_MAC, None)
11637         if nic_mac is None:
11638           nic_dict[constants.INIC_MAC] = constants.VALUE_AUTO
11639
11640       if constants.INIC_MAC in nic_dict:
11641         nic_mac = nic_dict[constants.INIC_MAC]
11642         if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
11643           nic_mac = utils.NormalizeAndValidateMac(nic_mac)
11644
11645         if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
11646           raise errors.OpPrereqError("'auto' is not a valid MAC address when"
11647                                      " modifying an existing nic",
11648                                      errors.ECODE_INVAL)
11649
11650     if nic_addremove > 1:
11651       raise errors.OpPrereqError("Only one NIC add or remove operation"
11652                                  " supported at a time", errors.ECODE_INVAL)
11653
11654   def ExpandNames(self):
11655     self._ExpandAndLockInstance()
11656     # Can't even acquire node locks in shared mode as upcoming changes in
11657     # Ganeti 2.6 will start to modify the node object on disk conversion
11658     self.needed_locks[locking.LEVEL_NODE] = []
11659     self.needed_locks[locking.LEVEL_NODE_RES] = []
11660     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
11661
11662   def DeclareLocks(self, level):
11663     if level == locking.LEVEL_NODE:
11664       self._LockInstancesNodes()
11665       if self.op.disk_template and self.op.remote_node:
11666         self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
11667         self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
11668     elif level == locking.LEVEL_NODE_RES and self.op.disk_template:
11669       # Copy node locks
11670       self.needed_locks[locking.LEVEL_NODE_RES] = \
11671         self.needed_locks[locking.LEVEL_NODE][:]
11672
11673   def BuildHooksEnv(self):
11674     """Build hooks env.
11675
11676     This runs on the master, primary and secondaries.
11677
11678     """
11679     args = dict()
11680     if constants.BE_MINMEM in self.be_new:
11681       args["minmem"] = self.be_new[constants.BE_MINMEM]
11682     if constants.BE_MAXMEM in self.be_new:
11683       args["maxmem"] = self.be_new[constants.BE_MAXMEM]
11684     if constants.BE_VCPUS in self.be_new:
11685       args["vcpus"] = self.be_new[constants.BE_VCPUS]
11686     # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
11687     # information at all.
11688     if self.op.nics:
11689       args["nics"] = []
11690       nic_override = dict(self.op.nics)
11691       for idx, nic in enumerate(self.instance.nics):
11692         if idx in nic_override:
11693           this_nic_override = nic_override[idx]
11694         else:
11695           this_nic_override = {}
11696         if constants.INIC_IP in this_nic_override:
11697           ip = this_nic_override[constants.INIC_IP]
11698         else:
11699           ip = nic.ip
11700         if constants.INIC_MAC in this_nic_override:
11701           mac = this_nic_override[constants.INIC_MAC]
11702         else:
11703           mac = nic.mac
11704         if idx in self.nic_pnew:
11705           nicparams = self.nic_pnew[idx]
11706         else:
11707           nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
11708         mode = nicparams[constants.NIC_MODE]
11709         link = nicparams[constants.NIC_LINK]
11710         args["nics"].append((ip, mac, mode, link))
11711       if constants.DDM_ADD in nic_override:
11712         ip = nic_override[constants.DDM_ADD].get(constants.INIC_IP, None)
11713         mac = nic_override[constants.DDM_ADD][constants.INIC_MAC]
11714         nicparams = self.nic_pnew[constants.DDM_ADD]
11715         mode = nicparams[constants.NIC_MODE]
11716         link = nicparams[constants.NIC_LINK]
11717         args["nics"].append((ip, mac, mode, link))
11718       elif constants.DDM_REMOVE in nic_override:
11719         del args["nics"][-1]
11720
11721     env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
11722     if self.op.disk_template:
11723       env["NEW_DISK_TEMPLATE"] = self.op.disk_template
11724
11725     return env
11726
11727   def BuildHooksNodes(self):
11728     """Build hooks nodes.
11729
11730     """
11731     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
11732     return (nl, nl)
11733
11734   def CheckPrereq(self):
11735     """Check prerequisites.
11736
11737     This only checks the instance list against the existing names.
11738
11739     """
11740     # checking the new params on the primary/secondary nodes
11741
11742     instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
11743     cluster = self.cluster = self.cfg.GetClusterInfo()
11744     assert self.instance is not None, \
11745       "Cannot retrieve locked instance %s" % self.op.instance_name
11746     pnode = instance.primary_node
11747     nodelist = list(instance.all_nodes)
11748     pnode_info = self.cfg.GetNodeInfo(pnode)
11749     self.diskparams = self.cfg.GetNodeGroup(pnode_info.group).diskparams
11750
11751     # OS change
11752     if self.op.os_name and not self.op.force:
11753       _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
11754                       self.op.force_variant)
11755       instance_os = self.op.os_name
11756     else:
11757       instance_os = instance.os
11758
11759     if self.op.disk_template:
11760       if instance.disk_template == self.op.disk_template:
11761         raise errors.OpPrereqError("Instance already has disk template %s" %
11762                                    instance.disk_template, errors.ECODE_INVAL)
11763
11764       if (instance.disk_template,
11765           self.op.disk_template) not in self._DISK_CONVERSIONS:
11766         raise errors.OpPrereqError("Unsupported disk template conversion from"
11767                                    " %s to %s" % (instance.disk_template,
11768                                                   self.op.disk_template),
11769                                    errors.ECODE_INVAL)
11770       _CheckInstanceState(self, instance, INSTANCE_DOWN,
11771                           msg="cannot change disk template")
11772       if self.op.disk_template in constants.DTS_INT_MIRROR:
11773         if self.op.remote_node == pnode:
11774           raise errors.OpPrereqError("Given new secondary node %s is the same"
11775                                      " as the primary node of the instance" %
11776                                      self.op.remote_node, errors.ECODE_STATE)
11777         _CheckNodeOnline(self, self.op.remote_node)
11778         _CheckNodeNotDrained(self, self.op.remote_node)
11779         # FIXME: here we assume that the old instance type is DT_PLAIN
11780         assert instance.disk_template == constants.DT_PLAIN
11781         disks = [{constants.IDISK_SIZE: d.size,
11782                   constants.IDISK_VG: d.logical_id[0]}
11783                  for d in instance.disks]
11784         required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
11785         _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
11786
11787         snode_info = self.cfg.GetNodeInfo(self.op.remote_node)
11788         if pnode_info.group != snode_info.group:
11789           self.LogWarning("The primary and secondary nodes are in two"
11790                           " different node groups; the disk parameters"
11791                           " from the first disk's node group will be"
11792                           " used")
11793
11794     # hvparams processing
11795     if self.op.hvparams:
11796       hv_type = instance.hypervisor
11797       i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
11798       utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
11799       hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
11800
11801       # local check
11802       hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
11803       _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
11804       self.hv_proposed = self.hv_new = hv_new # the new actual values
11805       self.hv_inst = i_hvdict # the new dict (without defaults)
11806     else:
11807       self.hv_proposed = cluster.SimpleFillHV(instance.hypervisor, instance.os,
11808                                               instance.hvparams)
11809       self.hv_new = self.hv_inst = {}
11810
11811     # beparams processing
11812     if self.op.beparams:
11813       i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
11814                                    use_none=True)
11815       objects.UpgradeBeParams(i_bedict)
11816       utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
11817       be_new = cluster.SimpleFillBE(i_bedict)
11818       self.be_proposed = self.be_new = be_new # the new actual values
11819       self.be_inst = i_bedict # the new dict (without defaults)
11820     else:
11821       self.be_new = self.be_inst = {}
11822       self.be_proposed = cluster.SimpleFillBE(instance.beparams)
11823     be_old = cluster.FillBE(instance)
11824
11825     # CPU param validation -- checking every time a paramtere is
11826     # changed to cover all cases where either CPU mask or vcpus have
11827     # changed
11828     if (constants.BE_VCPUS in self.be_proposed and
11829         constants.HV_CPU_MASK in self.hv_proposed):
11830       cpu_list = \
11831         utils.ParseMultiCpuMask(self.hv_proposed[constants.HV_CPU_MASK])
11832       # Verify mask is consistent with number of vCPUs. Can skip this
11833       # test if only 1 entry in the CPU mask, which means same mask
11834       # is applied to all vCPUs.
11835       if (len(cpu_list) > 1 and
11836           len(cpu_list) != self.be_proposed[constants.BE_VCPUS]):
11837         raise errors.OpPrereqError("Number of vCPUs [%d] does not match the"
11838                                    " CPU mask [%s]" %
11839                                    (self.be_proposed[constants.BE_VCPUS],
11840                                     self.hv_proposed[constants.HV_CPU_MASK]),
11841                                    errors.ECODE_INVAL)
11842
11843       # Only perform this test if a new CPU mask is given
11844       if constants.HV_CPU_MASK in self.hv_new:
11845         # Calculate the largest CPU number requested
11846         max_requested_cpu = max(map(max, cpu_list))
11847         # Check that all of the instance's nodes have enough physical CPUs to
11848         # satisfy the requested CPU mask
11849         _CheckNodesPhysicalCPUs(self, instance.all_nodes,
11850                                 max_requested_cpu + 1, instance.hypervisor)
11851
11852     # osparams processing
11853     if self.op.osparams:
11854       i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
11855       _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
11856       self.os_inst = i_osdict # the new dict (without defaults)
11857     else:
11858       self.os_inst = {}
11859
11860     self.warn = []
11861
11862     #TODO(dynmem): do the appropriate check involving MINMEM
11863     if (constants.BE_MAXMEM in self.op.beparams and not self.op.force and
11864         be_new[constants.BE_MAXMEM] > be_old[constants.BE_MAXMEM]):
11865       mem_check_list = [pnode]
11866       if be_new[constants.BE_AUTO_BALANCE]:
11867         # either we changed auto_balance to yes or it was from before
11868         mem_check_list.extend(instance.secondary_nodes)
11869       instance_info = self.rpc.call_instance_info(pnode, instance.name,
11870                                                   instance.hypervisor)
11871       nodeinfo = self.rpc.call_node_info(mem_check_list, None,
11872                                          [instance.hypervisor])
11873       pninfo = nodeinfo[pnode]
11874       msg = pninfo.fail_msg
11875       if msg:
11876         # Assume the primary node is unreachable and go ahead
11877         self.warn.append("Can't get info from primary node %s: %s" %
11878                          (pnode, msg))
11879       else:
11880         (_, _, (pnhvinfo, )) = pninfo.payload
11881         if not isinstance(pnhvinfo.get("memory_free", None), int):
11882           self.warn.append("Node data from primary node %s doesn't contain"
11883                            " free memory information" % pnode)
11884         elif instance_info.fail_msg:
11885           self.warn.append("Can't get instance runtime information: %s" %
11886                           instance_info.fail_msg)
11887         else:
11888           if instance_info.payload:
11889             current_mem = int(instance_info.payload["memory"])
11890           else:
11891             # Assume instance not running
11892             # (there is a slight race condition here, but it's not very
11893             # probable, and we have no other way to check)
11894             # TODO: Describe race condition
11895             current_mem = 0
11896           #TODO(dynmem): do the appropriate check involving MINMEM
11897           miss_mem = (be_new[constants.BE_MAXMEM] - current_mem -
11898                       pnhvinfo["memory_free"])
11899           if miss_mem > 0:
11900             raise errors.OpPrereqError("This change will prevent the instance"
11901                                        " from starting, due to %d MB of memory"
11902                                        " missing on its primary node" %
11903                                        miss_mem,
11904                                        errors.ECODE_NORES)
11905
11906       if be_new[constants.BE_AUTO_BALANCE]:
11907         for node, nres in nodeinfo.items():
11908           if node not in instance.secondary_nodes:
11909             continue
11910           nres.Raise("Can't get info from secondary node %s" % node,
11911                      prereq=True, ecode=errors.ECODE_STATE)
11912           (_, _, (nhvinfo, )) = nres.payload
11913           if not isinstance(nhvinfo.get("memory_free", None), int):
11914             raise errors.OpPrereqError("Secondary node %s didn't return free"
11915                                        " memory information" % node,
11916                                        errors.ECODE_STATE)
11917           #TODO(dynmem): do the appropriate check involving MINMEM
11918           elif be_new[constants.BE_MAXMEM] > nhvinfo["memory_free"]:
11919             raise errors.OpPrereqError("This change will prevent the instance"
11920                                        " from failover to its secondary node"
11921                                        " %s, due to not enough memory" % node,
11922                                        errors.ECODE_STATE)
11923
11924     # NIC processing
11925     self.nic_pnew = {}
11926     self.nic_pinst = {}
11927     for nic_op, nic_dict in self.op.nics:
11928       if nic_op == constants.DDM_REMOVE:
11929         if not instance.nics:
11930           raise errors.OpPrereqError("Instance has no NICs, cannot remove",
11931                                      errors.ECODE_INVAL)
11932         continue
11933       if nic_op != constants.DDM_ADD:
11934         # an existing nic
11935         if not instance.nics:
11936           raise errors.OpPrereqError("Invalid NIC index %s, instance has"
11937                                      " no NICs" % nic_op,
11938                                      errors.ECODE_INVAL)
11939         if nic_op < 0 or nic_op >= len(instance.nics):
11940           raise errors.OpPrereqError("Invalid NIC index %s, valid values"
11941                                      " are 0 to %d" %
11942                                      (nic_op, len(instance.nics) - 1),
11943                                      errors.ECODE_INVAL)
11944         old_nic_params = instance.nics[nic_op].nicparams
11945         old_nic_ip = instance.nics[nic_op].ip
11946       else:
11947         old_nic_params = {}
11948         old_nic_ip = None
11949
11950       update_params_dict = dict([(key, nic_dict[key])
11951                                  for key in constants.NICS_PARAMETERS
11952                                  if key in nic_dict])
11953
11954       if "bridge" in nic_dict:
11955         update_params_dict[constants.NIC_LINK] = nic_dict["bridge"]
11956
11957       new_nic_params = _GetUpdatedParams(old_nic_params,
11958                                          update_params_dict)
11959       utils.ForceDictType(new_nic_params, constants.NICS_PARAMETER_TYPES)
11960       new_filled_nic_params = cluster.SimpleFillNIC(new_nic_params)
11961       objects.NIC.CheckParameterSyntax(new_filled_nic_params)
11962       self.nic_pinst[nic_op] = new_nic_params
11963       self.nic_pnew[nic_op] = new_filled_nic_params
11964       new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
11965
11966       if new_nic_mode == constants.NIC_MODE_BRIDGED:
11967         nic_bridge = new_filled_nic_params[constants.NIC_LINK]
11968         msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
11969         if msg:
11970           msg = "Error checking bridges on node %s: %s" % (pnode, msg)
11971           if self.op.force:
11972             self.warn.append(msg)
11973           else:
11974             raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
11975       if new_nic_mode == constants.NIC_MODE_ROUTED:
11976         if constants.INIC_IP in nic_dict:
11977           nic_ip = nic_dict[constants.INIC_IP]
11978         else:
11979           nic_ip = old_nic_ip
11980         if nic_ip is None:
11981           raise errors.OpPrereqError("Cannot set the nic ip to None"
11982                                      " on a routed nic", errors.ECODE_INVAL)
11983       if constants.INIC_MAC in nic_dict:
11984         nic_mac = nic_dict[constants.INIC_MAC]
11985         if nic_mac is None:
11986           raise errors.OpPrereqError("Cannot set the nic mac to None",
11987                                      errors.ECODE_INVAL)
11988         elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
11989           # otherwise generate the mac
11990           nic_dict[constants.INIC_MAC] = \
11991             self.cfg.GenerateMAC(self.proc.GetECId())
11992         else:
11993           # or validate/reserve the current one
11994           try:
11995             self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
11996           except errors.ReservationError:
11997             raise errors.OpPrereqError("MAC address %s already in use"
11998                                        " in cluster" % nic_mac,
11999                                        errors.ECODE_NOTUNIQUE)
12000
12001     # DISK processing
12002     if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
12003       raise errors.OpPrereqError("Disk operations not supported for"
12004                                  " diskless instances",
12005                                  errors.ECODE_INVAL)
12006     for disk_op, _ in self.op.disks:
12007       if disk_op == constants.DDM_REMOVE:
12008         if len(instance.disks) == 1:
12009           raise errors.OpPrereqError("Cannot remove the last disk of"
12010                                      " an instance", errors.ECODE_INVAL)
12011         _CheckInstanceState(self, instance, INSTANCE_DOWN,
12012                             msg="cannot remove disks")
12013
12014       if (disk_op == constants.DDM_ADD and
12015           len(instance.disks) >= constants.MAX_DISKS):
12016         raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
12017                                    " add more" % constants.MAX_DISKS,
12018                                    errors.ECODE_STATE)
12019       if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
12020         # an existing disk
12021         if disk_op < 0 or disk_op >= len(instance.disks):
12022           raise errors.OpPrereqError("Invalid disk index %s, valid values"
12023                                      " are 0 to %d" %
12024                                      (disk_op, len(instance.disks)),
12025                                      errors.ECODE_INVAL)
12026
12027     # disabling the instance
12028     if self.op.offline_inst:
12029       _CheckInstanceState(self, instance, INSTANCE_DOWN,
12030                           msg="cannot change instance state to offline")
12031
12032     # enabling the instance
12033     if self.op.online_inst:
12034       _CheckInstanceState(self, instance, INSTANCE_OFFLINE,
12035                           msg="cannot make instance go online")
12036
12037   def _ConvertPlainToDrbd(self, feedback_fn):
12038     """Converts an instance from plain to drbd.
12039
12040     """
12041     feedback_fn("Converting template to drbd")
12042     instance = self.instance
12043     pnode = instance.primary_node
12044     snode = self.op.remote_node
12045
12046     assert instance.disk_template == constants.DT_PLAIN
12047
12048     # create a fake disk info for _GenerateDiskTemplate
12049     disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
12050                   constants.IDISK_VG: d.logical_id[0]}
12051                  for d in instance.disks]
12052     new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
12053                                       instance.name, pnode, [snode],
12054                                       disk_info, None, None, 0, feedback_fn,
12055                                       self.diskparams)
12056     info = _GetInstanceInfoText(instance)
12057     feedback_fn("Creating aditional volumes...")
12058     # first, create the missing data and meta devices
12059     for disk in new_disks:
12060       # unfortunately this is... not too nice
12061       _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
12062                             info, True)
12063       for child in disk.children:
12064         _CreateSingleBlockDev(self, snode, instance, child, info, True)
12065     # at this stage, all new LVs have been created, we can rename the
12066     # old ones
12067     feedback_fn("Renaming original volumes...")
12068     rename_list = [(o, n.children[0].logical_id)
12069                    for (o, n) in zip(instance.disks, new_disks)]
12070     result = self.rpc.call_blockdev_rename(pnode, rename_list)
12071     result.Raise("Failed to rename original LVs")
12072
12073     feedback_fn("Initializing DRBD devices...")
12074     # all child devices are in place, we can now create the DRBD devices
12075     for disk in new_disks:
12076       for node in [pnode, snode]:
12077         f_create = node == pnode
12078         _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
12079
12080     # at this point, the instance has been modified
12081     instance.disk_template = constants.DT_DRBD8
12082     instance.disks = new_disks
12083     self.cfg.Update(instance, feedback_fn)
12084
12085     # Release node locks while waiting for sync
12086     _ReleaseLocks(self, locking.LEVEL_NODE)
12087
12088     # disks are created, waiting for sync
12089     disk_abort = not _WaitForSync(self, instance,
12090                                   oneshot=not self.op.wait_for_sync)
12091     if disk_abort:
12092       raise errors.OpExecError("There are some degraded disks for"
12093                                " this instance, please cleanup manually")
12094
12095     # Node resource locks will be released by caller
12096
12097   def _ConvertDrbdToPlain(self, feedback_fn):
12098     """Converts an instance from drbd to plain.
12099
12100     """
12101     instance = self.instance
12102
12103     assert len(instance.secondary_nodes) == 1
12104     assert instance.disk_template == constants.DT_DRBD8
12105
12106     pnode = instance.primary_node
12107     snode = instance.secondary_nodes[0]
12108     feedback_fn("Converting template to plain")
12109
12110     old_disks = instance.disks
12111     new_disks = [d.children[0] for d in old_disks]
12112
12113     # copy over size and mode
12114     for parent, child in zip(old_disks, new_disks):
12115       child.size = parent.size
12116       child.mode = parent.mode
12117
12118     # update instance structure
12119     instance.disks = new_disks
12120     instance.disk_template = constants.DT_PLAIN
12121     self.cfg.Update(instance, feedback_fn)
12122
12123     # Release locks in case removing disks takes a while
12124     _ReleaseLocks(self, locking.LEVEL_NODE)
12125
12126     feedback_fn("Removing volumes on the secondary node...")
12127     for disk in old_disks:
12128       self.cfg.SetDiskID(disk, snode)
12129       msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
12130       if msg:
12131         self.LogWarning("Could not remove block device %s on node %s,"
12132                         " continuing anyway: %s", disk.iv_name, snode, msg)
12133
12134     feedback_fn("Removing unneeded volumes on the primary node...")
12135     for idx, disk in enumerate(old_disks):
12136       meta = disk.children[1]
12137       self.cfg.SetDiskID(meta, pnode)
12138       msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
12139       if msg:
12140         self.LogWarning("Could not remove metadata for disk %d on node %s,"
12141                         " continuing anyway: %s", idx, pnode, msg)
12142
12143     # this is a DRBD disk, return its port to the pool
12144     for disk in old_disks:
12145       tcp_port = disk.logical_id[2]
12146       self.cfg.AddTcpUdpPort(tcp_port)
12147
12148     # Node resource locks will be released by caller
12149
12150   def Exec(self, feedback_fn):
12151     """Modifies an instance.
12152
12153     All parameters take effect only at the next restart of the instance.
12154
12155     """
12156     # Process here the warnings from CheckPrereq, as we don't have a
12157     # feedback_fn there.
12158     for warn in self.warn:
12159       feedback_fn("WARNING: %s" % warn)
12160
12161     assert ((self.op.disk_template is None) ^
12162             bool(self.owned_locks(locking.LEVEL_NODE_RES))), \
12163       "Not owning any node resource locks"
12164
12165     result = []
12166     instance = self.instance
12167     # disk changes
12168     for disk_op, disk_dict in self.op.disks:
12169       if disk_op == constants.DDM_REMOVE:
12170         # remove the last disk
12171         device = instance.disks.pop()
12172         device_idx = len(instance.disks)
12173         for node, disk in device.ComputeNodeTree(instance.primary_node):
12174           self.cfg.SetDiskID(disk, node)
12175           msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
12176           if msg:
12177             self.LogWarning("Could not remove disk/%d on node %s: %s,"
12178                             " continuing anyway", device_idx, node, msg)
12179         result.append(("disk/%d" % device_idx, "remove"))
12180
12181         # if this is a DRBD disk, return its port to the pool
12182         if device.dev_type in constants.LDS_DRBD:
12183           tcp_port = device.logical_id[2]
12184           self.cfg.AddTcpUdpPort(tcp_port)
12185       elif disk_op == constants.DDM_ADD:
12186         # add a new disk
12187         if instance.disk_template in (constants.DT_FILE,
12188                                         constants.DT_SHARED_FILE):
12189           file_driver, file_path = instance.disks[0].logical_id
12190           file_path = os.path.dirname(file_path)
12191         else:
12192           file_driver = file_path = None
12193         disk_idx_base = len(instance.disks)
12194         new_disk = _GenerateDiskTemplate(self,
12195                                          instance.disk_template,
12196                                          instance.name, instance.primary_node,
12197                                          instance.secondary_nodes,
12198                                          [disk_dict],
12199                                          file_path,
12200                                          file_driver,
12201                                          disk_idx_base,
12202                                          feedback_fn,
12203                                          self.diskparams)[0]
12204         instance.disks.append(new_disk)
12205         info = _GetInstanceInfoText(instance)
12206
12207         logging.info("Creating volume %s for instance %s",
12208                      new_disk.iv_name, instance.name)
12209         # Note: this needs to be kept in sync with _CreateDisks
12210         #HARDCODE
12211         for node in instance.all_nodes:
12212           f_create = node == instance.primary_node
12213           try:
12214             _CreateBlockDev(self, node, instance, new_disk,
12215                             f_create, info, f_create)
12216           except errors.OpExecError, err:
12217             self.LogWarning("Failed to create volume %s (%s) on"
12218                             " node %s: %s",
12219                             new_disk.iv_name, new_disk, node, err)
12220         result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
12221                        (new_disk.size, new_disk.mode)))
12222       else:
12223         # change a given disk
12224         instance.disks[disk_op].mode = disk_dict[constants.IDISK_MODE]
12225         result.append(("disk.mode/%d" % disk_op,
12226                        disk_dict[constants.IDISK_MODE]))
12227
12228     if self.op.disk_template:
12229       if __debug__:
12230         check_nodes = set(instance.all_nodes)
12231         if self.op.remote_node:
12232           check_nodes.add(self.op.remote_node)
12233         for level in [locking.LEVEL_NODE, locking.LEVEL_NODE_RES]:
12234           owned = self.owned_locks(level)
12235           assert not (check_nodes - owned), \
12236             ("Not owning the correct locks, owning %r, expected at least %r" %
12237              (owned, check_nodes))
12238
12239       r_shut = _ShutdownInstanceDisks(self, instance)
12240       if not r_shut:
12241         raise errors.OpExecError("Cannot shutdown instance disks, unable to"
12242                                  " proceed with disk template conversion")
12243       mode = (instance.disk_template, self.op.disk_template)
12244       try:
12245         self._DISK_CONVERSIONS[mode](self, feedback_fn)
12246       except:
12247         self.cfg.ReleaseDRBDMinors(instance.name)
12248         raise
12249       result.append(("disk_template", self.op.disk_template))
12250
12251       assert instance.disk_template == self.op.disk_template, \
12252         ("Expected disk template '%s', found '%s'" %
12253          (self.op.disk_template, instance.disk_template))
12254
12255     # Release node and resource locks if there are any (they might already have
12256     # been released during disk conversion)
12257     _ReleaseLocks(self, locking.LEVEL_NODE)
12258     _ReleaseLocks(self, locking.LEVEL_NODE_RES)
12259
12260     # NIC changes
12261     for nic_op, nic_dict in self.op.nics:
12262       if nic_op == constants.DDM_REMOVE:
12263         # remove the last nic
12264         del instance.nics[-1]
12265         result.append(("nic.%d" % len(instance.nics), "remove"))
12266       elif nic_op == constants.DDM_ADD:
12267         # mac and bridge should be set, by now
12268         mac = nic_dict[constants.INIC_MAC]
12269         ip = nic_dict.get(constants.INIC_IP, None)
12270         nicparams = self.nic_pinst[constants.DDM_ADD]
12271         new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
12272         instance.nics.append(new_nic)
12273         result.append(("nic.%d" % (len(instance.nics) - 1),
12274                        "add:mac=%s,ip=%s,mode=%s,link=%s" %
12275                        (new_nic.mac, new_nic.ip,
12276                         self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
12277                         self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
12278                        )))
12279       else:
12280         for key in (constants.INIC_MAC, constants.INIC_IP):
12281           if key in nic_dict:
12282             setattr(instance.nics[nic_op], key, nic_dict[key])
12283         if nic_op in self.nic_pinst:
12284           instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
12285         for key, val in nic_dict.iteritems():
12286           result.append(("nic.%s/%d" % (key, nic_op), val))
12287
12288     # hvparams changes
12289     if self.op.hvparams:
12290       instance.hvparams = self.hv_inst
12291       for key, val in self.op.hvparams.iteritems():
12292         result.append(("hv/%s" % key, val))
12293
12294     # beparams changes
12295     if self.op.beparams:
12296       instance.beparams = self.be_inst
12297       for key, val in self.op.beparams.iteritems():
12298         result.append(("be/%s" % key, val))
12299
12300     # OS change
12301     if self.op.os_name:
12302       instance.os = self.op.os_name
12303
12304     # osparams changes
12305     if self.op.osparams:
12306       instance.osparams = self.os_inst
12307       for key, val in self.op.osparams.iteritems():
12308         result.append(("os/%s" % key, val))
12309
12310     # online/offline instance
12311     if self.op.online_inst:
12312       self.cfg.MarkInstanceDown(instance.name)
12313       result.append(("admin_state", constants.ADMINST_DOWN))
12314     if self.op.offline_inst:
12315       self.cfg.MarkInstanceOffline(instance.name)
12316       result.append(("admin_state", constants.ADMINST_OFFLINE))
12317
12318     self.cfg.Update(instance, feedback_fn)
12319
12320     assert not (self.owned_locks(locking.LEVEL_NODE_RES) or
12321                 self.owned_locks(locking.LEVEL_NODE)), \
12322       "All node locks should have been released by now"
12323
12324     return result
12325
12326   _DISK_CONVERSIONS = {
12327     (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
12328     (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
12329     }
12330
12331
12332 class LUInstanceChangeGroup(LogicalUnit):
12333   HPATH = "instance-change-group"
12334   HTYPE = constants.HTYPE_INSTANCE
12335   REQ_BGL = False
12336
12337   def ExpandNames(self):
12338     self.share_locks = _ShareAll()
12339     self.needed_locks = {
12340       locking.LEVEL_NODEGROUP: [],
12341       locking.LEVEL_NODE: [],
12342       }
12343
12344     self._ExpandAndLockInstance()
12345
12346     if self.op.target_groups:
12347       self.req_target_uuids = map(self.cfg.LookupNodeGroup,
12348                                   self.op.target_groups)
12349     else:
12350       self.req_target_uuids = None
12351
12352     self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
12353
12354   def DeclareLocks(self, level):
12355     if level == locking.LEVEL_NODEGROUP:
12356       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
12357
12358       if self.req_target_uuids:
12359         lock_groups = set(self.req_target_uuids)
12360
12361         # Lock all groups used by instance optimistically; this requires going
12362         # via the node before it's locked, requiring verification later on
12363         instance_groups = self.cfg.GetInstanceNodeGroups(self.op.instance_name)
12364         lock_groups.update(instance_groups)
12365       else:
12366         # No target groups, need to lock all of them
12367         lock_groups = locking.ALL_SET
12368
12369       self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
12370
12371     elif level == locking.LEVEL_NODE:
12372       if self.req_target_uuids:
12373         # Lock all nodes used by instances
12374         self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
12375         self._LockInstancesNodes()
12376
12377         # Lock all nodes in all potential target groups
12378         lock_groups = (frozenset(self.owned_locks(locking.LEVEL_NODEGROUP)) -
12379                        self.cfg.GetInstanceNodeGroups(self.op.instance_name))
12380         member_nodes = [node_name
12381                         for group in lock_groups
12382                         for node_name in self.cfg.GetNodeGroup(group).members]
12383         self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
12384       else:
12385         # Lock all nodes as all groups are potential targets
12386         self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
12387
12388   def CheckPrereq(self):
12389     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
12390     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
12391     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
12392
12393     assert (self.req_target_uuids is None or
12394             owned_groups.issuperset(self.req_target_uuids))
12395     assert owned_instances == set([self.op.instance_name])
12396
12397     # Get instance information
12398     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
12399
12400     # Check if node groups for locked instance are still correct
12401     assert owned_nodes.issuperset(self.instance.all_nodes), \
12402       ("Instance %s's nodes changed while we kept the lock" %
12403        self.op.instance_name)
12404
12405     inst_groups = _CheckInstanceNodeGroups(self.cfg, self.op.instance_name,
12406                                            owned_groups)
12407
12408     if self.req_target_uuids:
12409       # User requested specific target groups
12410       self.target_uuids = self.req_target_uuids
12411     else:
12412       # All groups except those used by the instance are potential targets
12413       self.target_uuids = owned_groups - inst_groups
12414
12415     conflicting_groups = self.target_uuids & inst_groups
12416     if conflicting_groups:
12417       raise errors.OpPrereqError("Can't use group(s) '%s' as targets, they are"
12418                                  " used by the instance '%s'" %
12419                                  (utils.CommaJoin(conflicting_groups),
12420                                   self.op.instance_name),
12421                                  errors.ECODE_INVAL)
12422
12423     if not self.target_uuids:
12424       raise errors.OpPrereqError("There are no possible target groups",
12425                                  errors.ECODE_INVAL)
12426
12427   def BuildHooksEnv(self):
12428     """Build hooks env.
12429
12430     """
12431     assert self.target_uuids
12432
12433     env = {
12434       "TARGET_GROUPS": " ".join(self.target_uuids),
12435       }
12436
12437     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
12438
12439     return env
12440
12441   def BuildHooksNodes(self):
12442     """Build hooks nodes.
12443
12444     """
12445     mn = self.cfg.GetMasterNode()
12446     return ([mn], [mn])
12447
12448   def Exec(self, feedback_fn):
12449     instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
12450
12451     assert instances == [self.op.instance_name], "Instance not locked"
12452
12453     ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
12454                      instances=instances, target_groups=list(self.target_uuids))
12455
12456     ial.Run(self.op.iallocator)
12457
12458     if not ial.success:
12459       raise errors.OpPrereqError("Can't compute solution for changing group of"
12460                                  " instance '%s' using iallocator '%s': %s" %
12461                                  (self.op.instance_name, self.op.iallocator,
12462                                   ial.info),
12463                                  errors.ECODE_NORES)
12464
12465     jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
12466
12467     self.LogInfo("Iallocator returned %s job(s) for changing group of"
12468                  " instance '%s'", len(jobs), self.op.instance_name)
12469
12470     return ResultWithJobs(jobs)
12471
12472
12473 class LUBackupQuery(NoHooksLU):
12474   """Query the exports list
12475
12476   """
12477   REQ_BGL = False
12478
12479   def ExpandNames(self):
12480     self.needed_locks = {}
12481     self.share_locks[locking.LEVEL_NODE] = 1
12482     if not self.op.nodes:
12483       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
12484     else:
12485       self.needed_locks[locking.LEVEL_NODE] = \
12486         _GetWantedNodes(self, self.op.nodes)
12487
12488   def Exec(self, feedback_fn):
12489     """Compute the list of all the exported system images.
12490
12491     @rtype: dict
12492     @return: a dictionary with the structure node->(export-list)
12493         where export-list is a list of the instances exported on
12494         that node.
12495
12496     """
12497     self.nodes = self.owned_locks(locking.LEVEL_NODE)
12498     rpcresult = self.rpc.call_export_list(self.nodes)
12499     result = {}
12500     for node in rpcresult:
12501       if rpcresult[node].fail_msg:
12502         result[node] = False
12503       else:
12504         result[node] = rpcresult[node].payload
12505
12506     return result
12507
12508
12509 class LUBackupPrepare(NoHooksLU):
12510   """Prepares an instance for an export and returns useful information.
12511
12512   """
12513   REQ_BGL = False
12514
12515   def ExpandNames(self):
12516     self._ExpandAndLockInstance()
12517
12518   def CheckPrereq(self):
12519     """Check prerequisites.
12520
12521     """
12522     instance_name = self.op.instance_name
12523
12524     self.instance = self.cfg.GetInstanceInfo(instance_name)
12525     assert self.instance is not None, \
12526           "Cannot retrieve locked instance %s" % self.op.instance_name
12527     _CheckNodeOnline(self, self.instance.primary_node)
12528
12529     self._cds = _GetClusterDomainSecret()
12530
12531   def Exec(self, feedback_fn):
12532     """Prepares an instance for an export.
12533
12534     """
12535     instance = self.instance
12536
12537     if self.op.mode == constants.EXPORT_MODE_REMOTE:
12538       salt = utils.GenerateSecret(8)
12539
12540       feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
12541       result = self.rpc.call_x509_cert_create(instance.primary_node,
12542                                               constants.RIE_CERT_VALIDITY)
12543       result.Raise("Can't create X509 key and certificate on %s" % result.node)
12544
12545       (name, cert_pem) = result.payload
12546
12547       cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
12548                                              cert_pem)
12549
12550       return {
12551         "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
12552         "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
12553                           salt),
12554         "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
12555         }
12556
12557     return None
12558
12559
12560 class LUBackupExport(LogicalUnit):
12561   """Export an instance to an image in the cluster.
12562
12563   """
12564   HPATH = "instance-export"
12565   HTYPE = constants.HTYPE_INSTANCE
12566   REQ_BGL = False
12567
12568   def CheckArguments(self):
12569     """Check the arguments.
12570
12571     """
12572     self.x509_key_name = self.op.x509_key_name
12573     self.dest_x509_ca_pem = self.op.destination_x509_ca
12574
12575     if self.op.mode == constants.EXPORT_MODE_REMOTE:
12576       if not self.x509_key_name:
12577         raise errors.OpPrereqError("Missing X509 key name for encryption",
12578                                    errors.ECODE_INVAL)
12579
12580       if not self.dest_x509_ca_pem:
12581         raise errors.OpPrereqError("Missing destination X509 CA",
12582                                    errors.ECODE_INVAL)
12583
12584   def ExpandNames(self):
12585     self._ExpandAndLockInstance()
12586
12587     # Lock all nodes for local exports
12588     if self.op.mode == constants.EXPORT_MODE_LOCAL:
12589       # FIXME: lock only instance primary and destination node
12590       #
12591       # Sad but true, for now we have do lock all nodes, as we don't know where
12592       # the previous export might be, and in this LU we search for it and
12593       # remove it from its current node. In the future we could fix this by:
12594       #  - making a tasklet to search (share-lock all), then create the
12595       #    new one, then one to remove, after
12596       #  - removing the removal operation altogether
12597       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
12598
12599   def DeclareLocks(self, level):
12600     """Last minute lock declaration."""
12601     # All nodes are locked anyway, so nothing to do here.
12602
12603   def BuildHooksEnv(self):
12604     """Build hooks env.
12605
12606     This will run on the master, primary node and target node.
12607
12608     """
12609     env = {
12610       "EXPORT_MODE": self.op.mode,
12611       "EXPORT_NODE": self.op.target_node,
12612       "EXPORT_DO_SHUTDOWN": self.op.shutdown,
12613       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
12614       # TODO: Generic function for boolean env variables
12615       "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
12616       }
12617
12618     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
12619
12620     return env
12621
12622   def BuildHooksNodes(self):
12623     """Build hooks nodes.
12624
12625     """
12626     nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
12627
12628     if self.op.mode == constants.EXPORT_MODE_LOCAL:
12629       nl.append(self.op.target_node)
12630
12631     return (nl, nl)
12632
12633   def CheckPrereq(self):
12634     """Check prerequisites.
12635
12636     This checks that the instance and node names are valid.
12637
12638     """
12639     instance_name = self.op.instance_name
12640
12641     self.instance = self.cfg.GetInstanceInfo(instance_name)
12642     assert self.instance is not None, \
12643           "Cannot retrieve locked instance %s" % self.op.instance_name
12644     _CheckNodeOnline(self, self.instance.primary_node)
12645
12646     if (self.op.remove_instance and
12647         self.instance.admin_state == constants.ADMINST_UP and
12648         not self.op.shutdown):
12649       raise errors.OpPrereqError("Can not remove instance without shutting it"
12650                                  " down before")
12651
12652     if self.op.mode == constants.EXPORT_MODE_LOCAL:
12653       self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
12654       self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
12655       assert self.dst_node is not None
12656
12657       _CheckNodeOnline(self, self.dst_node.name)
12658       _CheckNodeNotDrained(self, self.dst_node.name)
12659
12660       self._cds = None
12661       self.dest_disk_info = None
12662       self.dest_x509_ca = None
12663
12664     elif self.op.mode == constants.EXPORT_MODE_REMOTE:
12665       self.dst_node = None
12666
12667       if len(self.op.target_node) != len(self.instance.disks):
12668         raise errors.OpPrereqError(("Received destination information for %s"
12669                                     " disks, but instance %s has %s disks") %
12670                                    (len(self.op.target_node), instance_name,
12671                                     len(self.instance.disks)),
12672                                    errors.ECODE_INVAL)
12673
12674       cds = _GetClusterDomainSecret()
12675
12676       # Check X509 key name
12677       try:
12678         (key_name, hmac_digest, hmac_salt) = self.x509_key_name
12679       except (TypeError, ValueError), err:
12680         raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
12681
12682       if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
12683         raise errors.OpPrereqError("HMAC for X509 key name is wrong",
12684                                    errors.ECODE_INVAL)
12685
12686       # Load and verify CA
12687       try:
12688         (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
12689       except OpenSSL.crypto.Error, err:
12690         raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
12691                                    (err, ), errors.ECODE_INVAL)
12692
12693       (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
12694       if errcode is not None:
12695         raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
12696                                    (msg, ), errors.ECODE_INVAL)
12697
12698       self.dest_x509_ca = cert
12699
12700       # Verify target information
12701       disk_info = []
12702       for idx, disk_data in enumerate(self.op.target_node):
12703         try:
12704           (host, port, magic) = \
12705             masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
12706         except errors.GenericError, err:
12707           raise errors.OpPrereqError("Target info for disk %s: %s" %
12708                                      (idx, err), errors.ECODE_INVAL)
12709
12710         disk_info.append((host, port, magic))
12711
12712       assert len(disk_info) == len(self.op.target_node)
12713       self.dest_disk_info = disk_info
12714
12715     else:
12716       raise errors.ProgrammerError("Unhandled export mode %r" %
12717                                    self.op.mode)
12718
12719     # instance disk type verification
12720     # TODO: Implement export support for file-based disks
12721     for disk in self.instance.disks:
12722       if disk.dev_type == constants.LD_FILE:
12723         raise errors.OpPrereqError("Export not supported for instances with"
12724                                    " file-based disks", errors.ECODE_INVAL)
12725
12726   def _CleanupExports(self, feedback_fn):
12727     """Removes exports of current instance from all other nodes.
12728
12729     If an instance in a cluster with nodes A..D was exported to node C, its
12730     exports will be removed from the nodes A, B and D.
12731
12732     """
12733     assert self.op.mode != constants.EXPORT_MODE_REMOTE
12734
12735     nodelist = self.cfg.GetNodeList()
12736     nodelist.remove(self.dst_node.name)
12737
12738     # on one-node clusters nodelist will be empty after the removal
12739     # if we proceed the backup would be removed because OpBackupQuery
12740     # substitutes an empty list with the full cluster node list.
12741     iname = self.instance.name
12742     if nodelist:
12743       feedback_fn("Removing old exports for instance %s" % iname)
12744       exportlist = self.rpc.call_export_list(nodelist)
12745       for node in exportlist:
12746         if exportlist[node].fail_msg:
12747           continue
12748         if iname in exportlist[node].payload:
12749           msg = self.rpc.call_export_remove(node, iname).fail_msg
12750           if msg:
12751             self.LogWarning("Could not remove older export for instance %s"
12752                             " on node %s: %s", iname, node, msg)
12753
12754   def Exec(self, feedback_fn):
12755     """Export an instance to an image in the cluster.
12756
12757     """
12758     assert self.op.mode in constants.EXPORT_MODES
12759
12760     instance = self.instance
12761     src_node = instance.primary_node
12762
12763     if self.op.shutdown:
12764       # shutdown the instance, but not the disks
12765       feedback_fn("Shutting down instance %s" % instance.name)
12766       result = self.rpc.call_instance_shutdown(src_node, instance,
12767                                                self.op.shutdown_timeout)
12768       # TODO: Maybe ignore failures if ignore_remove_failures is set
12769       result.Raise("Could not shutdown instance %s on"
12770                    " node %s" % (instance.name, src_node))
12771
12772     # set the disks ID correctly since call_instance_start needs the
12773     # correct drbd minor to create the symlinks
12774     for disk in instance.disks:
12775       self.cfg.SetDiskID(disk, src_node)
12776
12777     activate_disks = (instance.admin_state != constants.ADMINST_UP)
12778
12779     if activate_disks:
12780       # Activate the instance disks if we'exporting a stopped instance
12781       feedback_fn("Activating disks for %s" % instance.name)
12782       _StartInstanceDisks(self, instance, None)
12783
12784     try:
12785       helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
12786                                                      instance)
12787
12788       helper.CreateSnapshots()
12789       try:
12790         if (self.op.shutdown and
12791             instance.admin_state == constants.ADMINST_UP and
12792             not self.op.remove_instance):
12793           assert not activate_disks
12794           feedback_fn("Starting instance %s" % instance.name)
12795           result = self.rpc.call_instance_start(src_node,
12796                                                 (instance, None, None), False)
12797           msg = result.fail_msg
12798           if msg:
12799             feedback_fn("Failed to start instance: %s" % msg)
12800             _ShutdownInstanceDisks(self, instance)
12801             raise errors.OpExecError("Could not start instance: %s" % msg)
12802
12803         if self.op.mode == constants.EXPORT_MODE_LOCAL:
12804           (fin_resu, dresults) = helper.LocalExport(self.dst_node)
12805         elif self.op.mode == constants.EXPORT_MODE_REMOTE:
12806           connect_timeout = constants.RIE_CONNECT_TIMEOUT
12807           timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
12808
12809           (key_name, _, _) = self.x509_key_name
12810
12811           dest_ca_pem = \
12812             OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
12813                                             self.dest_x509_ca)
12814
12815           (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
12816                                                      key_name, dest_ca_pem,
12817                                                      timeouts)
12818       finally:
12819         helper.Cleanup()
12820
12821       # Check for backwards compatibility
12822       assert len(dresults) == len(instance.disks)
12823       assert compat.all(isinstance(i, bool) for i in dresults), \
12824              "Not all results are boolean: %r" % dresults
12825
12826     finally:
12827       if activate_disks:
12828         feedback_fn("Deactivating disks for %s" % instance.name)
12829         _ShutdownInstanceDisks(self, instance)
12830
12831     if not (compat.all(dresults) and fin_resu):
12832       failures = []
12833       if not fin_resu:
12834         failures.append("export finalization")
12835       if not compat.all(dresults):
12836         fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
12837                                if not dsk)
12838         failures.append("disk export: disk(s) %s" % fdsk)
12839
12840       raise errors.OpExecError("Export failed, errors in %s" %
12841                                utils.CommaJoin(failures))
12842
12843     # At this point, the export was successful, we can cleanup/finish
12844
12845     # Remove instance if requested
12846     if self.op.remove_instance:
12847       feedback_fn("Removing instance %s" % instance.name)
12848       _RemoveInstance(self, feedback_fn, instance,
12849                       self.op.ignore_remove_failures)
12850
12851     if self.op.mode == constants.EXPORT_MODE_LOCAL:
12852       self._CleanupExports(feedback_fn)
12853
12854     return fin_resu, dresults
12855
12856
12857 class LUBackupRemove(NoHooksLU):
12858   """Remove exports related to the named instance.
12859
12860   """
12861   REQ_BGL = False
12862
12863   def ExpandNames(self):
12864     self.needed_locks = {}
12865     # We need all nodes to be locked in order for RemoveExport to work, but we
12866     # don't need to lock the instance itself, as nothing will happen to it (and
12867     # we can remove exports also for a removed instance)
12868     self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
12869
12870   def Exec(self, feedback_fn):
12871     """Remove any export.
12872
12873     """
12874     instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
12875     # If the instance was not found we'll try with the name that was passed in.
12876     # This will only work if it was an FQDN, though.
12877     fqdn_warn = False
12878     if not instance_name:
12879       fqdn_warn = True
12880       instance_name = self.op.instance_name
12881
12882     locked_nodes = self.owned_locks(locking.LEVEL_NODE)
12883     exportlist = self.rpc.call_export_list(locked_nodes)
12884     found = False
12885     for node in exportlist:
12886       msg = exportlist[node].fail_msg
12887       if msg:
12888         self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
12889         continue
12890       if instance_name in exportlist[node].payload:
12891         found = True
12892         result = self.rpc.call_export_remove(node, instance_name)
12893         msg = result.fail_msg
12894         if msg:
12895           logging.error("Could not remove export for instance %s"
12896                         " on node %s: %s", instance_name, node, msg)
12897
12898     if fqdn_warn and not found:
12899       feedback_fn("Export not found. If trying to remove an export belonging"
12900                   " to a deleted instance please use its Fully Qualified"
12901                   " Domain Name.")
12902
12903
12904 class LUGroupAdd(LogicalUnit):
12905   """Logical unit for creating node groups.
12906
12907   """
12908   HPATH = "group-add"
12909   HTYPE = constants.HTYPE_GROUP
12910   REQ_BGL = False
12911
12912   def ExpandNames(self):
12913     # We need the new group's UUID here so that we can create and acquire the
12914     # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
12915     # that it should not check whether the UUID exists in the configuration.
12916     self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
12917     self.needed_locks = {}
12918     self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
12919
12920   def CheckPrereq(self):
12921     """Check prerequisites.
12922
12923     This checks that the given group name is not an existing node group
12924     already.
12925
12926     """
12927     try:
12928       existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12929     except errors.OpPrereqError:
12930       pass
12931     else:
12932       raise errors.OpPrereqError("Desired group name '%s' already exists as a"
12933                                  " node group (UUID: %s)" %
12934                                  (self.op.group_name, existing_uuid),
12935                                  errors.ECODE_EXISTS)
12936
12937     if self.op.ndparams:
12938       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
12939
12940     if self.op.hv_state:
12941       self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
12942     else:
12943       self.new_hv_state = None
12944
12945     if self.op.disk_state:
12946       self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
12947     else:
12948       self.new_disk_state = None
12949
12950     if self.op.diskparams:
12951       for templ in constants.DISK_TEMPLATES:
12952         if templ not in self.op.diskparams:
12953           self.op.diskparams[templ] = {}
12954         utils.ForceDictType(self.op.diskparams[templ], constants.DISK_DT_TYPES)
12955     else:
12956       self.op.diskparams = self.cfg.GetClusterInfo().diskparams
12957
12958     if self.op.ipolicy:
12959       cluster = self.cfg.GetClusterInfo()
12960       full_ipolicy = cluster.SimpleFillIPolicy(self.op.ipolicy)
12961       objects.InstancePolicy.CheckParameterSyntax(full_ipolicy)
12962
12963   def BuildHooksEnv(self):
12964     """Build hooks env.
12965
12966     """
12967     return {
12968       "GROUP_NAME": self.op.group_name,
12969       }
12970
12971   def BuildHooksNodes(self):
12972     """Build hooks nodes.
12973
12974     """
12975     mn = self.cfg.GetMasterNode()
12976     return ([mn], [mn])
12977
12978   def Exec(self, feedback_fn):
12979     """Add the node group to the cluster.
12980
12981     """
12982     group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
12983                                   uuid=self.group_uuid,
12984                                   alloc_policy=self.op.alloc_policy,
12985                                   ndparams=self.op.ndparams,
12986                                   diskparams=self.op.diskparams,
12987                                   ipolicy=self.op.ipolicy,
12988                                   hv_state_static=self.new_hv_state,
12989                                   disk_state_static=self.new_disk_state)
12990
12991     self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
12992     del self.remove_locks[locking.LEVEL_NODEGROUP]
12993
12994
12995 class LUGroupAssignNodes(NoHooksLU):
12996   """Logical unit for assigning nodes to groups.
12997
12998   """
12999   REQ_BGL = False
13000
13001   def ExpandNames(self):
13002     # These raise errors.OpPrereqError on their own:
13003     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13004     self.op.nodes = _GetWantedNodes(self, self.op.nodes)
13005
13006     # We want to lock all the affected nodes and groups. We have readily
13007     # available the list of nodes, and the *destination* group. To gather the
13008     # list of "source" groups, we need to fetch node information later on.
13009     self.needed_locks = {
13010       locking.LEVEL_NODEGROUP: set([self.group_uuid]),
13011       locking.LEVEL_NODE: self.op.nodes,
13012       }
13013
13014   def DeclareLocks(self, level):
13015     if level == locking.LEVEL_NODEGROUP:
13016       assert len(self.needed_locks[locking.LEVEL_NODEGROUP]) == 1
13017
13018       # Try to get all affected nodes' groups without having the group or node
13019       # lock yet. Needs verification later in the code flow.
13020       groups = self.cfg.GetNodeGroupsFromNodes(self.op.nodes)
13021
13022       self.needed_locks[locking.LEVEL_NODEGROUP].update(groups)
13023
13024   def CheckPrereq(self):
13025     """Check prerequisites.
13026
13027     """
13028     assert self.needed_locks[locking.LEVEL_NODEGROUP]
13029     assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
13030             frozenset(self.op.nodes))
13031
13032     expected_locks = (set([self.group_uuid]) |
13033                       self.cfg.GetNodeGroupsFromNodes(self.op.nodes))
13034     actual_locks = self.owned_locks(locking.LEVEL_NODEGROUP)
13035     if actual_locks != expected_locks:
13036       raise errors.OpExecError("Nodes changed groups since locks were acquired,"
13037                                " current groups are '%s', used to be '%s'" %
13038                                (utils.CommaJoin(expected_locks),
13039                                 utils.CommaJoin(actual_locks)))
13040
13041     self.node_data = self.cfg.GetAllNodesInfo()
13042     self.group = self.cfg.GetNodeGroup(self.group_uuid)
13043     instance_data = self.cfg.GetAllInstancesInfo()
13044
13045     if self.group is None:
13046       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
13047                                (self.op.group_name, self.group_uuid))
13048
13049     (new_splits, previous_splits) = \
13050       self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
13051                                              for node in self.op.nodes],
13052                                             self.node_data, instance_data)
13053
13054     if new_splits:
13055       fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
13056
13057       if not self.op.force:
13058         raise errors.OpExecError("The following instances get split by this"
13059                                  " change and --force was not given: %s" %
13060                                  fmt_new_splits)
13061       else:
13062         self.LogWarning("This operation will split the following instances: %s",
13063                         fmt_new_splits)
13064
13065         if previous_splits:
13066           self.LogWarning("In addition, these already-split instances continue"
13067                           " to be split across groups: %s",
13068                           utils.CommaJoin(utils.NiceSort(previous_splits)))
13069
13070   def Exec(self, feedback_fn):
13071     """Assign nodes to a new group.
13072
13073     """
13074     mods = [(node_name, self.group_uuid) for node_name in self.op.nodes]
13075
13076     self.cfg.AssignGroupNodes(mods)
13077
13078   @staticmethod
13079   def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
13080     """Check for split instances after a node assignment.
13081
13082     This method considers a series of node assignments as an atomic operation,
13083     and returns information about split instances after applying the set of
13084     changes.
13085
13086     In particular, it returns information about newly split instances, and
13087     instances that were already split, and remain so after the change.
13088
13089     Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
13090     considered.
13091
13092     @type changes: list of (node_name, new_group_uuid) pairs.
13093     @param changes: list of node assignments to consider.
13094     @param node_data: a dict with data for all nodes
13095     @param instance_data: a dict with all instances to consider
13096     @rtype: a two-tuple
13097     @return: a list of instances that were previously okay and result split as a
13098       consequence of this change, and a list of instances that were previously
13099       split and this change does not fix.
13100
13101     """
13102     changed_nodes = dict((node, group) for node, group in changes
13103                          if node_data[node].group != group)
13104
13105     all_split_instances = set()
13106     previously_split_instances = set()
13107
13108     def InstanceNodes(instance):
13109       return [instance.primary_node] + list(instance.secondary_nodes)
13110
13111     for inst in instance_data.values():
13112       if inst.disk_template not in constants.DTS_INT_MIRROR:
13113         continue
13114
13115       instance_nodes = InstanceNodes(inst)
13116
13117       if len(set(node_data[node].group for node in instance_nodes)) > 1:
13118         previously_split_instances.add(inst.name)
13119
13120       if len(set(changed_nodes.get(node, node_data[node].group)
13121                  for node in instance_nodes)) > 1:
13122         all_split_instances.add(inst.name)
13123
13124     return (list(all_split_instances - previously_split_instances),
13125             list(previously_split_instances & all_split_instances))
13126
13127
13128 class _GroupQuery(_QueryBase):
13129   FIELDS = query.GROUP_FIELDS
13130
13131   def ExpandNames(self, lu):
13132     lu.needed_locks = {}
13133
13134     self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
13135     self._cluster = lu.cfg.GetClusterInfo()
13136     name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
13137
13138     if not self.names:
13139       self.wanted = [name_to_uuid[name]
13140                      for name in utils.NiceSort(name_to_uuid.keys())]
13141     else:
13142       # Accept names to be either names or UUIDs.
13143       missing = []
13144       self.wanted = []
13145       all_uuid = frozenset(self._all_groups.keys())
13146
13147       for name in self.names:
13148         if name in all_uuid:
13149           self.wanted.append(name)
13150         elif name in name_to_uuid:
13151           self.wanted.append(name_to_uuid[name])
13152         else:
13153           missing.append(name)
13154
13155       if missing:
13156         raise errors.OpPrereqError("Some groups do not exist: %s" %
13157                                    utils.CommaJoin(missing),
13158                                    errors.ECODE_NOENT)
13159
13160   def DeclareLocks(self, lu, level):
13161     pass
13162
13163   def _GetQueryData(self, lu):
13164     """Computes the list of node groups and their attributes.
13165
13166     """
13167     do_nodes = query.GQ_NODE in self.requested_data
13168     do_instances = query.GQ_INST in self.requested_data
13169
13170     group_to_nodes = None
13171     group_to_instances = None
13172
13173     # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
13174     # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
13175     # latter GetAllInstancesInfo() is not enough, for we have to go through
13176     # instance->node. Hence, we will need to process nodes even if we only need
13177     # instance information.
13178     if do_nodes or do_instances:
13179       all_nodes = lu.cfg.GetAllNodesInfo()
13180       group_to_nodes = dict((uuid, []) for uuid in self.wanted)
13181       node_to_group = {}
13182
13183       for node in all_nodes.values():
13184         if node.group in group_to_nodes:
13185           group_to_nodes[node.group].append(node.name)
13186           node_to_group[node.name] = node.group
13187
13188       if do_instances:
13189         all_instances = lu.cfg.GetAllInstancesInfo()
13190         group_to_instances = dict((uuid, []) for uuid in self.wanted)
13191
13192         for instance in all_instances.values():
13193           node = instance.primary_node
13194           if node in node_to_group:
13195             group_to_instances[node_to_group[node]].append(instance.name)
13196
13197         if not do_nodes:
13198           # Do not pass on node information if it was not requested.
13199           group_to_nodes = None
13200
13201     return query.GroupQueryData(self._cluster,
13202                                 [self._all_groups[uuid]
13203                                  for uuid in self.wanted],
13204                                 group_to_nodes, group_to_instances)
13205
13206
13207 class LUGroupQuery(NoHooksLU):
13208   """Logical unit for querying node groups.
13209
13210   """
13211   REQ_BGL = False
13212
13213   def CheckArguments(self):
13214     self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
13215                           self.op.output_fields, False)
13216
13217   def ExpandNames(self):
13218     self.gq.ExpandNames(self)
13219
13220   def DeclareLocks(self, level):
13221     self.gq.DeclareLocks(self, level)
13222
13223   def Exec(self, feedback_fn):
13224     return self.gq.OldStyleQuery(self)
13225
13226
13227 class LUGroupSetParams(LogicalUnit):
13228   """Modifies the parameters of a node group.
13229
13230   """
13231   HPATH = "group-modify"
13232   HTYPE = constants.HTYPE_GROUP
13233   REQ_BGL = False
13234
13235   def CheckArguments(self):
13236     all_changes = [
13237       self.op.ndparams,
13238       self.op.diskparams,
13239       self.op.alloc_policy,
13240       self.op.hv_state,
13241       self.op.disk_state,
13242       self.op.ipolicy,
13243       ]
13244
13245     if all_changes.count(None) == len(all_changes):
13246       raise errors.OpPrereqError("Please pass at least one modification",
13247                                  errors.ECODE_INVAL)
13248
13249   def ExpandNames(self):
13250     # This raises errors.OpPrereqError on its own:
13251     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13252
13253     self.needed_locks = {
13254       locking.LEVEL_NODEGROUP: [self.group_uuid],
13255       }
13256
13257   def CheckPrereq(self):
13258     """Check prerequisites.
13259
13260     """
13261     self.group = self.cfg.GetNodeGroup(self.group_uuid)
13262
13263     if self.group is None:
13264       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
13265                                (self.op.group_name, self.group_uuid))
13266
13267     if self.op.ndparams:
13268       new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
13269       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
13270       self.new_ndparams = new_ndparams
13271
13272     if self.op.diskparams:
13273       self.new_diskparams = dict()
13274       for templ in constants.DISK_TEMPLATES:
13275         if templ not in self.op.diskparams:
13276           self.op.diskparams[templ] = {}
13277         new_templ_params = _GetUpdatedParams(self.group.diskparams[templ],
13278                                              self.op.diskparams[templ])
13279         utils.ForceDictType(new_templ_params, constants.DISK_DT_TYPES)
13280         self.new_diskparams[templ] = new_templ_params
13281
13282     if self.op.hv_state:
13283       self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
13284                                                  self.group.hv_state_static)
13285
13286     if self.op.disk_state:
13287       self.new_disk_state = \
13288         _MergeAndVerifyDiskState(self.op.disk_state,
13289                                  self.group.disk_state_static)
13290
13291     if self.op.ipolicy:
13292       g_ipolicy = {}
13293       for key, value in self.op.ipolicy.iteritems():
13294         g_ipolicy[key] = _GetUpdatedParams(self.group.ipolicy.get(key, {}),
13295                                            value,
13296                                            use_none=True)
13297         utils.ForceDictType(g_ipolicy[key], constants.ISPECS_PARAMETER_TYPES)
13298       self.new_ipolicy = g_ipolicy
13299       objects.InstancePolicy.CheckParameterSyntax(self.new_ipolicy)
13300
13301   def BuildHooksEnv(self):
13302     """Build hooks env.
13303
13304     """
13305     return {
13306       "GROUP_NAME": self.op.group_name,
13307       "NEW_ALLOC_POLICY": self.op.alloc_policy,
13308       }
13309
13310   def BuildHooksNodes(self):
13311     """Build hooks nodes.
13312
13313     """
13314     mn = self.cfg.GetMasterNode()
13315     return ([mn], [mn])
13316
13317   def Exec(self, feedback_fn):
13318     """Modifies the node group.
13319
13320     """
13321     result = []
13322
13323     if self.op.ndparams:
13324       self.group.ndparams = self.new_ndparams
13325       result.append(("ndparams", str(self.group.ndparams)))
13326
13327     if self.op.diskparams:
13328       self.group.diskparams = self.new_diskparams
13329       result.append(("diskparams", str(self.group.diskparams)))
13330
13331     if self.op.alloc_policy:
13332       self.group.alloc_policy = self.op.alloc_policy
13333
13334     if self.op.hv_state:
13335       self.group.hv_state_static = self.new_hv_state
13336
13337     if self.op.disk_state:
13338       self.group.disk_state_static = self.new_disk_state
13339
13340     if self.op.ipolicy:
13341       self.group.ipolicy = self.new_ipolicy
13342
13343     self.cfg.Update(self.group, feedback_fn)
13344     return result
13345
13346
13347 class LUGroupRemove(LogicalUnit):
13348   HPATH = "group-remove"
13349   HTYPE = constants.HTYPE_GROUP
13350   REQ_BGL = False
13351
13352   def ExpandNames(self):
13353     # This will raises errors.OpPrereqError on its own:
13354     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13355     self.needed_locks = {
13356       locking.LEVEL_NODEGROUP: [self.group_uuid],
13357       }
13358
13359   def CheckPrereq(self):
13360     """Check prerequisites.
13361
13362     This checks that the given group name exists as a node group, that is
13363     empty (i.e., contains no nodes), and that is not the last group of the
13364     cluster.
13365
13366     """
13367     # Verify that the group is empty.
13368     group_nodes = [node.name
13369                    for node in self.cfg.GetAllNodesInfo().values()
13370                    if node.group == self.group_uuid]
13371
13372     if group_nodes:
13373       raise errors.OpPrereqError("Group '%s' not empty, has the following"
13374                                  " nodes: %s" %
13375                                  (self.op.group_name,
13376                                   utils.CommaJoin(utils.NiceSort(group_nodes))),
13377                                  errors.ECODE_STATE)
13378
13379     # Verify the cluster would not be left group-less.
13380     if len(self.cfg.GetNodeGroupList()) == 1:
13381       raise errors.OpPrereqError("Group '%s' is the only group,"
13382                                  " cannot be removed" %
13383                                  self.op.group_name,
13384                                  errors.ECODE_STATE)
13385
13386   def BuildHooksEnv(self):
13387     """Build hooks env.
13388
13389     """
13390     return {
13391       "GROUP_NAME": self.op.group_name,
13392       }
13393
13394   def BuildHooksNodes(self):
13395     """Build hooks nodes.
13396
13397     """
13398     mn = self.cfg.GetMasterNode()
13399     return ([mn], [mn])
13400
13401   def Exec(self, feedback_fn):
13402     """Remove the node group.
13403
13404     """
13405     try:
13406       self.cfg.RemoveNodeGroup(self.group_uuid)
13407     except errors.ConfigurationError:
13408       raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
13409                                (self.op.group_name, self.group_uuid))
13410
13411     self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
13412
13413
13414 class LUGroupRename(LogicalUnit):
13415   HPATH = "group-rename"
13416   HTYPE = constants.HTYPE_GROUP
13417   REQ_BGL = False
13418
13419   def ExpandNames(self):
13420     # This raises errors.OpPrereqError on its own:
13421     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13422
13423     self.needed_locks = {
13424       locking.LEVEL_NODEGROUP: [self.group_uuid],
13425       }
13426
13427   def CheckPrereq(self):
13428     """Check prerequisites.
13429
13430     Ensures requested new name is not yet used.
13431
13432     """
13433     try:
13434       new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
13435     except errors.OpPrereqError:
13436       pass
13437     else:
13438       raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
13439                                  " node group (UUID: %s)" %
13440                                  (self.op.new_name, new_name_uuid),
13441                                  errors.ECODE_EXISTS)
13442
13443   def BuildHooksEnv(self):
13444     """Build hooks env.
13445
13446     """
13447     return {
13448       "OLD_NAME": self.op.group_name,
13449       "NEW_NAME": self.op.new_name,
13450       }
13451
13452   def BuildHooksNodes(self):
13453     """Build hooks nodes.
13454
13455     """
13456     mn = self.cfg.GetMasterNode()
13457
13458     all_nodes = self.cfg.GetAllNodesInfo()
13459     all_nodes.pop(mn, None)
13460
13461     run_nodes = [mn]
13462     run_nodes.extend(node.name for node in all_nodes.values()
13463                      if node.group == self.group_uuid)
13464
13465     return (run_nodes, run_nodes)
13466
13467   def Exec(self, feedback_fn):
13468     """Rename the node group.
13469
13470     """
13471     group = self.cfg.GetNodeGroup(self.group_uuid)
13472
13473     if group is None:
13474       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
13475                                (self.op.group_name, self.group_uuid))
13476
13477     group.name = self.op.new_name
13478     self.cfg.Update(group, feedback_fn)
13479
13480     return self.op.new_name
13481
13482
13483 class LUGroupEvacuate(LogicalUnit):
13484   HPATH = "group-evacuate"
13485   HTYPE = constants.HTYPE_GROUP
13486   REQ_BGL = False
13487
13488   def ExpandNames(self):
13489     # This raises errors.OpPrereqError on its own:
13490     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13491
13492     if self.op.target_groups:
13493       self.req_target_uuids = map(self.cfg.LookupNodeGroup,
13494                                   self.op.target_groups)
13495     else:
13496       self.req_target_uuids = []
13497
13498     if self.group_uuid in self.req_target_uuids:
13499       raise errors.OpPrereqError("Group to be evacuated (%s) can not be used"
13500                                  " as a target group (targets are %s)" %
13501                                  (self.group_uuid,
13502                                   utils.CommaJoin(self.req_target_uuids)),
13503                                  errors.ECODE_INVAL)
13504
13505     self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
13506
13507     self.share_locks = _ShareAll()
13508     self.needed_locks = {
13509       locking.LEVEL_INSTANCE: [],
13510       locking.LEVEL_NODEGROUP: [],
13511       locking.LEVEL_NODE: [],
13512       }
13513
13514   def DeclareLocks(self, level):
13515     if level == locking.LEVEL_INSTANCE:
13516       assert not self.needed_locks[locking.LEVEL_INSTANCE]
13517
13518       # Lock instances optimistically, needs verification once node and group
13519       # locks have been acquired
13520       self.needed_locks[locking.LEVEL_INSTANCE] = \
13521         self.cfg.GetNodeGroupInstances(self.group_uuid)
13522
13523     elif level == locking.LEVEL_NODEGROUP:
13524       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
13525
13526       if self.req_target_uuids:
13527         lock_groups = set([self.group_uuid] + self.req_target_uuids)
13528
13529         # Lock all groups used by instances optimistically; this requires going
13530         # via the node before it's locked, requiring verification later on
13531         lock_groups.update(group_uuid
13532                            for instance_name in
13533                              self.owned_locks(locking.LEVEL_INSTANCE)
13534                            for group_uuid in
13535                              self.cfg.GetInstanceNodeGroups(instance_name))
13536       else:
13537         # No target groups, need to lock all of them
13538         lock_groups = locking.ALL_SET
13539
13540       self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
13541
13542     elif level == locking.LEVEL_NODE:
13543       # This will only lock the nodes in the group to be evacuated which
13544       # contain actual instances
13545       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
13546       self._LockInstancesNodes()
13547
13548       # Lock all nodes in group to be evacuated and target groups
13549       owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
13550       assert self.group_uuid in owned_groups
13551       member_nodes = [node_name
13552                       for group in owned_groups
13553                       for node_name in self.cfg.GetNodeGroup(group).members]
13554       self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
13555
13556   def CheckPrereq(self):
13557     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
13558     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
13559     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
13560
13561     assert owned_groups.issuperset(self.req_target_uuids)
13562     assert self.group_uuid in owned_groups
13563
13564     # Check if locked instances are still correct
13565     _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
13566
13567     # Get instance information
13568     self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
13569
13570     # Check if node groups for locked instances are still correct
13571     for instance_name in owned_instances:
13572       inst = self.instances[instance_name]
13573       assert owned_nodes.issuperset(inst.all_nodes), \
13574         "Instance %s's nodes changed while we kept the lock" % instance_name
13575
13576       inst_groups = _CheckInstanceNodeGroups(self.cfg, instance_name,
13577                                              owned_groups)
13578
13579       assert self.group_uuid in inst_groups, \
13580         "Instance %s has no node in group %s" % (instance_name, self.group_uuid)
13581
13582     if self.req_target_uuids:
13583       # User requested specific target groups
13584       self.target_uuids = self.req_target_uuids
13585     else:
13586       # All groups except the one to be evacuated are potential targets
13587       self.target_uuids = [group_uuid for group_uuid in owned_groups
13588                            if group_uuid != self.group_uuid]
13589
13590       if not self.target_uuids:
13591         raise errors.OpPrereqError("There are no possible target groups",
13592                                    errors.ECODE_INVAL)
13593
13594   def BuildHooksEnv(self):
13595     """Build hooks env.
13596
13597     """
13598     return {
13599       "GROUP_NAME": self.op.group_name,
13600       "TARGET_GROUPS": " ".join(self.target_uuids),
13601       }
13602
13603   def BuildHooksNodes(self):
13604     """Build hooks nodes.
13605
13606     """
13607     mn = self.cfg.GetMasterNode()
13608
13609     assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
13610
13611     run_nodes = [mn] + self.cfg.GetNodeGroup(self.group_uuid).members
13612
13613     return (run_nodes, run_nodes)
13614
13615   def Exec(self, feedback_fn):
13616     instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
13617
13618     assert self.group_uuid not in self.target_uuids
13619
13620     ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
13621                      instances=instances, target_groups=self.target_uuids)
13622
13623     ial.Run(self.op.iallocator)
13624
13625     if not ial.success:
13626       raise errors.OpPrereqError("Can't compute group evacuation using"
13627                                  " iallocator '%s': %s" %
13628                                  (self.op.iallocator, ial.info),
13629                                  errors.ECODE_NORES)
13630
13631     jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
13632
13633     self.LogInfo("Iallocator returned %s job(s) for evacuating node group %s",
13634                  len(jobs), self.op.group_name)
13635
13636     return ResultWithJobs(jobs)
13637
13638
13639 class TagsLU(NoHooksLU): # pylint: disable=W0223
13640   """Generic tags LU.
13641
13642   This is an abstract class which is the parent of all the other tags LUs.
13643
13644   """
13645   def ExpandNames(self):
13646     self.group_uuid = None
13647     self.needed_locks = {}
13648     if self.op.kind == constants.TAG_NODE:
13649       self.op.name = _ExpandNodeName(self.cfg, self.op.name)
13650       self.needed_locks[locking.LEVEL_NODE] = self.op.name
13651     elif self.op.kind == constants.TAG_INSTANCE:
13652       self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
13653       self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
13654     elif self.op.kind == constants.TAG_NODEGROUP:
13655       self.group_uuid = self.cfg.LookupNodeGroup(self.op.name)
13656
13657     # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
13658     # not possible to acquire the BGL based on opcode parameters)
13659
13660   def CheckPrereq(self):
13661     """Check prerequisites.
13662
13663     """
13664     if self.op.kind == constants.TAG_CLUSTER:
13665       self.target = self.cfg.GetClusterInfo()
13666     elif self.op.kind == constants.TAG_NODE:
13667       self.target = self.cfg.GetNodeInfo(self.op.name)
13668     elif self.op.kind == constants.TAG_INSTANCE:
13669       self.target = self.cfg.GetInstanceInfo(self.op.name)
13670     elif self.op.kind == constants.TAG_NODEGROUP:
13671       self.target = self.cfg.GetNodeGroup(self.group_uuid)
13672     else:
13673       raise errors.OpPrereqError("Wrong tag type requested (%s)" %
13674                                  str(self.op.kind), errors.ECODE_INVAL)
13675
13676
13677 class LUTagsGet(TagsLU):
13678   """Returns the tags of a given object.
13679
13680   """
13681   REQ_BGL = False
13682
13683   def ExpandNames(self):
13684     TagsLU.ExpandNames(self)
13685
13686     # Share locks as this is only a read operation
13687     self.share_locks = _ShareAll()
13688
13689   def Exec(self, feedback_fn):
13690     """Returns the tag list.
13691
13692     """
13693     return list(self.target.GetTags())
13694
13695
13696 class LUTagsSearch(NoHooksLU):
13697   """Searches the tags for a given pattern.
13698
13699   """
13700   REQ_BGL = False
13701
13702   def ExpandNames(self):
13703     self.needed_locks = {}
13704
13705   def CheckPrereq(self):
13706     """Check prerequisites.
13707
13708     This checks the pattern passed for validity by compiling it.
13709
13710     """
13711     try:
13712       self.re = re.compile(self.op.pattern)
13713     except re.error, err:
13714       raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
13715                                  (self.op.pattern, err), errors.ECODE_INVAL)
13716
13717   def Exec(self, feedback_fn):
13718     """Returns the tag list.
13719
13720     """
13721     cfg = self.cfg
13722     tgts = [("/cluster", cfg.GetClusterInfo())]
13723     ilist = cfg.GetAllInstancesInfo().values()
13724     tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
13725     nlist = cfg.GetAllNodesInfo().values()
13726     tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
13727     tgts.extend(("/nodegroup/%s" % n.name, n)
13728                 for n in cfg.GetAllNodeGroupsInfo().values())
13729     results = []
13730     for path, target in tgts:
13731       for tag in target.GetTags():
13732         if self.re.search(tag):
13733           results.append((path, tag))
13734     return results
13735
13736
13737 class LUTagsSet(TagsLU):
13738   """Sets a tag on a given object.
13739
13740   """
13741   REQ_BGL = False
13742
13743   def CheckPrereq(self):
13744     """Check prerequisites.
13745
13746     This checks the type and length of the tag name and value.
13747
13748     """
13749     TagsLU.CheckPrereq(self)
13750     for tag in self.op.tags:
13751       objects.TaggableObject.ValidateTag(tag)
13752
13753   def Exec(self, feedback_fn):
13754     """Sets the tag.
13755
13756     """
13757     try:
13758       for tag in self.op.tags:
13759         self.target.AddTag(tag)
13760     except errors.TagError, err:
13761       raise errors.OpExecError("Error while setting tag: %s" % str(err))
13762     self.cfg.Update(self.target, feedback_fn)
13763
13764
13765 class LUTagsDel(TagsLU):
13766   """Delete a list of tags from a given object.
13767
13768   """
13769   REQ_BGL = False
13770
13771   def CheckPrereq(self):
13772     """Check prerequisites.
13773
13774     This checks that we have the given tag.
13775
13776     """
13777     TagsLU.CheckPrereq(self)
13778     for tag in self.op.tags:
13779       objects.TaggableObject.ValidateTag(tag)
13780     del_tags = frozenset(self.op.tags)
13781     cur_tags = self.target.GetTags()
13782
13783     diff_tags = del_tags - cur_tags
13784     if diff_tags:
13785       diff_names = ("'%s'" % i for i in sorted(diff_tags))
13786       raise errors.OpPrereqError("Tag(s) %s not found" %
13787                                  (utils.CommaJoin(diff_names), ),
13788                                  errors.ECODE_NOENT)
13789
13790   def Exec(self, feedback_fn):
13791     """Remove the tag from the object.
13792
13793     """
13794     for tag in self.op.tags:
13795       self.target.RemoveTag(tag)
13796     self.cfg.Update(self.target, feedback_fn)
13797
13798
13799 class LUTestDelay(NoHooksLU):
13800   """Sleep for a specified amount of time.
13801
13802   This LU sleeps on the master and/or nodes for a specified amount of
13803   time.
13804
13805   """
13806   REQ_BGL = False
13807
13808   def ExpandNames(self):
13809     """Expand names and set required locks.
13810
13811     This expands the node list, if any.
13812
13813     """
13814     self.needed_locks = {}
13815     if self.op.on_nodes:
13816       # _GetWantedNodes can be used here, but is not always appropriate to use
13817       # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
13818       # more information.
13819       self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
13820       self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
13821
13822   def _TestDelay(self):
13823     """Do the actual sleep.
13824
13825     """
13826     if self.op.on_master:
13827       if not utils.TestDelay(self.op.duration):
13828         raise errors.OpExecError("Error during master delay test")
13829     if self.op.on_nodes:
13830       result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
13831       for node, node_result in result.items():
13832         node_result.Raise("Failure during rpc call to node %s" % node)
13833
13834   def Exec(self, feedback_fn):
13835     """Execute the test delay opcode, with the wanted repetitions.
13836
13837     """
13838     if self.op.repeat == 0:
13839       self._TestDelay()
13840     else:
13841       top_value = self.op.repeat - 1
13842       for i in range(self.op.repeat):
13843         self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
13844         self._TestDelay()
13845
13846
13847 class LUTestJqueue(NoHooksLU):
13848   """Utility LU to test some aspects of the job queue.
13849
13850   """
13851   REQ_BGL = False
13852
13853   # Must be lower than default timeout for WaitForJobChange to see whether it
13854   # notices changed jobs
13855   _CLIENT_CONNECT_TIMEOUT = 20.0
13856   _CLIENT_CONFIRM_TIMEOUT = 60.0
13857
13858   @classmethod
13859   def _NotifyUsingSocket(cls, cb, errcls):
13860     """Opens a Unix socket and waits for another program to connect.
13861
13862     @type cb: callable
13863     @param cb: Callback to send socket name to client
13864     @type errcls: class
13865     @param errcls: Exception class to use for errors
13866
13867     """
13868     # Using a temporary directory as there's no easy way to create temporary
13869     # sockets without writing a custom loop around tempfile.mktemp and
13870     # socket.bind
13871     tmpdir = tempfile.mkdtemp()
13872     try:
13873       tmpsock = utils.PathJoin(tmpdir, "sock")
13874
13875       logging.debug("Creating temporary socket at %s", tmpsock)
13876       sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
13877       try:
13878         sock.bind(tmpsock)
13879         sock.listen(1)
13880
13881         # Send details to client
13882         cb(tmpsock)
13883
13884         # Wait for client to connect before continuing
13885         sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
13886         try:
13887           (conn, _) = sock.accept()
13888         except socket.error, err:
13889           raise errcls("Client didn't connect in time (%s)" % err)
13890       finally:
13891         sock.close()
13892     finally:
13893       # Remove as soon as client is connected
13894       shutil.rmtree(tmpdir)
13895
13896     # Wait for client to close
13897     try:
13898       try:
13899         # pylint: disable=E1101
13900         # Instance of '_socketobject' has no ... member
13901         conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
13902         conn.recv(1)
13903       except socket.error, err:
13904         raise errcls("Client failed to confirm notification (%s)" % err)
13905     finally:
13906       conn.close()
13907
13908   def _SendNotification(self, test, arg, sockname):
13909     """Sends a notification to the client.
13910
13911     @type test: string
13912     @param test: Test name
13913     @param arg: Test argument (depends on test)
13914     @type sockname: string
13915     @param sockname: Socket path
13916
13917     """
13918     self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
13919
13920   def _Notify(self, prereq, test, arg):
13921     """Notifies the client of a test.
13922
13923     @type prereq: bool
13924     @param prereq: Whether this is a prereq-phase test
13925     @type test: string
13926     @param test: Test name
13927     @param arg: Test argument (depends on test)
13928
13929     """
13930     if prereq:
13931       errcls = errors.OpPrereqError
13932     else:
13933       errcls = errors.OpExecError
13934
13935     return self._NotifyUsingSocket(compat.partial(self._SendNotification,
13936                                                   test, arg),
13937                                    errcls)
13938
13939   def CheckArguments(self):
13940     self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
13941     self.expandnames_calls = 0
13942
13943   def ExpandNames(self):
13944     checkargs_calls = getattr(self, "checkargs_calls", 0)
13945     if checkargs_calls < 1:
13946       raise errors.ProgrammerError("CheckArguments was not called")
13947
13948     self.expandnames_calls += 1
13949
13950     if self.op.notify_waitlock:
13951       self._Notify(True, constants.JQT_EXPANDNAMES, None)
13952
13953     self.LogInfo("Expanding names")
13954
13955     # Get lock on master node (just to get a lock, not for a particular reason)
13956     self.needed_locks = {
13957       locking.LEVEL_NODE: self.cfg.GetMasterNode(),
13958       }
13959
13960   def Exec(self, feedback_fn):
13961     if self.expandnames_calls < 1:
13962       raise errors.ProgrammerError("ExpandNames was not called")
13963
13964     if self.op.notify_exec:
13965       self._Notify(False, constants.JQT_EXEC, None)
13966
13967     self.LogInfo("Executing")
13968
13969     if self.op.log_messages:
13970       self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
13971       for idx, msg in enumerate(self.op.log_messages):
13972         self.LogInfo("Sending log message %s", idx + 1)
13973         feedback_fn(constants.JQT_MSGPREFIX + msg)
13974         # Report how many test messages have been sent
13975         self._Notify(False, constants.JQT_LOGMSG, idx + 1)
13976
13977     if self.op.fail:
13978       raise errors.OpExecError("Opcode failure was requested")
13979
13980     return True
13981
13982
13983 class IAllocator(object):
13984   """IAllocator framework.
13985
13986   An IAllocator instance has three sets of attributes:
13987     - cfg that is needed to query the cluster
13988     - input data (all members of the _KEYS class attribute are required)
13989     - four buffer attributes (in|out_data|text), that represent the
13990       input (to the external script) in text and data structure format,
13991       and the output from it, again in two formats
13992     - the result variables from the script (success, info, nodes) for
13993       easy usage
13994
13995   """
13996   # pylint: disable=R0902
13997   # lots of instance attributes
13998
13999   def __init__(self, cfg, rpc_runner, mode, **kwargs):
14000     self.cfg = cfg
14001     self.rpc = rpc_runner
14002     # init buffer variables
14003     self.in_text = self.out_text = self.in_data = self.out_data = None
14004     # init all input fields so that pylint is happy
14005     self.mode = mode
14006     self.memory = self.disks = self.disk_template = None
14007     self.os = self.tags = self.nics = self.vcpus = None
14008     self.hypervisor = None
14009     self.relocate_from = None
14010     self.name = None
14011     self.instances = None
14012     self.evac_mode = None
14013     self.target_groups = []
14014     # computed fields
14015     self.required_nodes = None
14016     # init result fields
14017     self.success = self.info = self.result = None
14018
14019     try:
14020       (fn, keydata, self._result_check) = self._MODE_DATA[self.mode]
14021     except KeyError:
14022       raise errors.ProgrammerError("Unknown mode '%s' passed to the"
14023                                    " IAllocator" % self.mode)
14024
14025     keyset = [n for (n, _) in keydata]
14026
14027     for key in kwargs:
14028       if key not in keyset:
14029         raise errors.ProgrammerError("Invalid input parameter '%s' to"
14030                                      " IAllocator" % key)
14031       setattr(self, key, kwargs[key])
14032
14033     for key in keyset:
14034       if key not in kwargs:
14035         raise errors.ProgrammerError("Missing input parameter '%s' to"
14036                                      " IAllocator" % key)
14037     self._BuildInputData(compat.partial(fn, self), keydata)
14038
14039   def _ComputeClusterData(self):
14040     """Compute the generic allocator input data.
14041
14042     This is the data that is independent of the actual operation.
14043
14044     """
14045     cfg = self.cfg
14046     cluster_info = cfg.GetClusterInfo()
14047     # cluster data
14048     data = {
14049       "version": constants.IALLOCATOR_VERSION,
14050       "cluster_name": cfg.GetClusterName(),
14051       "cluster_tags": list(cluster_info.GetTags()),
14052       "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
14053       # we don't have job IDs
14054       }
14055     ninfo = cfg.GetAllNodesInfo()
14056     iinfo = cfg.GetAllInstancesInfo().values()
14057     i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
14058
14059     # node data
14060     node_list = [n.name for n in ninfo.values() if n.vm_capable]
14061
14062     if self.mode == constants.IALLOCATOR_MODE_ALLOC:
14063       hypervisor_name = self.hypervisor
14064     elif self.mode == constants.IALLOCATOR_MODE_RELOC:
14065       hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
14066     else:
14067       hypervisor_name = cluster_info.primary_hypervisor
14068
14069     node_data = self.rpc.call_node_info(node_list, [cfg.GetVGName()],
14070                                         [hypervisor_name])
14071     node_iinfo = \
14072       self.rpc.call_all_instances_info(node_list,
14073                                        cluster_info.enabled_hypervisors)
14074
14075     data["nodegroups"] = self._ComputeNodeGroupData(cfg)
14076
14077     config_ndata = self._ComputeBasicNodeData(ninfo)
14078     data["nodes"] = self._ComputeDynamicNodeData(ninfo, node_data, node_iinfo,
14079                                                  i_list, config_ndata)
14080     assert len(data["nodes"]) == len(ninfo), \
14081         "Incomplete node data computed"
14082
14083     data["instances"] = self._ComputeInstanceData(cluster_info, i_list)
14084
14085     self.in_data = data
14086
14087   @staticmethod
14088   def _ComputeNodeGroupData(cfg):
14089     """Compute node groups data.
14090
14091     """
14092     ng = dict((guuid, {
14093       "name": gdata.name,
14094       "alloc_policy": gdata.alloc_policy,
14095       })
14096       for guuid, gdata in cfg.GetAllNodeGroupsInfo().items())
14097
14098     return ng
14099
14100   @staticmethod
14101   def _ComputeBasicNodeData(node_cfg):
14102     """Compute global node data.
14103
14104     @rtype: dict
14105     @returns: a dict of name: (node dict, node config)
14106
14107     """
14108     # fill in static (config-based) values
14109     node_results = dict((ninfo.name, {
14110       "tags": list(ninfo.GetTags()),
14111       "primary_ip": ninfo.primary_ip,
14112       "secondary_ip": ninfo.secondary_ip,
14113       "offline": ninfo.offline,
14114       "drained": ninfo.drained,
14115       "master_candidate": ninfo.master_candidate,
14116       "group": ninfo.group,
14117       "master_capable": ninfo.master_capable,
14118       "vm_capable": ninfo.vm_capable,
14119       })
14120       for ninfo in node_cfg.values())
14121
14122     return node_results
14123
14124   @staticmethod
14125   def _ComputeDynamicNodeData(node_cfg, node_data, node_iinfo, i_list,
14126                               node_results):
14127     """Compute global node data.
14128
14129     @param node_results: the basic node structures as filled from the config
14130
14131     """
14132     #TODO(dynmem): compute the right data on MAX and MIN memory
14133     # make a copy of the current dict
14134     node_results = dict(node_results)
14135     for nname, nresult in node_data.items():
14136       assert nname in node_results, "Missing basic data for node %s" % nname
14137       ninfo = node_cfg[nname]
14138
14139       if not (ninfo.offline or ninfo.drained):
14140         nresult.Raise("Can't get data for node %s" % nname)
14141         node_iinfo[nname].Raise("Can't get node instance info from node %s" %
14142                                 nname)
14143         remote_info = _MakeLegacyNodeInfo(nresult.payload)
14144
14145         for attr in ["memory_total", "memory_free", "memory_dom0",
14146                      "vg_size", "vg_free", "cpu_total"]:
14147           if attr not in remote_info:
14148             raise errors.OpExecError("Node '%s' didn't return attribute"
14149                                      " '%s'" % (nname, attr))
14150           if not isinstance(remote_info[attr], int):
14151             raise errors.OpExecError("Node '%s' returned invalid value"
14152                                      " for '%s': %s" %
14153                                      (nname, attr, remote_info[attr]))
14154         # compute memory used by primary instances
14155         i_p_mem = i_p_up_mem = 0
14156         for iinfo, beinfo in i_list:
14157           if iinfo.primary_node == nname:
14158             i_p_mem += beinfo[constants.BE_MAXMEM]
14159             if iinfo.name not in node_iinfo[nname].payload:
14160               i_used_mem = 0
14161             else:
14162               i_used_mem = int(node_iinfo[nname].payload[iinfo.name]["memory"])
14163             i_mem_diff = beinfo[constants.BE_MAXMEM] - i_used_mem
14164             remote_info["memory_free"] -= max(0, i_mem_diff)
14165
14166             if iinfo.admin_state == constants.ADMINST_UP:
14167               i_p_up_mem += beinfo[constants.BE_MAXMEM]
14168
14169         # compute memory used by instances
14170         pnr_dyn = {
14171           "total_memory": remote_info["memory_total"],
14172           "reserved_memory": remote_info["memory_dom0"],
14173           "free_memory": remote_info["memory_free"],
14174           "total_disk": remote_info["vg_size"],
14175           "free_disk": remote_info["vg_free"],
14176           "total_cpus": remote_info["cpu_total"],
14177           "i_pri_memory": i_p_mem,
14178           "i_pri_up_memory": i_p_up_mem,
14179           }
14180         pnr_dyn.update(node_results[nname])
14181         node_results[nname] = pnr_dyn
14182
14183     return node_results
14184
14185   @staticmethod
14186   def _ComputeInstanceData(cluster_info, i_list):
14187     """Compute global instance data.
14188
14189     """
14190     instance_data = {}
14191     for iinfo, beinfo in i_list:
14192       nic_data = []
14193       for nic in iinfo.nics:
14194         filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
14195         nic_dict = {
14196           "mac": nic.mac,
14197           "ip": nic.ip,
14198           "mode": filled_params[constants.NIC_MODE],
14199           "link": filled_params[constants.NIC_LINK],
14200           }
14201         if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
14202           nic_dict["bridge"] = filled_params[constants.NIC_LINK]
14203         nic_data.append(nic_dict)
14204       pir = {
14205         "tags": list(iinfo.GetTags()),
14206         "admin_state": iinfo.admin_state,
14207         "vcpus": beinfo[constants.BE_VCPUS],
14208         "memory": beinfo[constants.BE_MAXMEM],
14209         "os": iinfo.os,
14210         "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
14211         "nics": nic_data,
14212         "disks": [{constants.IDISK_SIZE: dsk.size,
14213                    constants.IDISK_MODE: dsk.mode}
14214                   for dsk in iinfo.disks],
14215         "disk_template": iinfo.disk_template,
14216         "hypervisor": iinfo.hypervisor,
14217         }
14218       pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
14219                                                  pir["disks"])
14220       instance_data[iinfo.name] = pir
14221
14222     return instance_data
14223
14224   def _AddNewInstance(self):
14225     """Add new instance data to allocator structure.
14226
14227     This in combination with _AllocatorGetClusterData will create the
14228     correct structure needed as input for the allocator.
14229
14230     The checks for the completeness of the opcode must have already been
14231     done.
14232
14233     """
14234     disk_space = _ComputeDiskSize(self.disk_template, self.disks)
14235
14236     if self.disk_template in constants.DTS_INT_MIRROR:
14237       self.required_nodes = 2
14238     else:
14239       self.required_nodes = 1
14240
14241     request = {
14242       "name": self.name,
14243       "disk_template": self.disk_template,
14244       "tags": self.tags,
14245       "os": self.os,
14246       "vcpus": self.vcpus,
14247       "memory": self.memory,
14248       "disks": self.disks,
14249       "disk_space_total": disk_space,
14250       "nics": self.nics,
14251       "required_nodes": self.required_nodes,
14252       "hypervisor": self.hypervisor,
14253       }
14254
14255     return request
14256
14257   def _AddRelocateInstance(self):
14258     """Add relocate instance data to allocator structure.
14259
14260     This in combination with _IAllocatorGetClusterData will create the
14261     correct structure needed as input for the allocator.
14262
14263     The checks for the completeness of the opcode must have already been
14264     done.
14265
14266     """
14267     instance = self.cfg.GetInstanceInfo(self.name)
14268     if instance is None:
14269       raise errors.ProgrammerError("Unknown instance '%s' passed to"
14270                                    " IAllocator" % self.name)
14271
14272     if instance.disk_template not in constants.DTS_MIRRORED:
14273       raise errors.OpPrereqError("Can't relocate non-mirrored instances",
14274                                  errors.ECODE_INVAL)
14275
14276     if instance.disk_template in constants.DTS_INT_MIRROR and \
14277         len(instance.secondary_nodes) != 1:
14278       raise errors.OpPrereqError("Instance has not exactly one secondary node",
14279                                  errors.ECODE_STATE)
14280
14281     self.required_nodes = 1
14282     disk_sizes = [{constants.IDISK_SIZE: disk.size} for disk in instance.disks]
14283     disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
14284
14285     request = {
14286       "name": self.name,
14287       "disk_space_total": disk_space,
14288       "required_nodes": self.required_nodes,
14289       "relocate_from": self.relocate_from,
14290       }
14291     return request
14292
14293   def _AddNodeEvacuate(self):
14294     """Get data for node-evacuate requests.
14295
14296     """
14297     return {
14298       "instances": self.instances,
14299       "evac_mode": self.evac_mode,
14300       }
14301
14302   def _AddChangeGroup(self):
14303     """Get data for node-evacuate requests.
14304
14305     """
14306     return {
14307       "instances": self.instances,
14308       "target_groups": self.target_groups,
14309       }
14310
14311   def _BuildInputData(self, fn, keydata):
14312     """Build input data structures.
14313
14314     """
14315     self._ComputeClusterData()
14316
14317     request = fn()
14318     request["type"] = self.mode
14319     for keyname, keytype in keydata:
14320       if keyname not in request:
14321         raise errors.ProgrammerError("Request parameter %s is missing" %
14322                                      keyname)
14323       val = request[keyname]
14324       if not keytype(val):
14325         raise errors.ProgrammerError("Request parameter %s doesn't pass"
14326                                      " validation, value %s, expected"
14327                                      " type %s" % (keyname, val, keytype))
14328     self.in_data["request"] = request
14329
14330     self.in_text = serializer.Dump(self.in_data)
14331
14332   _STRING_LIST = ht.TListOf(ht.TString)
14333   _JOB_LIST = ht.TListOf(ht.TListOf(ht.TStrictDict(True, False, {
14334      # pylint: disable=E1101
14335      # Class '...' has no 'OP_ID' member
14336      "OP_ID": ht.TElemOf([opcodes.OpInstanceFailover.OP_ID,
14337                           opcodes.OpInstanceMigrate.OP_ID,
14338                           opcodes.OpInstanceReplaceDisks.OP_ID])
14339      })))
14340
14341   _NEVAC_MOVED = \
14342     ht.TListOf(ht.TAnd(ht.TIsLength(3),
14343                        ht.TItems([ht.TNonEmptyString,
14344                                   ht.TNonEmptyString,
14345                                   ht.TListOf(ht.TNonEmptyString),
14346                                  ])))
14347   _NEVAC_FAILED = \
14348     ht.TListOf(ht.TAnd(ht.TIsLength(2),
14349                        ht.TItems([ht.TNonEmptyString,
14350                                   ht.TMaybeString,
14351                                  ])))
14352   _NEVAC_RESULT = ht.TAnd(ht.TIsLength(3),
14353                           ht.TItems([_NEVAC_MOVED, _NEVAC_FAILED, _JOB_LIST]))
14354
14355   _MODE_DATA = {
14356     constants.IALLOCATOR_MODE_ALLOC:
14357       (_AddNewInstance,
14358        [
14359         ("name", ht.TString),
14360         ("memory", ht.TInt),
14361         ("disks", ht.TListOf(ht.TDict)),
14362         ("disk_template", ht.TString),
14363         ("os", ht.TString),
14364         ("tags", _STRING_LIST),
14365         ("nics", ht.TListOf(ht.TDict)),
14366         ("vcpus", ht.TInt),
14367         ("hypervisor", ht.TString),
14368         ], ht.TList),
14369     constants.IALLOCATOR_MODE_RELOC:
14370       (_AddRelocateInstance,
14371        [("name", ht.TString), ("relocate_from", _STRING_LIST)],
14372        ht.TList),
14373      constants.IALLOCATOR_MODE_NODE_EVAC:
14374       (_AddNodeEvacuate, [
14375         ("instances", _STRING_LIST),
14376         ("evac_mode", ht.TElemOf(constants.IALLOCATOR_NEVAC_MODES)),
14377         ], _NEVAC_RESULT),
14378      constants.IALLOCATOR_MODE_CHG_GROUP:
14379       (_AddChangeGroup, [
14380         ("instances", _STRING_LIST),
14381         ("target_groups", _STRING_LIST),
14382         ], _NEVAC_RESULT),
14383     }
14384
14385   def Run(self, name, validate=True, call_fn=None):
14386     """Run an instance allocator and return the results.
14387
14388     """
14389     if call_fn is None:
14390       call_fn = self.rpc.call_iallocator_runner
14391
14392     result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
14393     result.Raise("Failure while running the iallocator script")
14394
14395     self.out_text = result.payload
14396     if validate:
14397       self._ValidateResult()
14398
14399   def _ValidateResult(self):
14400     """Process the allocator results.
14401
14402     This will process and if successful save the result in
14403     self.out_data and the other parameters.
14404
14405     """
14406     try:
14407       rdict = serializer.Load(self.out_text)
14408     except Exception, err:
14409       raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
14410
14411     if not isinstance(rdict, dict):
14412       raise errors.OpExecError("Can't parse iallocator results: not a dict")
14413
14414     # TODO: remove backwards compatiblity in later versions
14415     if "nodes" in rdict and "result" not in rdict:
14416       rdict["result"] = rdict["nodes"]
14417       del rdict["nodes"]
14418
14419     for key in "success", "info", "result":
14420       if key not in rdict:
14421         raise errors.OpExecError("Can't parse iallocator results:"
14422                                  " missing key '%s'" % key)
14423       setattr(self, key, rdict[key])
14424
14425     if not self._result_check(self.result):
14426       raise errors.OpExecError("Iallocator returned invalid result,"
14427                                " expected %s, got %s" %
14428                                (self._result_check, self.result),
14429                                errors.ECODE_INVAL)
14430
14431     if self.mode == constants.IALLOCATOR_MODE_RELOC:
14432       assert self.relocate_from is not None
14433       assert self.required_nodes == 1
14434
14435       node2group = dict((name, ndata["group"])
14436                         for (name, ndata) in self.in_data["nodes"].items())
14437
14438       fn = compat.partial(self._NodesToGroups, node2group,
14439                           self.in_data["nodegroups"])
14440
14441       instance = self.cfg.GetInstanceInfo(self.name)
14442       request_groups = fn(self.relocate_from + [instance.primary_node])
14443       result_groups = fn(rdict["result"] + [instance.primary_node])
14444
14445       if self.success and not set(result_groups).issubset(request_groups):
14446         raise errors.OpExecError("Groups of nodes returned by iallocator (%s)"
14447                                  " differ from original groups (%s)" %
14448                                  (utils.CommaJoin(result_groups),
14449                                   utils.CommaJoin(request_groups)))
14450
14451     elif self.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
14452       assert self.evac_mode in constants.IALLOCATOR_NEVAC_MODES
14453
14454     self.out_data = rdict
14455
14456   @staticmethod
14457   def _NodesToGroups(node2group, groups, nodes):
14458     """Returns a list of unique group names for a list of nodes.
14459
14460     @type node2group: dict
14461     @param node2group: Map from node name to group UUID
14462     @type groups: dict
14463     @param groups: Group information
14464     @type nodes: list
14465     @param nodes: Node names
14466
14467     """
14468     result = set()
14469
14470     for node in nodes:
14471       try:
14472         group_uuid = node2group[node]
14473       except KeyError:
14474         # Ignore unknown node
14475         pass
14476       else:
14477         try:
14478           group = groups[group_uuid]
14479         except KeyError:
14480           # Can't find group, let's use UUID
14481           group_name = group_uuid
14482         else:
14483           group_name = group["name"]
14484
14485         result.add(group_name)
14486
14487     return sorted(result)
14488
14489
14490 class LUTestAllocator(NoHooksLU):
14491   """Run allocator tests.
14492
14493   This LU runs the allocator tests
14494
14495   """
14496   def CheckPrereq(self):
14497     """Check prerequisites.
14498
14499     This checks the opcode parameters depending on the director and mode test.
14500
14501     """
14502     if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
14503       for attr in ["memory", "disks", "disk_template",
14504                    "os", "tags", "nics", "vcpus"]:
14505         if not hasattr(self.op, attr):
14506           raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
14507                                      attr, errors.ECODE_INVAL)
14508       iname = self.cfg.ExpandInstanceName(self.op.name)
14509       if iname is not None:
14510         raise errors.OpPrereqError("Instance '%s' already in the cluster" %
14511                                    iname, errors.ECODE_EXISTS)
14512       if not isinstance(self.op.nics, list):
14513         raise errors.OpPrereqError("Invalid parameter 'nics'",
14514                                    errors.ECODE_INVAL)
14515       if not isinstance(self.op.disks, list):
14516         raise errors.OpPrereqError("Invalid parameter 'disks'",
14517                                    errors.ECODE_INVAL)
14518       for row in self.op.disks:
14519         if (not isinstance(row, dict) or
14520             constants.IDISK_SIZE not in row or
14521             not isinstance(row[constants.IDISK_SIZE], int) or
14522             constants.IDISK_MODE not in row or
14523             row[constants.IDISK_MODE] not in constants.DISK_ACCESS_SET):
14524           raise errors.OpPrereqError("Invalid contents of the 'disks'"
14525                                      " parameter", errors.ECODE_INVAL)
14526       if self.op.hypervisor is None:
14527         self.op.hypervisor = self.cfg.GetHypervisorType()
14528     elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
14529       fname = _ExpandInstanceName(self.cfg, self.op.name)
14530       self.op.name = fname
14531       self.relocate_from = \
14532           list(self.cfg.GetInstanceInfo(fname).secondary_nodes)
14533     elif self.op.mode in (constants.IALLOCATOR_MODE_CHG_GROUP,
14534                           constants.IALLOCATOR_MODE_NODE_EVAC):
14535       if not self.op.instances:
14536         raise errors.OpPrereqError("Missing instances", errors.ECODE_INVAL)
14537       self.op.instances = _GetWantedInstances(self, self.op.instances)
14538     else:
14539       raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
14540                                  self.op.mode, errors.ECODE_INVAL)
14541
14542     if self.op.direction == constants.IALLOCATOR_DIR_OUT:
14543       if self.op.allocator is None:
14544         raise errors.OpPrereqError("Missing allocator name",
14545                                    errors.ECODE_INVAL)
14546     elif self.op.direction != constants.IALLOCATOR_DIR_IN:
14547       raise errors.OpPrereqError("Wrong allocator test '%s'" %
14548                                  self.op.direction, errors.ECODE_INVAL)
14549
14550   def Exec(self, feedback_fn):
14551     """Run the allocator test.
14552
14553     """
14554     if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
14555       ial = IAllocator(self.cfg, self.rpc,
14556                        mode=self.op.mode,
14557                        name=self.op.name,
14558                        memory=self.op.memory,
14559                        disks=self.op.disks,
14560                        disk_template=self.op.disk_template,
14561                        os=self.op.os,
14562                        tags=self.op.tags,
14563                        nics=self.op.nics,
14564                        vcpus=self.op.vcpus,
14565                        hypervisor=self.op.hypervisor,
14566                        )
14567     elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
14568       ial = IAllocator(self.cfg, self.rpc,
14569                        mode=self.op.mode,
14570                        name=self.op.name,
14571                        relocate_from=list(self.relocate_from),
14572                        )
14573     elif self.op.mode == constants.IALLOCATOR_MODE_CHG_GROUP:
14574       ial = IAllocator(self.cfg, self.rpc,
14575                        mode=self.op.mode,
14576                        instances=self.op.instances,
14577                        target_groups=self.op.target_groups)
14578     elif self.op.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
14579       ial = IAllocator(self.cfg, self.rpc,
14580                        mode=self.op.mode,
14581                        instances=self.op.instances,
14582                        evac_mode=self.op.evac_mode)
14583     else:
14584       raise errors.ProgrammerError("Uncatched mode %s in"
14585                                    " LUTestAllocator.Exec", self.op.mode)
14586
14587     if self.op.direction == constants.IALLOCATOR_DIR_IN:
14588       result = ial.in_text
14589     else:
14590       ial.Run(self.op.allocator, validate=False)
14591       result = ial.out_text
14592     return result
14593
14594
14595 #: Query type implementations
14596 _QUERY_IMPL = {
14597   constants.QR_INSTANCE: _InstanceQuery,
14598   constants.QR_NODE: _NodeQuery,
14599   constants.QR_GROUP: _GroupQuery,
14600   constants.QR_OS: _OsQuery,
14601   }
14602
14603 assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
14604
14605
14606 def _GetQueryImplementation(name):
14607   """Returns the implemtnation for a query type.
14608
14609   @param name: Query type, must be one of L{constants.QR_VIA_OP}
14610
14611   """
14612   try:
14613     return _QUERY_IMPL[name]
14614   except KeyError:
14615     raise errors.OpPrereqError("Unknown query resource '%s'" % name,
14616                                errors.ECODE_INVAL)