code.grnet.gr Git - ganeti-local/blob - lib/cmdlib.py

   1 #
   2 #
   3
   4 # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011 Google Inc.
   5 #
   6 # This program is free software; you can redistribute it and/or modify
   7 # it under the terms of the GNU General Public License as published by
   8 # the Free Software Foundation; either version 2 of the License, or
   9 # (at your option) any later version.
  10 #
  11 # This program is distributed in the hope that it will be useful, but
  12 # WITHOUT ANY WARRANTY; without even the implied warranty of
  13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 # General Public License for more details.
  15 #
  16 # You should have received a copy of the GNU General Public License
  17 # along with this program; if not, write to the Free Software
  18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  19 # 02110-1301, USA.
  20
  21
  22 """Module implementing the master-side code."""
  23
  24 # pylint: disable=W0201,C0302
  25
  26 # W0201 since most LU attributes are defined in CheckPrereq or similar
  27 # functions
  28
  29 # C0302: since we have waaaay too many lines in this module
  30
  31 import os
  32 import os.path
  33 import time
  34 import re
  35 import platform
  36 import logging
  37 import copy
  38 import OpenSSL
  39 import socket
  40 import tempfile
  41 import shutil
  42 import itertools
  43 import operator
  44
  45 from ganeti import ssh
  46 from ganeti import utils
  47 from ganeti import errors
  48 from ganeti import hypervisor
  49 from ganeti import locking
  50 from ganeti import constants
  51 from ganeti import objects
  52 from ganeti import serializer
  53 from ganeti import ssconf
  54 from ganeti import uidpool
  55 from ganeti import compat
  56 from ganeti import masterd
  57 from ganeti import netutils
  58 from ganeti import query
  59 from ganeti import qlang
  60 from ganeti import opcodes
  61 from ganeti import ht
  62 from ganeti import rpc
  63
  64 import ganeti.masterd.instance # pylint: disable=W0611
  65
  66
  67 #: Size of DRBD meta block device
  68 DRBD_META_SIZE = 128
  69
  70 # States of instance
  71 INSTANCE_UP = [constants.ADMINST_UP]
  72 INSTANCE_DOWN = [constants.ADMINST_DOWN]
  73 INSTANCE_OFFLINE = [constants.ADMINST_OFFLINE]
  74 INSTANCE_ONLINE = [constants.ADMINST_DOWN, constants.ADMINST_UP]
  75 INSTANCE_NOT_RUNNING = [constants.ADMINST_DOWN, constants.ADMINST_OFFLINE]
  76
  77
  78 class ResultWithJobs:
  79   """Data container for LU results with jobs.
  80
  81   Instances of this class returned from L{LogicalUnit.Exec} will be recognized
  82   by L{mcpu.Processor._ProcessResult}. The latter will then submit the jobs
  83   contained in the C{jobs} attribute and include the job IDs in the opcode
  84   result.
  85
  86   """
  87   def __init__(self, jobs, **kwargs):
  88     """Initializes this class.
  89
  90     Additional return values can be specified as keyword arguments.
  91
  92     @type jobs: list of lists of L{opcode.OpCode}
  93     @param jobs: A list of lists of opcode objects
  94
  95     """
  96     self.jobs = jobs
  97     self.other = kwargs
  98
  99
 100 class LogicalUnit(object):
 101   """Logical Unit base class.
 102
 103   Subclasses must follow these rules:
 104     - implement ExpandNames
 105     - implement CheckPrereq (except when tasklets are used)
 106     - implement Exec (except when tasklets are used)
 107     - implement BuildHooksEnv
 108     - implement BuildHooksNodes
 109     - redefine HPATH and HTYPE
 110     - optionally redefine their run requirements:
 111         REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
 112
 113   Note that all commands require root permissions.
 114
 115   @ivar dry_run_result: the value (if any) that will be returned to the caller
 116       in dry-run mode (signalled by opcode dry_run parameter)
 117
 118   """
 119   HPATH = None
 120   HTYPE = None
 121   REQ_BGL = True
 122
 123   def __init__(self, processor, op, context, rpc_runner):
 124     """Constructor for LogicalUnit.
 125
 126     This needs to be overridden in derived classes in order to check op
 127     validity.
 128
 129     """
 130     self.proc = processor
 131     self.op = op
 132     self.cfg = context.cfg
 133     self.glm = context.glm
 134     # readability alias
 135     self.owned_locks = context.glm.list_owned
 136     self.context = context
 137     self.rpc = rpc_runner
 138     # Dicts used to declare locking needs to mcpu
 139     self.needed_locks = None
 140     self.share_locks = dict.fromkeys(locking.LEVELS, 0)
 141     self.add_locks = {}
 142     self.remove_locks = {}
 143     # Used to force good behavior when calling helper functions
 144     self.recalculate_locks = {}
 145     # logging
 146     self.Log = processor.Log # pylint: disable=C0103
 147     self.LogWarning = processor.LogWarning # pylint: disable=C0103
 148     self.LogInfo = processor.LogInfo # pylint: disable=C0103
 149     self.LogStep = processor.LogStep # pylint: disable=C0103
 150     # support for dry-run
 151     self.dry_run_result = None
 152     # support for generic debug attribute
 153     if (not hasattr(self.op, "debug_level") or
 154         not isinstance(self.op.debug_level, int)):
 155       self.op.debug_level = 0
 156
 157     # Tasklets
 158     self.tasklets = None
 159
 160     # Validate opcode parameters and set defaults
 161     self.op.Validate(True)
 162
 163     self.CheckArguments()
 164
 165   def CheckArguments(self):
 166     """Check syntactic validity for the opcode arguments.
 167
 168     This method is for doing a simple syntactic check and ensure
 169     validity of opcode parameters, without any cluster-related
 170     checks. While the same can be accomplished in ExpandNames and/or
 171     CheckPrereq, doing these separate is better because:
 172
 173       - ExpandNames is left as as purely a lock-related function
 174       - CheckPrereq is run after we have acquired locks (and possible
 175         waited for them)
 176
 177     The function is allowed to change the self.op attribute so that
 178     later methods can no longer worry about missing parameters.
 179
 180     """
 181     pass
 182
 183   def ExpandNames(self):
 184     """Expand names for this LU.
 185
 186     This method is called before starting to execute the opcode, and it should
 187     update all the parameters of the opcode to their canonical form (e.g. a
 188     short node name must be fully expanded after this method has successfully
 189     completed). This way locking, hooks, logging, etc. can work correctly.
 190
 191     LUs which implement this method must also populate the self.needed_locks
 192     member, as a dict with lock levels as keys, and a list of needed lock names
 193     as values. Rules:
 194
 195       - use an empty dict if you don't need any lock
 196       - if you don't need any lock at a particular level omit that level
 197       - don't put anything for the BGL level
 198       - if you want all locks at a level use locking.ALL_SET as a value
 199
 200     If you need to share locks (rather than acquire them exclusively) at one
 201     level you can modify self.share_locks, setting a true value (usually 1) for
 202     that level. By default locks are not shared.
 203
 204     This function can also define a list of tasklets, which then will be
 205     executed in order instead of the usual LU-level CheckPrereq and Exec
 206     functions, if those are not defined by the LU.
 207
 208     Examples::
 209
 210       # Acquire all nodes and one instance
 211       self.needed_locks = {
 212         locking.LEVEL_NODE: locking.ALL_SET,
 213         locking.LEVEL_INSTANCE: ['instance1.example.com'],
 214       }
 215       # Acquire just two nodes
 216       self.needed_locks = {
 217         locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
 218       }
 219       # Acquire no locks
 220       self.needed_locks = {} # No, you can't leave it to the default value None
 221
 222     """
 223     # The implementation of this method is mandatory only if the new LU is
 224     # concurrent, so that old LUs don't need to be changed all at the same
 225     # time.
 226     if self.REQ_BGL:
 227       self.needed_locks = {} # Exclusive LUs don't need locks.
 228     else:
 229       raise NotImplementedError
 230
 231   def DeclareLocks(self, level):
 232     """Declare LU locking needs for a level
 233
 234     While most LUs can just declare their locking needs at ExpandNames time,
 235     sometimes there's the need to calculate some locks after having acquired
 236     the ones before. This function is called just before acquiring locks at a
 237     particular level, but after acquiring the ones at lower levels, and permits
 238     such calculations. It can be used to modify self.needed_locks, and by
 239     default it does nothing.
 240
 241     This function is only called if you have something already set in
 242     self.needed_locks for the level.
 243
 244     @param level: Locking level which is going to be locked
 245     @type level: member of ganeti.locking.LEVELS
 246
 247     """
 248
 249   def CheckPrereq(self):
 250     """Check prerequisites for this LU.
 251
 252     This method should check that the prerequisites for the execution
 253     of this LU are fulfilled. It can do internode communication, but
 254     it should be idempotent - no cluster or system changes are
 255     allowed.
 256
 257     The method should raise errors.OpPrereqError in case something is
 258     not fulfilled. Its return value is ignored.
 259
 260     This method should also update all the parameters of the opcode to
 261     their canonical form if it hasn't been done by ExpandNames before.
 262
 263     """
 264     if self.tasklets is not None:
 265       for (idx, tl) in enumerate(self.tasklets):
 266         logging.debug("Checking prerequisites for tasklet %s/%s",
 267                       idx + 1, len(self.tasklets))
 268         tl.CheckPrereq()
 269     else:
 270       pass
 271
 272   def Exec(self, feedback_fn):
 273     """Execute the LU.
 274
 275     This method should implement the actual work. It should raise
 276     errors.OpExecError for failures that are somewhat dealt with in
 277     code, or expected.
 278
 279     """
 280     if self.tasklets is not None:
 281       for (idx, tl) in enumerate(self.tasklets):
 282         logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
 283         tl.Exec(feedback_fn)
 284     else:
 285       raise NotImplementedError
 286
 287   def BuildHooksEnv(self):
 288     """Build hooks environment for this LU.
 289
 290     @rtype: dict
 291     @return: Dictionary containing the environment that will be used for
 292       running the hooks for this LU. The keys of the dict must not be prefixed
 293       with "GANETI_"--that'll be added by the hooks runner. The hooks runner
 294       will extend the environment with additional variables. If no environment
 295       should be defined, an empty dictionary should be returned (not C{None}).
 296     @note: If the C{HPATH} attribute of the LU class is C{None}, this function
 297       will not be called.
 298
 299     """
 300     raise NotImplementedError
 301
 302   def BuildHooksNodes(self):
 303     """Build list of nodes to run LU's hooks.
 304
 305     @rtype: tuple; (list, list)
 306     @return: Tuple containing a list of node names on which the hook
 307       should run before the execution and a list of node names on which the
 308       hook should run after the execution. No nodes should be returned as an
 309       empty list (and not None).
 310     @note: If the C{HPATH} attribute of the LU class is C{None}, this function
 311       will not be called.
 312
 313     """
 314     raise NotImplementedError
 315
 316   def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
 317     """Notify the LU about the results of its hooks.
 318
 319     This method is called every time a hooks phase is executed, and notifies
 320     the Logical Unit about the hooks' result. The LU can then use it to alter
 321     its result based on the hooks.  By default the method does nothing and the
 322     previous result is passed back unchanged but any LU can define it if it
 323     wants to use the local cluster hook-scripts somehow.
 324
 325     @param phase: one of L{constants.HOOKS_PHASE_POST} or
 326         L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
 327     @param hook_results: the results of the multi-node hooks rpc call
 328     @param feedback_fn: function used send feedback back to the caller
 329     @param lu_result: the previous Exec result this LU had, or None
 330         in the PRE phase
 331     @return: the new Exec result, based on the previous result
 332         and hook results
 333
 334     """
 335     # API must be kept, thus we ignore the unused argument and could
 336     # be a function warnings
 337     # pylint: disable=W0613,R0201
 338     return lu_result
 339
 340   def _ExpandAndLockInstance(self):
 341     """Helper function to expand and lock an instance.
 342
 343     Many LUs that work on an instance take its name in self.op.instance_name
 344     and need to expand it and then declare the expanded name for locking. This
 345     function does it, and then updates self.op.instance_name to the expanded
 346     name. It also initializes needed_locks as a dict, if this hasn't been done
 347     before.
 348
 349     """
 350     if self.needed_locks is None:
 351       self.needed_locks = {}
 352     else:
 353       assert locking.LEVEL_INSTANCE not in self.needed_locks, \
 354         "_ExpandAndLockInstance called with instance-level locks set"
 355     self.op.instance_name = _ExpandInstanceName(self.cfg,
 356                                                 self.op.instance_name)
 357     self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
 358
 359   def _LockInstancesNodes(self, primary_only=False,
 360                           level=locking.LEVEL_NODE):
 361     """Helper function to declare instances' nodes for locking.
 362
 363     This function should be called after locking one or more instances to lock
 364     their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
 365     with all primary or secondary nodes for instances already locked and
 366     present in self.needed_locks[locking.LEVEL_INSTANCE].
 367
 368     It should be called from DeclareLocks, and for safety only works if
 369     self.recalculate_locks[locking.LEVEL_NODE] is set.
 370
 371     In the future it may grow parameters to just lock some instance's nodes, or
 372     to just lock primaries or secondary nodes, if needed.
 373
 374     If should be called in DeclareLocks in a way similar to::
 375
 376       if level == locking.LEVEL_NODE:
 377         self._LockInstancesNodes()
 378
 379     @type primary_only: boolean
 380     @param primary_only: only lock primary nodes of locked instances
 381     @param level: Which lock level to use for locking nodes
 382
 383     """
 384     assert level in self.recalculate_locks, \
 385       "_LockInstancesNodes helper function called with no nodes to recalculate"
 386
 387     # TODO: check if we're really been called with the instance locks held
 388
 389     # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
 390     # future we might want to have different behaviors depending on the value
 391     # of self.recalculate_locks[locking.LEVEL_NODE]
 392     wanted_nodes = []
 393     locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
 394     for _, instance in self.cfg.GetMultiInstanceInfo(locked_i):
 395       wanted_nodes.append(instance.primary_node)
 396       if not primary_only:
 397         wanted_nodes.extend(instance.secondary_nodes)
 398
 399     if self.recalculate_locks[level] == constants.LOCKS_REPLACE:
 400       self.needed_locks[level] = wanted_nodes
 401     elif self.recalculate_locks[level] == constants.LOCKS_APPEND:
 402       self.needed_locks[level].extend(wanted_nodes)
 403     else:
 404       raise errors.ProgrammerError("Unknown recalculation mode")
 405
 406     del self.recalculate_locks[level]
 407
 408
 409 class NoHooksLU(LogicalUnit): # pylint: disable=W0223
 410   """Simple LU which runs no hooks.
 411
 412   This LU is intended as a parent for other LogicalUnits which will
 413   run no hooks, in order to reduce duplicate code.
 414
 415   """
 416   HPATH = None
 417   HTYPE = None
 418
 419   def BuildHooksEnv(self):
 420     """Empty BuildHooksEnv for NoHooksLu.
 421
 422     This just raises an error.
 423
 424     """
 425     raise AssertionError("BuildHooksEnv called for NoHooksLUs")
 426
 427   def BuildHooksNodes(self):
 428     """Empty BuildHooksNodes for NoHooksLU.
 429
 430     """
 431     raise AssertionError("BuildHooksNodes called for NoHooksLU")
 432
 433
 434 class Tasklet:
 435   """Tasklet base class.
 436
 437   Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
 438   they can mix legacy code with tasklets. Locking needs to be done in the LU,
 439   tasklets know nothing about locks.
 440
 441   Subclasses must follow these rules:
 442     - Implement CheckPrereq
 443     - Implement Exec
 444
 445   """
 446   def __init__(self, lu):
 447     self.lu = lu
 448
 449     # Shortcuts
 450     self.cfg = lu.cfg
 451     self.rpc = lu.rpc
 452
 453   def CheckPrereq(self):
 454     """Check prerequisites for this tasklets.
 455
 456     This method should check whether the prerequisites for the execution of
 457     this tasklet are fulfilled. It can do internode communication, but it
 458     should be idempotent - no cluster or system changes are allowed.
 459
 460     The method should raise errors.OpPrereqError in case something is not
 461     fulfilled. Its return value is ignored.
 462
 463     This method should also update all parameters to their canonical form if it
 464     hasn't been done before.
 465
 466     """
 467     pass
 468
 469   def Exec(self, feedback_fn):
 470     """Execute the tasklet.
 471
 472     This method should implement the actual work. It should raise
 473     errors.OpExecError for failures that are somewhat dealt with in code, or
 474     expected.
 475
 476     """
 477     raise NotImplementedError
 478
 479
 480 class _QueryBase:
 481   """Base for query utility classes.
 482
 483   """
 484   #: Attribute holding field definitions
 485   FIELDS = None
 486
 487   def __init__(self, qfilter, fields, use_locking):
 488     """Initializes this class.
 489
 490     """
 491     self.use_locking = use_locking
 492
 493     self.query = query.Query(self.FIELDS, fields, qfilter=qfilter,
 494                              namefield="name")
 495     self.requested_data = self.query.RequestedData()
 496     self.names = self.query.RequestedNames()
 497
 498     # Sort only if no names were requested
 499     self.sort_by_name = not self.names
 500
 501     self.do_locking = None
 502     self.wanted = None
 503
 504   def _GetNames(self, lu, all_names, lock_level):
 505     """Helper function to determine names asked for in the query.
 506
 507     """
 508     if self.do_locking:
 509       names = lu.owned_locks(lock_level)
 510     else:
 511       names = all_names
 512
 513     if self.wanted == locking.ALL_SET:
 514       assert not self.names
 515       # caller didn't specify names, so ordering is not important
 516       return utils.NiceSort(names)
 517
 518     # caller specified names and we must keep the same order
 519     assert self.names
 520     assert not self.do_locking or lu.glm.is_owned(lock_level)
 521
 522     missing = set(self.wanted).difference(names)
 523     if missing:
 524       raise errors.OpExecError("Some items were removed before retrieving"
 525                                " their data: %s" % missing)
 526
 527     # Return expanded names
 528     return self.wanted
 529
 530   def ExpandNames(self, lu):
 531     """Expand names for this query.
 532
 533     See L{LogicalUnit.ExpandNames}.
 534
 535     """
 536     raise NotImplementedError()
 537
 538   def DeclareLocks(self, lu, level):
 539     """Declare locks for this query.
 540
 541     See L{LogicalUnit.DeclareLocks}.
 542
 543     """
 544     raise NotImplementedError()
 545
 546   def _GetQueryData(self, lu):
 547     """Collects all data for this query.
 548
 549     @return: Query data object
 550
 551     """
 552     raise NotImplementedError()
 553
 554   def NewStyleQuery(self, lu):
 555     """Collect data and execute query.
 556
 557     """
 558     return query.GetQueryResponse(self.query, self._GetQueryData(lu),
 559                                   sort_by_name=self.sort_by_name)
 560
 561   def OldStyleQuery(self, lu):
 562     """Collect data and execute query.
 563
 564     """
 565     return self.query.OldStyleQuery(self._GetQueryData(lu),
 566                                     sort_by_name=self.sort_by_name)
 567
 568
 569 def _ShareAll():
 570   """Returns a dict declaring all lock levels shared.
 571
 572   """
 573   return dict.fromkeys(locking.LEVELS, 1)
 574
 575
 576 def _MakeLegacyNodeInfo(data):
 577   """Formats the data returned by L{rpc.RpcRunner.call_node_info}.
 578
 579   Converts the data into a single dictionary. This is fine for most use cases,
 580   but some require information from more than one volume group or hypervisor.
 581
 582   """
 583   (bootid, (vg_info, ), (hv_info, )) = data
 584
 585   return utils.JoinDisjointDicts(utils.JoinDisjointDicts(vg_info, hv_info), {
 586     "bootid": bootid,
 587     })
 588
 589
 590 def _CheckInstanceNodeGroups(cfg, instance_name, owned_groups):
 591   """Checks if the owned node groups are still correct for an instance.
 592
 593   @type cfg: L{config.ConfigWriter}
 594   @param cfg: The cluster configuration
 595   @type instance_name: string
 596   @param instance_name: Instance name
 597   @type owned_groups: set or frozenset
 598   @param owned_groups: List of currently owned node groups
 599
 600   """
 601   inst_groups = cfg.GetInstanceNodeGroups(instance_name)
 602
 603   if not owned_groups.issuperset(inst_groups):
 604     raise errors.OpPrereqError("Instance %s's node groups changed since"
 605                                " locks were acquired, current groups are"
 606                                " are '%s', owning groups '%s'; retry the"
 607                                " operation" %
 608                                (instance_name,
 609                                 utils.CommaJoin(inst_groups),
 610                                 utils.CommaJoin(owned_groups)),
 611                                errors.ECODE_STATE)
 612
 613   return inst_groups
 614
 615
 616 def _CheckNodeGroupInstances(cfg, group_uuid, owned_instances):
 617   """Checks if the instances in a node group are still correct.
 618
 619   @type cfg: L{config.ConfigWriter}
 620   @param cfg: The cluster configuration
 621   @type group_uuid: string
 622   @param group_uuid: Node group UUID
 623   @type owned_instances: set or frozenset
 624   @param owned_instances: List of currently owned instances
 625
 626   """
 627   wanted_instances = cfg.GetNodeGroupInstances(group_uuid)
 628   if owned_instances != wanted_instances:
 629     raise errors.OpPrereqError("Instances in node group '%s' changed since"
 630                                " locks were acquired, wanted '%s', have '%s';"
 631                                " retry the operation" %
 632                                (group_uuid,
 633                                 utils.CommaJoin(wanted_instances),
 634                                 utils.CommaJoin(owned_instances)),
 635                                errors.ECODE_STATE)
 636
 637   return wanted_instances
 638
 639
 640 def _SupportsOob(cfg, node):
 641   """Tells if node supports OOB.
 642
 643   @type cfg: L{config.ConfigWriter}
 644   @param cfg: The cluster configuration
 645   @type node: L{objects.Node}
 646   @param node: The node
 647   @return: The OOB script if supported or an empty string otherwise
 648
 649   """
 650   return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
 651
 652
 653 def _GetWantedNodes(lu, nodes):
 654   """Returns list of checked and expanded node names.
 655
 656   @type lu: L{LogicalUnit}
 657   @param lu: the logical unit on whose behalf we execute
 658   @type nodes: list
 659   @param nodes: list of node names or None for all nodes
 660   @rtype: list
 661   @return: the list of nodes, sorted
 662   @raise errors.ProgrammerError: if the nodes parameter is wrong type
 663
 664   """
 665   if nodes:
 666     return [_ExpandNodeName(lu.cfg, name) for name in nodes]
 667
 668   return utils.NiceSort(lu.cfg.GetNodeList())
 669
 670
 671 def _GetWantedInstances(lu, instances):
 672   """Returns list of checked and expanded instance names.
 673
 674   @type lu: L{LogicalUnit}
 675   @param lu: the logical unit on whose behalf we execute
 676   @type instances: list
 677   @param instances: list of instance names or None for all instances
 678   @rtype: list
 679   @return: the list of instances, sorted
 680   @raise errors.OpPrereqError: if the instances parameter is wrong type
 681   @raise errors.OpPrereqError: if any of the passed instances is not found
 682
 683   """
 684   if instances:
 685     wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
 686   else:
 687     wanted = utils.NiceSort(lu.cfg.GetInstanceList())
 688   return wanted
 689
 690
 691 def _GetUpdatedParams(old_params, update_dict,
 692                       use_default=True, use_none=False):
 693   """Return the new version of a parameter dictionary.
 694
 695   @type old_params: dict
 696   @param old_params: old parameters
 697   @type update_dict: dict
 698   @param update_dict: dict containing new parameter values, or
 699       constants.VALUE_DEFAULT to reset the parameter to its default
 700       value
 701   @param use_default: boolean
 702   @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
 703       values as 'to be deleted' values
 704   @param use_none: boolean
 705   @type use_none: whether to recognise C{None} values as 'to be
 706       deleted' values
 707   @rtype: dict
 708   @return: the new parameter dictionary
 709
 710   """
 711   params_copy = copy.deepcopy(old_params)
 712   for key, val in update_dict.iteritems():
 713     if ((use_default and val == constants.VALUE_DEFAULT) or
 714         (use_none and val is None)):
 715       try:
 716         del params_copy[key]
 717       except KeyError:
 718         pass
 719     else:
 720       params_copy[key] = val
 721   return params_copy
 722
 723
 724 def _UpdateAndVerifySubDict(base, updates, type_check):
 725   """Updates and verifies a dict with sub dicts of the same type.
 726
 727   @param base: The dict with the old data
 728   @param updates: The dict with the new data
 729   @param type_check: Dict suitable to ForceDictType to verify correct types
 730   @returns: A new dict with updated and verified values
 731
 732   """
 733   def fn(old, value):
 734     new = _GetUpdatedParams(old, value)
 735     utils.ForceDictType(new, type_check)
 736     return new
 737
 738   ret = copy.deepcopy(base)
 739   ret.update(dict((key, fn(base.get(key, {}), value))
 740                   for key, value in updates.items()))
 741   return ret
 742
 743
 744 def _MergeAndVerifyHvState(op_input, obj_input):
 745   """Combines the hv state from an opcode with the one of the object
 746
 747   @param op_input: The input dict from the opcode
 748   @param obj_input: The input dict from the objects
 749   @return: The verified and updated dict
 750
 751   """
 752   if op_input:
 753     invalid_hvs = set(op_input) - constants.HYPER_TYPES
 754     if invalid_hvs:
 755       raise errors.OpPrereqError("Invalid hypervisor(s) in hypervisor state:"
 756                                  " %s" % utils.CommaJoin(invalid_hvs),
 757                                  errors.ECODE_INVAL)
 758     if obj_input is None:
 759       obj_input = {}
 760     type_check = constants.HVSTS_PARAMETER_TYPES
 761     return _UpdateAndVerifySubDict(obj_input, op_input, type_check)
 762
 763   return None
 764
 765
 766 def _MergeAndVerifyDiskState(op_input, obj_input):
 767   """Combines the disk state from an opcode with the one of the object
 768
 769   @param op_input: The input dict from the opcode
 770   @param obj_input: The input dict from the objects
 771   @return: The verified and updated dict
 772   """
 773   if op_input:
 774     invalid_dst = set(op_input) - constants.DS_VALID_TYPES
 775     if invalid_dst:
 776       raise errors.OpPrereqError("Invalid storage type(s) in disk state: %s" %
 777                                  utils.CommaJoin(invalid_dst),
 778                                  errors.ECODE_INVAL)
 779     type_check = constants.DSS_PARAMETER_TYPES
 780     if obj_input is None:
 781       obj_input = {}
 782     return dict((key, _UpdateAndVerifySubDict(obj_input.get(key, {}), value,
 783                                               type_check))
 784                 for key, value in op_input.items())
 785
 786   return None
 787
 788
 789 def _ReleaseLocks(lu, level, names=None, keep=None):
 790   """Releases locks owned by an LU.
 791
 792   @type lu: L{LogicalUnit}
 793   @param level: Lock level
 794   @type names: list or None
 795   @param names: Names of locks to release
 796   @type keep: list or None
 797   @param keep: Names of locks to retain
 798
 799   """
 800   assert not (keep is not None and names is not None), \
 801          "Only one of the 'names' and the 'keep' parameters can be given"
 802
 803   if names is not None:
 804     should_release = names.__contains__
 805   elif keep:
 806     should_release = lambda name: name not in keep
 807   else:
 808     should_release = None
 809
 810   owned = lu.owned_locks(level)
 811   if not owned:
 812     # Not owning any lock at this level, do nothing
 813     pass
 814
 815   elif should_release:
 816     retain = []
 817     release = []
 818
 819     # Determine which locks to release
 820     for name in owned:
 821       if should_release(name):
 822         release.append(name)
 823       else:
 824         retain.append(name)
 825
 826     assert len(lu.owned_locks(level)) == (len(retain) + len(release))
 827
 828     # Release just some locks
 829     lu.glm.release(level, names=release)
 830
 831     assert frozenset(lu.owned_locks(level)) == frozenset(retain)
 832   else:
 833     # Release everything
 834     lu.glm.release(level)
 835
 836     assert not lu.glm.is_owned(level), "No locks should be owned"
 837
 838
 839 def _MapInstanceDisksToNodes(instances):
 840   """Creates a map from (node, volume) to instance name.
 841
 842   @type instances: list of L{objects.Instance}
 843   @rtype: dict; tuple of (node name, volume name) as key, instance name as value
 844
 845   """
 846   return dict(((node, vol), inst.name)
 847               for inst in instances
 848               for (node, vols) in inst.MapLVsByNode().items()
 849               for vol in vols)
 850
 851
 852 def _RunPostHook(lu, node_name):
 853   """Runs the post-hook for an opcode on a single node.
 854
 855   """
 856   hm = lu.proc.BuildHooksManager(lu)
 857   try:
 858     hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
 859   except:
 860     # pylint: disable=W0702
 861     lu.LogWarning("Errors occurred running hooks on %s" % node_name)
 862
 863
 864 def _CheckOutputFields(static, dynamic, selected):
 865   """Checks whether all selected fields are valid.
 866
 867   @type static: L{utils.FieldSet}
 868   @param static: static fields set
 869   @type dynamic: L{utils.FieldSet}
 870   @param dynamic: dynamic fields set
 871
 872   """
 873   f = utils.FieldSet()
 874   f.Extend(static)
 875   f.Extend(dynamic)
 876
 877   delta = f.NonMatching(selected)
 878   if delta:
 879     raise errors.OpPrereqError("Unknown output fields selected: %s"
 880                                % ",".join(delta), errors.ECODE_INVAL)
 881
 882
 883 def _CheckGlobalHvParams(params):
 884   """Validates that given hypervisor params are not global ones.
 885
 886   This will ensure that instances don't get customised versions of
 887   global params.
 888
 889   """
 890   used_globals = constants.HVC_GLOBALS.intersection(params)
 891   if used_globals:
 892     msg = ("The following hypervisor parameters are global and cannot"
 893            " be customized at instance level, please modify them at"
 894            " cluster level: %s" % utils.CommaJoin(used_globals))
 895     raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
 896
 897
 898 def _CheckNodeOnline(lu, node, msg=None):
 899   """Ensure that a given node is online.
 900
 901   @param lu: the LU on behalf of which we make the check
 902   @param node: the node to check
 903   @param msg: if passed, should be a message to replace the default one
 904   @raise errors.OpPrereqError: if the node is offline
 905
 906   """
 907   if msg is None:
 908     msg = "Can't use offline node"
 909   if lu.cfg.GetNodeInfo(node).offline:
 910     raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
 911
 912
 913 def _CheckNodeNotDrained(lu, node):
 914   """Ensure that a given node is not drained.
 915
 916   @param lu: the LU on behalf of which we make the check
 917   @param node: the node to check
 918   @raise errors.OpPrereqError: if the node is drained
 919
 920   """
 921   if lu.cfg.GetNodeInfo(node).drained:
 922     raise errors.OpPrereqError("Can't use drained node %s" % node,
 923                                errors.ECODE_STATE)
 924
 925
 926 def _CheckNodeVmCapable(lu, node):
 927   """Ensure that a given node is vm capable.
 928
 929   @param lu: the LU on behalf of which we make the check
 930   @param node: the node to check
 931   @raise errors.OpPrereqError: if the node is not vm capable
 932
 933   """
 934   if not lu.cfg.GetNodeInfo(node).vm_capable:
 935     raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
 936                                errors.ECODE_STATE)
 937
 938
 939 def _CheckNodeHasOS(lu, node, os_name, force_variant):
 940   """Ensure that a node supports a given OS.
 941
 942   @param lu: the LU on behalf of which we make the check
 943   @param node: the node to check
 944   @param os_name: the OS to query about
 945   @param force_variant: whether to ignore variant errors
 946   @raise errors.OpPrereqError: if the node is not supporting the OS
 947
 948   """
 949   result = lu.rpc.call_os_get(node, os_name)
 950   result.Raise("OS '%s' not in supported OS list for node %s" %
 951                (os_name, node),
 952                prereq=True, ecode=errors.ECODE_INVAL)
 953   if not force_variant:
 954     _CheckOSVariant(result.payload, os_name)
 955
 956
 957 def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
 958   """Ensure that a node has the given secondary ip.
 959
 960   @type lu: L{LogicalUnit}
 961   @param lu: the LU on behalf of which we make the check
 962   @type node: string
 963   @param node: the node to check
 964   @type secondary_ip: string
 965   @param secondary_ip: the ip to check
 966   @type prereq: boolean
 967   @param prereq: whether to throw a prerequisite or an execute error
 968   @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
 969   @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
 970
 971   """
 972   result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
 973   result.Raise("Failure checking secondary ip on node %s" % node,
 974                prereq=prereq, ecode=errors.ECODE_ENVIRON)
 975   if not result.payload:
 976     msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
 977            " please fix and re-run this command" % secondary_ip)
 978     if prereq:
 979       raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
 980     else:
 981       raise errors.OpExecError(msg)
 982
 983
 984 def _GetClusterDomainSecret():
 985   """Reads the cluster domain secret.
 986
 987   """
 988   return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
 989                                strict=True)
 990
 991
 992 def _CheckInstanceState(lu, instance, req_states, msg=None):
 993   """Ensure that an instance is in one of the required states.
 994
 995   @param lu: the LU on behalf of which we make the check
 996   @param instance: the instance to check
 997   @param msg: if passed, should be a message to replace the default one
 998   @raise errors.OpPrereqError: if the instance is not in the required state
 999
1000   """
1001   if msg is None:
1002     msg = "can't use instance from outside %s states" % ", ".join(req_states)
1003   if instance.admin_state not in req_states:
1004     raise errors.OpPrereqError("Instance %s is marked to be %s, %s" %
1005                                (instance, instance.admin_state, msg),
1006                                errors.ECODE_STATE)
1007
1008   if constants.ADMINST_UP not in req_states:
1009     pnode = instance.primary_node
1010     ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
1011     ins_l.Raise("Can't contact node %s for instance information" % pnode,
1012                 prereq=True, ecode=errors.ECODE_ENVIRON)
1013
1014     if instance.name in ins_l.payload:
1015       raise errors.OpPrereqError("Instance %s is running, %s" %
1016                                  (instance.name, msg), errors.ECODE_STATE)
1017
1018
1019 def _CheckMinMaxSpecs(name, ipolicy, value):
1020   """Checks if value is in the desired range.
1021
1022   @param name: name of the parameter for which we perform the check
1023   @param ipolicy: dictionary containing min, max and std values
1024   @param value: actual value that we want to use
1025   @return: None or element not meeting the criteria
1026
1027
1028   """
1029   if value in [None, constants.VALUE_AUTO]:
1030     return None
1031   max_v = ipolicy[constants.ISPECS_MAX].get(name, value)
1032   min_v = ipolicy[constants.ISPECS_MIN].get(name, value)
1033   if value > max_v or min_v > value:
1034     return ("%s value %s is not in range [%s, %s]" %
1035             (name, value, min_v, max_v))
1036   return None
1037
1038
1039 def _ComputeIPolicySpecViolation(ipolicy, mem_size, cpu_count, disk_count,
1040                                  nic_count, disk_sizes,
1041                                  _check_spec_fn=_CheckMinMaxSpecs):
1042   """Verifies ipolicy against provided specs.
1043
1044   @type ipolicy: dict
1045   @param ipolicy: The ipolicy
1046   @type mem_size: int
1047   @param mem_size: The memory size
1048   @type cpu_count: int
1049   @param cpu_count: Used cpu cores
1050   @type disk_count: int
1051   @param disk_count: Number of disks used
1052   @type nic_count: int
1053   @param nic_count: Number of nics used
1054   @type disk_sizes: list of ints
1055   @param disk_sizes: Disk sizes of used disk (len must match C{disk_count})
1056   @param _check_spec_fn: The checking function (unittest only)
1057   @return: A list of violations, or an empty list of no violations are found
1058
1059   """
1060   assert disk_count == len(disk_sizes)
1061
1062   test_settings = [
1063     (constants.ISPEC_MEM_SIZE, mem_size),
1064     (constants.ISPEC_CPU_COUNT, cpu_count),
1065     (constants.ISPEC_DISK_COUNT, disk_count),
1066     (constants.ISPEC_NIC_COUNT, nic_count),
1067     ] + map((lambda d: (constants.ISPEC_DISK_SIZE, d)), disk_sizes)
1068
1069   return filter(None,
1070                 (_check_spec_fn(name, ipolicy, value)
1071                  for (name, value) in test_settings))
1072
1073
1074 def _ComputeIPolicyInstanceViolation(ipolicy, instance,
1075                                      _compute_fn=_ComputeIPolicySpecViolation):
1076   """Compute if instance meets the specs of ipolicy.
1077
1078   @type ipolicy: dict
1079   @param ipolicy: The ipolicy to verify against
1080   @type instance: L{objects.Instance}
1081   @param instance: The instance to verify
1082   @param _compute_fn: The function to verify ipolicy (unittest only)
1083   @see: L{_ComputeIPolicySpecViolation}
1084
1085   """
1086   mem_size = instance.beparams.get(constants.BE_MAXMEM, None)
1087   cpu_count = instance.beparams.get(constants.BE_VCPUS, None)
1088   disk_count = len(instance.disks)
1089   disk_sizes = [disk.size for disk in instance.disks]
1090   nic_count = len(instance.nics)
1091
1092   return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1093                      disk_sizes)
1094
1095
1096 def _ComputeIPolicyInstanceSpecViolation(ipolicy, instance_spec,
1097     _compute_fn=_ComputeIPolicySpecViolation):
1098   """Compute if instance specs meets the specs of ipolicy.
1099
1100   @type ipolicy: dict
1101   @param ipolicy: The ipolicy to verify against
1102   @param instance_spec: dict
1103   @param instance_spec: The instance spec to verify
1104   @param _compute_fn: The function to verify ipolicy (unittest only)
1105   @see: L{_ComputeIPolicySpecViolation}
1106
1107   """
1108   mem_size = instance_spec.get(constants.ISPEC_MEM_SIZE, None)
1109   cpu_count = instance_spec.get(constants.ISPEC_CPU_COUNT, None)
1110   disk_count = instance_spec.get(constants.ISPEC_DISK_COUNT, 0)
1111   disk_sizes = instance_spec.get(constants.ISPEC_DISK_SIZE, [])
1112   nic_count = instance_spec.get(constants.ISPEC_NIC_COUNT, 0)
1113
1114   return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1115                      disk_sizes)
1116
1117
1118 def _ComputeIPolicyNodeViolation(ipolicy, instance, current_group,
1119                                  target_group,
1120                                  _compute_fn=_ComputeIPolicyInstanceViolation):
1121   """Compute if instance meets the specs of the new target group.
1122
1123   @param ipolicy: The ipolicy to verify
1124   @param instance: The instance object to verify
1125   @param current_group: The current group of the instance
1126   @param target_group: The new group of the instance
1127   @param _compute_fn: The function to verify ipolicy (unittest only)
1128   @see: L{_ComputeIPolicySpecViolation}
1129
1130   """
1131   if current_group == target_group:
1132     return []
1133   else:
1134     return _compute_fn(ipolicy, instance)
1135
1136
1137 def _CheckTargetNodeIPolicy(lu, ipolicy, instance, node, ignore=False,
1138                             _compute_fn=_ComputeIPolicyNodeViolation):
1139   """Checks that the target node is correct in terms of instance policy.
1140
1141   @param ipolicy: The ipolicy to verify
1142   @param instance: The instance object to verify
1143   @param node: The new node to relocate
1144   @param ignore: Ignore violations of the ipolicy
1145   @param _compute_fn: The function to verify ipolicy (unittest only)
1146   @see: L{_ComputeIPolicySpecViolation}
1147
1148   """
1149   res = _compute_fn(ipolicy, instance, instance.primary_node.group, node.group)
1150
1151   if res:
1152     msg = ("Instance does not meet target node group's (%s) instance"
1153            " policy: %s") % (node.group, utils.CommaJoin(res))
1154     if ignore:
1155       lu.LogWarning(msg)
1156     else:
1157       raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
1158
1159
1160 def _ExpandItemName(fn, name, kind):
1161   """Expand an item name.
1162
1163   @param fn: the function to use for expansion
1164   @param name: requested item name
1165   @param kind: text description ('Node' or 'Instance')
1166   @return: the resolved (full) name
1167   @raise errors.OpPrereqError: if the item is not found
1168
1169   """
1170   full_name = fn(name)
1171   if full_name is None:
1172     raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
1173                                errors.ECODE_NOENT)
1174   return full_name
1175
1176
1177 def _ExpandNodeName(cfg, name):
1178   """Wrapper over L{_ExpandItemName} for nodes."""
1179   return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
1180
1181
1182 def _ExpandInstanceName(cfg, name):
1183   """Wrapper over L{_ExpandItemName} for instance."""
1184   return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
1185
1186
1187 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
1188                           minmem, maxmem, vcpus, nics, disk_template, disks,
1189                           bep, hvp, hypervisor_name, tags):
1190   """Builds instance related env variables for hooks
1191
1192   This builds the hook environment from individual variables.
1193
1194   @type name: string
1195   @param name: the name of the instance
1196   @type primary_node: string
1197   @param primary_node: the name of the instance's primary node
1198   @type secondary_nodes: list
1199   @param secondary_nodes: list of secondary nodes as strings
1200   @type os_type: string
1201   @param os_type: the name of the instance's OS
1202   @type status: string
1203   @param status: the desired status of the instance
1204   @type minmem: string
1205   @param minmem: the minimum memory size of the instance
1206   @type maxmem: string
1207   @param maxmem: the maximum memory size of the instance
1208   @type vcpus: string
1209   @param vcpus: the count of VCPUs the instance has
1210   @type nics: list
1211   @param nics: list of tuples (ip, mac, mode, link) representing
1212       the NICs the instance has
1213   @type disk_template: string
1214   @param disk_template: the disk template of the instance
1215   @type disks: list
1216   @param disks: the list of (size, mode) pairs
1217   @type bep: dict
1218   @param bep: the backend parameters for the instance
1219   @type hvp: dict
1220   @param hvp: the hypervisor parameters for the instance
1221   @type hypervisor_name: string
1222   @param hypervisor_name: the hypervisor for the instance
1223   @type tags: list
1224   @param tags: list of instance tags as strings
1225   @rtype: dict
1226   @return: the hook environment for this instance
1227
1228   """
1229   env = {
1230     "OP_TARGET": name,
1231     "INSTANCE_NAME": name,
1232     "INSTANCE_PRIMARY": primary_node,
1233     "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
1234     "INSTANCE_OS_TYPE": os_type,
1235     "INSTANCE_STATUS": status,
1236     "INSTANCE_MINMEM": minmem,
1237     "INSTANCE_MAXMEM": maxmem,
1238     # TODO(2.7) remove deprecated "memory" value
1239     "INSTANCE_MEMORY": maxmem,
1240     "INSTANCE_VCPUS": vcpus,
1241     "INSTANCE_DISK_TEMPLATE": disk_template,
1242     "INSTANCE_HYPERVISOR": hypervisor_name,
1243   }
1244   if nics:
1245     nic_count = len(nics)
1246     for idx, (ip, mac, mode, link) in enumerate(nics):
1247       if ip is None:
1248         ip = ""
1249       env["INSTANCE_NIC%d_IP" % idx] = ip
1250       env["INSTANCE_NIC%d_MAC" % idx] = mac
1251       env["INSTANCE_NIC%d_MODE" % idx] = mode
1252       env["INSTANCE_NIC%d_LINK" % idx] = link
1253       if mode == constants.NIC_MODE_BRIDGED:
1254         env["INSTANCE_NIC%d_BRIDGE" % idx] = link
1255   else:
1256     nic_count = 0
1257
1258   env["INSTANCE_NIC_COUNT"] = nic_count
1259
1260   if disks:
1261     disk_count = len(disks)
1262     for idx, (size, mode) in enumerate(disks):
1263       env["INSTANCE_DISK%d_SIZE" % idx] = size
1264       env["INSTANCE_DISK%d_MODE" % idx] = mode
1265   else:
1266     disk_count = 0
1267
1268   env["INSTANCE_DISK_COUNT"] = disk_count
1269
1270   if not tags:
1271     tags = []
1272
1273   env["INSTANCE_TAGS"] = " ".join(tags)
1274
1275   for source, kind in [(bep, "BE"), (hvp, "HV")]:
1276     for key, value in source.items():
1277       env["INSTANCE_%s_%s" % (kind, key)] = value
1278
1279   return env
1280
1281
1282 def _NICListToTuple(lu, nics):
1283   """Build a list of nic information tuples.
1284
1285   This list is suitable to be passed to _BuildInstanceHookEnv or as a return
1286   value in LUInstanceQueryData.
1287
1288   @type lu:  L{LogicalUnit}
1289   @param lu: the logical unit on whose behalf we execute
1290   @type nics: list of L{objects.NIC}
1291   @param nics: list of nics to convert to hooks tuples
1292
1293   """
1294   hooks_nics = []
1295   cluster = lu.cfg.GetClusterInfo()
1296   for nic in nics:
1297     ip = nic.ip
1298     mac = nic.mac
1299     filled_params = cluster.SimpleFillNIC(nic.nicparams)
1300     mode = filled_params[constants.NIC_MODE]
1301     link = filled_params[constants.NIC_LINK]
1302     hooks_nics.append((ip, mac, mode, link))
1303   return hooks_nics
1304
1305
1306 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
1307   """Builds instance related env variables for hooks from an object.
1308
1309   @type lu: L{LogicalUnit}
1310   @param lu: the logical unit on whose behalf we execute
1311   @type instance: L{objects.Instance}
1312   @param instance: the instance for which we should build the
1313       environment
1314   @type override: dict
1315   @param override: dictionary with key/values that will override
1316       our values
1317   @rtype: dict
1318   @return: the hook environment dictionary
1319
1320   """
1321   cluster = lu.cfg.GetClusterInfo()
1322   bep = cluster.FillBE(instance)
1323   hvp = cluster.FillHV(instance)
1324   args = {
1325     "name": instance.name,
1326     "primary_node": instance.primary_node,
1327     "secondary_nodes": instance.secondary_nodes,
1328     "os_type": instance.os,
1329     "status": instance.admin_state,
1330     "maxmem": bep[constants.BE_MAXMEM],
1331     "minmem": bep[constants.BE_MINMEM],
1332     "vcpus": bep[constants.BE_VCPUS],
1333     "nics": _NICListToTuple(lu, instance.nics),
1334     "disk_template": instance.disk_template,
1335     "disks": [(disk.size, disk.mode) for disk in instance.disks],
1336     "bep": bep,
1337     "hvp": hvp,
1338     "hypervisor_name": instance.hypervisor,
1339     "tags": instance.tags,
1340   }
1341   if override:
1342     args.update(override)
1343   return _BuildInstanceHookEnv(**args) # pylint: disable=W0142
1344
1345
1346 def _AdjustCandidatePool(lu, exceptions):
1347   """Adjust the candidate pool after node operations.
1348
1349   """
1350   mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1351   if mod_list:
1352     lu.LogInfo("Promoted nodes to master candidate role: %s",
1353                utils.CommaJoin(node.name for node in mod_list))
1354     for name in mod_list:
1355       lu.context.ReaddNode(name)
1356   mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1357   if mc_now > mc_max:
1358     lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1359                (mc_now, mc_max))
1360
1361
1362 def _DecideSelfPromotion(lu, exceptions=None):
1363   """Decide whether I should promote myself as a master candidate.
1364
1365   """
1366   cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1367   mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1368   # the new node will increase mc_max with one, so:
1369   mc_should = min(mc_should + 1, cp_size)
1370   return mc_now < mc_should
1371
1372
1373 def _CalculateGroupIPolicy(cluster, group):
1374   """Calculate instance policy for group.
1375
1376   """
1377   return cluster.SimpleFillIPolicy(group.ipolicy)
1378
1379
1380 def _CheckNicsBridgesExist(lu, target_nics, target_node):
1381   """Check that the brigdes needed by a list of nics exist.
1382
1383   """
1384   cluster = lu.cfg.GetClusterInfo()
1385   paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1386   brlist = [params[constants.NIC_LINK] for params in paramslist
1387             if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1388   if brlist:
1389     result = lu.rpc.call_bridges_exist(target_node, brlist)
1390     result.Raise("Error checking bridges on destination node '%s'" %
1391                  target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1392
1393
1394 def _CheckInstanceBridgesExist(lu, instance, node=None):
1395   """Check that the brigdes needed by an instance exist.
1396
1397   """
1398   if node is None:
1399     node = instance.primary_node
1400   _CheckNicsBridgesExist(lu, instance.nics, node)
1401
1402
1403 def _CheckOSVariant(os_obj, name):
1404   """Check whether an OS name conforms to the os variants specification.
1405
1406   @type os_obj: L{objects.OS}
1407   @param os_obj: OS object to check
1408   @type name: string
1409   @param name: OS name passed by the user, to check for validity
1410
1411   """
1412   variant = objects.OS.GetVariant(name)
1413   if not os_obj.supported_variants:
1414     if variant:
1415       raise errors.OpPrereqError("OS '%s' doesn't support variants ('%s'"
1416                                  " passed)" % (os_obj.name, variant),
1417                                  errors.ECODE_INVAL)
1418     return
1419   if not variant:
1420     raise errors.OpPrereqError("OS name must include a variant",
1421                                errors.ECODE_INVAL)
1422
1423   if variant not in os_obj.supported_variants:
1424     raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1425
1426
1427 def _GetNodeInstancesInner(cfg, fn):
1428   return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1429
1430
1431 def _GetNodeInstances(cfg, node_name):
1432   """Returns a list of all primary and secondary instances on a node.
1433
1434   """
1435
1436   return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1437
1438
1439 def _GetNodePrimaryInstances(cfg, node_name):
1440   """Returns primary instances on a node.
1441
1442   """
1443   return _GetNodeInstancesInner(cfg,
1444                                 lambda inst: node_name == inst.primary_node)
1445
1446
1447 def _GetNodeSecondaryInstances(cfg, node_name):
1448   """Returns secondary instances on a node.
1449
1450   """
1451   return _GetNodeInstancesInner(cfg,
1452                                 lambda inst: node_name in inst.secondary_nodes)
1453
1454
1455 def _GetStorageTypeArgs(cfg, storage_type):
1456   """Returns the arguments for a storage type.
1457
1458   """
1459   # Special case for file storage
1460   if storage_type == constants.ST_FILE:
1461     # storage.FileStorage wants a list of storage directories
1462     return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1463
1464   return []
1465
1466
1467 def _FindFaultyInstanceDisks(cfg, rpc_runner, instance, node_name, prereq):
1468   faulty = []
1469
1470   for dev in instance.disks:
1471     cfg.SetDiskID(dev, node_name)
1472
1473   result = rpc_runner.call_blockdev_getmirrorstatus(node_name, instance.disks)
1474   result.Raise("Failed to get disk status from node %s" % node_name,
1475                prereq=prereq, ecode=errors.ECODE_ENVIRON)
1476
1477   for idx, bdev_status in enumerate(result.payload):
1478     if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1479       faulty.append(idx)
1480
1481   return faulty
1482
1483
1484 def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1485   """Check the sanity of iallocator and node arguments and use the
1486   cluster-wide iallocator if appropriate.
1487
1488   Check that at most one of (iallocator, node) is specified. If none is
1489   specified, then the LU's opcode's iallocator slot is filled with the
1490   cluster-wide default iallocator.
1491
1492   @type iallocator_slot: string
1493   @param iallocator_slot: the name of the opcode iallocator slot
1494   @type node_slot: string
1495   @param node_slot: the name of the opcode target node slot
1496
1497   """
1498   node = getattr(lu.op, node_slot, None)
1499   iallocator = getattr(lu.op, iallocator_slot, None)
1500
1501   if node is not None and iallocator is not None:
1502     raise errors.OpPrereqError("Do not specify both, iallocator and node",
1503                                errors.ECODE_INVAL)
1504   elif node is None and iallocator is None:
1505     default_iallocator = lu.cfg.GetDefaultIAllocator()
1506     if default_iallocator:
1507       setattr(lu.op, iallocator_slot, default_iallocator)
1508     else:
1509       raise errors.OpPrereqError("No iallocator or node given and no"
1510                                  " cluster-wide default iallocator found;"
1511                                  " please specify either an iallocator or a"
1512                                  " node, or set a cluster-wide default"
1513                                  " iallocator")
1514
1515
1516 def _GetDefaultIAllocator(cfg, iallocator):
1517   """Decides on which iallocator to use.
1518
1519   @type cfg: L{config.ConfigWriter}
1520   @param cfg: Cluster configuration object
1521   @type iallocator: string or None
1522   @param iallocator: Iallocator specified in opcode
1523   @rtype: string
1524   @return: Iallocator name
1525
1526   """
1527   if not iallocator:
1528     # Use default iallocator
1529     iallocator = cfg.GetDefaultIAllocator()
1530
1531   if not iallocator:
1532     raise errors.OpPrereqError("No iallocator was specified, neither in the"
1533                                " opcode nor as a cluster-wide default",
1534                                errors.ECODE_INVAL)
1535
1536   return iallocator
1537
1538
1539 class LUClusterPostInit(LogicalUnit):
1540   """Logical unit for running hooks after cluster initialization.
1541
1542   """
1543   HPATH = "cluster-init"
1544   HTYPE = constants.HTYPE_CLUSTER
1545
1546   def BuildHooksEnv(self):
1547     """Build hooks env.
1548
1549     """
1550     return {
1551       "OP_TARGET": self.cfg.GetClusterName(),
1552       }
1553
1554   def BuildHooksNodes(self):
1555     """Build hooks nodes.
1556
1557     """
1558     return ([], [self.cfg.GetMasterNode()])
1559
1560   def Exec(self, feedback_fn):
1561     """Nothing to do.
1562
1563     """
1564     return True
1565
1566
1567 class LUClusterDestroy(LogicalUnit):
1568   """Logical unit for destroying the cluster.
1569
1570   """
1571   HPATH = "cluster-destroy"
1572   HTYPE = constants.HTYPE_CLUSTER
1573
1574   def BuildHooksEnv(self):
1575     """Build hooks env.
1576
1577     """
1578     return {
1579       "OP_TARGET": self.cfg.GetClusterName(),
1580       }
1581
1582   def BuildHooksNodes(self):
1583     """Build hooks nodes.
1584
1585     """
1586     return ([], [])
1587
1588   def CheckPrereq(self):
1589     """Check prerequisites.
1590
1591     This checks whether the cluster is empty.
1592
1593     Any errors are signaled by raising errors.OpPrereqError.
1594
1595     """
1596     master = self.cfg.GetMasterNode()
1597
1598     nodelist = self.cfg.GetNodeList()
1599     if len(nodelist) != 1 or nodelist[0] != master:
1600       raise errors.OpPrereqError("There are still %d node(s) in"
1601                                  " this cluster." % (len(nodelist) - 1),
1602                                  errors.ECODE_INVAL)
1603     instancelist = self.cfg.GetInstanceList()
1604     if instancelist:
1605       raise errors.OpPrereqError("There are still %d instance(s) in"
1606                                  " this cluster." % len(instancelist),
1607                                  errors.ECODE_INVAL)
1608
1609   def Exec(self, feedback_fn):
1610     """Destroys the cluster.
1611
1612     """
1613     master_params = self.cfg.GetMasterNetworkParameters()
1614
1615     # Run post hooks on master node before it's removed
1616     _RunPostHook(self, master_params.name)
1617
1618     ems = self.cfg.GetUseExternalMipScript()
1619     result = self.rpc.call_node_deactivate_master_ip(master_params.name,
1620                                                      master_params, ems)
1621     if result.fail_msg:
1622       self.LogWarning("Error disabling the master IP address: %s",
1623                       result.fail_msg)
1624
1625     return master_params.name
1626
1627
1628 def _VerifyCertificate(filename):
1629   """Verifies a certificate for L{LUClusterVerifyConfig}.
1630
1631   @type filename: string
1632   @param filename: Path to PEM file
1633
1634   """
1635   try:
1636     cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1637                                            utils.ReadFile(filename))
1638   except Exception, err: # pylint: disable=W0703
1639     return (LUClusterVerifyConfig.ETYPE_ERROR,
1640             "Failed to load X509 certificate %s: %s" % (filename, err))
1641
1642   (errcode, msg) = \
1643     utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1644                                 constants.SSL_CERT_EXPIRATION_ERROR)
1645
1646   if msg:
1647     fnamemsg = "While verifying %s: %s" % (filename, msg)
1648   else:
1649     fnamemsg = None
1650
1651   if errcode is None:
1652     return (None, fnamemsg)
1653   elif errcode == utils.CERT_WARNING:
1654     return (LUClusterVerifyConfig.ETYPE_WARNING, fnamemsg)
1655   elif errcode == utils.CERT_ERROR:
1656     return (LUClusterVerifyConfig.ETYPE_ERROR, fnamemsg)
1657
1658   raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1659
1660
1661 def _GetAllHypervisorParameters(cluster, instances):
1662   """Compute the set of all hypervisor parameters.
1663
1664   @type cluster: L{objects.Cluster}
1665   @param cluster: the cluster object
1666   @param instances: list of L{objects.Instance}
1667   @param instances: additional instances from which to obtain parameters
1668   @rtype: list of (origin, hypervisor, parameters)
1669   @return: a list with all parameters found, indicating the hypervisor they
1670        apply to, and the origin (can be "cluster", "os X", or "instance Y")
1671
1672   """
1673   hvp_data = []
1674
1675   for hv_name in cluster.enabled_hypervisors:
1676     hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
1677
1678   for os_name, os_hvp in cluster.os_hvp.items():
1679     for hv_name, hv_params in os_hvp.items():
1680       if hv_params:
1681         full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
1682         hvp_data.append(("os %s" % os_name, hv_name, full_params))
1683
1684   # TODO: collapse identical parameter values in a single one
1685   for instance in instances:
1686     if instance.hvparams:
1687       hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
1688                        cluster.FillHV(instance)))
1689
1690   return hvp_data
1691
1692
1693 class _VerifyErrors(object):
1694   """Mix-in for cluster/group verify LUs.
1695
1696   It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects
1697   self.op and self._feedback_fn to be available.)
1698
1699   """
1700
1701   ETYPE_FIELD = "code"
1702   ETYPE_ERROR = "ERROR"
1703   ETYPE_WARNING = "WARNING"
1704
1705   def _Error(self, ecode, item, msg, *args, **kwargs):
1706     """Format an error message.
1707
1708     Based on the opcode's error_codes parameter, either format a
1709     parseable error code, or a simpler error string.
1710
1711     This must be called only from Exec and functions called from Exec.
1712
1713     """
1714     ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1715     itype, etxt, _ = ecode
1716     # first complete the msg
1717     if args:
1718       msg = msg % args
1719     # then format the whole message
1720     if self.op.error_codes: # This is a mix-in. pylint: disable=E1101
1721       msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1722     else:
1723       if item:
1724         item = " " + item
1725       else:
1726         item = ""
1727       msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1728     # and finally report it via the feedback_fn
1729     self._feedback_fn("  - %s" % msg) # Mix-in. pylint: disable=E1101
1730
1731   def _ErrorIf(self, cond, ecode, *args, **kwargs):
1732     """Log an error message if the passed condition is True.
1733
1734     """
1735     cond = (bool(cond)
1736             or self.op.debug_simulate_errors) # pylint: disable=E1101
1737
1738     # If the error code is in the list of ignored errors, demote the error to a
1739     # warning
1740     (_, etxt, _) = ecode
1741     if etxt in self.op.ignore_errors:     # pylint: disable=E1101
1742       kwargs[self.ETYPE_FIELD] = self.ETYPE_WARNING
1743
1744     if cond:
1745       self._Error(ecode, *args, **kwargs)
1746
1747     # do not mark the operation as failed for WARN cases only
1748     if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1749       self.bad = self.bad or cond
1750
1751
1752 class LUClusterVerify(NoHooksLU):
1753   """Submits all jobs necessary to verify the cluster.
1754
1755   """
1756   REQ_BGL = False
1757
1758   def ExpandNames(self):
1759     self.needed_locks = {}
1760
1761   def Exec(self, feedback_fn):
1762     jobs = []
1763
1764     if self.op.group_name:
1765       groups = [self.op.group_name]
1766       depends_fn = lambda: None
1767     else:
1768       groups = self.cfg.GetNodeGroupList()
1769
1770       # Verify global configuration
1771       jobs.append([
1772         opcodes.OpClusterVerifyConfig(ignore_errors=self.op.ignore_errors)
1773         ])
1774
1775       # Always depend on global verification
1776       depends_fn = lambda: [(-len(jobs), [])]
1777
1778     jobs.extend([opcodes.OpClusterVerifyGroup(group_name=group,
1779                                             ignore_errors=self.op.ignore_errors,
1780                                             depends=depends_fn())]
1781                 for group in groups)
1782
1783     # Fix up all parameters
1784     for op in itertools.chain(*jobs): # pylint: disable=W0142
1785       op.debug_simulate_errors = self.op.debug_simulate_errors
1786       op.verbose = self.op.verbose
1787       op.error_codes = self.op.error_codes
1788       try:
1789         op.skip_checks = self.op.skip_checks
1790       except AttributeError:
1791         assert not isinstance(op, opcodes.OpClusterVerifyGroup)
1792
1793     return ResultWithJobs(jobs)
1794
1795
1796 class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
1797   """Verifies the cluster config.
1798
1799   """
1800   REQ_BGL = True
1801
1802   def _VerifyHVP(self, hvp_data):
1803     """Verifies locally the syntax of the hypervisor parameters.
1804
1805     """
1806     for item, hv_name, hv_params in hvp_data:
1807       msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
1808              (item, hv_name))
1809       try:
1810         hv_class = hypervisor.GetHypervisor(hv_name)
1811         utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
1812         hv_class.CheckParameterSyntax(hv_params)
1813       except errors.GenericError, err:
1814         self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg % str(err))
1815
1816   def ExpandNames(self):
1817     # Information can be safely retrieved as the BGL is acquired in exclusive
1818     # mode
1819     assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER)
1820     self.all_group_info = self.cfg.GetAllNodeGroupsInfo()
1821     self.all_node_info = self.cfg.GetAllNodesInfo()
1822     self.all_inst_info = self.cfg.GetAllInstancesInfo()
1823     self.needed_locks = {}
1824
1825   def Exec(self, feedback_fn):
1826     """Verify integrity of cluster, performing various test on nodes.
1827
1828     """
1829     self.bad = False
1830     self._feedback_fn = feedback_fn
1831
1832     feedback_fn("* Verifying cluster config")
1833
1834     for msg in self.cfg.VerifyConfig():
1835       self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg)
1836
1837     feedback_fn("* Verifying cluster certificate files")
1838
1839     for cert_filename in constants.ALL_CERT_FILES:
1840       (errcode, msg) = _VerifyCertificate(cert_filename)
1841       self._ErrorIf(errcode, constants.CV_ECLUSTERCERT, None, msg, code=errcode)
1842
1843     feedback_fn("* Verifying hypervisor parameters")
1844
1845     self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(),
1846                                                 self.all_inst_info.values()))
1847
1848     feedback_fn("* Verifying all nodes belong to an existing group")
1849
1850     # We do this verification here because, should this bogus circumstance
1851     # occur, it would never be caught by VerifyGroup, which only acts on
1852     # nodes/instances reachable from existing node groups.
1853
1854     dangling_nodes = set(node.name for node in self.all_node_info.values()
1855                          if node.group not in self.all_group_info)
1856
1857     dangling_instances = {}
1858     no_node_instances = []
1859
1860     for inst in self.all_inst_info.values():
1861       if inst.primary_node in dangling_nodes:
1862         dangling_instances.setdefault(inst.primary_node, []).append(inst.name)
1863       elif inst.primary_node not in self.all_node_info:
1864         no_node_instances.append(inst.name)
1865
1866     pretty_dangling = [
1867         "%s (%s)" %
1868         (node.name,
1869          utils.CommaJoin(dangling_instances.get(node.name,
1870                                                 ["no instances"])))
1871         for node in dangling_nodes]
1872
1873     self._ErrorIf(bool(dangling_nodes), constants.CV_ECLUSTERDANGLINGNODES,
1874                   None,
1875                   "the following nodes (and their instances) belong to a non"
1876                   " existing group: %s", utils.CommaJoin(pretty_dangling))
1877
1878     self._ErrorIf(bool(no_node_instances), constants.CV_ECLUSTERDANGLINGINST,
1879                   None,
1880                   "the following instances have a non-existing primary-node:"
1881                   " %s", utils.CommaJoin(no_node_instances))
1882
1883     return not self.bad
1884
1885
1886 class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
1887   """Verifies the status of a node group.
1888
1889   """
1890   HPATH = "cluster-verify"
1891   HTYPE = constants.HTYPE_CLUSTER
1892   REQ_BGL = False
1893
1894   _HOOKS_INDENT_RE = re.compile("^", re.M)
1895
1896   class NodeImage(object):
1897     """A class representing the logical and physical status of a node.
1898
1899     @type name: string
1900     @ivar name: the node name to which this object refers
1901     @ivar volumes: a structure as returned from
1902         L{ganeti.backend.GetVolumeList} (runtime)
1903     @ivar instances: a list of running instances (runtime)
1904     @ivar pinst: list of configured primary instances (config)
1905     @ivar sinst: list of configured secondary instances (config)
1906     @ivar sbp: dictionary of {primary-node: list of instances} for all
1907         instances for which this node is secondary (config)
1908     @ivar mfree: free memory, as reported by hypervisor (runtime)
1909     @ivar dfree: free disk, as reported by the node (runtime)
1910     @ivar offline: the offline status (config)
1911     @type rpc_fail: boolean
1912     @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1913         not whether the individual keys were correct) (runtime)
1914     @type lvm_fail: boolean
1915     @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1916     @type hyp_fail: boolean
1917     @ivar hyp_fail: whether the RPC call didn't return the instance list
1918     @type ghost: boolean
1919     @ivar ghost: whether this is a known node or not (config)
1920     @type os_fail: boolean
1921     @ivar os_fail: whether the RPC call didn't return valid OS data
1922     @type oslist: list
1923     @ivar oslist: list of OSes as diagnosed by DiagnoseOS
1924     @type vm_capable: boolean
1925     @ivar vm_capable: whether the node can host instances
1926
1927     """
1928     def __init__(self, offline=False, name=None, vm_capable=True):
1929       self.name = name
1930       self.volumes = {}
1931       self.instances = []
1932       self.pinst = []
1933       self.sinst = []
1934       self.sbp = {}
1935       self.mfree = 0
1936       self.dfree = 0
1937       self.offline = offline
1938       self.vm_capable = vm_capable
1939       self.rpc_fail = False
1940       self.lvm_fail = False
1941       self.hyp_fail = False
1942       self.ghost = False
1943       self.os_fail = False
1944       self.oslist = {}
1945
1946   def ExpandNames(self):
1947     # This raises errors.OpPrereqError on its own:
1948     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
1949
1950     # Get instances in node group; this is unsafe and needs verification later
1951     inst_names = self.cfg.GetNodeGroupInstances(self.group_uuid)
1952
1953     self.needed_locks = {
1954       locking.LEVEL_INSTANCE: inst_names,
1955       locking.LEVEL_NODEGROUP: [self.group_uuid],
1956       locking.LEVEL_NODE: [],
1957       }
1958
1959     self.share_locks = _ShareAll()
1960
1961   def DeclareLocks(self, level):
1962     if level == locking.LEVEL_NODE:
1963       # Get members of node group; this is unsafe and needs verification later
1964       nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
1965
1966       all_inst_info = self.cfg.GetAllInstancesInfo()
1967
1968       # In Exec(), we warn about mirrored instances that have primary and
1969       # secondary living in separate node groups. To fully verify that
1970       # volumes for these instances are healthy, we will need to do an
1971       # extra call to their secondaries. We ensure here those nodes will
1972       # be locked.
1973       for inst in self.owned_locks(locking.LEVEL_INSTANCE):
1974         # Important: access only the instances whose lock is owned
1975         if all_inst_info[inst].disk_template in constants.DTS_INT_MIRROR:
1976           nodes.update(all_inst_info[inst].secondary_nodes)
1977
1978       self.needed_locks[locking.LEVEL_NODE] = nodes
1979
1980   def CheckPrereq(self):
1981     assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
1982     self.group_info = self.cfg.GetNodeGroup(self.group_uuid)
1983
1984     group_nodes = set(self.group_info.members)
1985     group_instances = self.cfg.GetNodeGroupInstances(self.group_uuid)
1986
1987     unlocked_nodes = \
1988         group_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
1989
1990     unlocked_instances = \
1991         group_instances.difference(self.owned_locks(locking.LEVEL_INSTANCE))
1992
1993     if unlocked_nodes:
1994       raise errors.OpPrereqError("Missing lock for nodes: %s" %
1995                                  utils.CommaJoin(unlocked_nodes))
1996
1997     if unlocked_instances:
1998       raise errors.OpPrereqError("Missing lock for instances: %s" %
1999                                  utils.CommaJoin(unlocked_instances))
2000
2001     self.all_node_info = self.cfg.GetAllNodesInfo()
2002     self.all_inst_info = self.cfg.GetAllInstancesInfo()
2003
2004     self.my_node_names = utils.NiceSort(group_nodes)
2005     self.my_inst_names = utils.NiceSort(group_instances)
2006
2007     self.my_node_info = dict((name, self.all_node_info[name])
2008                              for name in self.my_node_names)
2009
2010     self.my_inst_info = dict((name, self.all_inst_info[name])
2011                              for name in self.my_inst_names)
2012
2013     # We detect here the nodes that will need the extra RPC calls for verifying
2014     # split LV volumes; they should be locked.
2015     extra_lv_nodes = set()
2016
2017     for inst in self.my_inst_info.values():
2018       if inst.disk_template in constants.DTS_INT_MIRROR:
2019         group = self.my_node_info[inst.primary_node].group
2020         for nname in inst.secondary_nodes:
2021           if self.all_node_info[nname].group != group:
2022             extra_lv_nodes.add(nname)
2023
2024     unlocked_lv_nodes = \
2025         extra_lv_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
2026
2027     if unlocked_lv_nodes:
2028       raise errors.OpPrereqError("these nodes could be locked: %s" %
2029                                  utils.CommaJoin(unlocked_lv_nodes))
2030     self.extra_lv_nodes = list(extra_lv_nodes)
2031
2032   def _VerifyNode(self, ninfo, nresult):
2033     """Perform some basic validation on data returned from a node.
2034
2035       - check the result data structure is well formed and has all the
2036         mandatory fields
2037       - check ganeti version
2038
2039     @type ninfo: L{objects.Node}
2040     @param ninfo: the node to check
2041     @param nresult: the results from the node
2042     @rtype: boolean
2043     @return: whether overall this call was successful (and we can expect
2044          reasonable values in the respose)
2045
2046     """
2047     node = ninfo.name
2048     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2049
2050     # main result, nresult should be a non-empty dict
2051     test = not nresult or not isinstance(nresult, dict)
2052     _ErrorIf(test, constants.CV_ENODERPC, node,
2053                   "unable to verify node: no data returned")
2054     if test:
2055       return False
2056
2057     # compares ganeti version
2058     local_version = constants.PROTOCOL_VERSION
2059     remote_version = nresult.get("version", None)
2060     test = not (remote_version and
2061                 isinstance(remote_version, (list, tuple)) and
2062                 len(remote_version) == 2)
2063     _ErrorIf(test, constants.CV_ENODERPC, node,
2064              "connection to node returned invalid data")
2065     if test:
2066       return False
2067
2068     test = local_version != remote_version[0]
2069     _ErrorIf(test, constants.CV_ENODEVERSION, node,
2070              "incompatible protocol versions: master %s,"
2071              " node %s", local_version, remote_version[0])
2072     if test:
2073       return False
2074
2075     # node seems compatible, we can actually try to look into its results
2076
2077     # full package version
2078     self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
2079                   constants.CV_ENODEVERSION, node,
2080                   "software version mismatch: master %s, node %s",
2081                   constants.RELEASE_VERSION, remote_version[1],
2082                   code=self.ETYPE_WARNING)
2083
2084     hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
2085     if ninfo.vm_capable and isinstance(hyp_result, dict):
2086       for hv_name, hv_result in hyp_result.iteritems():
2087         test = hv_result is not None
2088         _ErrorIf(test, constants.CV_ENODEHV, node,
2089                  "hypervisor %s verify failure: '%s'", hv_name, hv_result)
2090
2091     hvp_result = nresult.get(constants.NV_HVPARAMS, None)
2092     if ninfo.vm_capable and isinstance(hvp_result, list):
2093       for item, hv_name, hv_result in hvp_result:
2094         _ErrorIf(True, constants.CV_ENODEHV, node,
2095                  "hypervisor %s parameter verify failure (source %s): %s",
2096                  hv_name, item, hv_result)
2097
2098     test = nresult.get(constants.NV_NODESETUP,
2099                        ["Missing NODESETUP results"])
2100     _ErrorIf(test, constants.CV_ENODESETUP, node, "node setup error: %s",
2101              "; ".join(test))
2102
2103     return True
2104
2105   def _VerifyNodeTime(self, ninfo, nresult,
2106                       nvinfo_starttime, nvinfo_endtime):
2107     """Check the node time.
2108
2109     @type ninfo: L{objects.Node}
2110     @param ninfo: the node to check
2111     @param nresult: the remote results for the node
2112     @param nvinfo_starttime: the start time of the RPC call
2113     @param nvinfo_endtime: the end time of the RPC call
2114
2115     """
2116     node = ninfo.name
2117     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2118
2119     ntime = nresult.get(constants.NV_TIME, None)
2120     try:
2121       ntime_merged = utils.MergeTime(ntime)
2122     except (ValueError, TypeError):
2123       _ErrorIf(True, constants.CV_ENODETIME, node, "Node returned invalid time")
2124       return
2125
2126     if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
2127       ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
2128     elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
2129       ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
2130     else:
2131       ntime_diff = None
2132
2133     _ErrorIf(ntime_diff is not None, constants.CV_ENODETIME, node,
2134              "Node time diverges by at least %s from master node time",
2135              ntime_diff)
2136
2137   def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
2138     """Check the node LVM results.
2139
2140     @type ninfo: L{objects.Node}
2141     @param ninfo: the node to check
2142     @param nresult: the remote results for the node
2143     @param vg_name: the configured VG name
2144
2145     """
2146     if vg_name is None:
2147       return
2148
2149     node = ninfo.name
2150     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2151
2152     # checks vg existence and size > 20G
2153     vglist = nresult.get(constants.NV_VGLIST, None)
2154     test = not vglist
2155     _ErrorIf(test, constants.CV_ENODELVM, node, "unable to check volume groups")
2156     if not test:
2157       vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
2158                                             constants.MIN_VG_SIZE)
2159       _ErrorIf(vgstatus, constants.CV_ENODELVM, node, vgstatus)
2160
2161     # check pv names
2162     pvlist = nresult.get(constants.NV_PVLIST, None)
2163     test = pvlist is None
2164     _ErrorIf(test, constants.CV_ENODELVM, node, "Can't get PV list from node")
2165     if not test:
2166       # check that ':' is not present in PV names, since it's a
2167       # special character for lvcreate (denotes the range of PEs to
2168       # use on the PV)
2169       for _, pvname, owner_vg in pvlist:
2170         test = ":" in pvname
2171         _ErrorIf(test, constants.CV_ENODELVM, node,
2172                  "Invalid character ':' in PV '%s' of VG '%s'",
2173                  pvname, owner_vg)
2174
2175   def _VerifyNodeBridges(self, ninfo, nresult, bridges):
2176     """Check the node bridges.
2177
2178     @type ninfo: L{objects.Node}
2179     @param ninfo: the node to check
2180     @param nresult: the remote results for the node
2181     @param bridges: the expected list of bridges
2182
2183     """
2184     if not bridges:
2185       return
2186
2187     node = ninfo.name
2188     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2189
2190     missing = nresult.get(constants.NV_BRIDGES, None)
2191     test = not isinstance(missing, list)
2192     _ErrorIf(test, constants.CV_ENODENET, node,
2193              "did not return valid bridge information")
2194     if not test:
2195       _ErrorIf(bool(missing), constants.CV_ENODENET, node,
2196                "missing bridges: %s" % utils.CommaJoin(sorted(missing)))
2197
2198   def _VerifyNodeUserScripts(self, ninfo, nresult):
2199     """Check the results of user scripts presence and executability on the node
2200
2201     @type ninfo: L{objects.Node}
2202     @param ninfo: the node to check
2203     @param nresult: the remote results for the node
2204
2205     """
2206     node = ninfo.name
2207
2208     test = not constants.NV_USERSCRIPTS in nresult
2209     self._ErrorIf(test, constants.CV_ENODEUSERSCRIPTS, node,
2210                   "did not return user scripts information")
2211
2212     broken_scripts = nresult.get(constants.NV_USERSCRIPTS, None)
2213     if not test:
2214       self._ErrorIf(broken_scripts, constants.CV_ENODEUSERSCRIPTS, node,
2215                     "user scripts not present or not executable: %s" %
2216                     utils.CommaJoin(sorted(broken_scripts)))
2217
2218   def _VerifyNodeNetwork(self, ninfo, nresult):
2219     """Check the node network connectivity results.
2220
2221     @type ninfo: L{objects.Node}
2222     @param ninfo: the node to check
2223     @param nresult: the remote results for the node
2224
2225     """
2226     node = ninfo.name
2227     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2228
2229     test = constants.NV_NODELIST not in nresult
2230     _ErrorIf(test, constants.CV_ENODESSH, node,
2231              "node hasn't returned node ssh connectivity data")
2232     if not test:
2233       if nresult[constants.NV_NODELIST]:
2234         for a_node, a_msg in nresult[constants.NV_NODELIST].items():
2235           _ErrorIf(True, constants.CV_ENODESSH, node,
2236                    "ssh communication with node '%s': %s", a_node, a_msg)
2237
2238     test = constants.NV_NODENETTEST not in nresult
2239     _ErrorIf(test, constants.CV_ENODENET, node,
2240              "node hasn't returned node tcp connectivity data")
2241     if not test:
2242       if nresult[constants.NV_NODENETTEST]:
2243         nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
2244         for anode in nlist:
2245           _ErrorIf(True, constants.CV_ENODENET, node,
2246                    "tcp communication with node '%s': %s",
2247                    anode, nresult[constants.NV_NODENETTEST][anode])
2248
2249     test = constants.NV_MASTERIP not in nresult
2250     _ErrorIf(test, constants.CV_ENODENET, node,
2251              "node hasn't returned node master IP reachability data")
2252     if not test:
2253       if not nresult[constants.NV_MASTERIP]:
2254         if node == self.master_node:
2255           msg = "the master node cannot reach the master IP (not configured?)"
2256         else:
2257           msg = "cannot reach the master IP"
2258         _ErrorIf(True, constants.CV_ENODENET, node, msg)
2259
2260   def _VerifyInstancePolicy(self, instance):
2261     """Verify instance specs against instance policy set on node group level.
2262
2263
2264     """
2265     cluster = self.cfg.GetClusterInfo()
2266     full_beparams = cluster.FillBE(instance)
2267     ipolicy = cluster.SimpleFillIPolicy(self.group_info.ipolicy)
2268
2269     mem_size = full_beparams.get(constants.BE_MAXMEM, None)
2270     cpu_count = full_beparams.get(constants.BE_VCPUS, None)
2271     disk_count = len(instance.disks)
2272     disk_sizes = [disk.size for disk in instance.disks]
2273     nic_count = len(instance.nics)
2274
2275     test_settings = [
2276       (constants.ISPEC_MEM_SIZE, mem_size),
2277       (constants.ISPEC_CPU_COUNT, cpu_count),
2278       (constants.ISPEC_DISK_COUNT, disk_count),
2279       (constants.ISPEC_NIC_COUNT, nic_count),
2280       ] + map((lambda d: (constants.ISPEC_DISK_SIZE, d)), disk_sizes)
2281
2282     for (name, value) in test_settings:
2283       test_result = _CheckMinMaxSpecs(name, ipolicy, value)
2284       self._ErrorIf(test_result is not None,
2285                     constants.CV_EINSTANCEPOLICY, instance.name,
2286                     test_result)
2287
2288   def _VerifyInstance(self, instance, instanceconfig, node_image,
2289                       diskstatus):
2290     """Verify an instance.
2291
2292     This function checks to see if the required block devices are
2293     available on the instance's node.
2294
2295     """
2296     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2297     node_current = instanceconfig.primary_node
2298
2299     node_vol_should = {}
2300     instanceconfig.MapLVsByNode(node_vol_should)
2301
2302     ipolicy = _CalculateGroupIPolicy(self.cfg.GetClusterInfo(), self.group_info)
2303     err = _ComputeIPolicyInstanceViolation(ipolicy, instanceconfig)
2304     _ErrorIf(err, constants.CV_EINSTANCEPOLICY, instance, err)
2305
2306     for node in node_vol_should:
2307       n_img = node_image[node]
2308       if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2309         # ignore missing volumes on offline or broken nodes
2310         continue
2311       for volume in node_vol_should[node]:
2312         test = volume not in n_img.volumes
2313         _ErrorIf(test, constants.CV_EINSTANCEMISSINGDISK, instance,
2314                  "volume %s missing on node %s", volume, node)
2315
2316     if instanceconfig.admin_state == constants.ADMINST_UP:
2317       pri_img = node_image[node_current]
2318       test = instance not in pri_img.instances and not pri_img.offline
2319       _ErrorIf(test, constants.CV_EINSTANCEDOWN, instance,
2320                "instance not running on its primary node %s",
2321                node_current)
2322
2323     diskdata = [(nname, success, status, idx)
2324                 for (nname, disks) in diskstatus.items()
2325                 for idx, (success, status) in enumerate(disks)]
2326
2327     for nname, success, bdev_status, idx in diskdata:
2328       # the 'ghost node' construction in Exec() ensures that we have a
2329       # node here
2330       snode = node_image[nname]
2331       bad_snode = snode.ghost or snode.offline
2332       _ErrorIf(instanceconfig.admin_state == constants.ADMINST_UP and
2333                not success and not bad_snode,
2334                constants.CV_EINSTANCEFAULTYDISK, instance,
2335                "couldn't retrieve status for disk/%s on %s: %s",
2336                idx, nname, bdev_status)
2337       _ErrorIf((instanceconfig.admin_state == constants.ADMINST_UP and
2338                 success and bdev_status.ldisk_status == constants.LDS_FAULTY),
2339                constants.CV_EINSTANCEFAULTYDISK, instance,
2340                "disk/%s on %s is faulty", idx, nname)
2341
2342   def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
2343     """Verify if there are any unknown volumes in the cluster.
2344
2345     The .os, .swap and backup volumes are ignored. All other volumes are
2346     reported as unknown.
2347
2348     @type reserved: L{ganeti.utils.FieldSet}
2349     @param reserved: a FieldSet of reserved volume names
2350
2351     """
2352     for node, n_img in node_image.items():
2353       if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2354         # skip non-healthy nodes
2355         continue
2356       for volume in n_img.volumes:
2357         test = ((node not in node_vol_should or
2358                 volume not in node_vol_should[node]) and
2359                 not reserved.Matches(volume))
2360         self._ErrorIf(test, constants.CV_ENODEORPHANLV, node,
2361                       "volume %s is unknown", volume)
2362
2363   def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
2364     """Verify N+1 Memory Resilience.
2365
2366     Check that if one single node dies we can still start all the
2367     instances it was primary for.
2368
2369     """
2370     cluster_info = self.cfg.GetClusterInfo()
2371     for node, n_img in node_image.items():
2372       # This code checks that every node which is now listed as
2373       # secondary has enough memory to host all instances it is
2374       # supposed to should a single other node in the cluster fail.
2375       # FIXME: not ready for failover to an arbitrary node
2376       # FIXME: does not support file-backed instances
2377       # WARNING: we currently take into account down instances as well
2378       # as up ones, considering that even if they're down someone
2379       # might want to start them even in the event of a node failure.
2380       if n_img.offline:
2381         # we're skipping offline nodes from the N+1 warning, since
2382         # most likely we don't have good memory infromation from them;
2383         # we already list instances living on such nodes, and that's
2384         # enough warning
2385         continue
2386       #TODO(dynmem): use MINMEM for checking
2387       #TODO(dynmem): also consider ballooning out other instances
2388       for prinode, instances in n_img.sbp.items():
2389         needed_mem = 0
2390         for instance in instances:
2391           bep = cluster_info.FillBE(instance_cfg[instance])
2392           if bep[constants.BE_AUTO_BALANCE]:
2393             needed_mem += bep[constants.BE_MAXMEM]
2394         test = n_img.mfree < needed_mem
2395         self._ErrorIf(test, constants.CV_ENODEN1, node,
2396                       "not enough memory to accomodate instance failovers"
2397                       " should node %s fail (%dMiB needed, %dMiB available)",
2398                       prinode, needed_mem, n_img.mfree)
2399
2400   @classmethod
2401   def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
2402                    (files_all, files_opt, files_mc, files_vm)):
2403     """Verifies file checksums collected from all nodes.
2404
2405     @param errorif: Callback for reporting errors
2406     @param nodeinfo: List of L{objects.Node} objects
2407     @param master_node: Name of master node
2408     @param all_nvinfo: RPC results
2409
2410     """
2411     # Define functions determining which nodes to consider for a file
2412     files2nodefn = [
2413       (files_all, None),
2414       (files_mc, lambda node: (node.master_candidate or
2415                                node.name == master_node)),
2416       (files_vm, lambda node: node.vm_capable),
2417       ]
2418
2419     # Build mapping from filename to list of nodes which should have the file
2420     nodefiles = {}
2421     for (files, fn) in files2nodefn:
2422       if fn is None:
2423         filenodes = nodeinfo
2424       else:
2425         filenodes = filter(fn, nodeinfo)
2426       nodefiles.update((filename,
2427                         frozenset(map(operator.attrgetter("name"), filenodes)))
2428                        for filename in files)
2429
2430     assert set(nodefiles) == (files_all | files_mc | files_vm)
2431
2432     fileinfo = dict((filename, {}) for filename in nodefiles)
2433     ignore_nodes = set()
2434
2435     for node in nodeinfo:
2436       if node.offline:
2437         ignore_nodes.add(node.name)
2438         continue
2439
2440       nresult = all_nvinfo[node.name]
2441
2442       if nresult.fail_msg or not nresult.payload:
2443         node_files = None
2444       else:
2445         node_files = nresult.payload.get(constants.NV_FILELIST, None)
2446
2447       test = not (node_files and isinstance(node_files, dict))
2448       errorif(test, constants.CV_ENODEFILECHECK, node.name,
2449               "Node did not return file checksum data")
2450       if test:
2451         ignore_nodes.add(node.name)
2452         continue
2453
2454       # Build per-checksum mapping from filename to nodes having it
2455       for (filename, checksum) in node_files.items():
2456         assert filename in nodefiles
2457         fileinfo[filename].setdefault(checksum, set()).add(node.name)
2458
2459     for (filename, checksums) in fileinfo.items():
2460       assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
2461
2462       # Nodes having the file
2463       with_file = frozenset(node_name
2464                             for nodes in fileinfo[filename].values()
2465                             for node_name in nodes) - ignore_nodes
2466
2467       expected_nodes = nodefiles[filename] - ignore_nodes
2468
2469       # Nodes missing file
2470       missing_file = expected_nodes - with_file
2471
2472       if filename in files_opt:
2473         # All or no nodes
2474         errorif(missing_file and missing_file != expected_nodes,
2475                 constants.CV_ECLUSTERFILECHECK, None,
2476                 "File %s is optional, but it must exist on all or no"
2477                 " nodes (not found on %s)",
2478                 filename, utils.CommaJoin(utils.NiceSort(missing_file)))
2479       else:
2480         errorif(missing_file, constants.CV_ECLUSTERFILECHECK, None,
2481                 "File %s is missing from node(s) %s", filename,
2482                 utils.CommaJoin(utils.NiceSort(missing_file)))
2483
2484         # Warn if a node has a file it shouldn't
2485         unexpected = with_file - expected_nodes
2486         errorif(unexpected,
2487                 constants.CV_ECLUSTERFILECHECK, None,
2488                 "File %s should not exist on node(s) %s",
2489                 filename, utils.CommaJoin(utils.NiceSort(unexpected)))
2490
2491       # See if there are multiple versions of the file
2492       test = len(checksums) > 1
2493       if test:
2494         variants = ["variant %s on %s" %
2495                     (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
2496                     for (idx, (checksum, nodes)) in
2497                       enumerate(sorted(checksums.items()))]
2498       else:
2499         variants = []
2500
2501       errorif(test, constants.CV_ECLUSTERFILECHECK, None,
2502               "File %s found with %s different checksums (%s)",
2503               filename, len(checksums), "; ".join(variants))
2504
2505   def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
2506                       drbd_map):
2507     """Verifies and the node DRBD status.
2508
2509     @type ninfo: L{objects.Node}
2510     @param ninfo: the node to check
2511     @param nresult: the remote results for the node
2512     @param instanceinfo: the dict of instances
2513     @param drbd_helper: the configured DRBD usermode helper
2514     @param drbd_map: the DRBD map as returned by
2515         L{ganeti.config.ConfigWriter.ComputeDRBDMap}
2516
2517     """
2518     node = ninfo.name
2519     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2520
2521     if drbd_helper:
2522       helper_result = nresult.get(constants.NV_DRBDHELPER, None)
2523       test = (helper_result == None)
2524       _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2525                "no drbd usermode helper returned")
2526       if helper_result:
2527         status, payload = helper_result
2528         test = not status
2529         _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2530                  "drbd usermode helper check unsuccessful: %s", payload)
2531         test = status and (payload != drbd_helper)
2532         _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2533                  "wrong drbd usermode helper: %s", payload)
2534
2535     # compute the DRBD minors
2536     node_drbd = {}
2537     for minor, instance in drbd_map[node].items():
2538       test = instance not in instanceinfo
2539       _ErrorIf(test, constants.CV_ECLUSTERCFG, None,
2540                "ghost instance '%s' in temporary DRBD map", instance)
2541         # ghost instance should not be running, but otherwise we
2542         # don't give double warnings (both ghost instance and
2543         # unallocated minor in use)
2544       if test:
2545         node_drbd[minor] = (instance, False)
2546       else:
2547         instance = instanceinfo[instance]
2548         node_drbd[minor] = (instance.name,
2549                             instance.admin_state == constants.ADMINST_UP)
2550
2551     # and now check them
2552     used_minors = nresult.get(constants.NV_DRBDLIST, [])
2553     test = not isinstance(used_minors, (tuple, list))
2554     _ErrorIf(test, constants.CV_ENODEDRBD, node,
2555              "cannot parse drbd status file: %s", str(used_minors))
2556     if test:
2557       # we cannot check drbd status
2558       return
2559
2560     for minor, (iname, must_exist) in node_drbd.items():
2561       test = minor not in used_minors and must_exist
2562       _ErrorIf(test, constants.CV_ENODEDRBD, node,
2563                "drbd minor %d of instance %s is not active", minor, iname)
2564     for minor in used_minors:
2565       test = minor not in node_drbd
2566       _ErrorIf(test, constants.CV_ENODEDRBD, node,
2567                "unallocated drbd minor %d is in use", minor)
2568
2569   def _UpdateNodeOS(self, ninfo, nresult, nimg):
2570     """Builds the node OS structures.
2571
2572     @type ninfo: L{objects.Node}
2573     @param ninfo: the node to check
2574     @param nresult: the remote results for the node
2575     @param nimg: the node image object
2576
2577     """
2578     node = ninfo.name
2579     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2580
2581     remote_os = nresult.get(constants.NV_OSLIST, None)
2582     test = (not isinstance(remote_os, list) or
2583             not compat.all(isinstance(v, list) and len(v) == 7
2584                            for v in remote_os))
2585
2586     _ErrorIf(test, constants.CV_ENODEOS, node,
2587              "node hasn't returned valid OS data")
2588
2589     nimg.os_fail = test
2590
2591     if test:
2592       return
2593
2594     os_dict = {}
2595
2596     for (name, os_path, status, diagnose,
2597          variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
2598
2599       if name not in os_dict:
2600         os_dict[name] = []
2601
2602       # parameters is a list of lists instead of list of tuples due to
2603       # JSON lacking a real tuple type, fix it:
2604       parameters = [tuple(v) for v in parameters]
2605       os_dict[name].append((os_path, status, diagnose,
2606                             set(variants), set(parameters), set(api_ver)))
2607
2608     nimg.oslist = os_dict
2609
2610   def _VerifyNodeOS(self, ninfo, nimg, base):
2611     """Verifies the node OS list.
2612
2613     @type ninfo: L{objects.Node}
2614     @param ninfo: the node to check
2615     @param nimg: the node image object
2616     @param base: the 'template' node we match against (e.g. from the master)
2617
2618     """
2619     node = ninfo.name
2620     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2621
2622     assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
2623
2624     beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
2625     for os_name, os_data in nimg.oslist.items():
2626       assert os_data, "Empty OS status for OS %s?!" % os_name
2627       f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
2628       _ErrorIf(not f_status, constants.CV_ENODEOS, node,
2629                "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
2630       _ErrorIf(len(os_data) > 1, constants.CV_ENODEOS, node,
2631                "OS '%s' has multiple entries (first one shadows the rest): %s",
2632                os_name, utils.CommaJoin([v[0] for v in os_data]))
2633       # comparisons with the 'base' image
2634       test = os_name not in base.oslist
2635       _ErrorIf(test, constants.CV_ENODEOS, node,
2636                "Extra OS %s not present on reference node (%s)",
2637                os_name, base.name)
2638       if test:
2639         continue
2640       assert base.oslist[os_name], "Base node has empty OS status?"
2641       _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
2642       if not b_status:
2643         # base OS is invalid, skipping
2644         continue
2645       for kind, a, b in [("API version", f_api, b_api),
2646                          ("variants list", f_var, b_var),
2647                          ("parameters", beautify_params(f_param),
2648                           beautify_params(b_param))]:
2649         _ErrorIf(a != b, constants.CV_ENODEOS, node,
2650                  "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
2651                  kind, os_name, base.name,
2652                  utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
2653
2654     # check any missing OSes
2655     missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
2656     _ErrorIf(missing, constants.CV_ENODEOS, node,
2657              "OSes present on reference node %s but missing on this node: %s",
2658              base.name, utils.CommaJoin(missing))
2659
2660   def _VerifyOob(self, ninfo, nresult):
2661     """Verifies out of band functionality of a node.
2662
2663     @type ninfo: L{objects.Node}
2664     @param ninfo: the node to check
2665     @param nresult: the remote results for the node
2666
2667     """
2668     node = ninfo.name
2669     # We just have to verify the paths on master and/or master candidates
2670     # as the oob helper is invoked on the master
2671     if ((ninfo.master_candidate or ninfo.master_capable) and
2672         constants.NV_OOB_PATHS in nresult):
2673       for path_result in nresult[constants.NV_OOB_PATHS]:
2674         self._ErrorIf(path_result, constants.CV_ENODEOOBPATH, node, path_result)
2675
2676   def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
2677     """Verifies and updates the node volume data.
2678
2679     This function will update a L{NodeImage}'s internal structures
2680     with data from the remote call.
2681
2682     @type ninfo: L{objects.Node}
2683     @param ninfo: the node to check
2684     @param nresult: the remote results for the node
2685     @param nimg: the node image object
2686     @param vg_name: the configured VG name
2687
2688     """
2689     node = ninfo.name
2690     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2691
2692     nimg.lvm_fail = True
2693     lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
2694     if vg_name is None:
2695       pass
2696     elif isinstance(lvdata, basestring):
2697       _ErrorIf(True, constants.CV_ENODELVM, node, "LVM problem on node: %s",
2698                utils.SafeEncode(lvdata))
2699     elif not isinstance(lvdata, dict):
2700       _ErrorIf(True, constants.CV_ENODELVM, node,
2701                "rpc call to node failed (lvlist)")
2702     else:
2703       nimg.volumes = lvdata
2704       nimg.lvm_fail = False
2705
2706   def _UpdateNodeInstances(self, ninfo, nresult, nimg):
2707     """Verifies and updates the node instance list.
2708
2709     If the listing was successful, then updates this node's instance
2710     list. Otherwise, it marks the RPC call as failed for the instance
2711     list key.
2712
2713     @type ninfo: L{objects.Node}
2714     @param ninfo: the node to check
2715     @param nresult: the remote results for the node
2716     @param nimg: the node image object
2717
2718     """
2719     idata = nresult.get(constants.NV_INSTANCELIST, None)
2720     test = not isinstance(idata, list)
2721     self._ErrorIf(test, constants.CV_ENODEHV, ninfo.name,
2722                   "rpc call to node failed (instancelist): %s",
2723                   utils.SafeEncode(str(idata)))
2724     if test:
2725       nimg.hyp_fail = True
2726     else:
2727       nimg.instances = idata
2728
2729   def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
2730     """Verifies and computes a node information map
2731
2732     @type ninfo: L{objects.Node}
2733     @param ninfo: the node to check
2734     @param nresult: the remote results for the node
2735     @param nimg: the node image object
2736     @param vg_name: the configured VG name
2737
2738     """
2739     node = ninfo.name
2740     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2741
2742     # try to read free memory (from the hypervisor)
2743     hv_info = nresult.get(constants.NV_HVINFO, None)
2744     test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
2745     _ErrorIf(test, constants.CV_ENODEHV, node,
2746              "rpc call to node failed (hvinfo)")
2747     if not test:
2748       try:
2749         nimg.mfree = int(hv_info["memory_free"])
2750       except (ValueError, TypeError):
2751         _ErrorIf(True, constants.CV_ENODERPC, node,
2752                  "node returned invalid nodeinfo, check hypervisor")
2753
2754     # FIXME: devise a free space model for file based instances as well
2755     if vg_name is not None:
2756       test = (constants.NV_VGLIST not in nresult or
2757               vg_name not in nresult[constants.NV_VGLIST])
2758       _ErrorIf(test, constants.CV_ENODELVM, node,
2759                "node didn't return data for the volume group '%s'"
2760                " - it is either missing or broken", vg_name)
2761       if not test:
2762         try:
2763           nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
2764         except (ValueError, TypeError):
2765           _ErrorIf(True, constants.CV_ENODERPC, node,
2766                    "node returned invalid LVM info, check LVM status")
2767
2768   def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
2769     """Gets per-disk status information for all instances.
2770
2771     @type nodelist: list of strings
2772     @param nodelist: Node names
2773     @type node_image: dict of (name, L{objects.Node})
2774     @param node_image: Node objects
2775     @type instanceinfo: dict of (name, L{objects.Instance})
2776     @param instanceinfo: Instance objects
2777     @rtype: {instance: {node: [(succes, payload)]}}
2778     @return: a dictionary of per-instance dictionaries with nodes as
2779         keys and disk information as values; the disk information is a
2780         list of tuples (success, payload)
2781
2782     """
2783     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2784
2785     node_disks = {}
2786     node_disks_devonly = {}
2787     diskless_instances = set()
2788     diskless = constants.DT_DISKLESS
2789
2790     for nname in nodelist:
2791       node_instances = list(itertools.chain(node_image[nname].pinst,
2792                                             node_image[nname].sinst))
2793       diskless_instances.update(inst for inst in node_instances
2794                                 if instanceinfo[inst].disk_template == diskless)
2795       disks = [(inst, disk)
2796                for inst in node_instances
2797                for disk in instanceinfo[inst].disks]
2798
2799       if not disks:
2800         # No need to collect data
2801         continue
2802
2803       node_disks[nname] = disks
2804
2805       # Creating copies as SetDiskID below will modify the objects and that can
2806       # lead to incorrect data returned from nodes
2807       devonly = [dev.Copy() for (_, dev) in disks]
2808
2809       for dev in devonly:
2810         self.cfg.SetDiskID(dev, nname)
2811
2812       node_disks_devonly[nname] = devonly
2813
2814     assert len(node_disks) == len(node_disks_devonly)
2815
2816     # Collect data from all nodes with disks
2817     result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
2818                                                           node_disks_devonly)
2819
2820     assert len(result) == len(node_disks)
2821
2822     instdisk = {}
2823
2824     for (nname, nres) in result.items():
2825       disks = node_disks[nname]
2826
2827       if nres.offline:
2828         # No data from this node
2829         data = len(disks) * [(False, "node offline")]
2830       else:
2831         msg = nres.fail_msg
2832         _ErrorIf(msg, constants.CV_ENODERPC, nname,
2833                  "while getting disk information: %s", msg)
2834         if msg:
2835           # No data from this node
2836           data = len(disks) * [(False, msg)]
2837         else:
2838           data = []
2839           for idx, i in enumerate(nres.payload):
2840             if isinstance(i, (tuple, list)) and len(i) == 2:
2841               data.append(i)
2842             else:
2843               logging.warning("Invalid result from node %s, entry %d: %s",
2844                               nname, idx, i)
2845               data.append((False, "Invalid result from the remote node"))
2846
2847       for ((inst, _), status) in zip(disks, data):
2848         instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
2849
2850     # Add empty entries for diskless instances.
2851     for inst in diskless_instances:
2852       assert inst not in instdisk
2853       instdisk[inst] = {}
2854
2855     assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
2856                       len(nnames) <= len(instanceinfo[inst].all_nodes) and
2857                       compat.all(isinstance(s, (tuple, list)) and
2858                                  len(s) == 2 for s in statuses)
2859                       for inst, nnames in instdisk.items()
2860                       for nname, statuses in nnames.items())
2861     assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
2862
2863     return instdisk
2864
2865   @staticmethod
2866   def _SshNodeSelector(group_uuid, all_nodes):
2867     """Create endless iterators for all potential SSH check hosts.
2868
2869     """
2870     nodes = [node for node in all_nodes
2871              if (node.group != group_uuid and
2872                  not node.offline)]
2873     keyfunc = operator.attrgetter("group")
2874
2875     return map(itertools.cycle,
2876                [sorted(map(operator.attrgetter("name"), names))
2877                 for _, names in itertools.groupby(sorted(nodes, key=keyfunc),
2878                                                   keyfunc)])
2879
2880   @classmethod
2881   def _SelectSshCheckNodes(cls, group_nodes, group_uuid, all_nodes):
2882     """Choose which nodes should talk to which other nodes.
2883
2884     We will make nodes contact all nodes in their group, and one node from
2885     every other group.
2886
2887     @warning: This algorithm has a known issue if one node group is much
2888       smaller than others (e.g. just one node). In such a case all other
2889       nodes will talk to the single node.
2890
2891     """
2892     online_nodes = sorted(node.name for node in group_nodes if not node.offline)
2893     sel = cls._SshNodeSelector(group_uuid, all_nodes)
2894
2895     return (online_nodes,
2896             dict((name, sorted([i.next() for i in sel]))
2897                  for name in online_nodes))
2898
2899   def BuildHooksEnv(self):
2900     """Build hooks env.
2901
2902     Cluster-Verify hooks just ran in the post phase and their failure makes
2903     the output be logged in the verify output and the verification to fail.
2904
2905     """
2906     env = {
2907       "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
2908       }
2909
2910     env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
2911                for node in self.my_node_info.values())
2912
2913     return env
2914
2915   def BuildHooksNodes(self):
2916     """Build hooks nodes.
2917
2918     """
2919     return ([], self.my_node_names)
2920
2921   def Exec(self, feedback_fn):
2922     """Verify integrity of the node group, performing various test on nodes.
2923
2924     """
2925     # This method has too many local variables. pylint: disable=R0914
2926     feedback_fn("* Verifying group '%s'" % self.group_info.name)
2927
2928     if not self.my_node_names:
2929       # empty node group
2930       feedback_fn("* Empty node group, skipping verification")
2931       return True
2932
2933     self.bad = False
2934     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2935     verbose = self.op.verbose
2936     self._feedback_fn = feedback_fn
2937
2938     vg_name = self.cfg.GetVGName()
2939     drbd_helper = self.cfg.GetDRBDHelper()
2940     cluster = self.cfg.GetClusterInfo()
2941     groupinfo = self.cfg.GetAllNodeGroupsInfo()
2942     hypervisors = cluster.enabled_hypervisors
2943     node_data_list = [self.my_node_info[name] for name in self.my_node_names]
2944
2945     i_non_redundant = [] # Non redundant instances
2946     i_non_a_balanced = [] # Non auto-balanced instances
2947     i_offline = 0 # Count of offline instances
2948     n_offline = 0 # Count of offline nodes
2949     n_drained = 0 # Count of nodes being drained
2950     node_vol_should = {}
2951
2952     # FIXME: verify OS list
2953
2954     # File verification
2955     filemap = _ComputeAncillaryFiles(cluster, False)
2956
2957     # do local checksums
2958     master_node = self.master_node = self.cfg.GetMasterNode()
2959     master_ip = self.cfg.GetMasterIP()
2960
2961     feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names))
2962
2963     user_scripts = []
2964     if self.cfg.GetUseExternalMipScript():
2965       user_scripts.append(constants.EXTERNAL_MASTER_SETUP_SCRIPT)
2966
2967     node_verify_param = {
2968       constants.NV_FILELIST:
2969         utils.UniqueSequence(filename
2970                              for files in filemap
2971                              for filename in files),
2972       constants.NV_NODELIST:
2973         self._SelectSshCheckNodes(node_data_list, self.group_uuid,
2974                                   self.all_node_info.values()),
2975       constants.NV_HYPERVISOR: hypervisors,
2976       constants.NV_HVPARAMS:
2977         _GetAllHypervisorParameters(cluster, self.all_inst_info.values()),
2978       constants.NV_NODENETTEST: [(node.name, node.primary_ip, node.secondary_ip)
2979                                  for node in node_data_list
2980                                  if not node.offline],
2981       constants.NV_INSTANCELIST: hypervisors,
2982       constants.NV_VERSION: None,
2983       constants.NV_HVINFO: self.cfg.GetHypervisorType(),
2984       constants.NV_NODESETUP: None,
2985       constants.NV_TIME: None,
2986       constants.NV_MASTERIP: (master_node, master_ip),
2987       constants.NV_OSLIST: None,
2988       constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
2989       constants.NV_USERSCRIPTS: user_scripts,
2990       }
2991
2992     if vg_name is not None:
2993       node_verify_param[constants.NV_VGLIST] = None
2994       node_verify_param[constants.NV_LVLIST] = vg_name
2995       node_verify_param[constants.NV_PVLIST] = [vg_name]
2996       node_verify_param[constants.NV_DRBDLIST] = None
2997
2998     if drbd_helper:
2999       node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
3000
3001     # bridge checks
3002     # FIXME: this needs to be changed per node-group, not cluster-wide
3003     bridges = set()
3004     default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
3005     if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3006       bridges.add(default_nicpp[constants.NIC_LINK])
3007     for instance in self.my_inst_info.values():
3008       for nic in instance.nics:
3009         full_nic = cluster.SimpleFillNIC(nic.nicparams)
3010         if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3011           bridges.add(full_nic[constants.NIC_LINK])
3012
3013     if bridges:
3014       node_verify_param[constants.NV_BRIDGES] = list(bridges)
3015
3016     # Build our expected cluster state
3017     node_image = dict((node.name, self.NodeImage(offline=node.offline,
3018                                                  name=node.name,
3019                                                  vm_capable=node.vm_capable))
3020                       for node in node_data_list)
3021
3022     # Gather OOB paths
3023     oob_paths = []
3024     for node in self.all_node_info.values():
3025       path = _SupportsOob(self.cfg, node)
3026       if path and path not in oob_paths:
3027         oob_paths.append(path)
3028
3029     if oob_paths:
3030       node_verify_param[constants.NV_OOB_PATHS] = oob_paths
3031
3032     for instance in self.my_inst_names:
3033       inst_config = self.my_inst_info[instance]
3034
3035       for nname in inst_config.all_nodes:
3036         if nname not in node_image:
3037           gnode = self.NodeImage(name=nname)
3038           gnode.ghost = (nname not in self.all_node_info)
3039           node_image[nname] = gnode
3040
3041       inst_config.MapLVsByNode(node_vol_should)
3042
3043       pnode = inst_config.primary_node
3044       node_image[pnode].pinst.append(instance)
3045
3046       for snode in inst_config.secondary_nodes:
3047         nimg = node_image[snode]
3048         nimg.sinst.append(instance)
3049         if pnode not in nimg.sbp:
3050           nimg.sbp[pnode] = []
3051         nimg.sbp[pnode].append(instance)
3052
3053     # At this point, we have the in-memory data structures complete,
3054     # except for the runtime information, which we'll gather next
3055
3056     # Due to the way our RPC system works, exact response times cannot be
3057     # guaranteed (e.g. a broken node could run into a timeout). By keeping the
3058     # time before and after executing the request, we can at least have a time
3059     # window.
3060     nvinfo_starttime = time.time()
3061     all_nvinfo = self.rpc.call_node_verify(self.my_node_names,
3062                                            node_verify_param,
3063                                            self.cfg.GetClusterName())
3064     nvinfo_endtime = time.time()
3065
3066     if self.extra_lv_nodes and vg_name is not None:
3067       extra_lv_nvinfo = \
3068           self.rpc.call_node_verify(self.extra_lv_nodes,
3069                                     {constants.NV_LVLIST: vg_name},
3070                                     self.cfg.GetClusterName())
3071     else:
3072       extra_lv_nvinfo = {}
3073
3074     all_drbd_map = self.cfg.ComputeDRBDMap()
3075
3076     feedback_fn("* Gathering disk information (%s nodes)" %
3077                 len(self.my_node_names))
3078     instdisk = self._CollectDiskInfo(self.my_node_names, node_image,
3079                                      self.my_inst_info)
3080
3081     feedback_fn("* Verifying configuration file consistency")
3082
3083     # If not all nodes are being checked, we need to make sure the master node
3084     # and a non-checked vm_capable node are in the list.
3085     absent_nodes = set(self.all_node_info).difference(self.my_node_info)
3086     if absent_nodes:
3087       vf_nvinfo = all_nvinfo.copy()
3088       vf_node_info = list(self.my_node_info.values())
3089       additional_nodes = []
3090       if master_node not in self.my_node_info:
3091         additional_nodes.append(master_node)
3092         vf_node_info.append(self.all_node_info[master_node])
3093       # Add the first vm_capable node we find which is not included
3094       for node in absent_nodes:
3095         nodeinfo = self.all_node_info[node]
3096         if nodeinfo.vm_capable and not nodeinfo.offline:
3097           additional_nodes.append(node)
3098           vf_node_info.append(self.all_node_info[node])
3099           break
3100       key = constants.NV_FILELIST
3101       vf_nvinfo.update(self.rpc.call_node_verify(additional_nodes,
3102                                                  {key: node_verify_param[key]},
3103                                                  self.cfg.GetClusterName()))
3104     else:
3105       vf_nvinfo = all_nvinfo
3106       vf_node_info = self.my_node_info.values()
3107
3108     self._VerifyFiles(_ErrorIf, vf_node_info, master_node, vf_nvinfo, filemap)
3109
3110     feedback_fn("* Verifying node status")
3111
3112     refos_img = None
3113
3114     for node_i in node_data_list:
3115       node = node_i.name
3116       nimg = node_image[node]
3117
3118       if node_i.offline:
3119         if verbose:
3120           feedback_fn("* Skipping offline node %s" % (node,))
3121         n_offline += 1
3122         continue
3123
3124       if node == master_node:
3125         ntype = "master"
3126       elif node_i.master_candidate:
3127         ntype = "master candidate"
3128       elif node_i.drained:
3129         ntype = "drained"
3130         n_drained += 1
3131       else:
3132         ntype = "regular"
3133       if verbose:
3134         feedback_fn("* Verifying node %s (%s)" % (node, ntype))
3135
3136       msg = all_nvinfo[node].fail_msg
3137       _ErrorIf(msg, constants.CV_ENODERPC, node, "while contacting node: %s",
3138                msg)
3139       if msg:
3140         nimg.rpc_fail = True
3141         continue
3142
3143       nresult = all_nvinfo[node].payload
3144
3145       nimg.call_ok = self._VerifyNode(node_i, nresult)
3146       self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
3147       self._VerifyNodeNetwork(node_i, nresult)
3148       self._VerifyNodeUserScripts(node_i, nresult)
3149       self._VerifyOob(node_i, nresult)
3150
3151       if nimg.vm_capable:
3152         self._VerifyNodeLVM(node_i, nresult, vg_name)
3153         self._VerifyNodeDrbd(node_i, nresult, self.all_inst_info, drbd_helper,
3154                              all_drbd_map)
3155
3156         self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
3157         self._UpdateNodeInstances(node_i, nresult, nimg)
3158         self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
3159         self._UpdateNodeOS(node_i, nresult, nimg)
3160
3161         if not nimg.os_fail:
3162           if refos_img is None:
3163             refos_img = nimg
3164           self._VerifyNodeOS(node_i, nimg, refos_img)
3165         self._VerifyNodeBridges(node_i, nresult, bridges)
3166
3167         # Check whether all running instancies are primary for the node. (This
3168         # can no longer be done from _VerifyInstance below, since some of the
3169         # wrong instances could be from other node groups.)
3170         non_primary_inst = set(nimg.instances).difference(nimg.pinst)
3171
3172         for inst in non_primary_inst:
3173           # FIXME: investigate best way to handle offline insts
3174           if inst.admin_state == constants.ADMINST_OFFLINE:
3175             if verbose:
3176               feedback_fn("* Skipping offline instance %s" % inst.name)
3177             i_offline += 1
3178             continue
3179           test = inst in self.all_inst_info
3180           _ErrorIf(test, constants.CV_EINSTANCEWRONGNODE, inst,
3181                    "instance should not run on node %s", node_i.name)
3182           _ErrorIf(not test, constants.CV_ENODEORPHANINSTANCE, node_i.name,
3183                    "node is running unknown instance %s", inst)
3184
3185     for node, result in extra_lv_nvinfo.items():
3186       self._UpdateNodeVolumes(self.all_node_info[node], result.payload,
3187                               node_image[node], vg_name)
3188
3189     feedback_fn("* Verifying instance status")
3190     for instance in self.my_inst_names:
3191       if verbose:
3192         feedback_fn("* Verifying instance %s" % instance)
3193       inst_config = self.my_inst_info[instance]
3194       self._VerifyInstance(instance, inst_config, node_image,
3195                            instdisk[instance])
3196       inst_nodes_offline = []
3197
3198       pnode = inst_config.primary_node
3199       pnode_img = node_image[pnode]
3200       _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
3201                constants.CV_ENODERPC, pnode, "instance %s, connection to"
3202                " primary node failed", instance)
3203
3204       _ErrorIf(inst_config.admin_state == constants.ADMINST_UP and
3205                pnode_img.offline,
3206                constants.CV_EINSTANCEBADNODE, instance,
3207                "instance is marked as running and lives on offline node %s",
3208                inst_config.primary_node)
3209
3210       # If the instance is non-redundant we cannot survive losing its primary
3211       # node, so we are not N+1 compliant. On the other hand we have no disk
3212       # templates with more than one secondary so that situation is not well
3213       # supported either.
3214       # FIXME: does not support file-backed instances
3215       if not inst_config.secondary_nodes:
3216         i_non_redundant.append(instance)
3217
3218       _ErrorIf(len(inst_config.secondary_nodes) > 1,
3219                constants.CV_EINSTANCELAYOUT,
3220                instance, "instance has multiple secondary nodes: %s",
3221                utils.CommaJoin(inst_config.secondary_nodes),
3222                code=self.ETYPE_WARNING)
3223
3224       if inst_config.disk_template in constants.DTS_INT_MIRROR:
3225         pnode = inst_config.primary_node
3226         instance_nodes = utils.NiceSort(inst_config.all_nodes)
3227         instance_groups = {}
3228
3229         for node in instance_nodes:
3230           instance_groups.setdefault(self.all_node_info[node].group,
3231                                      []).append(node)
3232
3233         pretty_list = [
3234           "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
3235           # Sort so that we always list the primary node first.
3236           for group, nodes in sorted(instance_groups.items(),
3237                                      key=lambda (_, nodes): pnode in nodes,
3238                                      reverse=True)]
3239
3240         self._ErrorIf(len(instance_groups) > 1,
3241                       constants.CV_EINSTANCESPLITGROUPS,
3242                       instance, "instance has primary and secondary nodes in"
3243                       " different groups: %s", utils.CommaJoin(pretty_list),
3244                       code=self.ETYPE_WARNING)
3245
3246       if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
3247         i_non_a_balanced.append(instance)
3248
3249       for snode in inst_config.secondary_nodes:
3250         s_img = node_image[snode]
3251         _ErrorIf(s_img.rpc_fail and not s_img.offline, constants.CV_ENODERPC,
3252                  snode, "instance %s, connection to secondary node failed",
3253                  instance)
3254
3255         if s_img.offline:
3256           inst_nodes_offline.append(snode)
3257
3258       # warn that the instance lives on offline nodes
3259       _ErrorIf(inst_nodes_offline, constants.CV_EINSTANCEBADNODE, instance,
3260                "instance has offline secondary node(s) %s",
3261                utils.CommaJoin(inst_nodes_offline))
3262       # ... or ghost/non-vm_capable nodes
3263       for node in inst_config.all_nodes:
3264         _ErrorIf(node_image[node].ghost, constants.CV_EINSTANCEBADNODE,
3265                  instance, "instance lives on ghost node %s", node)
3266         _ErrorIf(not node_image[node].vm_capable, constants.CV_EINSTANCEBADNODE,
3267                  instance, "instance lives on non-vm_capable node %s", node)
3268
3269     feedback_fn("* Verifying orphan volumes")
3270     reserved = utils.FieldSet(*cluster.reserved_lvs)
3271
3272     # We will get spurious "unknown volume" warnings if any node of this group
3273     # is secondary for an instance whose primary is in another group. To avoid
3274     # them, we find these instances and add their volumes to node_vol_should.
3275     for inst in self.all_inst_info.values():
3276       for secondary in inst.secondary_nodes:
3277         if (secondary in self.my_node_info
3278             and inst.name not in self.my_inst_info):
3279           inst.MapLVsByNode(node_vol_should)
3280           break
3281
3282     self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
3283
3284     if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
3285       feedback_fn("* Verifying N+1 Memory redundancy")
3286       self._VerifyNPlusOneMemory(node_image, self.my_inst_info)
3287
3288     feedback_fn("* Other Notes")
3289     if i_non_redundant:
3290       feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
3291                   % len(i_non_redundant))
3292
3293     if i_non_a_balanced:
3294       feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
3295                   % len(i_non_a_balanced))
3296
3297     if i_offline:
3298       feedback_fn("  - NOTICE: %d offline instance(s) found." % i_offline)
3299
3300     if n_offline:
3301       feedback_fn("  - NOTICE: %d offline node(s) found." % n_offline)
3302
3303     if n_drained:
3304       feedback_fn("  - NOTICE: %d drained node(s) found." % n_drained)
3305
3306     return not self.bad
3307
3308   def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
3309     """Analyze the post-hooks' result
3310
3311     This method analyses the hook result, handles it, and sends some
3312     nicely-formatted feedback back to the user.
3313
3314     @param phase: one of L{constants.HOOKS_PHASE_POST} or
3315         L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
3316     @param hooks_results: the results of the multi-node hooks rpc call
3317     @param feedback_fn: function used send feedback back to the caller
3318     @param lu_result: previous Exec result
3319     @return: the new Exec result, based on the previous result
3320         and hook results
3321
3322     """
3323     # We only really run POST phase hooks, only for non-empty groups,
3324     # and are only interested in their results
3325     if not self.my_node_names:
3326       # empty node group
3327       pass
3328     elif phase == constants.HOOKS_PHASE_POST:
3329       # Used to change hooks' output to proper indentation
3330       feedback_fn("* Hooks Results")
3331       assert hooks_results, "invalid result from hooks"
3332
3333       for node_name in hooks_results:
3334         res = hooks_results[node_name]
3335         msg = res.fail_msg
3336         test = msg and not res.offline
3337         self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3338                       "Communication failure in hooks execution: %s", msg)
3339         if res.offline or msg:
3340           # No need to investigate payload if node is offline or gave
3341           # an error.
3342           continue
3343         for script, hkr, output in res.payload:
3344           test = hkr == constants.HKR_FAIL
3345           self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3346                         "Script %s failed, output:", script)
3347           if test:
3348             output = self._HOOKS_INDENT_RE.sub("      ", output)
3349             feedback_fn("%s" % output)
3350             lu_result = False
3351
3352     return lu_result
3353
3354
3355 class LUClusterVerifyDisks(NoHooksLU):
3356   """Verifies the cluster disks status.
3357
3358   """
3359   REQ_BGL = False
3360
3361   def ExpandNames(self):
3362     self.share_locks = _ShareAll()
3363     self.needed_locks = {
3364       locking.LEVEL_NODEGROUP: locking.ALL_SET,
3365       }
3366
3367   def Exec(self, feedback_fn):
3368     group_names = self.owned_locks(locking.LEVEL_NODEGROUP)
3369
3370     # Submit one instance of L{opcodes.OpGroupVerifyDisks} per node group
3371     return ResultWithJobs([[opcodes.OpGroupVerifyDisks(group_name=group)]
3372                            for group in group_names])
3373
3374
3375 class LUGroupVerifyDisks(NoHooksLU):
3376   """Verifies the status of all disks in a node group.
3377
3378   """
3379   REQ_BGL = False
3380
3381   def ExpandNames(self):
3382     # Raises errors.OpPrereqError on its own if group can't be found
3383     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
3384
3385     self.share_locks = _ShareAll()
3386     self.needed_locks = {
3387       locking.LEVEL_INSTANCE: [],
3388       locking.LEVEL_NODEGROUP: [],
3389       locking.LEVEL_NODE: [],
3390       }
3391
3392   def DeclareLocks(self, level):
3393     if level == locking.LEVEL_INSTANCE:
3394       assert not self.needed_locks[locking.LEVEL_INSTANCE]
3395
3396       # Lock instances optimistically, needs verification once node and group
3397       # locks have been acquired
3398       self.needed_locks[locking.LEVEL_INSTANCE] = \
3399         self.cfg.GetNodeGroupInstances(self.group_uuid)
3400
3401     elif level == locking.LEVEL_NODEGROUP:
3402       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
3403
3404       self.needed_locks[locking.LEVEL_NODEGROUP] = \
3405         set([self.group_uuid] +
3406             # Lock all groups used by instances optimistically; this requires
3407             # going via the node before it's locked, requiring verification
3408             # later on
3409             [group_uuid
3410              for instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
3411              for group_uuid in self.cfg.GetInstanceNodeGroups(instance_name)])
3412
3413     elif level == locking.LEVEL_NODE:
3414       # This will only lock the nodes in the group to be verified which contain
3415       # actual instances
3416       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
3417       self._LockInstancesNodes()
3418
3419       # Lock all nodes in group to be verified
3420       assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
3421       member_nodes = self.cfg.GetNodeGroup(self.group_uuid).members
3422       self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
3423
3424   def CheckPrereq(self):
3425     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
3426     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
3427     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
3428
3429     assert self.group_uuid in owned_groups
3430
3431     # Check if locked instances are still correct
3432     _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
3433
3434     # Get instance information
3435     self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
3436
3437     # Check if node groups for locked instances are still correct
3438     for (instance_name, inst) in self.instances.items():
3439       assert owned_nodes.issuperset(inst.all_nodes), \
3440         "Instance %s's nodes changed while we kept the lock" % instance_name
3441
3442       inst_groups = _CheckInstanceNodeGroups(self.cfg, instance_name,
3443                                              owned_groups)
3444
3445       assert self.group_uuid in inst_groups, \
3446         "Instance %s has no node in group %s" % (instance_name, self.group_uuid)
3447
3448   def Exec(self, feedback_fn):
3449     """Verify integrity of cluster disks.
3450
3451     @rtype: tuple of three items
3452     @return: a tuple of (dict of node-to-node_error, list of instances
3453         which need activate-disks, dict of instance: (node, volume) for
3454         missing volumes
3455
3456     """
3457     res_nodes = {}
3458     res_instances = set()
3459     res_missing = {}
3460
3461     nv_dict = _MapInstanceDisksToNodes([inst
3462             for inst in self.instances.values()
3463             if inst.admin_state == constants.ADMINST_UP])
3464
3465     if nv_dict:
3466       nodes = utils.NiceSort(set(self.owned_locks(locking.LEVEL_NODE)) &
3467                              set(self.cfg.GetVmCapableNodeList()))
3468
3469       node_lvs = self.rpc.call_lv_list(nodes, [])
3470
3471       for (node, node_res) in node_lvs.items():
3472         if node_res.offline:
3473           continue
3474
3475         msg = node_res.fail_msg
3476         if msg:
3477           logging.warning("Error enumerating LVs on node %s: %s", node, msg)
3478           res_nodes[node] = msg
3479           continue
3480
3481         for lv_name, (_, _, lv_online) in node_res.payload.items():
3482           inst = nv_dict.pop((node, lv_name), None)
3483           if not (lv_online or inst is None):
3484             res_instances.add(inst)
3485
3486       # any leftover items in nv_dict are missing LVs, let's arrange the data
3487       # better
3488       for key, inst in nv_dict.iteritems():
3489         res_missing.setdefault(inst, []).append(list(key))
3490
3491     return (res_nodes, list(res_instances), res_missing)
3492
3493
3494 class LUClusterRepairDiskSizes(NoHooksLU):
3495   """Verifies the cluster disks sizes.
3496
3497   """
3498   REQ_BGL = False
3499
3500   def ExpandNames(self):
3501     if self.op.instances:
3502       self.wanted_names = _GetWantedInstances(self, self.op.instances)
3503       self.needed_locks = {
3504         locking.LEVEL_NODE_RES: [],
3505         locking.LEVEL_INSTANCE: self.wanted_names,
3506         }
3507       self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
3508     else:
3509       self.wanted_names = None
3510       self.needed_locks = {
3511         locking.LEVEL_NODE_RES: locking.ALL_SET,
3512         locking.LEVEL_INSTANCE: locking.ALL_SET,
3513         }
3514     self.share_locks = {
3515       locking.LEVEL_NODE_RES: 1,
3516       locking.LEVEL_INSTANCE: 0,
3517       }
3518
3519   def DeclareLocks(self, level):
3520     if level == locking.LEVEL_NODE_RES and self.wanted_names is not None:
3521       self._LockInstancesNodes(primary_only=True, level=level)
3522
3523   def CheckPrereq(self):
3524     """Check prerequisites.
3525
3526     This only checks the optional instance list against the existing names.
3527
3528     """
3529     if self.wanted_names is None:
3530       self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
3531
3532     self.wanted_instances = \
3533         map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
3534
3535   def _EnsureChildSizes(self, disk):
3536     """Ensure children of the disk have the needed disk size.
3537
3538     This is valid mainly for DRBD8 and fixes an issue where the
3539     children have smaller disk size.
3540
3541     @param disk: an L{ganeti.objects.Disk} object
3542
3543     """
3544     if disk.dev_type == constants.LD_DRBD8:
3545       assert disk.children, "Empty children for DRBD8?"
3546       fchild = disk.children[0]
3547       mismatch = fchild.size < disk.size
3548       if mismatch:
3549         self.LogInfo("Child disk has size %d, parent %d, fixing",
3550                      fchild.size, disk.size)
3551         fchild.size = disk.size
3552
3553       # and we recurse on this child only, not on the metadev
3554       return self._EnsureChildSizes(fchild) or mismatch
3555     else:
3556       return False
3557
3558   def Exec(self, feedback_fn):
3559     """Verify the size of cluster disks.
3560
3561     """
3562     # TODO: check child disks too
3563     # TODO: check differences in size between primary/secondary nodes
3564     per_node_disks = {}
3565     for instance in self.wanted_instances:
3566       pnode = instance.primary_node
3567       if pnode not in per_node_disks:
3568         per_node_disks[pnode] = []
3569       for idx, disk in enumerate(instance.disks):
3570         per_node_disks[pnode].append((instance, idx, disk))
3571
3572     assert not (frozenset(per_node_disks.keys()) -
3573                 self.owned_locks(locking.LEVEL_NODE_RES)), \
3574       "Not owning correct locks"
3575     assert not self.owned_locks(locking.LEVEL_NODE)
3576
3577     changed = []
3578     for node, dskl in per_node_disks.items():
3579       newl = [v[2].Copy() for v in dskl]
3580       for dsk in newl:
3581         self.cfg.SetDiskID(dsk, node)
3582       result = self.rpc.call_blockdev_getsize(node, newl)
3583       if result.fail_msg:
3584         self.LogWarning("Failure in blockdev_getsize call to node"
3585                         " %s, ignoring", node)
3586         continue
3587       if len(result.payload) != len(dskl):
3588         logging.warning("Invalid result from node %s: len(dksl)=%d,"
3589                         " result.payload=%s", node, len(dskl), result.payload)
3590         self.LogWarning("Invalid result from node %s, ignoring node results",
3591                         node)
3592         continue
3593       for ((instance, idx, disk), size) in zip(dskl, result.payload):
3594         if size is None:
3595           self.LogWarning("Disk %d of instance %s did not return size"
3596                           " information, ignoring", idx, instance.name)
3597           continue
3598         if not isinstance(size, (int, long)):
3599           self.LogWarning("Disk %d of instance %s did not return valid"
3600                           " size information, ignoring", idx, instance.name)
3601           continue
3602         size = size >> 20
3603         if size != disk.size:
3604           self.LogInfo("Disk %d of instance %s has mismatched size,"
3605                        " correcting: recorded %d, actual %d", idx,
3606                        instance.name, disk.size, size)
3607           disk.size = size
3608           self.cfg.Update(instance, feedback_fn)
3609           changed.append((instance.name, idx, size))
3610         if self._EnsureChildSizes(disk):
3611           self.cfg.Update(instance, feedback_fn)
3612           changed.append((instance.name, idx, disk.size))
3613     return changed
3614
3615
3616 class LUClusterRename(LogicalUnit):
3617   """Rename the cluster.
3618
3619   """
3620   HPATH = "cluster-rename"
3621   HTYPE = constants.HTYPE_CLUSTER
3622
3623   def BuildHooksEnv(self):
3624     """Build hooks env.
3625
3626     """
3627     return {
3628       "OP_TARGET": self.cfg.GetClusterName(),
3629       "NEW_NAME": self.op.name,
3630       }
3631
3632   def BuildHooksNodes(self):
3633     """Build hooks nodes.
3634
3635     """
3636     return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
3637
3638   def CheckPrereq(self):
3639     """Verify that the passed name is a valid one.
3640
3641     """
3642     hostname = netutils.GetHostname(name=self.op.name,
3643                                     family=self.cfg.GetPrimaryIPFamily())
3644
3645     new_name = hostname.name
3646     self.ip = new_ip = hostname.ip
3647     old_name = self.cfg.GetClusterName()
3648     old_ip = self.cfg.GetMasterIP()
3649     if new_name == old_name and new_ip == old_ip:
3650       raise errors.OpPrereqError("Neither the name nor the IP address of the"
3651                                  " cluster has changed",
3652                                  errors.ECODE_INVAL)
3653     if new_ip != old_ip:
3654       if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
3655         raise errors.OpPrereqError("The given cluster IP address (%s) is"
3656                                    " reachable on the network" %
3657                                    new_ip, errors.ECODE_NOTUNIQUE)
3658
3659     self.op.name = new_name
3660
3661   def Exec(self, feedback_fn):
3662     """Rename the cluster.
3663
3664     """
3665     clustername = self.op.name
3666     new_ip = self.ip
3667
3668     # shutdown the master IP
3669     master_params = self.cfg.GetMasterNetworkParameters()
3670     ems = self.cfg.GetUseExternalMipScript()
3671     result = self.rpc.call_node_deactivate_master_ip(master_params.name,
3672                                                      master_params, ems)
3673     result.Raise("Could not disable the master role")
3674
3675     try:
3676       cluster = self.cfg.GetClusterInfo()
3677       cluster.cluster_name = clustername
3678       cluster.master_ip = new_ip
3679       self.cfg.Update(cluster, feedback_fn)
3680
3681       # update the known hosts file
3682       ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
3683       node_list = self.cfg.GetOnlineNodeList()
3684       try:
3685         node_list.remove(master_params.name)
3686       except ValueError:
3687         pass
3688       _UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE)
3689     finally:
3690       master_params.ip = new_ip
3691       result = self.rpc.call_node_activate_master_ip(master_params.name,
3692                                                      master_params, ems)
3693       msg = result.fail_msg
3694       if msg:
3695         self.LogWarning("Could not re-enable the master role on"
3696                         " the master, please restart manually: %s", msg)
3697
3698     return clustername
3699
3700
3701 def _ValidateNetmask(cfg, netmask):
3702   """Checks if a netmask is valid.
3703
3704   @type cfg: L{config.ConfigWriter}
3705   @param cfg: The cluster configuration
3706   @type netmask: int
3707   @param netmask: the netmask to be verified
3708   @raise errors.OpPrereqError: if the validation fails
3709
3710   """
3711   ip_family = cfg.GetPrimaryIPFamily()
3712   try:
3713     ipcls = netutils.IPAddress.GetClassFromIpFamily(ip_family)
3714   except errors.ProgrammerError:
3715     raise errors.OpPrereqError("Invalid primary ip family: %s." %
3716                                ip_family)
3717   if not ipcls.ValidateNetmask(netmask):
3718     raise errors.OpPrereqError("CIDR netmask (%s) not valid" %
3719                                 (netmask))
3720
3721
3722 class LUClusterSetParams(LogicalUnit):
3723   """Change the parameters of the cluster.
3724
3725   """
3726   HPATH = "cluster-modify"
3727   HTYPE = constants.HTYPE_CLUSTER
3728   REQ_BGL = False
3729
3730   def CheckArguments(self):
3731     """Check parameters
3732
3733     """
3734     if self.op.uid_pool:
3735       uidpool.CheckUidPool(self.op.uid_pool)
3736
3737     if self.op.add_uids:
3738       uidpool.CheckUidPool(self.op.add_uids)
3739
3740     if self.op.remove_uids:
3741       uidpool.CheckUidPool(self.op.remove_uids)
3742
3743     if self.op.master_netmask is not None:
3744       _ValidateNetmask(self.cfg, self.op.master_netmask)
3745
3746     if self.op.diskparams:
3747       for dt_params in self.op.diskparams.values():
3748         utils.ForceDictType(dt_params, constants.DISK_DT_TYPES)
3749
3750   def ExpandNames(self):
3751     # FIXME: in the future maybe other cluster params won't require checking on
3752     # all nodes to be modified.
3753     self.needed_locks = {
3754       locking.LEVEL_NODE: locking.ALL_SET,
3755     }
3756     self.share_locks[locking.LEVEL_NODE] = 1
3757
3758   def BuildHooksEnv(self):
3759     """Build hooks env.
3760
3761     """
3762     return {
3763       "OP_TARGET": self.cfg.GetClusterName(),
3764       "NEW_VG_NAME": self.op.vg_name,
3765       }
3766
3767   def BuildHooksNodes(self):
3768     """Build hooks nodes.
3769
3770     """
3771     mn = self.cfg.GetMasterNode()
3772     return ([mn], [mn])
3773
3774   def CheckPrereq(self):
3775     """Check prerequisites.
3776
3777     This checks whether the given params don't conflict and
3778     if the given volume group is valid.
3779
3780     """
3781     if self.op.vg_name is not None and not self.op.vg_name:
3782       if self.cfg.HasAnyDiskOfType(constants.LD_LV):
3783         raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
3784                                    " instances exist", errors.ECODE_INVAL)
3785
3786     if self.op.drbd_helper is not None and not self.op.drbd_helper:
3787       if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
3788         raise errors.OpPrereqError("Cannot disable drbd helper while"
3789                                    " drbd-based instances exist",
3790                                    errors.ECODE_INVAL)
3791
3792     node_list = self.owned_locks(locking.LEVEL_NODE)
3793
3794     # if vg_name not None, checks given volume group on all nodes
3795     if self.op.vg_name:
3796       vglist = self.rpc.call_vg_list(node_list)
3797       for node in node_list:
3798         msg = vglist[node].fail_msg
3799         if msg:
3800           # ignoring down node
3801           self.LogWarning("Error while gathering data on node %s"
3802                           " (ignoring node): %s", node, msg)
3803           continue
3804         vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
3805                                               self.op.vg_name,
3806                                               constants.MIN_VG_SIZE)
3807         if vgstatus:
3808           raise errors.OpPrereqError("Error on node '%s': %s" %
3809                                      (node, vgstatus), errors.ECODE_ENVIRON)
3810
3811     if self.op.drbd_helper:
3812       # checks given drbd helper on all nodes
3813       helpers = self.rpc.call_drbd_helper(node_list)
3814       for (node, ninfo) in self.cfg.GetMultiNodeInfo(node_list):
3815         if ninfo.offline:
3816           self.LogInfo("Not checking drbd helper on offline node %s", node)
3817           continue
3818         msg = helpers[node].fail_msg
3819         if msg:
3820           raise errors.OpPrereqError("Error checking drbd helper on node"
3821                                      " '%s': %s" % (node, msg),
3822                                      errors.ECODE_ENVIRON)
3823         node_helper = helpers[node].payload
3824         if node_helper != self.op.drbd_helper:
3825           raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
3826                                      (node, node_helper), errors.ECODE_ENVIRON)
3827
3828     self.cluster = cluster = self.cfg.GetClusterInfo()
3829     # validate params changes
3830     if self.op.beparams:
3831       objects.UpgradeBeParams(self.op.beparams)
3832       utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
3833       self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
3834
3835     if self.op.ndparams:
3836       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
3837       self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
3838
3839       # TODO: we need a more general way to handle resetting
3840       # cluster-level parameters to default values
3841       if self.new_ndparams["oob_program"] == "":
3842         self.new_ndparams["oob_program"] = \
3843             constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
3844
3845     if self.op.hv_state:
3846       new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
3847                                             self.cluster.hv_state_static)
3848       self.new_hv_state = dict((hv, cluster.SimpleFillHvState(values))
3849                                for hv, values in new_hv_state.items())
3850
3851     if self.op.disk_state:
3852       new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state,
3853                                                 self.cluster.disk_state_static)
3854       self.new_disk_state = \
3855         dict((storage, dict((name, cluster.SimpleFillDiskState(values))
3856                             for name, values in svalues.items()))
3857              for storage, svalues in new_disk_state.items())
3858
3859     if self.op.ipolicy:
3860       ipolicy = {}
3861       for key, value in self.op.ipolicy.items():
3862         utils.ForceDictType(value, constants.ISPECS_PARAMETER_TYPES)
3863         ipolicy[key] = _GetUpdatedParams(cluster.ipolicy.get(key, {}),
3864                                           value)
3865       objects.InstancePolicy.CheckParameterSyntax(ipolicy)
3866       self.new_ipolicy = ipolicy
3867
3868     if self.op.nicparams:
3869       utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
3870       self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
3871       objects.NIC.CheckParameterSyntax(self.new_nicparams)
3872       nic_errors = []
3873
3874       # check all instances for consistency
3875       for instance in self.cfg.GetAllInstancesInfo().values():
3876         for nic_idx, nic in enumerate(instance.nics):
3877           params_copy = copy.deepcopy(nic.nicparams)
3878           params_filled = objects.FillDict(self.new_nicparams, params_copy)
3879
3880           # check parameter syntax
3881           try:
3882             objects.NIC.CheckParameterSyntax(params_filled)
3883           except errors.ConfigurationError, err:
3884             nic_errors.append("Instance %s, nic/%d: %s" %
3885                               (instance.name, nic_idx, err))
3886
3887           # if we're moving instances to routed, check that they have an ip
3888           target_mode = params_filled[constants.NIC_MODE]
3889           if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
3890             nic_errors.append("Instance %s, nic/%d: routed NIC with no ip"
3891                               " address" % (instance.name, nic_idx))
3892       if nic_errors:
3893         raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
3894                                    "\n".join(nic_errors))
3895
3896     # hypervisor list/parameters
3897     self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
3898     if self.op.hvparams:
3899       for hv_name, hv_dict in self.op.hvparams.items():
3900         if hv_name not in self.new_hvparams:
3901           self.new_hvparams[hv_name] = hv_dict
3902         else:
3903           self.new_hvparams[hv_name].update(hv_dict)
3904
3905     # disk template parameters
3906     self.new_diskparams = objects.FillDict(cluster.diskparams, {})
3907     if self.op.diskparams:
3908       for dt_name, dt_params in self.op.diskparams.items():
3909         if dt_name not in self.op.diskparams:
3910           self.new_diskparams[dt_name] = dt_params
3911         else:
3912           self.new_diskparams[dt_name].update(dt_params)
3913
3914     # os hypervisor parameters
3915     self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
3916     if self.op.os_hvp:
3917       for os_name, hvs in self.op.os_hvp.items():
3918         if os_name not in self.new_os_hvp:
3919           self.new_os_hvp[os_name] = hvs
3920         else:
3921           for hv_name, hv_dict in hvs.items():
3922             if hv_name not in self.new_os_hvp[os_name]:
3923               self.new_os_hvp[os_name][hv_name] = hv_dict
3924             else:
3925               self.new_os_hvp[os_name][hv_name].update(hv_dict)
3926
3927     # os parameters
3928     self.new_osp = objects.FillDict(cluster.osparams, {})
3929     if self.op.osparams:
3930       for os_name, osp in self.op.osparams.items():
3931         if os_name not in self.new_osp:
3932           self.new_osp[os_name] = {}
3933
3934         self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
3935                                                   use_none=True)
3936
3937         if not self.new_osp[os_name]:
3938           # we removed all parameters
3939           del self.new_osp[os_name]
3940         else:
3941           # check the parameter validity (remote check)
3942           _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
3943                          os_name, self.new_osp[os_name])
3944
3945     # changes to the hypervisor list
3946     if self.op.enabled_hypervisors is not None:
3947       self.hv_list = self.op.enabled_hypervisors
3948       for hv in self.hv_list:
3949         # if the hypervisor doesn't already exist in the cluster
3950         # hvparams, we initialize it to empty, and then (in both
3951         # cases) we make sure to fill the defaults, as we might not
3952         # have a complete defaults list if the hypervisor wasn't
3953         # enabled before
3954         if hv not in new_hvp:
3955           new_hvp[hv] = {}
3956         new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
3957         utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
3958     else:
3959       self.hv_list = cluster.enabled_hypervisors
3960
3961     if self.op.hvparams or self.op.enabled_hypervisors is not None:
3962       # either the enabled list has changed, or the parameters have, validate
3963       for hv_name, hv_params in self.new_hvparams.items():
3964         if ((self.op.hvparams and hv_name in self.op.hvparams) or
3965             (self.op.enabled_hypervisors and
3966              hv_name in self.op.enabled_hypervisors)):
3967           # either this is a new hypervisor, or its parameters have changed
3968           hv_class = hypervisor.GetHypervisor(hv_name)
3969           utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3970           hv_class.CheckParameterSyntax(hv_params)
3971           _CheckHVParams(self, node_list, hv_name, hv_params)
3972
3973     if self.op.os_hvp:
3974       # no need to check any newly-enabled hypervisors, since the
3975       # defaults have already been checked in the above code-block
3976       for os_name, os_hvp in self.new_os_hvp.items():
3977         for hv_name, hv_params in os_hvp.items():
3978           utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3979           # we need to fill in the new os_hvp on top of the actual hv_p
3980           cluster_defaults = self.new_hvparams.get(hv_name, {})
3981           new_osp = objects.FillDict(cluster_defaults, hv_params)
3982           hv_class = hypervisor.GetHypervisor(hv_name)
3983           hv_class.CheckParameterSyntax(new_osp)
3984           _CheckHVParams(self, node_list, hv_name, new_osp)
3985
3986     if self.op.default_iallocator:
3987       alloc_script = utils.FindFile(self.op.default_iallocator,
3988                                     constants.IALLOCATOR_SEARCH_PATH,
3989                                     os.path.isfile)
3990       if alloc_script is None:
3991         raise errors.OpPrereqError("Invalid default iallocator script '%s'"
3992                                    " specified" % self.op.default_iallocator,
3993                                    errors.ECODE_INVAL)
3994
3995   def Exec(self, feedback_fn):
3996     """Change the parameters of the cluster.
3997
3998     """
3999     if self.op.vg_name is not None:
4000       new_volume = self.op.vg_name
4001       if not new_volume:
4002         new_volume = None
4003       if new_volume != self.cfg.GetVGName():
4004         self.cfg.SetVGName(new_volume)
4005       else:
4006         feedback_fn("Cluster LVM configuration already in desired"
4007                     " state, not changing")
4008     if self.op.drbd_helper is not None:
4009       new_helper = self.op.drbd_helper
4010       if not new_helper:
4011         new_helper = None
4012       if new_helper != self.cfg.GetDRBDHelper():
4013         self.cfg.SetDRBDHelper(new_helper)
4014       else:
4015         feedback_fn("Cluster DRBD helper already in desired state,"
4016                     " not changing")
4017     if self.op.hvparams:
4018       self.cluster.hvparams = self.new_hvparams
4019     if self.op.os_hvp:
4020       self.cluster.os_hvp = self.new_os_hvp
4021     if self.op.enabled_hypervisors is not None:
4022       self.cluster.hvparams = self.new_hvparams
4023       self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
4024     if self.op.beparams:
4025       self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
4026     if self.op.nicparams:
4027       self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
4028     if self.op.ipolicy:
4029       self.cluster.ipolicy = self.new_ipolicy
4030     if self.op.osparams:
4031       self.cluster.osparams = self.new_osp
4032     if self.op.ndparams:
4033       self.cluster.ndparams = self.new_ndparams
4034     if self.op.diskparams:
4035       self.cluster.diskparams = self.new_diskparams
4036     if self.op.hv_state:
4037       self.cluster.hv_state_static = self.new_hv_state
4038     if self.op.disk_state:
4039       self.cluster.disk_state_static = self.new_disk_state
4040
4041     if self.op.candidate_pool_size is not None:
4042       self.cluster.candidate_pool_size = self.op.candidate_pool_size
4043       # we need to update the pool size here, otherwise the save will fail
4044       _AdjustCandidatePool(self, [])
4045
4046     if self.op.maintain_node_health is not None:
4047       if self.op.maintain_node_health and not constants.ENABLE_CONFD:
4048         feedback_fn("Note: CONFD was disabled at build time, node health"
4049                     " maintenance is not useful (still enabling it)")
4050       self.cluster.maintain_node_health = self.op.maintain_node_health
4051
4052     if self.op.prealloc_wipe_disks is not None:
4053       self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
4054
4055     if self.op.add_uids is not None:
4056       uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
4057
4058     if self.op.remove_uids is not None:
4059       uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
4060
4061     if self.op.uid_pool is not None:
4062       self.cluster.uid_pool = self.op.uid_pool
4063
4064     if self.op.default_iallocator is not None:
4065       self.cluster.default_iallocator = self.op.default_iallocator
4066
4067     if self.op.reserved_lvs is not None:
4068       self.cluster.reserved_lvs = self.op.reserved_lvs
4069
4070     if self.op.use_external_mip_script is not None:
4071       self.cluster.use_external_mip_script = self.op.use_external_mip_script
4072
4073     def helper_os(aname, mods, desc):
4074       desc += " OS list"
4075       lst = getattr(self.cluster, aname)
4076       for key, val in mods:
4077         if key == constants.DDM_ADD:
4078           if val in lst:
4079             feedback_fn("OS %s already in %s, ignoring" % (val, desc))
4080           else:
4081             lst.append(val)
4082         elif key == constants.DDM_REMOVE:
4083           if val in lst:
4084             lst.remove(val)
4085           else:
4086             feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
4087         else:
4088           raise errors.ProgrammerError("Invalid modification '%s'" % key)
4089
4090     if self.op.hidden_os:
4091       helper_os("hidden_os", self.op.hidden_os, "hidden")
4092
4093     if self.op.blacklisted_os:
4094       helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
4095
4096     if self.op.master_netdev:
4097       master_params = self.cfg.GetMasterNetworkParameters()
4098       ems = self.cfg.GetUseExternalMipScript()
4099       feedback_fn("Shutting down master ip on the current netdev (%s)" %
4100                   self.cluster.master_netdev)
4101       result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4102                                                        master_params, ems)
4103       result.Raise("Could not disable the master ip")
4104       feedback_fn("Changing master_netdev from %s to %s" %
4105                   (master_params.netdev, self.op.master_netdev))
4106       self.cluster.master_netdev = self.op.master_netdev
4107
4108     if self.op.master_netmask:
4109       master_params = self.cfg.GetMasterNetworkParameters()
4110       feedback_fn("Changing master IP netmask to %s" % self.op.master_netmask)
4111       result = self.rpc.call_node_change_master_netmask(master_params.name,
4112                                                         master_params.netmask,
4113                                                         self.op.master_netmask,
4114                                                         master_params.ip,
4115                                                         master_params.netdev)
4116       if result.fail_msg:
4117         msg = "Could not change the master IP netmask: %s" % result.fail_msg
4118         feedback_fn(msg)
4119
4120       self.cluster.master_netmask = self.op.master_netmask
4121
4122     self.cfg.Update(self.cluster, feedback_fn)
4123
4124     if self.op.master_netdev:
4125       master_params = self.cfg.GetMasterNetworkParameters()
4126       feedback_fn("Starting the master ip on the new master netdev (%s)" %
4127                   self.op.master_netdev)
4128       ems = self.cfg.GetUseExternalMipScript()
4129       result = self.rpc.call_node_activate_master_ip(master_params.name,
4130                                                      master_params, ems)
4131       if result.fail_msg:
4132         self.LogWarning("Could not re-enable the master ip on"
4133                         " the master, please restart manually: %s",
4134                         result.fail_msg)
4135
4136
4137 def _UploadHelper(lu, nodes, fname):
4138   """Helper for uploading a file and showing warnings.
4139
4140   """
4141   if os.path.exists(fname):
4142     result = lu.rpc.call_upload_file(nodes, fname)
4143     for to_node, to_result in result.items():
4144       msg = to_result.fail_msg
4145       if msg:
4146         msg = ("Copy of file %s to node %s failed: %s" %
4147                (fname, to_node, msg))
4148         lu.proc.LogWarning(msg)
4149
4150
4151 def _ComputeAncillaryFiles(cluster, redist):
4152   """Compute files external to Ganeti which need to be consistent.
4153
4154   @type redist: boolean
4155   @param redist: Whether to include files which need to be redistributed
4156
4157   """
4158   # Compute files for all nodes
4159   files_all = set([
4160     constants.SSH_KNOWN_HOSTS_FILE,
4161     constants.CONFD_HMAC_KEY,
4162     constants.CLUSTER_DOMAIN_SECRET_FILE,
4163     constants.SPICE_CERT_FILE,
4164     constants.SPICE_CACERT_FILE,
4165     constants.RAPI_USERS_FILE,
4166     ])
4167
4168   if not redist:
4169     files_all.update(constants.ALL_CERT_FILES)
4170     files_all.update(ssconf.SimpleStore().GetFileList())
4171   else:
4172     # we need to ship at least the RAPI certificate
4173     files_all.add(constants.RAPI_CERT_FILE)
4174
4175   if cluster.modify_etc_hosts:
4176     files_all.add(constants.ETC_HOSTS)
4177
4178   # Files which are optional, these must:
4179   # - be present in one other category as well
4180   # - either exist or not exist on all nodes of that category (mc, vm all)
4181   files_opt = set([
4182     constants.RAPI_USERS_FILE,
4183     ])
4184
4185   # Files which should only be on master candidates
4186   files_mc = set()
4187
4188   if not redist:
4189     files_mc.add(constants.CLUSTER_CONF_FILE)
4190
4191     # FIXME: this should also be replicated but Ganeti doesn't support files_mc
4192     # replication
4193     files_mc.add(constants.DEFAULT_MASTER_SETUP_SCRIPT)
4194
4195   # Files which should only be on VM-capable nodes
4196   files_vm = set(filename
4197     for hv_name in cluster.enabled_hypervisors
4198     for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[0])
4199
4200   files_opt |= set(filename
4201     for hv_name in cluster.enabled_hypervisors
4202     for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[1])
4203
4204   # Filenames in each category must be unique
4205   all_files_set = files_all | files_mc | files_vm
4206   assert (len(all_files_set) ==
4207           sum(map(len, [files_all, files_mc, files_vm]))), \
4208          "Found file listed in more than one file list"
4209
4210   # Optional files must be present in one other category
4211   assert all_files_set.issuperset(files_opt), \
4212          "Optional file not in a different required list"
4213
4214   return (files_all, files_opt, files_mc, files_vm)
4215
4216
4217 def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
4218   """Distribute additional files which are part of the cluster configuration.
4219
4220   ConfigWriter takes care of distributing the config and ssconf files, but
4221   there are more files which should be distributed to all nodes. This function
4222   makes sure those are copied.
4223
4224   @param lu: calling logical unit
4225   @param additional_nodes: list of nodes not in the config to distribute to
4226   @type additional_vm: boolean
4227   @param additional_vm: whether the additional nodes are vm-capable or not
4228
4229   """
4230   # Gather target nodes
4231   cluster = lu.cfg.GetClusterInfo()
4232   master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
4233
4234   online_nodes = lu.cfg.GetOnlineNodeList()
4235   vm_nodes = lu.cfg.GetVmCapableNodeList()
4236
4237   if additional_nodes is not None:
4238     online_nodes.extend(additional_nodes)
4239     if additional_vm:
4240       vm_nodes.extend(additional_nodes)
4241
4242   # Never distribute to master node
4243   for nodelist in [online_nodes, vm_nodes]:
4244     if master_info.name in nodelist:
4245       nodelist.remove(master_info.name)
4246
4247   # Gather file lists
4248   (files_all, _, files_mc, files_vm) = \
4249     _ComputeAncillaryFiles(cluster, True)
4250
4251   # Never re-distribute configuration file from here
4252   assert not (constants.CLUSTER_CONF_FILE in files_all or
4253               constants.CLUSTER_CONF_FILE in files_vm)
4254   assert not files_mc, "Master candidates not handled in this function"
4255
4256   filemap = [
4257     (online_nodes, files_all),
4258     (vm_nodes, files_vm),
4259     ]
4260
4261   # Upload the files
4262   for (node_list, files) in filemap:
4263     for fname in files:
4264       _UploadHelper(lu, node_list, fname)
4265
4266
4267 class LUClusterRedistConf(NoHooksLU):
4268   """Force the redistribution of cluster configuration.
4269
4270   This is a very simple LU.
4271
4272   """
4273   REQ_BGL = False
4274
4275   def ExpandNames(self):
4276     self.needed_locks = {
4277       locking.LEVEL_NODE: locking.ALL_SET,
4278     }
4279     self.share_locks[locking.LEVEL_NODE] = 1
4280
4281   def Exec(self, feedback_fn):
4282     """Redistribute the configuration.
4283
4284     """
4285     self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
4286     _RedistributeAncillaryFiles(self)
4287
4288
4289 class LUClusterActivateMasterIp(NoHooksLU):
4290   """Activate the master IP on the master node.
4291
4292   """
4293   def Exec(self, feedback_fn):
4294     """Activate the master IP.
4295
4296     """
4297     master_params = self.cfg.GetMasterNetworkParameters()
4298     ems = self.cfg.GetUseExternalMipScript()
4299     result = self.rpc.call_node_activate_master_ip(master_params.name,
4300                                                    master_params, ems)
4301     result.Raise("Could not activate the master IP")
4302
4303
4304 class LUClusterDeactivateMasterIp(NoHooksLU):
4305   """Deactivate the master IP on the master node.
4306
4307   """
4308   def Exec(self, feedback_fn):
4309     """Deactivate the master IP.
4310
4311     """
4312     master_params = self.cfg.GetMasterNetworkParameters()
4313     ems = self.cfg.GetUseExternalMipScript()
4314     result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4315                                                      master_params, ems)
4316     result.Raise("Could not deactivate the master IP")
4317
4318
4319 def _WaitForSync(lu, instance, disks=None, oneshot=False):
4320   """Sleep and poll for an instance's disk to sync.
4321
4322   """
4323   if not instance.disks or disks is not None and not disks:
4324     return True
4325
4326   disks = _ExpandCheckDisks(instance, disks)
4327
4328   if not oneshot:
4329     lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
4330
4331   node = instance.primary_node
4332
4333   for dev in disks:
4334     lu.cfg.SetDiskID(dev, node)
4335
4336   # TODO: Convert to utils.Retry
4337
4338   retries = 0
4339   degr_retries = 10 # in seconds, as we sleep 1 second each time
4340   while True:
4341     max_time = 0
4342     done = True
4343     cumul_degraded = False
4344     rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
4345     msg = rstats.fail_msg
4346     if msg:
4347       lu.LogWarning("Can't get any data from node %s: %s", node, msg)
4348       retries += 1
4349       if retries >= 10:
4350         raise errors.RemoteError("Can't contact node %s for mirror data,"
4351                                  " aborting." % node)
4352       time.sleep(6)
4353       continue
4354     rstats = rstats.payload
4355     retries = 0
4356     for i, mstat in enumerate(rstats):
4357       if mstat is None:
4358         lu.LogWarning("Can't compute data for node %s/%s",
4359                            node, disks[i].iv_name)
4360         continue
4361
4362       cumul_degraded = (cumul_degraded or
4363                         (mstat.is_degraded and mstat.sync_percent is None))
4364       if mstat.sync_percent is not None:
4365         done = False
4366         if mstat.estimated_time is not None:
4367           rem_time = ("%s remaining (estimated)" %
4368                       utils.FormatSeconds(mstat.estimated_time))
4369           max_time = mstat.estimated_time
4370         else:
4371           rem_time = "no time estimate"
4372         lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
4373                         (disks[i].iv_name, mstat.sync_percent, rem_time))
4374
4375     # if we're done but degraded, let's do a few small retries, to
4376     # make sure we see a stable and not transient situation; therefore
4377     # we force restart of the loop
4378     if (done or oneshot) and cumul_degraded and degr_retries > 0:
4379       logging.info("Degraded disks found, %d retries left", degr_retries)
4380       degr_retries -= 1
4381       time.sleep(1)
4382       continue
4383
4384     if done or oneshot:
4385       break
4386
4387     time.sleep(min(60, max_time))
4388
4389   if done:
4390     lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
4391   return not cumul_degraded
4392
4393
4394 def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
4395   """Check that mirrors are not degraded.
4396
4397   The ldisk parameter, if True, will change the test from the
4398   is_degraded attribute (which represents overall non-ok status for
4399   the device(s)) to the ldisk (representing the local storage status).
4400
4401   """
4402   lu.cfg.SetDiskID(dev, node)
4403
4404   result = True
4405
4406   if on_primary or dev.AssembleOnSecondary():
4407     rstats = lu.rpc.call_blockdev_find(node, dev)
4408     msg = rstats.fail_msg
4409     if msg:
4410       lu.LogWarning("Can't find disk on node %s: %s", node, msg)
4411       result = False
4412     elif not rstats.payload:
4413       lu.LogWarning("Can't find disk on node %s", node)
4414       result = False
4415     else:
4416       if ldisk:
4417         result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
4418       else:
4419         result = result and not rstats.payload.is_degraded
4420
4421   if dev.children:
4422     for child in dev.children:
4423       result = result and _CheckDiskConsistency(lu, child, node, on_primary)
4424
4425   return result
4426
4427
4428 class LUOobCommand(NoHooksLU):
4429   """Logical unit for OOB handling.
4430
4431   """
4432   REG_BGL = False
4433   _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
4434
4435   def ExpandNames(self):
4436     """Gather locks we need.
4437
4438     """
4439     if self.op.node_names:
4440       self.op.node_names = _GetWantedNodes(self, self.op.node_names)
4441       lock_names = self.op.node_names
4442     else:
4443       lock_names = locking.ALL_SET
4444
4445     self.needed_locks = {
4446       locking.LEVEL_NODE: lock_names,
4447       }
4448
4449   def CheckPrereq(self):
4450     """Check prerequisites.
4451
4452     This checks:
4453      - the node exists in the configuration
4454      - OOB is supported
4455
4456     Any errors are signaled by raising errors.OpPrereqError.
4457
4458     """
4459     self.nodes = []
4460     self.master_node = self.cfg.GetMasterNode()
4461
4462     assert self.op.power_delay >= 0.0
4463
4464     if self.op.node_names:
4465       if (self.op.command in self._SKIP_MASTER and
4466           self.master_node in self.op.node_names):
4467         master_node_obj = self.cfg.GetNodeInfo(self.master_node)
4468         master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
4469
4470         if master_oob_handler:
4471           additional_text = ("run '%s %s %s' if you want to operate on the"
4472                              " master regardless") % (master_oob_handler,
4473                                                       self.op.command,
4474                                                       self.master_node)
4475         else:
4476           additional_text = "it does not support out-of-band operations"
4477
4478         raise errors.OpPrereqError(("Operating on the master node %s is not"
4479                                     " allowed for %s; %s") %
4480                                    (self.master_node, self.op.command,
4481                                     additional_text), errors.ECODE_INVAL)
4482     else:
4483       self.op.node_names = self.cfg.GetNodeList()
4484       if self.op.command in self._SKIP_MASTER:
4485         self.op.node_names.remove(self.master_node)
4486
4487     if self.op.command in self._SKIP_MASTER:
4488       assert self.master_node not in self.op.node_names
4489
4490     for (node_name, node) in self.cfg.GetMultiNodeInfo(self.op.node_names):
4491       if node is None:
4492         raise errors.OpPrereqError("Node %s not found" % node_name,
4493                                    errors.ECODE_NOENT)
4494       else:
4495         self.nodes.append(node)
4496
4497       if (not self.op.ignore_status and
4498           (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
4499         raise errors.OpPrereqError(("Cannot power off node %s because it is"
4500                                     " not marked offline") % node_name,
4501                                    errors.ECODE_STATE)
4502
4503   def Exec(self, feedback_fn):
4504     """Execute OOB and return result if we expect any.
4505
4506     """
4507     master_node = self.master_node
4508     ret = []
4509
4510     for idx, node in enumerate(utils.NiceSort(self.nodes,
4511                                               key=lambda node: node.name)):
4512       node_entry = [(constants.RS_NORMAL, node.name)]
4513       ret.append(node_entry)
4514
4515       oob_program = _SupportsOob(self.cfg, node)
4516
4517       if not oob_program:
4518         node_entry.append((constants.RS_UNAVAIL, None))
4519         continue
4520
4521       logging.info("Executing out-of-band command '%s' using '%s' on %s",
4522                    self.op.command, oob_program, node.name)
4523       result = self.rpc.call_run_oob(master_node, oob_program,
4524                                      self.op.command, node.name,
4525                                      self.op.timeout)
4526
4527       if result.fail_msg:
4528         self.LogWarning("Out-of-band RPC failed on node '%s': %s",
4529                         node.name, result.fail_msg)
4530         node_entry.append((constants.RS_NODATA, None))
4531       else:
4532         try:
4533           self._CheckPayload(result)
4534         except errors.OpExecError, err:
4535           self.LogWarning("Payload returned by node '%s' is not valid: %s",
4536                           node.name, err)
4537           node_entry.append((constants.RS_NODATA, None))
4538         else:
4539           if self.op.command == constants.OOB_HEALTH:
4540             # For health we should log important events
4541             for item, status in result.payload:
4542               if status in [constants.OOB_STATUS_WARNING,
4543                             constants.OOB_STATUS_CRITICAL]:
4544                 self.LogWarning("Item '%s' on node '%s' has status '%s'",
4545                                 item, node.name, status)
4546
4547           if self.op.command == constants.OOB_POWER_ON:
4548             node.powered = True
4549           elif self.op.command == constants.OOB_POWER_OFF:
4550             node.powered = False
4551           elif self.op.command == constants.OOB_POWER_STATUS:
4552             powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
4553             if powered != node.powered:
4554               logging.warning(("Recorded power state (%s) of node '%s' does not"
4555                                " match actual power state (%s)"), node.powered,
4556                               node.name, powered)
4557
4558           # For configuration changing commands we should update the node
4559           if self.op.command in (constants.OOB_POWER_ON,
4560                                  constants.OOB_POWER_OFF):
4561             self.cfg.Update(node, feedback_fn)
4562
4563           node_entry.append((constants.RS_NORMAL, result.payload))
4564
4565           if (self.op.command == constants.OOB_POWER_ON and
4566               idx < len(self.nodes) - 1):
4567             time.sleep(self.op.power_delay)
4568
4569     return ret
4570
4571   def _CheckPayload(self, result):
4572     """Checks if the payload is valid.
4573
4574     @param result: RPC result
4575     @raises errors.OpExecError: If payload is not valid
4576
4577     """
4578     errs = []
4579     if self.op.command == constants.OOB_HEALTH:
4580       if not isinstance(result.payload, list):
4581         errs.append("command 'health' is expected to return a list but got %s" %
4582                     type(result.payload))
4583       else:
4584         for item, status in result.payload:
4585           if status not in constants.OOB_STATUSES:
4586             errs.append("health item '%s' has invalid status '%s'" %
4587                         (item, status))
4588
4589     if self.op.command == constants.OOB_POWER_STATUS:
4590       if not isinstance(result.payload, dict):
4591         errs.append("power-status is expected to return a dict but got %s" %
4592                     type(result.payload))
4593
4594     if self.op.command in [
4595         constants.OOB_POWER_ON,
4596         constants.OOB_POWER_OFF,
4597         constants.OOB_POWER_CYCLE,
4598         ]:
4599       if result.payload is not None:
4600         errs.append("%s is expected to not return payload but got '%s'" %
4601                     (self.op.command, result.payload))
4602
4603     if errs:
4604       raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
4605                                utils.CommaJoin(errs))
4606
4607
4608 class _OsQuery(_QueryBase):
4609   FIELDS = query.OS_FIELDS
4610
4611   def ExpandNames(self, lu):
4612     # Lock all nodes in shared mode
4613     # Temporary removal of locks, should be reverted later
4614     # TODO: reintroduce locks when they are lighter-weight
4615     lu.needed_locks = {}
4616     #self.share_locks[locking.LEVEL_NODE] = 1
4617     #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4618
4619     # The following variables interact with _QueryBase._GetNames
4620     if self.names:
4621       self.wanted = self.names
4622     else:
4623       self.wanted = locking.ALL_SET
4624
4625     self.do_locking = self.use_locking
4626
4627   def DeclareLocks(self, lu, level):
4628     pass
4629
4630   @staticmethod
4631   def _DiagnoseByOS(rlist):
4632     """Remaps a per-node return list into an a per-os per-node dictionary
4633
4634     @param rlist: a map with node names as keys and OS objects as values
4635
4636     @rtype: dict
4637     @return: a dictionary with osnames as keys and as value another
4638         map, with nodes as keys and tuples of (path, status, diagnose,
4639         variants, parameters, api_versions) as values, eg::
4640
4641           {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
4642                                      (/srv/..., False, "invalid api")],
4643                            "node2": [(/srv/..., True, "", [], [])]}
4644           }
4645
4646     """
4647     all_os = {}
4648     # we build here the list of nodes that didn't fail the RPC (at RPC
4649     # level), so that nodes with a non-responding node daemon don't
4650     # make all OSes invalid
4651     good_nodes = [node_name for node_name in rlist
4652                   if not rlist[node_name].fail_msg]
4653     for node_name, nr in rlist.items():
4654       if nr.fail_msg or not nr.payload:
4655         continue
4656       for (name, path, status, diagnose, variants,
4657            params, api_versions) in nr.payload:
4658         if name not in all_os:
4659           # build a list of nodes for this os containing empty lists
4660           # for each node in node_list
4661           all_os[name] = {}
4662           for nname in good_nodes:
4663             all_os[name][nname] = []
4664         # convert params from [name, help] to (name, help)
4665         params = [tuple(v) for v in params]
4666         all_os[name][node_name].append((path, status, diagnose,
4667                                         variants, params, api_versions))
4668     return all_os
4669
4670   def _GetQueryData(self, lu):
4671     """Computes the list of nodes and their attributes.
4672
4673     """
4674     # Locking is not used
4675     assert not (compat.any(lu.glm.is_owned(level)
4676                            for level in locking.LEVELS
4677                            if level != locking.LEVEL_CLUSTER) or
4678                 self.do_locking or self.use_locking)
4679
4680     valid_nodes = [node.name
4681                    for node in lu.cfg.GetAllNodesInfo().values()
4682                    if not node.offline and node.vm_capable]
4683     pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
4684     cluster = lu.cfg.GetClusterInfo()
4685
4686     data = {}
4687
4688     for (os_name, os_data) in pol.items():
4689       info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
4690                           hidden=(os_name in cluster.hidden_os),
4691                           blacklisted=(os_name in cluster.blacklisted_os))
4692
4693       variants = set()
4694       parameters = set()
4695       api_versions = set()
4696
4697       for idx, osl in enumerate(os_data.values()):
4698         info.valid = bool(info.valid and osl and osl[0][1])
4699         if not info.valid:
4700           break
4701
4702         (node_variants, node_params, node_api) = osl[0][3:6]
4703         if idx == 0:
4704           # First entry
4705           variants.update(node_variants)
4706           parameters.update(node_params)
4707           api_versions.update(node_api)
4708         else:
4709           # Filter out inconsistent values
4710           variants.intersection_update(node_variants)
4711           parameters.intersection_update(node_params)
4712           api_versions.intersection_update(node_api)
4713
4714       info.variants = list(variants)
4715       info.parameters = list(parameters)
4716       info.api_versions = list(api_versions)
4717
4718       data[os_name] = info
4719
4720     # Prepare data in requested order
4721     return [data[name] for name in self._GetNames(lu, pol.keys(), None)
4722             if name in data]
4723
4724
4725 class LUOsDiagnose(NoHooksLU):
4726   """Logical unit for OS diagnose/query.
4727
4728   """
4729   REQ_BGL = False
4730
4731   @staticmethod
4732   def _BuildFilter(fields, names):
4733     """Builds a filter for querying OSes.
4734
4735     """
4736     name_filter = qlang.MakeSimpleFilter("name", names)
4737
4738     # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
4739     # respective field is not requested
4740     status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
4741                      for fname in ["hidden", "blacklisted"]
4742                      if fname not in fields]
4743     if "valid" not in fields:
4744       status_filter.append([qlang.OP_TRUE, "valid"])
4745
4746     if status_filter:
4747       status_filter.insert(0, qlang.OP_AND)
4748     else:
4749       status_filter = None
4750
4751     if name_filter and status_filter:
4752       return [qlang.OP_AND, name_filter, status_filter]
4753     elif name_filter:
4754       return name_filter
4755     else:
4756       return status_filter
4757
4758   def CheckArguments(self):
4759     self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
4760                        self.op.output_fields, False)
4761
4762   def ExpandNames(self):
4763     self.oq.ExpandNames(self)
4764
4765   def Exec(self, feedback_fn):
4766     return self.oq.OldStyleQuery(self)
4767
4768
4769 class LUNodeRemove(LogicalUnit):
4770   """Logical unit for removing a node.
4771
4772   """
4773   HPATH = "node-remove"
4774   HTYPE = constants.HTYPE_NODE
4775
4776   def BuildHooksEnv(self):
4777     """Build hooks env.
4778
4779     This doesn't run on the target node in the pre phase as a failed
4780     node would then be impossible to remove.
4781
4782     """
4783     return {
4784       "OP_TARGET": self.op.node_name,
4785       "NODE_NAME": self.op.node_name,
4786       }
4787
4788   def BuildHooksNodes(self):
4789     """Build hooks nodes.
4790
4791     """
4792     all_nodes = self.cfg.GetNodeList()
4793     try:
4794       all_nodes.remove(self.op.node_name)
4795     except ValueError:
4796       logging.warning("Node '%s', which is about to be removed, was not found"
4797                       " in the list of all nodes", self.op.node_name)
4798     return (all_nodes, all_nodes)
4799
4800   def CheckPrereq(self):
4801     """Check prerequisites.
4802
4803     This checks:
4804      - the node exists in the configuration
4805      - it does not have primary or secondary instances
4806      - it's not the master
4807
4808     Any errors are signaled by raising errors.OpPrereqError.
4809
4810     """
4811     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4812     node = self.cfg.GetNodeInfo(self.op.node_name)
4813     assert node is not None
4814
4815     masternode = self.cfg.GetMasterNode()
4816     if node.name == masternode:
4817       raise errors.OpPrereqError("Node is the master node, failover to another"
4818                                  " node is required", errors.ECODE_INVAL)
4819
4820     for instance_name, instance in self.cfg.GetAllInstancesInfo().items():
4821       if node.name in instance.all_nodes:
4822         raise errors.OpPrereqError("Instance %s is still running on the node,"
4823                                    " please remove first" % instance_name,
4824                                    errors.ECODE_INVAL)
4825     self.op.node_name = node.name
4826     self.node = node
4827
4828   def Exec(self, feedback_fn):
4829     """Removes the node from the cluster.
4830
4831     """
4832     node = self.node
4833     logging.info("Stopping the node daemon and removing configs from node %s",
4834                  node.name)
4835
4836     modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
4837
4838     assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
4839       "Not owning BGL"
4840
4841     # Promote nodes to master candidate as needed
4842     _AdjustCandidatePool(self, exceptions=[node.name])
4843     self.context.RemoveNode(node.name)
4844
4845     # Run post hooks on the node before it's removed
4846     _RunPostHook(self, node.name)
4847
4848     result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
4849     msg = result.fail_msg
4850     if msg:
4851       self.LogWarning("Errors encountered on the remote node while leaving"
4852                       " the cluster: %s", msg)
4853
4854     # Remove node from our /etc/hosts
4855     if self.cfg.GetClusterInfo().modify_etc_hosts:
4856       master_node = self.cfg.GetMasterNode()
4857       result = self.rpc.call_etc_hosts_modify(master_node,
4858                                               constants.ETC_HOSTS_REMOVE,
4859                                               node.name, None)
4860       result.Raise("Can't update hosts file with new host data")
4861       _RedistributeAncillaryFiles(self)
4862
4863
4864 class _NodeQuery(_QueryBase):
4865   FIELDS = query.NODE_FIELDS
4866
4867   def ExpandNames(self, lu):
4868     lu.needed_locks = {}
4869     lu.share_locks = _ShareAll()
4870
4871     if self.names:
4872       self.wanted = _GetWantedNodes(lu, self.names)
4873     else:
4874       self.wanted = locking.ALL_SET
4875
4876     self.do_locking = (self.use_locking and
4877                        query.NQ_LIVE in self.requested_data)
4878
4879     if self.do_locking:
4880       # If any non-static field is requested we need to lock the nodes
4881       lu.needed_locks[locking.LEVEL_NODE] = self.wanted
4882
4883   def DeclareLocks(self, lu, level):
4884     pass
4885
4886   def _GetQueryData(self, lu):
4887     """Computes the list of nodes and their attributes.
4888
4889     """
4890     all_info = lu.cfg.GetAllNodesInfo()
4891
4892     nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
4893
4894     # Gather data as requested
4895     if query.NQ_LIVE in self.requested_data:
4896       # filter out non-vm_capable nodes
4897       toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
4898
4899       node_data = lu.rpc.call_node_info(toquery_nodes, [lu.cfg.GetVGName()],
4900                                         [lu.cfg.GetHypervisorType()])
4901       live_data = dict((name, _MakeLegacyNodeInfo(nresult.payload))
4902                        for (name, nresult) in node_data.items()
4903                        if not nresult.fail_msg and nresult.payload)
4904     else:
4905       live_data = None
4906
4907     if query.NQ_INST in self.requested_data:
4908       node_to_primary = dict([(name, set()) for name in nodenames])
4909       node_to_secondary = dict([(name, set()) for name in nodenames])
4910
4911       inst_data = lu.cfg.GetAllInstancesInfo()
4912
4913       for inst in inst_data.values():
4914         if inst.primary_node in node_to_primary:
4915           node_to_primary[inst.primary_node].add(inst.name)
4916         for secnode in inst.secondary_nodes:
4917           if secnode in node_to_secondary:
4918             node_to_secondary[secnode].add(inst.name)
4919     else:
4920       node_to_primary = None
4921       node_to_secondary = None
4922
4923     if query.NQ_OOB in self.requested_data:
4924       oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
4925                          for name, node in all_info.iteritems())
4926     else:
4927       oob_support = None
4928
4929     if query.NQ_GROUP in self.requested_data:
4930       groups = lu.cfg.GetAllNodeGroupsInfo()
4931     else:
4932       groups = {}
4933
4934     return query.NodeQueryData([all_info[name] for name in nodenames],
4935                                live_data, lu.cfg.GetMasterNode(),
4936                                node_to_primary, node_to_secondary, groups,
4937                                oob_support, lu.cfg.GetClusterInfo())
4938
4939
4940 class LUNodeQuery(NoHooksLU):
4941   """Logical unit for querying nodes.
4942
4943   """
4944   # pylint: disable=W0142
4945   REQ_BGL = False
4946
4947   def CheckArguments(self):
4948     self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
4949                          self.op.output_fields, self.op.use_locking)
4950
4951   def ExpandNames(self):
4952     self.nq.ExpandNames(self)
4953
4954   def DeclareLocks(self, level):
4955     self.nq.DeclareLocks(self, level)
4956
4957   def Exec(self, feedback_fn):
4958     return self.nq.OldStyleQuery(self)
4959
4960
4961 class LUNodeQueryvols(NoHooksLU):
4962   """Logical unit for getting volumes on node(s).
4963
4964   """
4965   REQ_BGL = False
4966   _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
4967   _FIELDS_STATIC = utils.FieldSet("node")
4968
4969   def CheckArguments(self):
4970     _CheckOutputFields(static=self._FIELDS_STATIC,
4971                        dynamic=self._FIELDS_DYNAMIC,
4972                        selected=self.op.output_fields)
4973
4974   def ExpandNames(self):
4975     self.share_locks = _ShareAll()
4976     self.needed_locks = {}
4977
4978     if not self.op.nodes:
4979       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4980     else:
4981       self.needed_locks[locking.LEVEL_NODE] = \
4982         _GetWantedNodes(self, self.op.nodes)
4983
4984   def Exec(self, feedback_fn):
4985     """Computes the list of nodes and their attributes.
4986
4987     """
4988     nodenames = self.owned_locks(locking.LEVEL_NODE)
4989     volumes = self.rpc.call_node_volumes(nodenames)
4990
4991     ilist = self.cfg.GetAllInstancesInfo()
4992     vol2inst = _MapInstanceDisksToNodes(ilist.values())
4993
4994     output = []
4995     for node in nodenames:
4996       nresult = volumes[node]
4997       if nresult.offline:
4998         continue
4999       msg = nresult.fail_msg
5000       if msg:
5001         self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
5002         continue
5003
5004       node_vols = sorted(nresult.payload,
5005                          key=operator.itemgetter("dev"))
5006
5007       for vol in node_vols:
5008         node_output = []
5009         for field in self.op.output_fields:
5010           if field == "node":
5011             val = node
5012           elif field == "phys":
5013             val = vol["dev"]
5014           elif field == "vg":
5015             val = vol["vg"]
5016           elif field == "name":
5017             val = vol["name"]
5018           elif field == "size":
5019             val = int(float(vol["size"]))
5020           elif field == "instance":
5021             val = vol2inst.get((node, vol["vg"] + "/" + vol["name"]), "-")
5022           else:
5023             raise errors.ParameterError(field)
5024           node_output.append(str(val))
5025
5026         output.append(node_output)
5027
5028     return output
5029
5030
5031 class LUNodeQueryStorage(NoHooksLU):
5032   """Logical unit for getting information on storage units on node(s).
5033
5034   """
5035   _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
5036   REQ_BGL = False
5037
5038   def CheckArguments(self):
5039     _CheckOutputFields(static=self._FIELDS_STATIC,
5040                        dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
5041                        selected=self.op.output_fields)
5042
5043   def ExpandNames(self):
5044     self.share_locks = _ShareAll()
5045     self.needed_locks = {}
5046
5047     if self.op.nodes:
5048       self.needed_locks[locking.LEVEL_NODE] = \
5049         _GetWantedNodes(self, self.op.nodes)
5050     else:
5051       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5052
5053   def Exec(self, feedback_fn):
5054     """Computes the list of nodes and their attributes.
5055
5056     """
5057     self.nodes = self.owned_locks(locking.LEVEL_NODE)
5058
5059     # Always get name to sort by
5060     if constants.SF_NAME in self.op.output_fields:
5061       fields = self.op.output_fields[:]
5062     else:
5063       fields = [constants.SF_NAME] + self.op.output_fields
5064
5065     # Never ask for node or type as it's only known to the LU
5066     for extra in [constants.SF_NODE, constants.SF_TYPE]:
5067       while extra in fields:
5068         fields.remove(extra)
5069
5070     field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
5071     name_idx = field_idx[constants.SF_NAME]
5072
5073     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5074     data = self.rpc.call_storage_list(self.nodes,
5075                                       self.op.storage_type, st_args,
5076                                       self.op.name, fields)
5077
5078     result = []
5079
5080     for node in utils.NiceSort(self.nodes):
5081       nresult = data[node]
5082       if nresult.offline:
5083         continue
5084
5085       msg = nresult.fail_msg
5086       if msg:
5087         self.LogWarning("Can't get storage data from node %s: %s", node, msg)
5088         continue
5089
5090       rows = dict([(row[name_idx], row) for row in nresult.payload])
5091
5092       for name in utils.NiceSort(rows.keys()):
5093         row = rows[name]
5094
5095         out = []
5096
5097         for field in self.op.output_fields:
5098           if field == constants.SF_NODE:
5099             val = node
5100           elif field == constants.SF_TYPE:
5101             val = self.op.storage_type
5102           elif field in field_idx:
5103             val = row[field_idx[field]]
5104           else:
5105             raise errors.ParameterError(field)
5106
5107           out.append(val)
5108
5109         result.append(out)
5110
5111     return result
5112
5113
5114 class _InstanceQuery(_QueryBase):
5115   FIELDS = query.INSTANCE_FIELDS
5116
5117   def ExpandNames(self, lu):
5118     lu.needed_locks = {}
5119     lu.share_locks = _ShareAll()
5120
5121     if self.names:
5122       self.wanted = _GetWantedInstances(lu, self.names)
5123     else:
5124       self.wanted = locking.ALL_SET
5125
5126     self.do_locking = (self.use_locking and
5127                        query.IQ_LIVE in self.requested_data)
5128     if self.do_locking:
5129       lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
5130       lu.needed_locks[locking.LEVEL_NODEGROUP] = []
5131       lu.needed_locks[locking.LEVEL_NODE] = []
5132       lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5133
5134     self.do_grouplocks = (self.do_locking and
5135                           query.IQ_NODES in self.requested_data)
5136
5137   def DeclareLocks(self, lu, level):
5138     if self.do_locking:
5139       if level == locking.LEVEL_NODEGROUP and self.do_grouplocks:
5140         assert not lu.needed_locks[locking.LEVEL_NODEGROUP]
5141
5142         # Lock all groups used by instances optimistically; this requires going
5143         # via the node before it's locked, requiring verification later on
5144         lu.needed_locks[locking.LEVEL_NODEGROUP] = \
5145           set(group_uuid
5146               for instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
5147               for group_uuid in lu.cfg.GetInstanceNodeGroups(instance_name))
5148       elif level == locking.LEVEL_NODE:
5149         lu._LockInstancesNodes() # pylint: disable=W0212
5150
5151   @staticmethod
5152   def _CheckGroupLocks(lu):
5153     owned_instances = frozenset(lu.owned_locks(locking.LEVEL_INSTANCE))
5154     owned_groups = frozenset(lu.owned_locks(locking.LEVEL_NODEGROUP))
5155
5156     # Check if node groups for locked instances are still correct
5157     for instance_name in owned_instances:
5158       _CheckInstanceNodeGroups(lu.cfg, instance_name, owned_groups)
5159
5160   def _GetQueryData(self, lu):
5161     """Computes the list of instances and their attributes.
5162
5163     """
5164     if self.do_grouplocks:
5165       self._CheckGroupLocks(lu)
5166
5167     cluster = lu.cfg.GetClusterInfo()
5168     all_info = lu.cfg.GetAllInstancesInfo()
5169
5170     instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
5171
5172     instance_list = [all_info[name] for name in instance_names]
5173     nodes = frozenset(itertools.chain(*(inst.all_nodes
5174                                         for inst in instance_list)))
5175     hv_list = list(set([inst.hypervisor for inst in instance_list]))
5176     bad_nodes = []
5177     offline_nodes = []
5178     wrongnode_inst = set()
5179
5180     # Gather data as requested
5181     if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
5182       live_data = {}
5183       node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
5184       for name in nodes:
5185         result = node_data[name]
5186         if result.offline:
5187           # offline nodes will be in both lists
5188           assert result.fail_msg
5189           offline_nodes.append(name)
5190         if result.fail_msg:
5191           bad_nodes.append(name)
5192         elif result.payload:
5193           for inst in result.payload:
5194             if inst in all_info:
5195               if all_info[inst].primary_node == name:
5196                 live_data.update(result.payload)
5197               else:
5198                 wrongnode_inst.add(inst)
5199             else:
5200               # orphan instance; we don't list it here as we don't
5201               # handle this case yet in the output of instance listing
5202               logging.warning("Orphan instance '%s' found on node %s",
5203                               inst, name)
5204         # else no instance is alive
5205     else:
5206       live_data = {}
5207
5208     if query.IQ_DISKUSAGE in self.requested_data:
5209       disk_usage = dict((inst.name,
5210                          _ComputeDiskSize(inst.disk_template,
5211                                           [{constants.IDISK_SIZE: disk.size}
5212                                            for disk in inst.disks]))
5213                         for inst in instance_list)
5214     else:
5215       disk_usage = None
5216
5217     if query.IQ_CONSOLE in self.requested_data:
5218       consinfo = {}
5219       for inst in instance_list:
5220         if inst.name in live_data:
5221           # Instance is running
5222           consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
5223         else:
5224           consinfo[inst.name] = None
5225       assert set(consinfo.keys()) == set(instance_names)
5226     else:
5227       consinfo = None
5228
5229     if query.IQ_NODES in self.requested_data:
5230       node_names = set(itertools.chain(*map(operator.attrgetter("all_nodes"),
5231                                             instance_list)))
5232       nodes = dict(lu.cfg.GetMultiNodeInfo(node_names))
5233       groups = dict((uuid, lu.cfg.GetNodeGroup(uuid))
5234                     for uuid in set(map(operator.attrgetter("group"),
5235                                         nodes.values())))
5236     else:
5237       nodes = None
5238       groups = None
5239
5240     return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
5241                                    disk_usage, offline_nodes, bad_nodes,
5242                                    live_data, wrongnode_inst, consinfo,
5243                                    nodes, groups)
5244
5245
5246 class LUQuery(NoHooksLU):
5247   """Query for resources/items of a certain kind.
5248
5249   """
5250   # pylint: disable=W0142
5251   REQ_BGL = False
5252
5253   def CheckArguments(self):
5254     qcls = _GetQueryImplementation(self.op.what)
5255
5256     self.impl = qcls(self.op.qfilter, self.op.fields, self.op.use_locking)
5257
5258   def ExpandNames(self):
5259     self.impl.ExpandNames(self)
5260
5261   def DeclareLocks(self, level):
5262     self.impl.DeclareLocks(self, level)
5263
5264   def Exec(self, feedback_fn):
5265     return self.impl.NewStyleQuery(self)
5266
5267
5268 class LUQueryFields(NoHooksLU):
5269   """Query for resources/items of a certain kind.
5270
5271   """
5272   # pylint: disable=W0142
5273   REQ_BGL = False
5274
5275   def CheckArguments(self):
5276     self.qcls = _GetQueryImplementation(self.op.what)
5277
5278   def ExpandNames(self):
5279     self.needed_locks = {}
5280
5281   def Exec(self, feedback_fn):
5282     return query.QueryFields(self.qcls.FIELDS, self.op.fields)
5283
5284
5285 class LUNodeModifyStorage(NoHooksLU):
5286   """Logical unit for modifying a storage volume on a node.
5287
5288   """
5289   REQ_BGL = False
5290
5291   def CheckArguments(self):
5292     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5293
5294     storage_type = self.op.storage_type
5295
5296     try:
5297       modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
5298     except KeyError:
5299       raise errors.OpPrereqError("Storage units of type '%s' can not be"
5300                                  " modified" % storage_type,
5301                                  errors.ECODE_INVAL)
5302
5303     diff = set(self.op.changes.keys()) - modifiable
5304     if diff:
5305       raise errors.OpPrereqError("The following fields can not be modified for"
5306                                  " storage units of type '%s': %r" %
5307                                  (storage_type, list(diff)),
5308                                  errors.ECODE_INVAL)
5309
5310   def ExpandNames(self):
5311     self.needed_locks = {
5312       locking.LEVEL_NODE: self.op.node_name,
5313       }
5314
5315   def Exec(self, feedback_fn):
5316     """Computes the list of nodes and their attributes.
5317
5318     """
5319     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5320     result = self.rpc.call_storage_modify(self.op.node_name,
5321                                           self.op.storage_type, st_args,
5322                                           self.op.name, self.op.changes)
5323     result.Raise("Failed to modify storage unit '%s' on %s" %
5324                  (self.op.name, self.op.node_name))
5325
5326
5327 class LUNodeAdd(LogicalUnit):
5328   """Logical unit for adding node to the cluster.
5329
5330   """
5331   HPATH = "node-add"
5332   HTYPE = constants.HTYPE_NODE
5333   _NFLAGS = ["master_capable", "vm_capable"]
5334
5335   def CheckArguments(self):
5336     self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
5337     # validate/normalize the node name
5338     self.hostname = netutils.GetHostname(name=self.op.node_name,
5339                                          family=self.primary_ip_family)
5340     self.op.node_name = self.hostname.name
5341
5342     if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
5343       raise errors.OpPrereqError("Cannot readd the master node",
5344                                  errors.ECODE_STATE)
5345
5346     if self.op.readd and self.op.group:
5347       raise errors.OpPrereqError("Cannot pass a node group when a node is"
5348                                  " being readded", errors.ECODE_INVAL)
5349
5350   def BuildHooksEnv(self):
5351     """Build hooks env.
5352
5353     This will run on all nodes before, and on all nodes + the new node after.
5354
5355     """
5356     return {
5357       "OP_TARGET": self.op.node_name,
5358       "NODE_NAME": self.op.node_name,
5359       "NODE_PIP": self.op.primary_ip,
5360       "NODE_SIP": self.op.secondary_ip,
5361       "MASTER_CAPABLE": str(self.op.master_capable),
5362       "VM_CAPABLE": str(self.op.vm_capable),
5363       }
5364
5365   def BuildHooksNodes(self):
5366     """Build hooks nodes.
5367
5368     """
5369     # Exclude added node
5370     pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
5371     post_nodes = pre_nodes + [self.op.node_name, ]
5372
5373     return (pre_nodes, post_nodes)
5374
5375   def CheckPrereq(self):
5376     """Check prerequisites.
5377
5378     This checks:
5379      - the new node is not already in the config
5380      - it is resolvable
5381      - its parameters (single/dual homed) matches the cluster
5382
5383     Any errors are signaled by raising errors.OpPrereqError.
5384
5385     """
5386     cfg = self.cfg
5387     hostname = self.hostname
5388     node = hostname.name
5389     primary_ip = self.op.primary_ip = hostname.ip
5390     if self.op.secondary_ip is None:
5391       if self.primary_ip_family == netutils.IP6Address.family:
5392         raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
5393                                    " IPv4 address must be given as secondary",
5394                                    errors.ECODE_INVAL)
5395       self.op.secondary_ip = primary_ip
5396
5397     secondary_ip = self.op.secondary_ip
5398     if not netutils.IP4Address.IsValid(secondary_ip):
5399       raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5400                                  " address" % secondary_ip, errors.ECODE_INVAL)
5401
5402     node_list = cfg.GetNodeList()
5403     if not self.op.readd and node in node_list:
5404       raise errors.OpPrereqError("Node %s is already in the configuration" %
5405                                  node, errors.ECODE_EXISTS)
5406     elif self.op.readd and node not in node_list:
5407       raise errors.OpPrereqError("Node %s is not in the configuration" % node,
5408                                  errors.ECODE_NOENT)
5409
5410     self.changed_primary_ip = False
5411
5412     for existing_node_name, existing_node in cfg.GetMultiNodeInfo(node_list):
5413       if self.op.readd and node == existing_node_name:
5414         if existing_node.secondary_ip != secondary_ip:
5415           raise errors.OpPrereqError("Readded node doesn't have the same IP"
5416                                      " address configuration as before",
5417                                      errors.ECODE_INVAL)
5418         if existing_node.primary_ip != primary_ip:
5419           self.changed_primary_ip = True
5420
5421         continue
5422
5423       if (existing_node.primary_ip == primary_ip or
5424           existing_node.secondary_ip == primary_ip or
5425           existing_node.primary_ip == secondary_ip or
5426           existing_node.secondary_ip == secondary_ip):
5427         raise errors.OpPrereqError("New node ip address(es) conflict with"
5428                                    " existing node %s" % existing_node.name,
5429                                    errors.ECODE_NOTUNIQUE)
5430
5431     # After this 'if' block, None is no longer a valid value for the
5432     # _capable op attributes
5433     if self.op.readd:
5434       old_node = self.cfg.GetNodeInfo(node)
5435       assert old_node is not None, "Can't retrieve locked node %s" % node
5436       for attr in self._NFLAGS:
5437         if getattr(self.op, attr) is None:
5438           setattr(self.op, attr, getattr(old_node, attr))
5439     else:
5440       for attr in self._NFLAGS:
5441         if getattr(self.op, attr) is None:
5442           setattr(self.op, attr, True)
5443
5444     if self.op.readd and not self.op.vm_capable:
5445       pri, sec = cfg.GetNodeInstances(node)
5446       if pri or sec:
5447         raise errors.OpPrereqError("Node %s being re-added with vm_capable"
5448                                    " flag set to false, but it already holds"
5449                                    " instances" % node,
5450                                    errors.ECODE_STATE)
5451
5452     # check that the type of the node (single versus dual homed) is the
5453     # same as for the master
5454     myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
5455     master_singlehomed = myself.secondary_ip == myself.primary_ip
5456     newbie_singlehomed = secondary_ip == primary_ip
5457     if master_singlehomed != newbie_singlehomed:
5458       if master_singlehomed:
5459         raise errors.OpPrereqError("The master has no secondary ip but the"
5460                                    " new node has one",
5461                                    errors.ECODE_INVAL)
5462       else:
5463         raise errors.OpPrereqError("The master has a secondary ip but the"
5464                                    " new node doesn't have one",
5465                                    errors.ECODE_INVAL)
5466
5467     # checks reachability
5468     if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
5469       raise errors.OpPrereqError("Node not reachable by ping",
5470                                  errors.ECODE_ENVIRON)
5471
5472     if not newbie_singlehomed:
5473       # check reachability from my secondary ip to newbie's secondary ip
5474       if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
5475                            source=myself.secondary_ip):
5476         raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5477                                    " based ping to node daemon port",
5478                                    errors.ECODE_ENVIRON)
5479
5480     if self.op.readd:
5481       exceptions = [node]
5482     else:
5483       exceptions = []
5484
5485     if self.op.master_capable:
5486       self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
5487     else:
5488       self.master_candidate = False
5489
5490     if self.op.readd:
5491       self.new_node = old_node
5492     else:
5493       node_group = cfg.LookupNodeGroup(self.op.group)
5494       self.new_node = objects.Node(name=node,
5495                                    primary_ip=primary_ip,
5496                                    secondary_ip=secondary_ip,
5497                                    master_candidate=self.master_candidate,
5498                                    offline=False, drained=False,
5499                                    group=node_group)
5500
5501     if self.op.ndparams:
5502       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
5503
5504     if self.op.hv_state:
5505       self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
5506
5507     if self.op.disk_state:
5508       self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
5509
5510   def Exec(self, feedback_fn):
5511     """Adds the new node to the cluster.
5512
5513     """
5514     new_node = self.new_node
5515     node = new_node.name
5516
5517     assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
5518       "Not owning BGL"
5519
5520     # We adding a new node so we assume it's powered
5521     new_node.powered = True
5522
5523     # for re-adds, reset the offline/drained/master-candidate flags;
5524     # we need to reset here, otherwise offline would prevent RPC calls
5525     # later in the procedure; this also means that if the re-add
5526     # fails, we are left with a non-offlined, broken node
5527     if self.op.readd:
5528       new_node.drained = new_node.offline = False # pylint: disable=W0201
5529       self.LogInfo("Readding a node, the offline/drained flags were reset")
5530       # if we demote the node, we do cleanup later in the procedure
5531       new_node.master_candidate = self.master_candidate
5532       if self.changed_primary_ip:
5533         new_node.primary_ip = self.op.primary_ip
5534
5535     # copy the master/vm_capable flags
5536     for attr in self._NFLAGS:
5537       setattr(new_node, attr, getattr(self.op, attr))
5538
5539     # notify the user about any possible mc promotion
5540     if new_node.master_candidate:
5541       self.LogInfo("Node will be a master candidate")
5542
5543     if self.op.ndparams:
5544       new_node.ndparams = self.op.ndparams
5545     else:
5546       new_node.ndparams = {}
5547
5548     if self.op.hv_state:
5549       new_node.hv_state_static = self.new_hv_state
5550
5551     if self.op.disk_state:
5552       new_node.disk_state_static = self.new_disk_state
5553
5554     # check connectivity
5555     result = self.rpc.call_version([node])[node]
5556     result.Raise("Can't get version information from node %s" % node)
5557     if constants.PROTOCOL_VERSION == result.payload:
5558       logging.info("Communication to node %s fine, sw version %s match",
5559                    node, result.payload)
5560     else:
5561       raise errors.OpExecError("Version mismatch master version %s,"
5562                                " node version %s" %
5563                                (constants.PROTOCOL_VERSION, result.payload))
5564
5565     # Add node to our /etc/hosts, and add key to known_hosts
5566     if self.cfg.GetClusterInfo().modify_etc_hosts:
5567       master_node = self.cfg.GetMasterNode()
5568       result = self.rpc.call_etc_hosts_modify(master_node,
5569                                               constants.ETC_HOSTS_ADD,
5570                                               self.hostname.name,
5571                                               self.hostname.ip)
5572       result.Raise("Can't update hosts file with new host data")
5573
5574     if new_node.secondary_ip != new_node.primary_ip:
5575       _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
5576                                False)
5577
5578     node_verify_list = [self.cfg.GetMasterNode()]
5579     node_verify_param = {
5580       constants.NV_NODELIST: ([node], {}),
5581       # TODO: do a node-net-test as well?
5582     }
5583
5584     result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
5585                                        self.cfg.GetClusterName())
5586     for verifier in node_verify_list:
5587       result[verifier].Raise("Cannot communicate with node %s" % verifier)
5588       nl_payload = result[verifier].payload[constants.NV_NODELIST]
5589       if nl_payload:
5590         for failed in nl_payload:
5591           feedback_fn("ssh/hostname verification failed"
5592                       " (checking from %s): %s" %
5593                       (verifier, nl_payload[failed]))
5594         raise errors.OpExecError("ssh/hostname verification failed")
5595
5596     if self.op.readd:
5597       _RedistributeAncillaryFiles(self)
5598       self.context.ReaddNode(new_node)
5599       # make sure we redistribute the config
5600       self.cfg.Update(new_node, feedback_fn)
5601       # and make sure the new node will not have old files around
5602       if not new_node.master_candidate:
5603         result = self.rpc.call_node_demote_from_mc(new_node.name)
5604         msg = result.fail_msg
5605         if msg:
5606           self.LogWarning("Node failed to demote itself from master"
5607                           " candidate status: %s" % msg)
5608     else:
5609       _RedistributeAncillaryFiles(self, additional_nodes=[node],
5610                                   additional_vm=self.op.vm_capable)
5611       self.context.AddNode(new_node, self.proc.GetECId())
5612
5613
5614 class LUNodeSetParams(LogicalUnit):
5615   """Modifies the parameters of a node.
5616
5617   @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
5618       to the node role (as _ROLE_*)
5619   @cvar _R2F: a dictionary from node role to tuples of flags
5620   @cvar _FLAGS: a list of attribute names corresponding to the flags
5621
5622   """
5623   HPATH = "node-modify"
5624   HTYPE = constants.HTYPE_NODE
5625   REQ_BGL = False
5626   (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
5627   _F2R = {
5628     (True, False, False): _ROLE_CANDIDATE,
5629     (False, True, False): _ROLE_DRAINED,
5630     (False, False, True): _ROLE_OFFLINE,
5631     (False, False, False): _ROLE_REGULAR,
5632     }
5633   _R2F = dict((v, k) for k, v in _F2R.items())
5634   _FLAGS = ["master_candidate", "drained", "offline"]
5635
5636   def CheckArguments(self):
5637     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5638     all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
5639                 self.op.master_capable, self.op.vm_capable,
5640                 self.op.secondary_ip, self.op.ndparams, self.op.hv_state,
5641                 self.op.disk_state]
5642     if all_mods.count(None) == len(all_mods):
5643       raise errors.OpPrereqError("Please pass at least one modification",
5644                                  errors.ECODE_INVAL)
5645     if all_mods.count(True) > 1:
5646       raise errors.OpPrereqError("Can't set the node into more than one"
5647                                  " state at the same time",
5648                                  errors.ECODE_INVAL)
5649
5650     # Boolean value that tells us whether we might be demoting from MC
5651     self.might_demote = (self.op.master_candidate == False or
5652                          self.op.offline == True or
5653                          self.op.drained == True or
5654                          self.op.master_capable == False)
5655
5656     if self.op.secondary_ip:
5657       if not netutils.IP4Address.IsValid(self.op.secondary_ip):
5658         raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5659                                    " address" % self.op.secondary_ip,
5660                                    errors.ECODE_INVAL)
5661
5662     self.lock_all = self.op.auto_promote and self.might_demote
5663     self.lock_instances = self.op.secondary_ip is not None
5664
5665   def _InstanceFilter(self, instance):
5666     """Filter for getting affected instances.
5667
5668     """
5669     return (instance.disk_template in constants.DTS_INT_MIRROR and
5670             self.op.node_name in instance.all_nodes)
5671
5672   def ExpandNames(self):
5673     if self.lock_all:
5674       self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
5675     else:
5676       self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
5677
5678     # Since modifying a node can have severe effects on currently running
5679     # operations the resource lock is at least acquired in shared mode
5680     self.needed_locks[locking.LEVEL_NODE_RES] = \
5681       self.needed_locks[locking.LEVEL_NODE]
5682
5683     # Get node resource and instance locks in shared mode; they are not used
5684     # for anything but read-only access
5685     self.share_locks[locking.LEVEL_NODE_RES] = 1
5686     self.share_locks[locking.LEVEL_INSTANCE] = 1
5687
5688     if self.lock_instances:
5689       self.needed_locks[locking.LEVEL_INSTANCE] = \
5690         frozenset(self.cfg.GetInstancesInfoByFilter(self._InstanceFilter))
5691
5692   def BuildHooksEnv(self):
5693     """Build hooks env.
5694
5695     This runs on the master node.
5696
5697     """
5698     return {
5699       "OP_TARGET": self.op.node_name,
5700       "MASTER_CANDIDATE": str(self.op.master_candidate),
5701       "OFFLINE": str(self.op.offline),
5702       "DRAINED": str(self.op.drained),
5703       "MASTER_CAPABLE": str(self.op.master_capable),
5704       "VM_CAPABLE": str(self.op.vm_capable),
5705       }
5706
5707   def BuildHooksNodes(self):
5708     """Build hooks nodes.
5709
5710     """
5711     nl = [self.cfg.GetMasterNode(), self.op.node_name]
5712     return (nl, nl)
5713
5714   def CheckPrereq(self):
5715     """Check prerequisites.
5716
5717     This only checks the instance list against the existing names.
5718
5719     """
5720     node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
5721
5722     if self.lock_instances:
5723       affected_instances = \
5724         self.cfg.GetInstancesInfoByFilter(self._InstanceFilter)
5725
5726       # Verify instance locks
5727       owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
5728       wanted_instances = frozenset(affected_instances.keys())
5729       if wanted_instances - owned_instances:
5730         raise errors.OpPrereqError("Instances affected by changing node %s's"
5731                                    " secondary IP address have changed since"
5732                                    " locks were acquired, wanted '%s', have"
5733                                    " '%s'; retry the operation" %
5734                                    (self.op.node_name,
5735                                     utils.CommaJoin(wanted_instances),
5736                                     utils.CommaJoin(owned_instances)),
5737                                    errors.ECODE_STATE)
5738     else:
5739       affected_instances = None
5740
5741     if (self.op.master_candidate is not None or
5742         self.op.drained is not None or
5743         self.op.offline is not None):
5744       # we can't change the master's node flags
5745       if self.op.node_name == self.cfg.GetMasterNode():
5746         raise errors.OpPrereqError("The master role can be changed"
5747                                    " only via master-failover",
5748                                    errors.ECODE_INVAL)
5749
5750     if self.op.master_candidate and not node.master_capable:
5751       raise errors.OpPrereqError("Node %s is not master capable, cannot make"
5752                                  " it a master candidate" % node.name,
5753                                  errors.ECODE_STATE)
5754
5755     if self.op.vm_capable == False:
5756       (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
5757       if ipri or isec:
5758         raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
5759                                    " the vm_capable flag" % node.name,
5760                                    errors.ECODE_STATE)
5761
5762     if node.master_candidate and self.might_demote and not self.lock_all:
5763       assert not self.op.auto_promote, "auto_promote set but lock_all not"
5764       # check if after removing the current node, we're missing master
5765       # candidates
5766       (mc_remaining, mc_should, _) = \
5767           self.cfg.GetMasterCandidateStats(exceptions=[node.name])
5768       if mc_remaining < mc_should:
5769         raise errors.OpPrereqError("Not enough master candidates, please"
5770                                    " pass auto promote option to allow"
5771                                    " promotion", errors.ECODE_STATE)
5772
5773     self.old_flags = old_flags = (node.master_candidate,
5774                                   node.drained, node.offline)
5775     assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
5776     self.old_role = old_role = self._F2R[old_flags]
5777
5778     # Check for ineffective changes
5779     for attr in self._FLAGS:
5780       if (getattr(self.op, attr) == False and getattr(node, attr) == False):
5781         self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
5782         setattr(self.op, attr, None)
5783
5784     # Past this point, any flag change to False means a transition
5785     # away from the respective state, as only real changes are kept
5786
5787     # TODO: We might query the real power state if it supports OOB
5788     if _SupportsOob(self.cfg, node):
5789       if self.op.offline is False and not (node.powered or
5790                                            self.op.powered == True):
5791         raise errors.OpPrereqError(("Node %s needs to be turned on before its"
5792                                     " offline status can be reset") %
5793                                    self.op.node_name)
5794     elif self.op.powered is not None:
5795       raise errors.OpPrereqError(("Unable to change powered state for node %s"
5796                                   " as it does not support out-of-band"
5797                                   " handling") % self.op.node_name)
5798
5799     # If we're being deofflined/drained, we'll MC ourself if needed
5800     if (self.op.drained == False or self.op.offline == False or
5801         (self.op.master_capable and not node.master_capable)):
5802       if _DecideSelfPromotion(self):
5803         self.op.master_candidate = True
5804         self.LogInfo("Auto-promoting node to master candidate")
5805
5806     # If we're no longer master capable, we'll demote ourselves from MC
5807     if self.op.master_capable == False and node.master_candidate:
5808       self.LogInfo("Demoting from master candidate")
5809       self.op.master_candidate = False
5810
5811     # Compute new role
5812     assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
5813     if self.op.master_candidate:
5814       new_role = self._ROLE_CANDIDATE
5815     elif self.op.drained:
5816       new_role = self._ROLE_DRAINED
5817     elif self.op.offline:
5818       new_role = self._ROLE_OFFLINE
5819     elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
5820       # False is still in new flags, which means we're un-setting (the
5821       # only) True flag
5822       new_role = self._ROLE_REGULAR
5823     else: # no new flags, nothing, keep old role
5824       new_role = old_role
5825
5826     self.new_role = new_role
5827
5828     if old_role == self._ROLE_OFFLINE and new_role != old_role:
5829       # Trying to transition out of offline status
5830       # TODO: Use standard RPC runner, but make sure it works when the node is
5831       # still marked offline
5832       result = rpc.BootstrapRunner().call_version([node.name])[node.name]
5833       if result.fail_msg:
5834         raise errors.OpPrereqError("Node %s is being de-offlined but fails"
5835                                    " to report its version: %s" %
5836                                    (node.name, result.fail_msg),
5837                                    errors.ECODE_STATE)
5838       else:
5839         self.LogWarning("Transitioning node from offline to online state"
5840                         " without using re-add. Please make sure the node"
5841                         " is healthy!")
5842
5843     if self.op.secondary_ip:
5844       # Ok even without locking, because this can't be changed by any LU
5845       master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
5846       master_singlehomed = master.secondary_ip == master.primary_ip
5847       if master_singlehomed and self.op.secondary_ip:
5848         raise errors.OpPrereqError("Cannot change the secondary ip on a single"
5849                                    " homed cluster", errors.ECODE_INVAL)
5850
5851       assert not (frozenset(affected_instances) -
5852                   self.owned_locks(locking.LEVEL_INSTANCE))
5853
5854       if node.offline:
5855         if affected_instances:
5856           raise errors.OpPrereqError("Cannot change secondary IP address:"
5857                                      " offline node has instances (%s)"
5858                                      " configured to use it" %
5859                                      utils.CommaJoin(affected_instances.keys()))
5860       else:
5861         # On online nodes, check that no instances are running, and that
5862         # the node has the new ip and we can reach it.
5863         for instance in affected_instances.values():
5864           _CheckInstanceState(self, instance, INSTANCE_DOWN,
5865                               msg="cannot change secondary ip")
5866
5867         _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
5868         if master.name != node.name:
5869           # check reachability from master secondary ip to new secondary ip
5870           if not netutils.TcpPing(self.op.secondary_ip,
5871                                   constants.DEFAULT_NODED_PORT,
5872                                   source=master.secondary_ip):
5873             raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5874                                        " based ping to node daemon port",
5875                                        errors.ECODE_ENVIRON)
5876
5877     if self.op.ndparams:
5878       new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
5879       utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
5880       self.new_ndparams = new_ndparams
5881
5882     if self.op.hv_state:
5883       self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
5884                                                  self.node.hv_state_static)
5885
5886     if self.op.disk_state:
5887       self.new_disk_state = \
5888         _MergeAndVerifyDiskState(self.op.disk_state,
5889                                  self.node.disk_state_static)
5890
5891   def Exec(self, feedback_fn):
5892     """Modifies a node.
5893
5894     """
5895     node = self.node
5896     old_role = self.old_role
5897     new_role = self.new_role
5898
5899     result = []
5900
5901     if self.op.ndparams:
5902       node.ndparams = self.new_ndparams
5903
5904     if self.op.powered is not None:
5905       node.powered = self.op.powered
5906
5907     if self.op.hv_state:
5908       node.hv_state_static = self.new_hv_state
5909
5910     if self.op.disk_state:
5911       node.disk_state_static = self.new_disk_state
5912
5913     for attr in ["master_capable", "vm_capable"]:
5914       val = getattr(self.op, attr)
5915       if val is not None:
5916         setattr(node, attr, val)
5917         result.append((attr, str(val)))
5918
5919     if new_role != old_role:
5920       # Tell the node to demote itself, if no longer MC and not offline
5921       if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
5922         msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
5923         if msg:
5924           self.LogWarning("Node failed to demote itself: %s", msg)
5925
5926       new_flags = self._R2F[new_role]
5927       for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
5928         if of != nf:
5929           result.append((desc, str(nf)))
5930       (node.master_candidate, node.drained, node.offline) = new_flags
5931
5932       # we locked all nodes, we adjust the CP before updating this node
5933       if self.lock_all:
5934         _AdjustCandidatePool(self, [node.name])
5935
5936     if self.op.secondary_ip:
5937       node.secondary_ip = self.op.secondary_ip
5938       result.append(("secondary_ip", self.op.secondary_ip))
5939
5940     # this will trigger configuration file update, if needed
5941     self.cfg.Update(node, feedback_fn)
5942
5943     # this will trigger job queue propagation or cleanup if the mc
5944     # flag changed
5945     if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
5946       self.context.ReaddNode(node)
5947
5948     return result
5949
5950
5951 class LUNodePowercycle(NoHooksLU):
5952   """Powercycles a node.
5953
5954   """
5955   REQ_BGL = False
5956
5957   def CheckArguments(self):
5958     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5959     if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
5960       raise errors.OpPrereqError("The node is the master and the force"
5961                                  " parameter was not set",
5962                                  errors.ECODE_INVAL)
5963
5964   def ExpandNames(self):
5965     """Locking for PowercycleNode.
5966
5967     This is a last-resort option and shouldn't block on other
5968     jobs. Therefore, we grab no locks.
5969
5970     """
5971     self.needed_locks = {}
5972
5973   def Exec(self, feedback_fn):
5974     """Reboots a node.
5975
5976     """
5977     result = self.rpc.call_node_powercycle(self.op.node_name,
5978                                            self.cfg.GetHypervisorType())
5979     result.Raise("Failed to schedule the reboot")
5980     return result.payload
5981
5982
5983 class LUClusterQuery(NoHooksLU):
5984   """Query cluster configuration.
5985
5986   """
5987   REQ_BGL = False
5988
5989   def ExpandNames(self):
5990     self.needed_locks = {}
5991
5992   def Exec(self, feedback_fn):
5993     """Return cluster config.
5994
5995     """
5996     cluster = self.cfg.GetClusterInfo()
5997     os_hvp = {}
5998
5999     # Filter just for enabled hypervisors
6000     for os_name, hv_dict in cluster.os_hvp.items():
6001       os_hvp[os_name] = {}
6002       for hv_name, hv_params in hv_dict.items():
6003         if hv_name in cluster.enabled_hypervisors:
6004           os_hvp[os_name][hv_name] = hv_params
6005
6006     # Convert ip_family to ip_version
6007     primary_ip_version = constants.IP4_VERSION
6008     if cluster.primary_ip_family == netutils.IP6Address.family:
6009       primary_ip_version = constants.IP6_VERSION
6010
6011     result = {
6012       "software_version": constants.RELEASE_VERSION,
6013       "protocol_version": constants.PROTOCOL_VERSION,
6014       "config_version": constants.CONFIG_VERSION,
6015       "os_api_version": max(constants.OS_API_VERSIONS),
6016       "export_version": constants.EXPORT_VERSION,
6017       "architecture": (platform.architecture()[0], platform.machine()),
6018       "name": cluster.cluster_name,
6019       "master": cluster.master_node,
6020       "default_hypervisor": cluster.primary_hypervisor,
6021       "enabled_hypervisors": cluster.enabled_hypervisors,
6022       "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
6023                         for hypervisor_name in cluster.enabled_hypervisors]),
6024       "os_hvp": os_hvp,
6025       "beparams": cluster.beparams,
6026       "osparams": cluster.osparams,
6027       "ipolicy": cluster.ipolicy,
6028       "nicparams": cluster.nicparams,
6029       "ndparams": cluster.ndparams,
6030       "candidate_pool_size": cluster.candidate_pool_size,
6031       "master_netdev": cluster.master_netdev,
6032       "master_netmask": cluster.master_netmask,
6033       "use_external_mip_script": cluster.use_external_mip_script,
6034       "volume_group_name": cluster.volume_group_name,
6035       "drbd_usermode_helper": cluster.drbd_usermode_helper,
6036       "file_storage_dir": cluster.file_storage_dir,
6037       "shared_file_storage_dir": cluster.shared_file_storage_dir,
6038       "maintain_node_health": cluster.maintain_node_health,
6039       "ctime": cluster.ctime,
6040       "mtime": cluster.mtime,
6041       "uuid": cluster.uuid,
6042       "tags": list(cluster.GetTags()),
6043       "uid_pool": cluster.uid_pool,
6044       "default_iallocator": cluster.default_iallocator,
6045       "reserved_lvs": cluster.reserved_lvs,
6046       "primary_ip_version": primary_ip_version,
6047       "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
6048       "hidden_os": cluster.hidden_os,
6049       "blacklisted_os": cluster.blacklisted_os,
6050       }
6051
6052     return result
6053
6054
6055 class LUClusterConfigQuery(NoHooksLU):
6056   """Return configuration values.
6057
6058   """
6059   REQ_BGL = False
6060   _FIELDS_DYNAMIC = utils.FieldSet()
6061   _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
6062                                   "watcher_pause", "volume_group_name")
6063
6064   def CheckArguments(self):
6065     _CheckOutputFields(static=self._FIELDS_STATIC,
6066                        dynamic=self._FIELDS_DYNAMIC,
6067                        selected=self.op.output_fields)
6068
6069   def ExpandNames(self):
6070     self.needed_locks = {}
6071
6072   def Exec(self, feedback_fn):
6073     """Dump a representation of the cluster config to the standard output.
6074
6075     """
6076     values = []
6077     for field in self.op.output_fields:
6078       if field == "cluster_name":
6079         entry = self.cfg.GetClusterName()
6080       elif field == "master_node":
6081         entry = self.cfg.GetMasterNode()
6082       elif field == "drain_flag":
6083         entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
6084       elif field == "watcher_pause":
6085         entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
6086       elif field == "volume_group_name":
6087         entry = self.cfg.GetVGName()
6088       else:
6089         raise errors.ParameterError(field)
6090       values.append(entry)
6091     return values
6092
6093
6094 class LUInstanceActivateDisks(NoHooksLU):
6095   """Bring up an instance's disks.
6096
6097   """
6098   REQ_BGL = False
6099
6100   def ExpandNames(self):
6101     self._ExpandAndLockInstance()
6102     self.needed_locks[locking.LEVEL_NODE] = []
6103     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6104
6105   def DeclareLocks(self, level):
6106     if level == locking.LEVEL_NODE:
6107       self._LockInstancesNodes()
6108
6109   def CheckPrereq(self):
6110     """Check prerequisites.
6111
6112     This checks that the instance is in the cluster.
6113
6114     """
6115     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6116     assert self.instance is not None, \
6117       "Cannot retrieve locked instance %s" % self.op.instance_name
6118     _CheckNodeOnline(self, self.instance.primary_node)
6119
6120   def Exec(self, feedback_fn):
6121     """Activate the disks.
6122
6123     """
6124     disks_ok, disks_info = \
6125               _AssembleInstanceDisks(self, self.instance,
6126                                      ignore_size=self.op.ignore_size)
6127     if not disks_ok:
6128       raise errors.OpExecError("Cannot activate block devices")
6129
6130     return disks_info
6131
6132
6133 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
6134                            ignore_size=False):
6135   """Prepare the block devices for an instance.
6136
6137   This sets up the block devices on all nodes.
6138
6139   @type lu: L{LogicalUnit}
6140   @param lu: the logical unit on whose behalf we execute
6141   @type instance: L{objects.Instance}
6142   @param instance: the instance for whose disks we assemble
6143   @type disks: list of L{objects.Disk} or None
6144   @param disks: which disks to assemble (or all, if None)
6145   @type ignore_secondaries: boolean
6146   @param ignore_secondaries: if true, errors on secondary nodes
6147       won't result in an error return from the function
6148   @type ignore_size: boolean
6149   @param ignore_size: if true, the current known size of the disk
6150       will not be used during the disk activation, useful for cases
6151       when the size is wrong
6152   @return: False if the operation failed, otherwise a list of
6153       (host, instance_visible_name, node_visible_name)
6154       with the mapping from node devices to instance devices
6155
6156   """
6157   device_info = []
6158   disks_ok = True
6159   iname = instance.name
6160   disks = _ExpandCheckDisks(instance, disks)
6161
6162   # With the two passes mechanism we try to reduce the window of
6163   # opportunity for the race condition of switching DRBD to primary
6164   # before handshaking occured, but we do not eliminate it
6165
6166   # The proper fix would be to wait (with some limits) until the
6167   # connection has been made and drbd transitions from WFConnection
6168   # into any other network-connected state (Connected, SyncTarget,
6169   # SyncSource, etc.)
6170
6171   # 1st pass, assemble on all nodes in secondary mode
6172   for idx, inst_disk in enumerate(disks):
6173     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6174       if ignore_size:
6175         node_disk = node_disk.Copy()
6176         node_disk.UnsetSize()
6177       lu.cfg.SetDiskID(node_disk, node)
6178       result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False, idx)
6179       msg = result.fail_msg
6180       if msg:
6181         lu.proc.LogWarning("Could not prepare block device %s on node %s"
6182                            " (is_primary=False, pass=1): %s",
6183                            inst_disk.iv_name, node, msg)
6184         if not ignore_secondaries:
6185           disks_ok = False
6186
6187   # FIXME: race condition on drbd migration to primary
6188
6189   # 2nd pass, do only the primary node
6190   for idx, inst_disk in enumerate(disks):
6191     dev_path = None
6192
6193     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6194       if node != instance.primary_node:
6195         continue
6196       if ignore_size:
6197         node_disk = node_disk.Copy()
6198         node_disk.UnsetSize()
6199       lu.cfg.SetDiskID(node_disk, node)
6200       result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True, idx)
6201       msg = result.fail_msg
6202       if msg:
6203         lu.proc.LogWarning("Could not prepare block device %s on node %s"
6204                            " (is_primary=True, pass=2): %s",
6205                            inst_disk.iv_name, node, msg)
6206         disks_ok = False
6207       else:
6208         dev_path = result.payload
6209
6210     device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
6211
6212   # leave the disks configured for the primary node
6213   # this is a workaround that would be fixed better by
6214   # improving the logical/physical id handling
6215   for disk in disks:
6216     lu.cfg.SetDiskID(disk, instance.primary_node)
6217
6218   return disks_ok, device_info
6219
6220
6221 def _StartInstanceDisks(lu, instance, force):
6222   """Start the disks of an instance.
6223
6224   """
6225   disks_ok, _ = _AssembleInstanceDisks(lu, instance,
6226                                            ignore_secondaries=force)
6227   if not disks_ok:
6228     _ShutdownInstanceDisks(lu, instance)
6229     if force is not None and not force:
6230       lu.proc.LogWarning("", hint="If the message above refers to a"
6231                          " secondary node,"
6232                          " you can retry the operation using '--force'.")
6233     raise errors.OpExecError("Disk consistency error")
6234
6235
6236 class LUInstanceDeactivateDisks(NoHooksLU):
6237   """Shutdown an instance's disks.
6238
6239   """
6240   REQ_BGL = False
6241
6242   def ExpandNames(self):
6243     self._ExpandAndLockInstance()
6244     self.needed_locks[locking.LEVEL_NODE] = []
6245     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6246
6247   def DeclareLocks(self, level):
6248     if level == locking.LEVEL_NODE:
6249       self._LockInstancesNodes()
6250
6251   def CheckPrereq(self):
6252     """Check prerequisites.
6253
6254     This checks that the instance is in the cluster.
6255
6256     """
6257     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6258     assert self.instance is not None, \
6259       "Cannot retrieve locked instance %s" % self.op.instance_name
6260
6261   def Exec(self, feedback_fn):
6262     """Deactivate the disks
6263
6264     """
6265     instance = self.instance
6266     if self.op.force:
6267       _ShutdownInstanceDisks(self, instance)
6268     else:
6269       _SafeShutdownInstanceDisks(self, instance)
6270
6271
6272 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
6273   """Shutdown block devices of an instance.
6274
6275   This function checks if an instance is running, before calling
6276   _ShutdownInstanceDisks.
6277
6278   """
6279   _CheckInstanceState(lu, instance, INSTANCE_DOWN, msg="cannot shutdown disks")
6280   _ShutdownInstanceDisks(lu, instance, disks=disks)
6281
6282
6283 def _ExpandCheckDisks(instance, disks):
6284   """Return the instance disks selected by the disks list
6285
6286   @type disks: list of L{objects.Disk} or None
6287   @param disks: selected disks
6288   @rtype: list of L{objects.Disk}
6289   @return: selected instance disks to act on
6290
6291   """
6292   if disks is None:
6293     return instance.disks
6294   else:
6295     if not set(disks).issubset(instance.disks):
6296       raise errors.ProgrammerError("Can only act on disks belonging to the"
6297                                    " target instance")
6298     return disks
6299
6300
6301 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
6302   """Shutdown block devices of an instance.
6303
6304   This does the shutdown on all nodes of the instance.
6305
6306   If the ignore_primary is false, errors on the primary node are
6307   ignored.
6308
6309   """
6310   all_result = True
6311   disks = _ExpandCheckDisks(instance, disks)
6312
6313   for disk in disks:
6314     for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
6315       lu.cfg.SetDiskID(top_disk, node)
6316       result = lu.rpc.call_blockdev_shutdown(node, top_disk)
6317       msg = result.fail_msg
6318       if msg:
6319         lu.LogWarning("Could not shutdown block device %s on node %s: %s",
6320                       disk.iv_name, node, msg)
6321         if ((node == instance.primary_node and not ignore_primary) or
6322             (node != instance.primary_node and not result.offline)):
6323           all_result = False
6324   return all_result
6325
6326
6327 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
6328   """Checks if a node has enough free memory.
6329
6330   This function check if a given node has the needed amount of free
6331   memory. In case the node has less memory or we cannot get the
6332   information from the node, this function raise an OpPrereqError
6333   exception.
6334
6335   @type lu: C{LogicalUnit}
6336   @param lu: a logical unit from which we get configuration data
6337   @type node: C{str}
6338   @param node: the node to check
6339   @type reason: C{str}
6340   @param reason: string to use in the error message
6341   @type requested: C{int}
6342   @param requested: the amount of memory in MiB to check for
6343   @type hypervisor_name: C{str}
6344   @param hypervisor_name: the hypervisor to ask for memory stats
6345   @raise errors.OpPrereqError: if the node doesn't have enough memory, or
6346       we cannot check the node
6347
6348   """
6349   nodeinfo = lu.rpc.call_node_info([node], None, [hypervisor_name])
6350   nodeinfo[node].Raise("Can't get data from node %s" % node,
6351                        prereq=True, ecode=errors.ECODE_ENVIRON)
6352   (_, _, (hv_info, )) = nodeinfo[node].payload
6353
6354   free_mem = hv_info.get("memory_free", None)
6355   if not isinstance(free_mem, int):
6356     raise errors.OpPrereqError("Can't compute free memory on node %s, result"
6357                                " was '%s'" % (node, free_mem),
6358                                errors.ECODE_ENVIRON)
6359   if requested > free_mem:
6360     raise errors.OpPrereqError("Not enough memory on node %s for %s:"
6361                                " needed %s MiB, available %s MiB" %
6362                                (node, reason, requested, free_mem),
6363                                errors.ECODE_NORES)
6364
6365
6366 def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
6367   """Checks if nodes have enough free disk space in the all VGs.
6368
6369   This function check if all given nodes have the needed amount of
6370   free disk. In case any node has less disk or we cannot get the
6371   information from the node, this function raise an OpPrereqError
6372   exception.
6373
6374   @type lu: C{LogicalUnit}
6375   @param lu: a logical unit from which we get configuration data
6376   @type nodenames: C{list}
6377   @param nodenames: the list of node names to check
6378   @type req_sizes: C{dict}
6379   @param req_sizes: the hash of vg and corresponding amount of disk in
6380       MiB to check for
6381   @raise errors.OpPrereqError: if the node doesn't have enough disk,
6382       or we cannot check the node
6383
6384   """
6385   for vg, req_size in req_sizes.items():
6386     _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
6387
6388
6389 def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
6390   """Checks if nodes have enough free disk space in the specified VG.
6391
6392   This function check if all given nodes have the needed amount of
6393   free disk. In case any node has less disk or we cannot get the
6394   information from the node, this function raise an OpPrereqError
6395   exception.
6396
6397   @type lu: C{LogicalUnit}
6398   @param lu: a logical unit from which we get configuration data
6399   @type nodenames: C{list}
6400   @param nodenames: the list of node names to check
6401   @type vg: C{str}
6402   @param vg: the volume group to check
6403   @type requested: C{int}
6404   @param requested: the amount of disk in MiB to check for
6405   @raise errors.OpPrereqError: if the node doesn't have enough disk,
6406       or we cannot check the node
6407
6408   """
6409   nodeinfo = lu.rpc.call_node_info(nodenames, [vg], None)
6410   for node in nodenames:
6411     info = nodeinfo[node]
6412     info.Raise("Cannot get current information from node %s" % node,
6413                prereq=True, ecode=errors.ECODE_ENVIRON)
6414     (_, (vg_info, ), _) = info.payload
6415     vg_free = vg_info.get("vg_free", None)
6416     if not isinstance(vg_free, int):
6417       raise errors.OpPrereqError("Can't compute free disk space on node"
6418                                  " %s for vg %s, result was '%s'" %
6419                                  (node, vg, vg_free), errors.ECODE_ENVIRON)
6420     if requested > vg_free:
6421       raise errors.OpPrereqError("Not enough disk space on target node %s"
6422                                  " vg %s: required %d MiB, available %d MiB" %
6423                                  (node, vg, requested, vg_free),
6424                                  errors.ECODE_NORES)
6425
6426
6427 def _CheckNodesPhysicalCPUs(lu, nodenames, requested, hypervisor_name):
6428   """Checks if nodes have enough physical CPUs
6429
6430   This function checks if all given nodes have the needed number of
6431   physical CPUs. In case any node has less CPUs or we cannot get the
6432   information from the node, this function raises an OpPrereqError
6433   exception.
6434
6435   @type lu: C{LogicalUnit}
6436   @param lu: a logical unit from which we get configuration data
6437   @type nodenames: C{list}
6438   @param nodenames: the list of node names to check
6439   @type requested: C{int}
6440   @param requested: the minimum acceptable number of physical CPUs
6441   @raise errors.OpPrereqError: if the node doesn't have enough CPUs,
6442       or we cannot check the node
6443
6444   """
6445   nodeinfo = lu.rpc.call_node_info(nodenames, None, [hypervisor_name])
6446   for node in nodenames:
6447     info = nodeinfo[node]
6448     info.Raise("Cannot get current information from node %s" % node,
6449                prereq=True, ecode=errors.ECODE_ENVIRON)
6450     (_, _, (hv_info, )) = info.payload
6451     num_cpus = hv_info.get("cpu_total", None)
6452     if not isinstance(num_cpus, int):
6453       raise errors.OpPrereqError("Can't compute the number of physical CPUs"
6454                                  " on node %s, result was '%s'" %
6455                                  (node, num_cpus), errors.ECODE_ENVIRON)
6456     if requested > num_cpus:
6457       raise errors.OpPrereqError("Node %s has %s physical CPUs, but %s are "
6458                                  "required" % (node, num_cpus, requested),
6459                                  errors.ECODE_NORES)
6460
6461
6462 class LUInstanceStartup(LogicalUnit):
6463   """Starts an instance.
6464
6465   """
6466   HPATH = "instance-start"
6467   HTYPE = constants.HTYPE_INSTANCE
6468   REQ_BGL = False
6469
6470   def CheckArguments(self):
6471     # extra beparams
6472     if self.op.beparams:
6473       # fill the beparams dict
6474       objects.UpgradeBeParams(self.op.beparams)
6475       utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
6476
6477   def ExpandNames(self):
6478     self._ExpandAndLockInstance()
6479
6480   def BuildHooksEnv(self):
6481     """Build hooks env.
6482
6483     This runs on master, primary and secondary nodes of the instance.
6484
6485     """
6486     env = {
6487       "FORCE": self.op.force,
6488       }
6489
6490     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6491
6492     return env
6493
6494   def BuildHooksNodes(self):
6495     """Build hooks nodes.
6496
6497     """
6498     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6499     return (nl, nl)
6500
6501   def CheckPrereq(self):
6502     """Check prerequisites.
6503
6504     This checks that the instance is in the cluster.
6505
6506     """
6507     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6508     assert self.instance is not None, \
6509       "Cannot retrieve locked instance %s" % self.op.instance_name
6510
6511     # extra hvparams
6512     if self.op.hvparams:
6513       # check hypervisor parameter syntax (locally)
6514       cluster = self.cfg.GetClusterInfo()
6515       utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
6516       filled_hvp = cluster.FillHV(instance)
6517       filled_hvp.update(self.op.hvparams)
6518       hv_type = hypervisor.GetHypervisor(instance.hypervisor)
6519       hv_type.CheckParameterSyntax(filled_hvp)
6520       _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
6521
6522     _CheckInstanceState(self, instance, INSTANCE_ONLINE)
6523
6524     self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
6525
6526     if self.primary_offline and self.op.ignore_offline_nodes:
6527       self.proc.LogWarning("Ignoring offline primary node")
6528
6529       if self.op.hvparams or self.op.beparams:
6530         self.proc.LogWarning("Overridden parameters are ignored")
6531     else:
6532       _CheckNodeOnline(self, instance.primary_node)
6533
6534       bep = self.cfg.GetClusterInfo().FillBE(instance)
6535       bep.update(self.op.beparams)
6536
6537       # check bridges existence
6538       _CheckInstanceBridgesExist(self, instance)
6539
6540       remote_info = self.rpc.call_instance_info(instance.primary_node,
6541                                                 instance.name,
6542                                                 instance.hypervisor)
6543       remote_info.Raise("Error checking node %s" % instance.primary_node,
6544                         prereq=True, ecode=errors.ECODE_ENVIRON)
6545       if not remote_info.payload: # not running already
6546         _CheckNodeFreeMemory(self, instance.primary_node,
6547                              "starting instance %s" % instance.name,
6548                              bep[constants.BE_MAXMEM], instance.hypervisor)
6549
6550   def Exec(self, feedback_fn):
6551     """Start the instance.
6552
6553     """
6554     instance = self.instance
6555     force = self.op.force
6556
6557     if not self.op.no_remember:
6558       self.cfg.MarkInstanceUp(instance.name)
6559
6560     if self.primary_offline:
6561       assert self.op.ignore_offline_nodes
6562       self.proc.LogInfo("Primary node offline, marked instance as started")
6563     else:
6564       node_current = instance.primary_node
6565
6566       _StartInstanceDisks(self, instance, force)
6567
6568       result = \
6569         self.rpc.call_instance_start(node_current,
6570                                      (instance, self.op.hvparams,
6571                                       self.op.beparams),
6572                                      self.op.startup_paused)
6573       msg = result.fail_msg
6574       if msg:
6575         _ShutdownInstanceDisks(self, instance)
6576         raise errors.OpExecError("Could not start instance: %s" % msg)
6577
6578
6579 class LUInstanceReboot(LogicalUnit):
6580   """Reboot an instance.
6581
6582   """
6583   HPATH = "instance-reboot"
6584   HTYPE = constants.HTYPE_INSTANCE
6585   REQ_BGL = False
6586
6587   def ExpandNames(self):
6588     self._ExpandAndLockInstance()
6589
6590   def BuildHooksEnv(self):
6591     """Build hooks env.
6592
6593     This runs on master, primary and secondary nodes of the instance.
6594
6595     """
6596     env = {
6597       "IGNORE_SECONDARIES": self.op.ignore_secondaries,
6598       "REBOOT_TYPE": self.op.reboot_type,
6599       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6600       }
6601
6602     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6603
6604     return env
6605
6606   def BuildHooksNodes(self):
6607     """Build hooks nodes.
6608
6609     """
6610     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6611     return (nl, nl)
6612
6613   def CheckPrereq(self):
6614     """Check prerequisites.
6615
6616     This checks that the instance is in the cluster.
6617
6618     """
6619     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6620     assert self.instance is not None, \
6621       "Cannot retrieve locked instance %s" % self.op.instance_name
6622     _CheckInstanceState(self, instance, INSTANCE_ONLINE)
6623     _CheckNodeOnline(self, instance.primary_node)
6624
6625     # check bridges existence
6626     _CheckInstanceBridgesExist(self, instance)
6627
6628   def Exec(self, feedback_fn):
6629     """Reboot the instance.
6630
6631     """
6632     instance = self.instance
6633     ignore_secondaries = self.op.ignore_secondaries
6634     reboot_type = self.op.reboot_type
6635
6636     remote_info = self.rpc.call_instance_info(instance.primary_node,
6637                                               instance.name,
6638                                               instance.hypervisor)
6639     remote_info.Raise("Error checking node %s" % instance.primary_node)
6640     instance_running = bool(remote_info.payload)
6641
6642     node_current = instance.primary_node
6643
6644     if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
6645                                             constants.INSTANCE_REBOOT_HARD]:
6646       for disk in instance.disks:
6647         self.cfg.SetDiskID(disk, node_current)
6648       result = self.rpc.call_instance_reboot(node_current, instance,
6649                                              reboot_type,
6650                                              self.op.shutdown_timeout)
6651       result.Raise("Could not reboot instance")
6652     else:
6653       if instance_running:
6654         result = self.rpc.call_instance_shutdown(node_current, instance,
6655                                                  self.op.shutdown_timeout)
6656         result.Raise("Could not shutdown instance for full reboot")
6657         _ShutdownInstanceDisks(self, instance)
6658       else:
6659         self.LogInfo("Instance %s was already stopped, starting now",
6660                      instance.name)
6661       _StartInstanceDisks(self, instance, ignore_secondaries)
6662       result = self.rpc.call_instance_start(node_current,
6663                                             (instance, None, None), False)
6664       msg = result.fail_msg
6665       if msg:
6666         _ShutdownInstanceDisks(self, instance)
6667         raise errors.OpExecError("Could not start instance for"
6668                                  " full reboot: %s" % msg)
6669
6670     self.cfg.MarkInstanceUp(instance.name)
6671
6672
6673 class LUInstanceShutdown(LogicalUnit):
6674   """Shutdown an instance.
6675
6676   """
6677   HPATH = "instance-stop"
6678   HTYPE = constants.HTYPE_INSTANCE
6679   REQ_BGL = False
6680
6681   def ExpandNames(self):
6682     self._ExpandAndLockInstance()
6683
6684   def BuildHooksEnv(self):
6685     """Build hooks env.
6686
6687     This runs on master, primary and secondary nodes of the instance.
6688
6689     """
6690     env = _BuildInstanceHookEnvByObject(self, self.instance)
6691     env["TIMEOUT"] = self.op.timeout
6692     return env
6693
6694   def BuildHooksNodes(self):
6695     """Build hooks nodes.
6696
6697     """
6698     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6699     return (nl, nl)
6700
6701   def CheckPrereq(self):
6702     """Check prerequisites.
6703
6704     This checks that the instance is in the cluster.
6705
6706     """
6707     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6708     assert self.instance is not None, \
6709       "Cannot retrieve locked instance %s" % self.op.instance_name
6710
6711     _CheckInstanceState(self, self.instance, INSTANCE_ONLINE)
6712
6713     self.primary_offline = \
6714       self.cfg.GetNodeInfo(self.instance.primary_node).offline
6715
6716     if self.primary_offline and self.op.ignore_offline_nodes:
6717       self.proc.LogWarning("Ignoring offline primary node")
6718     else:
6719       _CheckNodeOnline(self, self.instance.primary_node)
6720
6721   def Exec(self, feedback_fn):
6722     """Shutdown the instance.
6723
6724     """
6725     instance = self.instance
6726     node_current = instance.primary_node
6727     timeout = self.op.timeout
6728
6729     if not self.op.no_remember:
6730       self.cfg.MarkInstanceDown(instance.name)
6731
6732     if self.primary_offline:
6733       assert self.op.ignore_offline_nodes
6734       self.proc.LogInfo("Primary node offline, marked instance as stopped")
6735     else:
6736       result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
6737       msg = result.fail_msg
6738       if msg:
6739         self.proc.LogWarning("Could not shutdown instance: %s" % msg)
6740
6741       _ShutdownInstanceDisks(self, instance)
6742
6743
6744 class LUInstanceReinstall(LogicalUnit):
6745   """Reinstall an instance.
6746
6747   """
6748   HPATH = "instance-reinstall"
6749   HTYPE = constants.HTYPE_INSTANCE
6750   REQ_BGL = False
6751
6752   def ExpandNames(self):
6753     self._ExpandAndLockInstance()
6754
6755   def BuildHooksEnv(self):
6756     """Build hooks env.
6757
6758     This runs on master, primary and secondary nodes of the instance.
6759
6760     """
6761     return _BuildInstanceHookEnvByObject(self, self.instance)
6762
6763   def BuildHooksNodes(self):
6764     """Build hooks nodes.
6765
6766     """
6767     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6768     return (nl, nl)
6769
6770   def CheckPrereq(self):
6771     """Check prerequisites.
6772
6773     This checks that the instance is in the cluster and is not running.
6774
6775     """
6776     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6777     assert instance is not None, \
6778       "Cannot retrieve locked instance %s" % self.op.instance_name
6779     _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
6780                      " offline, cannot reinstall")
6781     for node in instance.secondary_nodes:
6782       _CheckNodeOnline(self, node, "Instance secondary node offline,"
6783                        " cannot reinstall")
6784
6785     if instance.disk_template == constants.DT_DISKLESS:
6786       raise errors.OpPrereqError("Instance '%s' has no disks" %
6787                                  self.op.instance_name,
6788                                  errors.ECODE_INVAL)
6789     _CheckInstanceState(self, instance, INSTANCE_DOWN, msg="cannot reinstall")
6790
6791     if self.op.os_type is not None:
6792       # OS verification
6793       pnode = _ExpandNodeName(self.cfg, instance.primary_node)
6794       _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
6795       instance_os = self.op.os_type
6796     else:
6797       instance_os = instance.os
6798
6799     nodelist = list(instance.all_nodes)
6800
6801     if self.op.osparams:
6802       i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
6803       _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
6804       self.os_inst = i_osdict # the new dict (without defaults)
6805     else:
6806       self.os_inst = None
6807
6808     self.instance = instance
6809
6810   def Exec(self, feedback_fn):
6811     """Reinstall the instance.
6812
6813     """
6814     inst = self.instance
6815
6816     if self.op.os_type is not None:
6817       feedback_fn("Changing OS to '%s'..." % self.op.os_type)
6818       inst.os = self.op.os_type
6819       # Write to configuration
6820       self.cfg.Update(inst, feedback_fn)
6821
6822     _StartInstanceDisks(self, inst, None)
6823     try:
6824       feedback_fn("Running the instance OS create scripts...")
6825       # FIXME: pass debug option from opcode to backend
6826       result = self.rpc.call_instance_os_add(inst.primary_node,
6827                                              (inst, self.os_inst), True,
6828                                              self.op.debug_level)
6829       result.Raise("Could not install OS for instance %s on node %s" %
6830                    (inst.name, inst.primary_node))
6831     finally:
6832       _ShutdownInstanceDisks(self, inst)
6833
6834
6835 class LUInstanceRecreateDisks(LogicalUnit):
6836   """Recreate an instance's missing disks.
6837
6838   """
6839   HPATH = "instance-recreate-disks"
6840   HTYPE = constants.HTYPE_INSTANCE
6841   REQ_BGL = False
6842
6843   def CheckArguments(self):
6844     # normalise the disk list
6845     self.op.disks = sorted(frozenset(self.op.disks))
6846
6847   def ExpandNames(self):
6848     self._ExpandAndLockInstance()
6849     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6850     if self.op.nodes:
6851       self.op.nodes = [_ExpandNodeName(self.cfg, n) for n in self.op.nodes]
6852       self.needed_locks[locking.LEVEL_NODE] = list(self.op.nodes)
6853     else:
6854       self.needed_locks[locking.LEVEL_NODE] = []
6855
6856   def DeclareLocks(self, level):
6857     if level == locking.LEVEL_NODE:
6858       # if we replace the nodes, we only need to lock the old primary,
6859       # otherwise we need to lock all nodes for disk re-creation
6860       primary_only = bool(self.op.nodes)
6861       self._LockInstancesNodes(primary_only=primary_only)
6862     elif level == locking.LEVEL_NODE_RES:
6863       # Copy node locks
6864       self.needed_locks[locking.LEVEL_NODE_RES] = \
6865         self.needed_locks[locking.LEVEL_NODE][:]
6866
6867   def BuildHooksEnv(self):
6868     """Build hooks env.
6869
6870     This runs on master, primary and secondary nodes of the instance.
6871
6872     """
6873     return _BuildInstanceHookEnvByObject(self, self.instance)
6874
6875   def BuildHooksNodes(self):
6876     """Build hooks nodes.
6877
6878     """
6879     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6880     return (nl, nl)
6881
6882   def CheckPrereq(self):
6883     """Check prerequisites.
6884
6885     This checks that the instance is in the cluster and is not running.
6886
6887     """
6888     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6889     assert instance is not None, \
6890       "Cannot retrieve locked instance %s" % self.op.instance_name
6891     if self.op.nodes:
6892       if len(self.op.nodes) != len(instance.all_nodes):
6893         raise errors.OpPrereqError("Instance %s currently has %d nodes, but"
6894                                    " %d replacement nodes were specified" %
6895                                    (instance.name, len(instance.all_nodes),
6896                                     len(self.op.nodes)),
6897                                    errors.ECODE_INVAL)
6898       assert instance.disk_template != constants.DT_DRBD8 or \
6899           len(self.op.nodes) == 2
6900       assert instance.disk_template != constants.DT_PLAIN or \
6901           len(self.op.nodes) == 1
6902       primary_node = self.op.nodes[0]
6903     else:
6904       primary_node = instance.primary_node
6905     _CheckNodeOnline(self, primary_node)
6906
6907     if instance.disk_template == constants.DT_DISKLESS:
6908       raise errors.OpPrereqError("Instance '%s' has no disks" %
6909                                  self.op.instance_name, errors.ECODE_INVAL)
6910     # if we replace nodes *and* the old primary is offline, we don't
6911     # check
6912     assert instance.primary_node in self.owned_locks(locking.LEVEL_NODE)
6913     assert instance.primary_node in self.owned_locks(locking.LEVEL_NODE_RES)
6914     old_pnode = self.cfg.GetNodeInfo(instance.primary_node)
6915     if not (self.op.nodes and old_pnode.offline):
6916       _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
6917                           msg="cannot recreate disks")
6918
6919     if not self.op.disks:
6920       self.op.disks = range(len(instance.disks))
6921     else:
6922       for idx in self.op.disks:
6923         if idx >= len(instance.disks):
6924           raise errors.OpPrereqError("Invalid disk index '%s'" % idx,
6925                                      errors.ECODE_INVAL)
6926     if self.op.disks != range(len(instance.disks)) and self.op.nodes:
6927       raise errors.OpPrereqError("Can't recreate disks partially and"
6928                                  " change the nodes at the same time",
6929                                  errors.ECODE_INVAL)
6930     self.instance = instance
6931
6932   def Exec(self, feedback_fn):
6933     """Recreate the disks.
6934
6935     """
6936     instance = self.instance
6937
6938     assert (self.owned_locks(locking.LEVEL_NODE) ==
6939             self.owned_locks(locking.LEVEL_NODE_RES))
6940
6941     to_skip = []
6942     mods = [] # keeps track of needed logical_id changes
6943
6944     for idx, disk in enumerate(instance.disks):
6945       if idx not in self.op.disks: # disk idx has not been passed in
6946         to_skip.append(idx)
6947         continue
6948       # update secondaries for disks, if needed
6949       if self.op.nodes:
6950         if disk.dev_type == constants.LD_DRBD8:
6951           # need to update the nodes and minors
6952           assert len(self.op.nodes) == 2
6953           assert len(disk.logical_id) == 6 # otherwise disk internals
6954                                            # have changed
6955           (_, _, old_port, _, _, old_secret) = disk.logical_id
6956           new_minors = self.cfg.AllocateDRBDMinor(self.op.nodes, instance.name)
6957           new_id = (self.op.nodes[0], self.op.nodes[1], old_port,
6958                     new_minors[0], new_minors[1], old_secret)
6959           assert len(disk.logical_id) == len(new_id)
6960           mods.append((idx, new_id))
6961
6962     # now that we have passed all asserts above, we can apply the mods
6963     # in a single run (to avoid partial changes)
6964     for idx, new_id in mods:
6965       instance.disks[idx].logical_id = new_id
6966
6967     # change primary node, if needed
6968     if self.op.nodes:
6969       instance.primary_node = self.op.nodes[0]
6970       self.LogWarning("Changing the instance's nodes, you will have to"
6971                       " remove any disks left on the older nodes manually")
6972
6973     if self.op.nodes:
6974       self.cfg.Update(instance, feedback_fn)
6975
6976     _CreateDisks(self, instance, to_skip=to_skip)
6977
6978
6979 class LUInstanceRename(LogicalUnit):
6980   """Rename an instance.
6981
6982   """
6983   HPATH = "instance-rename"
6984   HTYPE = constants.HTYPE_INSTANCE
6985
6986   def CheckArguments(self):
6987     """Check arguments.
6988
6989     """
6990     if self.op.ip_check and not self.op.name_check:
6991       # TODO: make the ip check more flexible and not depend on the name check
6992       raise errors.OpPrereqError("IP address check requires a name check",
6993                                  errors.ECODE_INVAL)
6994
6995   def BuildHooksEnv(self):
6996     """Build hooks env.
6997
6998     This runs on master, primary and secondary nodes of the instance.
6999
7000     """
7001     env = _BuildInstanceHookEnvByObject(self, self.instance)
7002     env["INSTANCE_NEW_NAME"] = self.op.new_name
7003     return env
7004
7005   def BuildHooksNodes(self):
7006     """Build hooks nodes.
7007
7008     """
7009     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7010     return (nl, nl)
7011
7012   def CheckPrereq(self):
7013     """Check prerequisites.
7014
7015     This checks that the instance is in the cluster and is not running.
7016
7017     """
7018     self.op.instance_name = _ExpandInstanceName(self.cfg,
7019                                                 self.op.instance_name)
7020     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7021     assert instance is not None
7022     _CheckNodeOnline(self, instance.primary_node)
7023     _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
7024                         msg="cannot rename")
7025     self.instance = instance
7026
7027     new_name = self.op.new_name
7028     if self.op.name_check:
7029       hostname = netutils.GetHostname(name=new_name)
7030       if hostname.name != new_name:
7031         self.LogInfo("Resolved given name '%s' to '%s'", new_name,
7032                      hostname.name)
7033       if not utils.MatchNameComponent(self.op.new_name, [hostname.name]):
7034         raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
7035                                     " same as given hostname '%s'") %
7036                                     (hostname.name, self.op.new_name),
7037                                     errors.ECODE_INVAL)
7038       new_name = self.op.new_name = hostname.name
7039       if (self.op.ip_check and
7040           netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
7041         raise errors.OpPrereqError("IP %s of instance %s already in use" %
7042                                    (hostname.ip, new_name),
7043                                    errors.ECODE_NOTUNIQUE)
7044
7045     instance_list = self.cfg.GetInstanceList()
7046     if new_name in instance_list and new_name != instance.name:
7047       raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
7048                                  new_name, errors.ECODE_EXISTS)
7049
7050   def Exec(self, feedback_fn):
7051     """Rename the instance.
7052
7053     """
7054     inst = self.instance
7055     old_name = inst.name
7056
7057     rename_file_storage = False
7058     if (inst.disk_template in constants.DTS_FILEBASED and
7059         self.op.new_name != inst.name):
7060       old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
7061       rename_file_storage = True
7062
7063     self.cfg.RenameInstance(inst.name, self.op.new_name)
7064     # Change the instance lock. This is definitely safe while we hold the BGL.
7065     # Otherwise the new lock would have to be added in acquired mode.
7066     assert self.REQ_BGL
7067     self.glm.remove(locking.LEVEL_INSTANCE, old_name)
7068     self.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
7069
7070     # re-read the instance from the configuration after rename
7071     inst = self.cfg.GetInstanceInfo(self.op.new_name)
7072
7073     if rename_file_storage:
7074       new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
7075       result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
7076                                                      old_file_storage_dir,
7077                                                      new_file_storage_dir)
7078       result.Raise("Could not rename on node %s directory '%s' to '%s'"
7079                    " (but the instance has been renamed in Ganeti)" %
7080                    (inst.primary_node, old_file_storage_dir,
7081                     new_file_storage_dir))
7082
7083     _StartInstanceDisks(self, inst, None)
7084     try:
7085       result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
7086                                                  old_name, self.op.debug_level)
7087       msg = result.fail_msg
7088       if msg:
7089         msg = ("Could not run OS rename script for instance %s on node %s"
7090                " (but the instance has been renamed in Ganeti): %s" %
7091                (inst.name, inst.primary_node, msg))
7092         self.proc.LogWarning(msg)
7093     finally:
7094       _ShutdownInstanceDisks(self, inst)
7095
7096     return inst.name
7097
7098
7099 class LUInstanceRemove(LogicalUnit):
7100   """Remove an instance.
7101
7102   """
7103   HPATH = "instance-remove"
7104   HTYPE = constants.HTYPE_INSTANCE
7105   REQ_BGL = False
7106
7107   def ExpandNames(self):
7108     self._ExpandAndLockInstance()
7109     self.needed_locks[locking.LEVEL_NODE] = []
7110     self.needed_locks[locking.LEVEL_NODE_RES] = []
7111     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7112
7113   def DeclareLocks(self, level):
7114     if level == locking.LEVEL_NODE:
7115       self._LockInstancesNodes()
7116     elif level == locking.LEVEL_NODE_RES:
7117       # Copy node locks
7118       self.needed_locks[locking.LEVEL_NODE_RES] = \
7119         self.needed_locks[locking.LEVEL_NODE][:]
7120
7121   def BuildHooksEnv(self):
7122     """Build hooks env.
7123
7124     This runs on master, primary and secondary nodes of the instance.
7125
7126     """
7127     env = _BuildInstanceHookEnvByObject(self, self.instance)
7128     env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
7129     return env
7130
7131   def BuildHooksNodes(self):
7132     """Build hooks nodes.
7133
7134     """
7135     nl = [self.cfg.GetMasterNode()]
7136     nl_post = list(self.instance.all_nodes) + nl
7137     return (nl, nl_post)
7138
7139   def CheckPrereq(self):
7140     """Check prerequisites.
7141
7142     This checks that the instance is in the cluster.
7143
7144     """
7145     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7146     assert self.instance is not None, \
7147       "Cannot retrieve locked instance %s" % self.op.instance_name
7148
7149   def Exec(self, feedback_fn):
7150     """Remove the instance.
7151
7152     """
7153     instance = self.instance
7154     logging.info("Shutting down instance %s on node %s",
7155                  instance.name, instance.primary_node)
7156
7157     result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
7158                                              self.op.shutdown_timeout)
7159     msg = result.fail_msg
7160     if msg:
7161       if self.op.ignore_failures:
7162         feedback_fn("Warning: can't shutdown instance: %s" % msg)
7163       else:
7164         raise errors.OpExecError("Could not shutdown instance %s on"
7165                                  " node %s: %s" %
7166                                  (instance.name, instance.primary_node, msg))
7167
7168     assert (self.owned_locks(locking.LEVEL_NODE) ==
7169             self.owned_locks(locking.LEVEL_NODE_RES))
7170     assert not (set(instance.all_nodes) -
7171                 self.owned_locks(locking.LEVEL_NODE)), \
7172       "Not owning correct locks"
7173
7174     _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
7175
7176
7177 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
7178   """Utility function to remove an instance.
7179
7180   """
7181   logging.info("Removing block devices for instance %s", instance.name)
7182
7183   if not _RemoveDisks(lu, instance):
7184     if not ignore_failures:
7185       raise errors.OpExecError("Can't remove instance's disks")
7186     feedback_fn("Warning: can't remove instance's disks")
7187
7188   logging.info("Removing instance %s out of cluster config", instance.name)
7189
7190   lu.cfg.RemoveInstance(instance.name)
7191
7192   assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
7193     "Instance lock removal conflict"
7194
7195   # Remove lock for the instance
7196   lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
7197
7198
7199 class LUInstanceQuery(NoHooksLU):
7200   """Logical unit for querying instances.
7201
7202   """
7203   # pylint: disable=W0142
7204   REQ_BGL = False
7205
7206   def CheckArguments(self):
7207     self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
7208                              self.op.output_fields, self.op.use_locking)
7209
7210   def ExpandNames(self):
7211     self.iq.ExpandNames(self)
7212
7213   def DeclareLocks(self, level):
7214     self.iq.DeclareLocks(self, level)
7215
7216   def Exec(self, feedback_fn):
7217     return self.iq.OldStyleQuery(self)
7218
7219
7220 class LUInstanceFailover(LogicalUnit):
7221   """Failover an instance.
7222
7223   """
7224   HPATH = "instance-failover"
7225   HTYPE = constants.HTYPE_INSTANCE
7226   REQ_BGL = False
7227
7228   def CheckArguments(self):
7229     """Check the arguments.
7230
7231     """
7232     self.iallocator = getattr(self.op, "iallocator", None)
7233     self.target_node = getattr(self.op, "target_node", None)
7234
7235   def ExpandNames(self):
7236     self._ExpandAndLockInstance()
7237
7238     if self.op.target_node is not None:
7239       self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7240
7241     self.needed_locks[locking.LEVEL_NODE] = []
7242     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7243
7244     ignore_consistency = self.op.ignore_consistency
7245     shutdown_timeout = self.op.shutdown_timeout
7246     self._migrater = TLMigrateInstance(self, self.op.instance_name,
7247                                        cleanup=False,
7248                                        failover=True,
7249                                        ignore_consistency=ignore_consistency,
7250                                        shutdown_timeout=shutdown_timeout,
7251                                        ignore_ipolicy=self.op.ignore_ipolicy)
7252     self.tasklets = [self._migrater]
7253
7254   def DeclareLocks(self, level):
7255     if level == locking.LEVEL_NODE:
7256       instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
7257       if instance.disk_template in constants.DTS_EXT_MIRROR:
7258         if self.op.target_node is None:
7259           self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7260         else:
7261           self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
7262                                                    self.op.target_node]
7263         del self.recalculate_locks[locking.LEVEL_NODE]
7264       else:
7265         self._LockInstancesNodes()
7266
7267   def BuildHooksEnv(self):
7268     """Build hooks env.
7269
7270     This runs on master, primary and secondary nodes of the instance.
7271
7272     """
7273     instance = self._migrater.instance
7274     source_node = instance.primary_node
7275     target_node = self.op.target_node
7276     env = {
7277       "IGNORE_CONSISTENCY": self.op.ignore_consistency,
7278       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7279       "OLD_PRIMARY": source_node,
7280       "NEW_PRIMARY": target_node,
7281       }
7282
7283     if instance.disk_template in constants.DTS_INT_MIRROR:
7284       env["OLD_SECONDARY"] = instance.secondary_nodes[0]
7285       env["NEW_SECONDARY"] = source_node
7286     else:
7287       env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
7288
7289     env.update(_BuildInstanceHookEnvByObject(self, instance))
7290
7291     return env
7292
7293   def BuildHooksNodes(self):
7294     """Build hooks nodes.
7295
7296     """
7297     instance = self._migrater.instance
7298     nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
7299     return (nl, nl + [instance.primary_node])
7300
7301
7302 class LUInstanceMigrate(LogicalUnit):
7303   """Migrate an instance.
7304
7305   This is migration without shutting down, compared to the failover,
7306   which is done with shutdown.
7307
7308   """
7309   HPATH = "instance-migrate"
7310   HTYPE = constants.HTYPE_INSTANCE
7311   REQ_BGL = False
7312
7313   def ExpandNames(self):
7314     self._ExpandAndLockInstance()
7315
7316     if self.op.target_node is not None:
7317       self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7318
7319     self.needed_locks[locking.LEVEL_NODE] = []
7320     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7321
7322     self._migrater = TLMigrateInstance(self, self.op.instance_name,
7323                                        cleanup=self.op.cleanup,
7324                                        failover=False,
7325                                        fallback=self.op.allow_failover,
7326                                        ignore_ipolicy=self.op.ignore_ipolicy)
7327     self.tasklets = [self._migrater]
7328
7329   def DeclareLocks(self, level):
7330     if level == locking.LEVEL_NODE:
7331       instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
7332       if instance.disk_template in constants.DTS_EXT_MIRROR:
7333         if self.op.target_node is None:
7334           self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7335         else:
7336           self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
7337                                                    self.op.target_node]
7338         del self.recalculate_locks[locking.LEVEL_NODE]
7339       else:
7340         self._LockInstancesNodes()
7341
7342   def BuildHooksEnv(self):
7343     """Build hooks env.
7344
7345     This runs on master, primary and secondary nodes of the instance.
7346
7347     """
7348     instance = self._migrater.instance
7349     source_node = instance.primary_node
7350     target_node = self.op.target_node
7351     env = _BuildInstanceHookEnvByObject(self, instance)
7352     env.update({
7353       "MIGRATE_LIVE": self._migrater.live,
7354       "MIGRATE_CLEANUP": self.op.cleanup,
7355       "OLD_PRIMARY": source_node,
7356       "NEW_PRIMARY": target_node,
7357       })
7358
7359     if instance.disk_template in constants.DTS_INT_MIRROR:
7360       env["OLD_SECONDARY"] = target_node
7361       env["NEW_SECONDARY"] = source_node
7362     else:
7363       env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
7364
7365     return env
7366
7367   def BuildHooksNodes(self):
7368     """Build hooks nodes.
7369
7370     """
7371     instance = self._migrater.instance
7372     nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
7373     return (nl, nl + [instance.primary_node])
7374
7375
7376 class LUInstanceMove(LogicalUnit):
7377   """Move an instance by data-copying.
7378
7379   """
7380   HPATH = "instance-move"
7381   HTYPE = constants.HTYPE_INSTANCE
7382   REQ_BGL = False
7383
7384   def ExpandNames(self):
7385     self._ExpandAndLockInstance()
7386     target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7387     self.op.target_node = target_node
7388     self.needed_locks[locking.LEVEL_NODE] = [target_node]
7389     self.needed_locks[locking.LEVEL_NODE_RES] = []
7390     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7391
7392   def DeclareLocks(self, level):
7393     if level == locking.LEVEL_NODE:
7394       self._LockInstancesNodes(primary_only=True)
7395     elif level == locking.LEVEL_NODE_RES:
7396       # Copy node locks
7397       self.needed_locks[locking.LEVEL_NODE_RES] = \
7398         self.needed_locks[locking.LEVEL_NODE][:]
7399
7400   def BuildHooksEnv(self):
7401     """Build hooks env.
7402
7403     This runs on master, primary and secondary nodes of the instance.
7404
7405     """
7406     env = {
7407       "TARGET_NODE": self.op.target_node,
7408       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7409       }
7410     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
7411     return env
7412
7413   def BuildHooksNodes(self):
7414     """Build hooks nodes.
7415
7416     """
7417     nl = [
7418       self.cfg.GetMasterNode(),
7419       self.instance.primary_node,
7420       self.op.target_node,
7421       ]
7422     return (nl, nl)
7423
7424   def CheckPrereq(self):
7425     """Check prerequisites.
7426
7427     This checks that the instance is in the cluster.
7428
7429     """
7430     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7431     assert self.instance is not None, \
7432       "Cannot retrieve locked instance %s" % self.op.instance_name
7433
7434     node = self.cfg.GetNodeInfo(self.op.target_node)
7435     assert node is not None, \
7436       "Cannot retrieve locked node %s" % self.op.target_node
7437
7438     self.target_node = target_node = node.name
7439
7440     if target_node == instance.primary_node:
7441       raise errors.OpPrereqError("Instance %s is already on the node %s" %
7442                                  (instance.name, target_node),
7443                                  errors.ECODE_STATE)
7444
7445     bep = self.cfg.GetClusterInfo().FillBE(instance)
7446
7447     for idx, dsk in enumerate(instance.disks):
7448       if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
7449         raise errors.OpPrereqError("Instance disk %d has a complex layout,"
7450                                    " cannot copy" % idx, errors.ECODE_STATE)
7451
7452     _CheckNodeOnline(self, target_node)
7453     _CheckNodeNotDrained(self, target_node)
7454     _CheckNodeVmCapable(self, target_node)
7455     ipolicy = _CalculateGroupIPolicy(self.cfg.GetClusterInfo(), node.group)
7456     _CheckTargetNodeIPolicy(self, ipolicy, instance, node,
7457                             ignore=self.op.ignore_ipolicy)
7458
7459     if instance.admin_state == constants.ADMINST_UP:
7460       # check memory requirements on the secondary node
7461       _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
7462                            instance.name, bep[constants.BE_MAXMEM],
7463                            instance.hypervisor)
7464     else:
7465       self.LogInfo("Not checking memory on the secondary node as"
7466                    " instance will not be started")
7467
7468     # check bridge existance
7469     _CheckInstanceBridgesExist(self, instance, node=target_node)
7470
7471   def Exec(self, feedback_fn):
7472     """Move an instance.
7473
7474     The move is done by shutting it down on its present node, copying
7475     the data over (slow) and starting it on the new node.
7476
7477     """
7478     instance = self.instance
7479
7480     source_node = instance.primary_node
7481     target_node = self.target_node
7482
7483     self.LogInfo("Shutting down instance %s on source node %s",
7484                  instance.name, source_node)
7485
7486     assert (self.owned_locks(locking.LEVEL_NODE) ==
7487             self.owned_locks(locking.LEVEL_NODE_RES))
7488
7489     result = self.rpc.call_instance_shutdown(source_node, instance,
7490                                              self.op.shutdown_timeout)
7491     msg = result.fail_msg
7492     if msg:
7493       if self.op.ignore_consistency:
7494         self.proc.LogWarning("Could not shutdown instance %s on node %s."
7495                              " Proceeding anyway. Please make sure node"
7496                              " %s is down. Error details: %s",
7497                              instance.name, source_node, source_node, msg)
7498       else:
7499         raise errors.OpExecError("Could not shutdown instance %s on"
7500                                  " node %s: %s" %
7501                                  (instance.name, source_node, msg))
7502
7503     # create the target disks
7504     try:
7505       _CreateDisks(self, instance, target_node=target_node)
7506     except errors.OpExecError:
7507       self.LogWarning("Device creation failed, reverting...")
7508       try:
7509         _RemoveDisks(self, instance, target_node=target_node)
7510       finally:
7511         self.cfg.ReleaseDRBDMinors(instance.name)
7512         raise
7513
7514     cluster_name = self.cfg.GetClusterInfo().cluster_name
7515
7516     errs = []
7517     # activate, get path, copy the data over
7518     for idx, disk in enumerate(instance.disks):
7519       self.LogInfo("Copying data for disk %d", idx)
7520       result = self.rpc.call_blockdev_assemble(target_node, disk,
7521                                                instance.name, True, idx)
7522       if result.fail_msg:
7523         self.LogWarning("Can't assemble newly created disk %d: %s",
7524                         idx, result.fail_msg)
7525         errs.append(result.fail_msg)
7526         break
7527       dev_path = result.payload
7528       result = self.rpc.call_blockdev_export(source_node, disk,
7529                                              target_node, dev_path,
7530                                              cluster_name)
7531       if result.fail_msg:
7532         self.LogWarning("Can't copy data over for disk %d: %s",
7533                         idx, result.fail_msg)
7534         errs.append(result.fail_msg)
7535         break
7536
7537     if errs:
7538       self.LogWarning("Some disks failed to copy, aborting")
7539       try:
7540         _RemoveDisks(self, instance, target_node=target_node)
7541       finally:
7542         self.cfg.ReleaseDRBDMinors(instance.name)
7543         raise errors.OpExecError("Errors during disk copy: %s" %
7544                                  (",".join(errs),))
7545
7546     instance.primary_node = target_node
7547     self.cfg.Update(instance, feedback_fn)
7548
7549     self.LogInfo("Removing the disks on the original node")
7550     _RemoveDisks(self, instance, target_node=source_node)
7551
7552     # Only start the instance if it's marked as up
7553     if instance.admin_state == constants.ADMINST_UP:
7554       self.LogInfo("Starting instance %s on node %s",
7555                    instance.name, target_node)
7556
7557       disks_ok, _ = _AssembleInstanceDisks(self, instance,
7558                                            ignore_secondaries=True)
7559       if not disks_ok:
7560         _ShutdownInstanceDisks(self, instance)
7561         raise errors.OpExecError("Can't activate the instance's disks")
7562
7563       result = self.rpc.call_instance_start(target_node,
7564                                             (instance, None, None), False)
7565       msg = result.fail_msg
7566       if msg:
7567         _ShutdownInstanceDisks(self, instance)
7568         raise errors.OpExecError("Could not start instance %s on node %s: %s" %
7569                                  (instance.name, target_node, msg))
7570
7571
7572 class LUNodeMigrate(LogicalUnit):
7573   """Migrate all instances from a node.
7574
7575   """
7576   HPATH = "node-migrate"
7577   HTYPE = constants.HTYPE_NODE
7578   REQ_BGL = False
7579
7580   def CheckArguments(self):
7581     pass
7582
7583   def ExpandNames(self):
7584     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
7585
7586     self.share_locks = _ShareAll()
7587     self.needed_locks = {
7588       locking.LEVEL_NODE: [self.op.node_name],
7589       }
7590
7591   def BuildHooksEnv(self):
7592     """Build hooks env.
7593
7594     This runs on the master, the primary and all the secondaries.
7595
7596     """
7597     return {
7598       "NODE_NAME": self.op.node_name,
7599       }
7600
7601   def BuildHooksNodes(self):
7602     """Build hooks nodes.
7603
7604     """
7605     nl = [self.cfg.GetMasterNode()]
7606     return (nl, nl)
7607
7608   def CheckPrereq(self):
7609     pass
7610
7611   def Exec(self, feedback_fn):
7612     # Prepare jobs for migration instances
7613     jobs = [
7614       [opcodes.OpInstanceMigrate(instance_name=inst.name,
7615                                  mode=self.op.mode,
7616                                  live=self.op.live,
7617                                  iallocator=self.op.iallocator,
7618                                  target_node=self.op.target_node,
7619                                  ignore_ipolicy=self.op.ignore_ipolicy)]
7620       for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name)
7621       ]
7622
7623     # TODO: Run iallocator in this opcode and pass correct placement options to
7624     # OpInstanceMigrate. Since other jobs can modify the cluster between
7625     # running the iallocator and the actual migration, a good consistency model
7626     # will have to be found.
7627
7628     assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
7629             frozenset([self.op.node_name]))
7630
7631     return ResultWithJobs(jobs)
7632
7633
7634 class TLMigrateInstance(Tasklet):
7635   """Tasklet class for instance migration.
7636
7637   @type live: boolean
7638   @ivar live: whether the migration will be done live or non-live;
7639       this variable is initalized only after CheckPrereq has run
7640   @type cleanup: boolean
7641   @ivar cleanup: Wheater we cleanup from a failed migration
7642   @type iallocator: string
7643   @ivar iallocator: The iallocator used to determine target_node
7644   @type target_node: string
7645   @ivar target_node: If given, the target_node to reallocate the instance to
7646   @type failover: boolean
7647   @ivar failover: Whether operation results in failover or migration
7648   @type fallback: boolean
7649   @ivar fallback: Whether fallback to failover is allowed if migration not
7650                   possible
7651   @type ignore_consistency: boolean
7652   @ivar ignore_consistency: Wheter we should ignore consistency between source
7653                             and target node
7654   @type shutdown_timeout: int
7655   @ivar shutdown_timeout: In case of failover timeout of the shutdown
7656   @type ignore_ipolicy: bool
7657   @ivar ignore_ipolicy: If true, we can ignore instance policy when migrating
7658
7659   """
7660
7661   # Constants
7662   _MIGRATION_POLL_INTERVAL = 1      # seconds
7663   _MIGRATION_FEEDBACK_INTERVAL = 10 # seconds
7664
7665   def __init__(self, lu, instance_name, cleanup=False,
7666                failover=False, fallback=False,
7667                ignore_consistency=False,
7668                shutdown_timeout=constants.DEFAULT_SHUTDOWN_TIMEOUT,
7669                ignore_ipolicy=False):
7670     """Initializes this class.
7671
7672     """
7673     Tasklet.__init__(self, lu)
7674
7675     # Parameters
7676     self.instance_name = instance_name
7677     self.cleanup = cleanup
7678     self.live = False # will be overridden later
7679     self.failover = failover
7680     self.fallback = fallback
7681     self.ignore_consistency = ignore_consistency
7682     self.shutdown_timeout = shutdown_timeout
7683     self.ignore_ipolicy = ignore_ipolicy
7684
7685   def CheckPrereq(self):
7686     """Check prerequisites.
7687
7688     This checks that the instance is in the cluster.
7689
7690     """
7691     instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
7692     instance = self.cfg.GetInstanceInfo(instance_name)
7693     assert instance is not None
7694     self.instance = instance
7695     cluster = self.cfg.GetClusterInfo()
7696
7697     if (not self.cleanup and
7698         not instance.admin_state == constants.ADMINST_UP and
7699         not self.failover and self.fallback):
7700       self.lu.LogInfo("Instance is marked down or offline, fallback allowed,"
7701                       " switching to failover")
7702       self.failover = True
7703
7704     if instance.disk_template not in constants.DTS_MIRRORED:
7705       if self.failover:
7706         text = "failovers"
7707       else:
7708         text = "migrations"
7709       raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
7710                                  " %s" % (instance.disk_template, text),
7711                                  errors.ECODE_STATE)
7712
7713     if instance.disk_template in constants.DTS_EXT_MIRROR:
7714       _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
7715
7716       if self.lu.op.iallocator:
7717         self._RunAllocator()
7718       else:
7719         # We set set self.target_node as it is required by
7720         # BuildHooksEnv
7721         self.target_node = self.lu.op.target_node
7722
7723       # Check that the target node is correct in terms of instance policy
7724       nodeinfo = self.cfg.GetNodeInfo(self.target_node)
7725       ipolicy = _CalculateGroupIPolicy(cluster, nodeinfo.group)
7726       _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo,
7727                               ignore=self.ignore_ipolicy)
7728
7729       # self.target_node is already populated, either directly or by the
7730       # iallocator run
7731       target_node = self.target_node
7732       if self.target_node == instance.primary_node:
7733         raise errors.OpPrereqError("Cannot migrate instance %s"
7734                                    " to its primary (%s)" %
7735                                    (instance.name, instance.primary_node))
7736
7737       if len(self.lu.tasklets) == 1:
7738         # It is safe to release locks only when we're the only tasklet
7739         # in the LU
7740         _ReleaseLocks(self.lu, locking.LEVEL_NODE,
7741                       keep=[instance.primary_node, self.target_node])
7742
7743     else:
7744       secondary_nodes = instance.secondary_nodes
7745       if not secondary_nodes:
7746         raise errors.ConfigurationError("No secondary node but using"
7747                                         " %s disk template" %
7748                                         instance.disk_template)
7749       target_node = secondary_nodes[0]
7750       if self.lu.op.iallocator or (self.lu.op.target_node and
7751                                    self.lu.op.target_node != target_node):
7752         if self.failover:
7753           text = "failed over"
7754         else:
7755           text = "migrated"
7756         raise errors.OpPrereqError("Instances with disk template %s cannot"
7757                                    " be %s to arbitrary nodes"
7758                                    " (neither an iallocator nor a target"
7759                                    " node can be passed)" %
7760                                    (instance.disk_template, text),
7761                                    errors.ECODE_INVAL)
7762       nodeinfo = self.cfg.GetNodeInfo(target_node)
7763       ipolicy = _CalculateGroupIPolicy(cluster, nodeinfo.group)
7764       _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo,
7765                               ignore=self.ignore_ipolicy)
7766
7767     i_be = cluster.FillBE(instance)
7768
7769     # check memory requirements on the secondary node
7770     if not self.failover or instance.admin_state == constants.ADMINST_UP:
7771       _CheckNodeFreeMemory(self.lu, target_node, "migrating instance %s" %
7772                            instance.name, i_be[constants.BE_MAXMEM],
7773                            instance.hypervisor)
7774     else:
7775       self.lu.LogInfo("Not checking memory on the secondary node as"
7776                       " instance will not be started")
7777
7778     # check if failover must be forced instead of migration
7779     if (not self.cleanup and not self.failover and
7780         i_be[constants.BE_ALWAYS_FAILOVER]):
7781       if self.fallback:
7782         self.lu.LogInfo("Instance configured to always failover; fallback"
7783                         " to failover")
7784         self.failover = True
7785       else:
7786         raise errors.OpPrereqError("This instance has been configured to"
7787                                    " always failover, please allow failover",
7788                                    errors.ECODE_STATE)
7789
7790     # check bridge existance
7791     _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
7792
7793     if not self.cleanup:
7794       _CheckNodeNotDrained(self.lu, target_node)
7795       if not self.failover:
7796         result = self.rpc.call_instance_migratable(instance.primary_node,
7797                                                    instance)
7798         if result.fail_msg and self.fallback:
7799           self.lu.LogInfo("Can't migrate, instance offline, fallback to"
7800                           " failover")
7801           self.failover = True
7802         else:
7803           result.Raise("Can't migrate, please use failover",
7804                        prereq=True, ecode=errors.ECODE_STATE)
7805
7806     assert not (self.failover and self.cleanup)
7807
7808     if not self.failover:
7809       if self.lu.op.live is not None and self.lu.op.mode is not None:
7810         raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
7811                                    " parameters are accepted",
7812                                    errors.ECODE_INVAL)
7813       if self.lu.op.live is not None:
7814         if self.lu.op.live:
7815           self.lu.op.mode = constants.HT_MIGRATION_LIVE
7816         else:
7817           self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
7818         # reset the 'live' parameter to None so that repeated
7819         # invocations of CheckPrereq do not raise an exception
7820         self.lu.op.live = None
7821       elif self.lu.op.mode is None:
7822         # read the default value from the hypervisor
7823         i_hv = cluster.FillHV(self.instance, skip_globals=False)
7824         self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
7825
7826       self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
7827     else:
7828       # Failover is never live
7829       self.live = False
7830
7831   def _RunAllocator(self):
7832     """Run the allocator based on input opcode.
7833
7834     """
7835     # FIXME: add a self.ignore_ipolicy option
7836     ial = IAllocator(self.cfg, self.rpc,
7837                      mode=constants.IALLOCATOR_MODE_RELOC,
7838                      name=self.instance_name,
7839                      # TODO See why hail breaks with a single node below
7840                      relocate_from=[self.instance.primary_node,
7841                                     self.instance.primary_node],
7842                      )
7843
7844     ial.Run(self.lu.op.iallocator)
7845
7846     if not ial.success:
7847       raise errors.OpPrereqError("Can't compute nodes using"
7848                                  " iallocator '%s': %s" %
7849                                  (self.lu.op.iallocator, ial.info),
7850                                  errors.ECODE_NORES)
7851     if len(ial.result) != ial.required_nodes:
7852       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7853                                  " of nodes (%s), required %s" %
7854                                  (self.lu.op.iallocator, len(ial.result),
7855                                   ial.required_nodes), errors.ECODE_FAULT)
7856     self.target_node = ial.result[0]
7857     self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
7858                  self.instance_name, self.lu.op.iallocator,
7859                  utils.CommaJoin(ial.result))
7860
7861   def _WaitUntilSync(self):
7862     """Poll with custom rpc for disk sync.
7863
7864     This uses our own step-based rpc call.
7865
7866     """
7867     self.feedback_fn("* wait until resync is done")
7868     all_done = False
7869     while not all_done:
7870       all_done = True
7871       result = self.rpc.call_drbd_wait_sync(self.all_nodes,
7872                                             self.nodes_ip,
7873                                             self.instance.disks)
7874       min_percent = 100
7875       for node, nres in result.items():
7876         nres.Raise("Cannot resync disks on node %s" % node)
7877         node_done, node_percent = nres.payload
7878         all_done = all_done and node_done
7879         if node_percent is not None:
7880           min_percent = min(min_percent, node_percent)
7881       if not all_done:
7882         if min_percent < 100:
7883           self.feedback_fn("   - progress: %.1f%%" % min_percent)
7884         time.sleep(2)
7885
7886   def _EnsureSecondary(self, node):
7887     """Demote a node to secondary.
7888
7889     """
7890     self.feedback_fn("* switching node %s to secondary mode" % node)
7891
7892     for dev in self.instance.disks:
7893       self.cfg.SetDiskID(dev, node)
7894
7895     result = self.rpc.call_blockdev_close(node, self.instance.name,
7896                                           self.instance.disks)
7897     result.Raise("Cannot change disk to secondary on node %s" % node)
7898
7899   def _GoStandalone(self):
7900     """Disconnect from the network.
7901
7902     """
7903     self.feedback_fn("* changing into standalone mode")
7904     result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
7905                                                self.instance.disks)
7906     for node, nres in result.items():
7907       nres.Raise("Cannot disconnect disks node %s" % node)
7908
7909   def _GoReconnect(self, multimaster):
7910     """Reconnect to the network.
7911
7912     """
7913     if multimaster:
7914       msg = "dual-master"
7915     else:
7916       msg = "single-master"
7917     self.feedback_fn("* changing disks into %s mode" % msg)
7918     result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
7919                                            self.instance.disks,
7920                                            self.instance.name, multimaster)
7921     for node, nres in result.items():
7922       nres.Raise("Cannot change disks config on node %s" % node)
7923
7924   def _ExecCleanup(self):
7925     """Try to cleanup after a failed migration.
7926
7927     The cleanup is done by:
7928       - check that the instance is running only on one node
7929         (and update the config if needed)
7930       - change disks on its secondary node to secondary
7931       - wait until disks are fully synchronized
7932       - disconnect from the network
7933       - change disks into single-master mode
7934       - wait again until disks are fully synchronized
7935
7936     """
7937     instance = self.instance
7938     target_node = self.target_node
7939     source_node = self.source_node
7940
7941     # check running on only one node
7942     self.feedback_fn("* checking where the instance actually runs"
7943                      " (if this hangs, the hypervisor might be in"
7944                      " a bad state)")
7945     ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
7946     for node, result in ins_l.items():
7947       result.Raise("Can't contact node %s" % node)
7948
7949     runningon_source = instance.name in ins_l[source_node].payload
7950     runningon_target = instance.name in ins_l[target_node].payload
7951
7952     if runningon_source and runningon_target:
7953       raise errors.OpExecError("Instance seems to be running on two nodes,"
7954                                " or the hypervisor is confused; you will have"
7955                                " to ensure manually that it runs only on one"
7956                                " and restart this operation")
7957
7958     if not (runningon_source or runningon_target):
7959       raise errors.OpExecError("Instance does not seem to be running at all;"
7960                                " in this case it's safer to repair by"
7961                                " running 'gnt-instance stop' to ensure disk"
7962                                " shutdown, and then restarting it")
7963
7964     if runningon_target:
7965       # the migration has actually succeeded, we need to update the config
7966       self.feedback_fn("* instance running on secondary node (%s),"
7967                        " updating config" % target_node)
7968       instance.primary_node = target_node
7969       self.cfg.Update(instance, self.feedback_fn)
7970       demoted_node = source_node
7971     else:
7972       self.feedback_fn("* instance confirmed to be running on its"
7973                        " primary node (%s)" % source_node)
7974       demoted_node = target_node
7975
7976     if instance.disk_template in constants.DTS_INT_MIRROR:
7977       self._EnsureSecondary(demoted_node)
7978       try:
7979         self._WaitUntilSync()
7980       except errors.OpExecError:
7981         # we ignore here errors, since if the device is standalone, it
7982         # won't be able to sync
7983         pass
7984       self._GoStandalone()
7985       self._GoReconnect(False)
7986       self._WaitUntilSync()
7987
7988     self.feedback_fn("* done")
7989
7990   def _RevertDiskStatus(self):
7991     """Try to revert the disk status after a failed migration.
7992
7993     """
7994     target_node = self.target_node
7995     if self.instance.disk_template in constants.DTS_EXT_MIRROR:
7996       return
7997
7998     try:
7999       self._EnsureSecondary(target_node)
8000       self._GoStandalone()
8001       self._GoReconnect(False)
8002       self._WaitUntilSync()
8003     except errors.OpExecError, err:
8004       self.lu.LogWarning("Migration failed and I can't reconnect the drives,"
8005                          " please try to recover the instance manually;"
8006                          " error '%s'" % str(err))
8007
8008   def _AbortMigration(self):
8009     """Call the hypervisor code to abort a started migration.
8010
8011     """
8012     instance = self.instance
8013     target_node = self.target_node
8014     source_node = self.source_node
8015     migration_info = self.migration_info
8016
8017     abort_result = self.rpc.call_instance_finalize_migration_dst(target_node,
8018                                                                  instance,
8019                                                                  migration_info,
8020                                                                  False)
8021     abort_msg = abort_result.fail_msg
8022     if abort_msg:
8023       logging.error("Aborting migration failed on target node %s: %s",
8024                     target_node, abort_msg)
8025       # Don't raise an exception here, as we stil have to try to revert the
8026       # disk status, even if this step failed.
8027
8028     abort_result = self.rpc.call_instance_finalize_migration_src(source_node,
8029         instance, False, self.live)
8030     abort_msg = abort_result.fail_msg
8031     if abort_msg:
8032       logging.error("Aborting migration failed on source node %s: %s",
8033                     source_node, abort_msg)
8034
8035   def _ExecMigration(self):
8036     """Migrate an instance.
8037
8038     The migrate is done by:
8039       - change the disks into dual-master mode
8040       - wait until disks are fully synchronized again
8041       - migrate the instance
8042       - change disks on the new secondary node (the old primary) to secondary
8043       - wait until disks are fully synchronized
8044       - change disks into single-master mode
8045
8046     """
8047     instance = self.instance
8048     target_node = self.target_node
8049     source_node = self.source_node
8050
8051     # Check for hypervisor version mismatch and warn the user.
8052     nodeinfo = self.rpc.call_node_info([source_node, target_node],
8053                                        None, [self.instance.hypervisor])
8054     for ninfo in nodeinfo.values():
8055       ninfo.Raise("Unable to retrieve node information from node '%s'" %
8056                   ninfo.node)
8057     (_, _, (src_info, )) = nodeinfo[source_node].payload
8058     (_, _, (dst_info, )) = nodeinfo[target_node].payload
8059
8060     if ((constants.HV_NODEINFO_KEY_VERSION in src_info) and
8061         (constants.HV_NODEINFO_KEY_VERSION in dst_info)):
8062       src_version = src_info[constants.HV_NODEINFO_KEY_VERSION]
8063       dst_version = dst_info[constants.HV_NODEINFO_KEY_VERSION]
8064       if src_version != dst_version:
8065         self.feedback_fn("* warning: hypervisor version mismatch between"
8066                          " source (%s) and target (%s) node" %
8067                          (src_version, dst_version))
8068
8069     self.feedback_fn("* checking disk consistency between source and target")
8070     for dev in instance.disks:
8071       if not _CheckDiskConsistency(self.lu, dev, target_node, False):
8072         raise errors.OpExecError("Disk %s is degraded or not fully"
8073                                  " synchronized on target node,"
8074                                  " aborting migration" % dev.iv_name)
8075
8076     # First get the migration information from the remote node
8077     result = self.rpc.call_migration_info(source_node, instance)
8078     msg = result.fail_msg
8079     if msg:
8080       log_err = ("Failed fetching source migration information from %s: %s" %
8081                  (source_node, msg))
8082       logging.error(log_err)
8083       raise errors.OpExecError(log_err)
8084
8085     self.migration_info = migration_info = result.payload
8086
8087     if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
8088       # Then switch the disks to master/master mode
8089       self._EnsureSecondary(target_node)
8090       self._GoStandalone()
8091       self._GoReconnect(True)
8092       self._WaitUntilSync()
8093
8094     self.feedback_fn("* preparing %s to accept the instance" % target_node)
8095     result = self.rpc.call_accept_instance(target_node,
8096                                            instance,
8097                                            migration_info,
8098                                            self.nodes_ip[target_node])
8099
8100     msg = result.fail_msg
8101     if msg:
8102       logging.error("Instance pre-migration failed, trying to revert"
8103                     " disk status: %s", msg)
8104       self.feedback_fn("Pre-migration failed, aborting")
8105       self._AbortMigration()
8106       self._RevertDiskStatus()
8107       raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
8108                                (instance.name, msg))
8109
8110     self.feedback_fn("* migrating instance to %s" % target_node)
8111     result = self.rpc.call_instance_migrate(source_node, instance,
8112                                             self.nodes_ip[target_node],
8113                                             self.live)
8114     msg = result.fail_msg
8115     if msg:
8116       logging.error("Instance migration failed, trying to revert"
8117                     " disk status: %s", msg)
8118       self.feedback_fn("Migration failed, aborting")
8119       self._AbortMigration()
8120       self._RevertDiskStatus()
8121       raise errors.OpExecError("Could not migrate instance %s: %s" %
8122                                (instance.name, msg))
8123
8124     self.feedback_fn("* starting memory transfer")
8125     last_feedback = time.time()
8126     while True:
8127       result = self.rpc.call_instance_get_migration_status(source_node,
8128                                                            instance)
8129       msg = result.fail_msg
8130       ms = result.payload   # MigrationStatus instance
8131       if msg or (ms.status in constants.HV_MIGRATION_FAILED_STATUSES):
8132         logging.error("Instance migration failed, trying to revert"
8133                       " disk status: %s", msg)
8134         self.feedback_fn("Migration failed, aborting")
8135         self._AbortMigration()
8136         self._RevertDiskStatus()
8137         raise errors.OpExecError("Could not migrate instance %s: %s" %
8138                                  (instance.name, msg))
8139
8140       if result.payload.status != constants.HV_MIGRATION_ACTIVE:
8141         self.feedback_fn("* memory transfer complete")
8142         break
8143
8144       if (utils.TimeoutExpired(last_feedback,
8145                                self._MIGRATION_FEEDBACK_INTERVAL) and
8146           ms.transferred_ram is not None):
8147         mem_progress = 100 * float(ms.transferred_ram) / float(ms.total_ram)
8148         self.feedback_fn("* memory transfer progress: %.2f %%" % mem_progress)
8149         last_feedback = time.time()
8150
8151       time.sleep(self._MIGRATION_POLL_INTERVAL)
8152
8153     result = self.rpc.call_instance_finalize_migration_src(source_node,
8154                                                            instance,
8155                                                            True,
8156                                                            self.live)
8157     msg = result.fail_msg
8158     if msg:
8159       logging.error("Instance migration succeeded, but finalization failed"
8160                     " on the source node: %s", msg)
8161       raise errors.OpExecError("Could not finalize instance migration: %s" %
8162                                msg)
8163
8164     instance.primary_node = target_node
8165
8166     # distribute new instance config to the other nodes
8167     self.cfg.Update(instance, self.feedback_fn)
8168
8169     result = self.rpc.call_instance_finalize_migration_dst(target_node,
8170                                                            instance,
8171                                                            migration_info,
8172                                                            True)
8173     msg = result.fail_msg
8174     if msg:
8175       logging.error("Instance migration succeeded, but finalization failed"
8176                     " on the target node: %s", msg)
8177       raise errors.OpExecError("Could not finalize instance migration: %s" %
8178                                msg)
8179
8180     if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
8181       self._EnsureSecondary(source_node)
8182       self._WaitUntilSync()
8183       self._GoStandalone()
8184       self._GoReconnect(False)
8185       self._WaitUntilSync()
8186
8187     self.feedback_fn("* done")
8188
8189   def _ExecFailover(self):
8190     """Failover an instance.
8191
8192     The failover is done by shutting it down on its present node and
8193     starting it on the secondary.
8194
8195     """
8196     instance = self.instance
8197     primary_node = self.cfg.GetNodeInfo(instance.primary_node)
8198
8199     source_node = instance.primary_node
8200     target_node = self.target_node
8201
8202     if instance.admin_state == constants.ADMINST_UP:
8203       self.feedback_fn("* checking disk consistency between source and target")
8204       for dev in instance.disks:
8205         # for drbd, these are drbd over lvm
8206         if not _CheckDiskConsistency(self.lu, dev, target_node, False):
8207           if primary_node.offline:
8208             self.feedback_fn("Node %s is offline, ignoring degraded disk %s on"
8209                              " target node %s" %
8210                              (primary_node.name, dev.iv_name, target_node))
8211           elif not self.ignore_consistency:
8212             raise errors.OpExecError("Disk %s is degraded on target node,"
8213                                      " aborting failover" % dev.iv_name)
8214     else:
8215       self.feedback_fn("* not checking disk consistency as instance is not"
8216                        " running")
8217
8218     self.feedback_fn("* shutting down instance on source node")
8219     logging.info("Shutting down instance %s on node %s",
8220                  instance.name, source_node)
8221
8222     result = self.rpc.call_instance_shutdown(source_node, instance,
8223                                              self.shutdown_timeout)
8224     msg = result.fail_msg
8225     if msg:
8226       if self.ignore_consistency or primary_node.offline:
8227         self.lu.LogWarning("Could not shutdown instance %s on node %s,"
8228                            " proceeding anyway; please make sure node"
8229                            " %s is down; error details: %s",
8230                            instance.name, source_node, source_node, msg)
8231       else:
8232         raise errors.OpExecError("Could not shutdown instance %s on"
8233                                  " node %s: %s" %
8234                                  (instance.name, source_node, msg))
8235
8236     self.feedback_fn("* deactivating the instance's disks on source node")
8237     if not _ShutdownInstanceDisks(self.lu, instance, ignore_primary=True):
8238       raise errors.OpExecError("Can't shut down the instance's disks")
8239
8240     instance.primary_node = target_node
8241     # distribute new instance config to the other nodes
8242     self.cfg.Update(instance, self.feedback_fn)
8243
8244     # Only start the instance if it's marked as up
8245     if instance.admin_state == constants.ADMINST_UP:
8246       self.feedback_fn("* activating the instance's disks on target node %s" %
8247                        target_node)
8248       logging.info("Starting instance %s on node %s",
8249                    instance.name, target_node)
8250
8251       disks_ok, _ = _AssembleInstanceDisks(self.lu, instance,
8252                                            ignore_secondaries=True)
8253       if not disks_ok:
8254         _ShutdownInstanceDisks(self.lu, instance)
8255         raise errors.OpExecError("Can't activate the instance's disks")
8256
8257       self.feedback_fn("* starting the instance on the target node %s" %
8258                        target_node)
8259       result = self.rpc.call_instance_start(target_node, (instance, None, None),
8260                                             False)
8261       msg = result.fail_msg
8262       if msg:
8263         _ShutdownInstanceDisks(self.lu, instance)
8264         raise errors.OpExecError("Could not start instance %s on node %s: %s" %
8265                                  (instance.name, target_node, msg))
8266
8267   def Exec(self, feedback_fn):
8268     """Perform the migration.
8269
8270     """
8271     self.feedback_fn = feedback_fn
8272     self.source_node = self.instance.primary_node
8273
8274     # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
8275     if self.instance.disk_template in constants.DTS_INT_MIRROR:
8276       self.target_node = self.instance.secondary_nodes[0]
8277       # Otherwise self.target_node has been populated either
8278       # directly, or through an iallocator.
8279
8280     self.all_nodes = [self.source_node, self.target_node]
8281     self.nodes_ip = dict((name, node.secondary_ip) for (name, node)
8282                          in self.cfg.GetMultiNodeInfo(self.all_nodes))
8283
8284     if self.failover:
8285       feedback_fn("Failover instance %s" % self.instance.name)
8286       self._ExecFailover()
8287     else:
8288       feedback_fn("Migrating instance %s" % self.instance.name)
8289
8290       if self.cleanup:
8291         return self._ExecCleanup()
8292       else:
8293         return self._ExecMigration()
8294
8295
8296 def _CreateBlockDev(lu, node, instance, device, force_create,
8297                     info, force_open):
8298   """Create a tree of block devices on a given node.
8299
8300   If this device type has to be created on secondaries, create it and
8301   all its children.
8302
8303   If not, just recurse to children keeping the same 'force' value.
8304
8305   @param lu: the lu on whose behalf we execute
8306   @param node: the node on which to create the device
8307   @type instance: L{objects.Instance}
8308   @param instance: the instance which owns the device
8309   @type device: L{objects.Disk}
8310   @param device: the device to create
8311   @type force_create: boolean
8312   @param force_create: whether to force creation of this device; this
8313       will be change to True whenever we find a device which has
8314       CreateOnSecondary() attribute
8315   @param info: the extra 'metadata' we should attach to the device
8316       (this will be represented as a LVM tag)
8317   @type force_open: boolean
8318   @param force_open: this parameter will be passes to the
8319       L{backend.BlockdevCreate} function where it specifies
8320       whether we run on primary or not, and it affects both
8321       the child assembly and the device own Open() execution
8322
8323   """
8324   if device.CreateOnSecondary():
8325     force_create = True
8326
8327   if device.children:
8328     for child in device.children:
8329       _CreateBlockDev(lu, node, instance, child, force_create,
8330                       info, force_open)
8331
8332   if not force_create:
8333     return
8334
8335   _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
8336
8337
8338 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
8339   """Create a single block device on a given node.
8340
8341   This will not recurse over children of the device, so they must be
8342   created in advance.
8343
8344   @param lu: the lu on whose behalf we execute
8345   @param node: the node on which to create the device
8346   @type instance: L{objects.Instance}
8347   @param instance: the instance which owns the device
8348   @type device: L{objects.Disk}
8349   @param device: the device to create
8350   @param info: the extra 'metadata' we should attach to the device
8351       (this will be represented as a LVM tag)
8352   @type force_open: boolean
8353   @param force_open: this parameter will be passes to the
8354       L{backend.BlockdevCreate} function where it specifies
8355       whether we run on primary or not, and it affects both
8356       the child assembly and the device own Open() execution
8357
8358   """
8359   lu.cfg.SetDiskID(device, node)
8360   result = lu.rpc.call_blockdev_create(node, device, device.size,
8361                                        instance.name, force_open, info)
8362   result.Raise("Can't create block device %s on"
8363                " node %s for instance %s" % (device, node, instance.name))
8364   if device.physical_id is None:
8365     device.physical_id = result.payload
8366
8367
8368 def _GenerateUniqueNames(lu, exts):
8369   """Generate a suitable LV name.
8370
8371   This will generate a logical volume name for the given instance.
8372
8373   """
8374   results = []
8375   for val in exts:
8376     new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
8377     results.append("%s%s" % (new_id, val))
8378   return results
8379
8380
8381 def _ComputeLDParams(disk_template, disk_params):
8382   """Computes Logical Disk parameters from Disk Template parameters.
8383
8384   @type disk_template: string
8385   @param disk_template: disk template, one of L{constants.DISK_TEMPLATES}
8386   @type disk_params: dict
8387   @param disk_params: disk template parameters; dict(template_name -> parameters
8388   @rtype: list(dict)
8389   @return: a list of dicts, one for each node of the disk hierarchy. Each dict
8390     contains the LD parameters of the node. The tree is flattened in-order.
8391
8392   """
8393   if disk_template not in constants.DISK_TEMPLATES:
8394     raise errors.ProgrammerError("Unknown disk template %s" % disk_template)
8395
8396   result = list()
8397   dt_params = disk_params[disk_template]
8398   if disk_template == constants.DT_DRBD8:
8399     drbd_params = {
8400       constants.LDP_RESYNC_RATE: dt_params[constants.DRBD_RESYNC_RATE],
8401       constants.LDP_BARRIERS: dt_params[constants.DRBD_DISK_BARRIERS],
8402       constants.LDP_NO_META_FLUSH: dt_params[constants.DRBD_META_BARRIERS],
8403       constants.LDP_DEFAULT_METAVG: dt_params[constants.DRBD_DEFAULT_METAVG],
8404       constants.LDP_DISK_CUSTOM: dt_params[constants.DRBD_DISK_CUSTOM],
8405       constants.LDP_NET_CUSTOM: dt_params[constants.DRBD_NET_CUSTOM],
8406       constants.LDP_DYNAMIC_RESYNC: dt_params[constants.DRBD_DYNAMIC_RESYNC],
8407       constants.LDP_PLAN_AHEAD: dt_params[constants.DRBD_PLAN_AHEAD],
8408       constants.LDP_FILL_TARGET: dt_params[constants.DRBD_FILL_TARGET],
8409       constants.LDP_DELAY_TARGET: dt_params[constants.DRBD_DELAY_TARGET],
8410       constants.LDP_MAX_RATE: dt_params[constants.DRBD_MAX_RATE],
8411       constants.LDP_MIN_RATE: dt_params[constants.DRBD_MIN_RATE],
8412       }
8413
8414     drbd_params = \
8415       objects.FillDict(constants.DISK_LD_DEFAULTS[constants.LD_DRBD8],
8416                        drbd_params)
8417
8418     result.append(drbd_params)
8419
8420     # data LV
8421     data_params = {
8422       constants.LDP_STRIPES: dt_params[constants.DRBD_DATA_STRIPES],
8423       }
8424     data_params = \
8425       objects.FillDict(constants.DISK_LD_DEFAULTS[constants.LD_LV],
8426                        data_params)
8427     result.append(data_params)
8428
8429     # metadata LV
8430     meta_params = {
8431       constants.LDP_STRIPES: dt_params[constants.DRBD_META_STRIPES],
8432       }
8433     meta_params = \
8434       objects.FillDict(constants.DISK_LD_DEFAULTS[constants.LD_LV],
8435                        meta_params)
8436     result.append(meta_params)
8437
8438   elif (disk_template == constants.DT_FILE or
8439         disk_template == constants.DT_SHARED_FILE):
8440     result.append(constants.DISK_LD_DEFAULTS[constants.LD_FILE])
8441
8442   elif disk_template == constants.DT_PLAIN:
8443     params = {
8444       constants.LDP_STRIPES: dt_params[constants.LV_STRIPES],
8445       }
8446     params = \
8447       objects.FillDict(constants.DISK_LD_DEFAULTS[constants.LD_LV],
8448                        params)
8449     result.append(params)
8450
8451   elif disk_template == constants.DT_BLOCK:
8452     result.append(constants.DISK_LD_DEFAULTS[constants.LD_BLOCKDEV])
8453
8454   return result
8455
8456
8457 def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
8458                          iv_name, p_minor, s_minor, drbd_params, data_params,
8459                          meta_params):
8460   """Generate a drbd8 device complete with its children.
8461
8462   """
8463   assert len(vgnames) == len(names) == 2
8464   port = lu.cfg.AllocatePort()
8465   shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
8466
8467   dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
8468                           logical_id=(vgnames[0], names[0]),
8469                           params=data_params)
8470   dev_meta = objects.Disk(dev_type=constants.LD_LV, size=DRBD_META_SIZE,
8471                           logical_id=(vgnames[1], names[1]),
8472                           params=meta_params)
8473   drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
8474                           logical_id=(primary, secondary, port,
8475                                       p_minor, s_minor,
8476                                       shared_secret),
8477                           children=[dev_data, dev_meta],
8478                           iv_name=iv_name, params=drbd_params)
8479   return drbd_dev
8480
8481
8482 def _GenerateDiskTemplate(lu, template_name,
8483                           instance_name, primary_node,
8484                           secondary_nodes, disk_info,
8485                           file_storage_dir, file_driver,
8486                           base_index, feedback_fn, disk_params):
8487   """Generate the entire disk layout for a given template type.
8488
8489   """
8490   #TODO: compute space requirements
8491
8492   vgname = lu.cfg.GetVGName()
8493   disk_count = len(disk_info)
8494   disks = []
8495   ld_params = _ComputeLDParams(template_name, disk_params)
8496   if template_name == constants.DT_DISKLESS:
8497     pass
8498   elif template_name == constants.DT_PLAIN:
8499     if len(secondary_nodes) != 0:
8500       raise errors.ProgrammerError("Wrong template configuration")
8501
8502     names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
8503                                       for i in range(disk_count)])
8504     for idx, disk in enumerate(disk_info):
8505       disk_index = idx + base_index
8506       vg = disk.get(constants.IDISK_VG, vgname)
8507       feedback_fn("* disk %i, vg %s, name %s" % (idx, vg, names[idx]))
8508       disk_dev = objects.Disk(dev_type=constants.LD_LV,
8509                               size=disk[constants.IDISK_SIZE],
8510                               logical_id=(vg, names[idx]),
8511                               iv_name="disk/%d" % disk_index,
8512                               mode=disk[constants.IDISK_MODE],
8513                               params=ld_params[0])
8514       disks.append(disk_dev)
8515   elif template_name == constants.DT_DRBD8:
8516     drbd_params, data_params, meta_params = ld_params
8517     if len(secondary_nodes) != 1:
8518       raise errors.ProgrammerError("Wrong template configuration")
8519     remote_node = secondary_nodes[0]
8520     minors = lu.cfg.AllocateDRBDMinor(
8521       [primary_node, remote_node] * len(disk_info), instance_name)
8522
8523     names = []
8524     for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
8525                                                for i in range(disk_count)]):
8526       names.append(lv_prefix + "_data")
8527       names.append(lv_prefix + "_meta")
8528     for idx, disk in enumerate(disk_info):
8529       disk_index = idx + base_index
8530       drbd_default_metavg = drbd_params[constants.LDP_DEFAULT_METAVG]
8531       data_vg = disk.get(constants.IDISK_VG, vgname)
8532       meta_vg = disk.get(constants.IDISK_METAVG, drbd_default_metavg)
8533       disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
8534                                       disk[constants.IDISK_SIZE],
8535                                       [data_vg, meta_vg],
8536                                       names[idx * 2:idx * 2 + 2],
8537                                       "disk/%d" % disk_index,
8538                                       minors[idx * 2], minors[idx * 2 + 1],
8539                                       drbd_params, data_params, meta_params)
8540       disk_dev.mode = disk[constants.IDISK_MODE]
8541       disks.append(disk_dev)
8542   elif template_name == constants.DT_FILE:
8543     if len(secondary_nodes) != 0:
8544       raise errors.ProgrammerError("Wrong template configuration")
8545
8546     opcodes.RequireFileStorage()
8547
8548     for idx, disk in enumerate(disk_info):
8549       disk_index = idx + base_index
8550       disk_dev = objects.Disk(dev_type=constants.LD_FILE,
8551                               size=disk[constants.IDISK_SIZE],
8552                               iv_name="disk/%d" % disk_index,
8553                               logical_id=(file_driver,
8554                                           "%s/disk%d" % (file_storage_dir,
8555                                                          disk_index)),
8556                               mode=disk[constants.IDISK_MODE],
8557                               params=ld_params[0])
8558       disks.append(disk_dev)
8559   elif template_name == constants.DT_SHARED_FILE:
8560     if len(secondary_nodes) != 0:
8561       raise errors.ProgrammerError("Wrong template configuration")
8562
8563     opcodes.RequireSharedFileStorage()
8564
8565     for idx, disk in enumerate(disk_info):
8566       disk_index = idx + base_index
8567       disk_dev = objects.Disk(dev_type=constants.LD_FILE,
8568                               size=disk[constants.IDISK_SIZE],
8569                               iv_name="disk/%d" % disk_index,
8570                               logical_id=(file_driver,
8571                                           "%s/disk%d" % (file_storage_dir,
8572                                                          disk_index)),
8573                               mode=disk[constants.IDISK_MODE],
8574                               params=ld_params[0])
8575       disks.append(disk_dev)
8576   elif template_name == constants.DT_BLOCK:
8577     if len(secondary_nodes) != 0:
8578       raise errors.ProgrammerError("Wrong template configuration")
8579
8580     for idx, disk in enumerate(disk_info):
8581       disk_index = idx + base_index
8582       disk_dev = objects.Disk(dev_type=constants.LD_BLOCKDEV,
8583                               size=disk[constants.IDISK_SIZE],
8584                               logical_id=(constants.BLOCKDEV_DRIVER_MANUAL,
8585                                           disk[constants.IDISK_ADOPT]),
8586                               iv_name="disk/%d" % disk_index,
8587                               mode=disk[constants.IDISK_MODE],
8588                               params=ld_params[0])
8589       disks.append(disk_dev)
8590
8591   else:
8592     raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
8593   return disks
8594
8595
8596 def _GetInstanceInfoText(instance):
8597   """Compute that text that should be added to the disk's metadata.
8598
8599   """
8600   return "originstname+%s" % instance.name
8601
8602
8603 def _CalcEta(time_taken, written, total_size):
8604   """Calculates the ETA based on size written and total size.
8605
8606   @param time_taken: The time taken so far
8607   @param written: amount written so far
8608   @param total_size: The total size of data to be written
8609   @return: The remaining time in seconds
8610
8611   """
8612   avg_time = time_taken / float(written)
8613   return (total_size - written) * avg_time
8614
8615
8616 def _WipeDisks(lu, instance):
8617   """Wipes instance disks.
8618
8619   @type lu: L{LogicalUnit}
8620   @param lu: the logical unit on whose behalf we execute
8621   @type instance: L{objects.Instance}
8622   @param instance: the instance whose disks we should create
8623   @return: the success of the wipe
8624
8625   """
8626   node = instance.primary_node
8627
8628   for device in instance.disks:
8629     lu.cfg.SetDiskID(device, node)
8630
8631   logging.info("Pause sync of instance %s disks", instance.name)
8632   result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, True)
8633
8634   for idx, success in enumerate(result.payload):
8635     if not success:
8636       logging.warn("pause-sync of instance %s for disks %d failed",
8637                    instance.name, idx)
8638
8639   try:
8640     for idx, device in enumerate(instance.disks):
8641       # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
8642       # MAX_WIPE_CHUNK at max
8643       wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 *
8644                             constants.MIN_WIPE_CHUNK_PERCENT)
8645       # we _must_ make this an int, otherwise rounding errors will
8646       # occur
8647       wipe_chunk_size = int(wipe_chunk_size)
8648
8649       lu.LogInfo("* Wiping disk %d", idx)
8650       logging.info("Wiping disk %d for instance %s, node %s using"
8651                    " chunk size %s", idx, instance.name, node, wipe_chunk_size)
8652
8653       offset = 0
8654       size = device.size
8655       last_output = 0
8656       start_time = time.time()
8657
8658       while offset < size:
8659         wipe_size = min(wipe_chunk_size, size - offset)
8660         logging.debug("Wiping disk %d, offset %s, chunk %s",
8661                       idx, offset, wipe_size)
8662         result = lu.rpc.call_blockdev_wipe(node, device, offset, wipe_size)
8663         result.Raise("Could not wipe disk %d at offset %d for size %d" %
8664                      (idx, offset, wipe_size))
8665         now = time.time()
8666         offset += wipe_size
8667         if now - last_output >= 60:
8668           eta = _CalcEta(now - start_time, offset, size)
8669           lu.LogInfo(" - done: %.1f%% ETA: %s" %
8670                      (offset / float(size) * 100, utils.FormatSeconds(eta)))
8671           last_output = now
8672   finally:
8673     logging.info("Resume sync of instance %s disks", instance.name)
8674
8675     result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, False)
8676
8677     for idx, success in enumerate(result.payload):
8678       if not success:
8679         lu.LogWarning("Resume sync of disk %d failed, please have a"
8680                       " look at the status and troubleshoot the issue", idx)
8681         logging.warn("resume-sync of instance %s for disks %d failed",
8682                      instance.name, idx)
8683
8684
8685 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
8686   """Create all disks for an instance.
8687
8688   This abstracts away some work from AddInstance.
8689
8690   @type lu: L{LogicalUnit}
8691   @param lu: the logical unit on whose behalf we execute
8692   @type instance: L{objects.Instance}
8693   @param instance: the instance whose disks we should create
8694   @type to_skip: list
8695   @param to_skip: list of indices to skip
8696   @type target_node: string
8697   @param target_node: if passed, overrides the target node for creation
8698   @rtype: boolean
8699   @return: the success of the creation
8700
8701   """
8702   info = _GetInstanceInfoText(instance)
8703   if target_node is None:
8704     pnode = instance.primary_node
8705     all_nodes = instance.all_nodes
8706   else:
8707     pnode = target_node
8708     all_nodes = [pnode]
8709
8710   if instance.disk_template in constants.DTS_FILEBASED:
8711     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
8712     result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
8713
8714     result.Raise("Failed to create directory '%s' on"
8715                  " node %s" % (file_storage_dir, pnode))
8716
8717   # Note: this needs to be kept in sync with adding of disks in
8718   # LUInstanceSetParams
8719   for idx, device in enumerate(instance.disks):
8720     if to_skip and idx in to_skip:
8721       continue
8722     logging.info("Creating volume %s for instance %s",
8723                  device.iv_name, instance.name)
8724     #HARDCODE
8725     for node in all_nodes:
8726       f_create = node == pnode
8727       _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
8728
8729
8730 def _RemoveDisks(lu, instance, target_node=None):
8731   """Remove all disks for an instance.
8732
8733   This abstracts away some work from `AddInstance()` and
8734   `RemoveInstance()`. Note that in case some of the devices couldn't
8735   be removed, the removal will continue with the other ones (compare
8736   with `_CreateDisks()`).
8737
8738   @type lu: L{LogicalUnit}
8739   @param lu: the logical unit on whose behalf we execute
8740   @type instance: L{objects.Instance}
8741   @param instance: the instance whose disks we should remove
8742   @type target_node: string
8743   @param target_node: used to override the node on which to remove the disks
8744   @rtype: boolean
8745   @return: the success of the removal
8746
8747   """
8748   logging.info("Removing block devices for instance %s", instance.name)
8749
8750   all_result = True
8751   for device in instance.disks:
8752     if target_node:
8753       edata = [(target_node, device)]
8754     else:
8755       edata = device.ComputeNodeTree(instance.primary_node)
8756     for node, disk in edata:
8757       lu.cfg.SetDiskID(disk, node)
8758       msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
8759       if msg:
8760         lu.LogWarning("Could not remove block device %s on node %s,"
8761                       " continuing anyway: %s", device.iv_name, node, msg)
8762         all_result = False
8763
8764     # if this is a DRBD disk, return its port to the pool
8765     if device.dev_type in constants.LDS_DRBD:
8766       tcp_port = device.logical_id[2]
8767       lu.cfg.AddTcpUdpPort(tcp_port)
8768
8769   if instance.disk_template == constants.DT_FILE:
8770     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
8771     if target_node:
8772       tgt = target_node
8773     else:
8774       tgt = instance.primary_node
8775     result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
8776     if result.fail_msg:
8777       lu.LogWarning("Could not remove directory '%s' on node %s: %s",
8778                     file_storage_dir, instance.primary_node, result.fail_msg)
8779       all_result = False
8780
8781   return all_result
8782
8783
8784 def _ComputeDiskSizePerVG(disk_template, disks):
8785   """Compute disk size requirements in the volume group
8786
8787   """
8788   def _compute(disks, payload):
8789     """Universal algorithm.
8790
8791     """
8792     vgs = {}
8793     for disk in disks:
8794       vgs[disk[constants.IDISK_VG]] = \
8795         vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
8796
8797     return vgs
8798
8799   # Required free disk space as a function of disk and swap space
8800   req_size_dict = {
8801     constants.DT_DISKLESS: {},
8802     constants.DT_PLAIN: _compute(disks, 0),
8803     # 128 MB are added for drbd metadata for each disk
8804     constants.DT_DRBD8: _compute(disks, DRBD_META_SIZE),
8805     constants.DT_FILE: {},
8806     constants.DT_SHARED_FILE: {},
8807   }
8808
8809   if disk_template not in req_size_dict:
8810     raise errors.ProgrammerError("Disk template '%s' size requirement"
8811                                  " is unknown" % disk_template)
8812
8813   return req_size_dict[disk_template]
8814
8815
8816 def _ComputeDiskSize(disk_template, disks):
8817   """Compute disk size requirements in the volume group
8818
8819   """
8820   # Required free disk space as a function of disk and swap space
8821   req_size_dict = {
8822     constants.DT_DISKLESS: None,
8823     constants.DT_PLAIN: sum(d[constants.IDISK_SIZE] for d in disks),
8824     # 128 MB are added for drbd metadata for each disk
8825     constants.DT_DRBD8:
8826       sum(d[constants.IDISK_SIZE] + DRBD_META_SIZE for d in disks),
8827     constants.DT_FILE: None,
8828     constants.DT_SHARED_FILE: 0,
8829     constants.DT_BLOCK: 0,
8830   }
8831
8832   if disk_template not in req_size_dict:
8833     raise errors.ProgrammerError("Disk template '%s' size requirement"
8834                                  " is unknown" % disk_template)
8835
8836   return req_size_dict[disk_template]
8837
8838
8839 def _FilterVmNodes(lu, nodenames):
8840   """Filters out non-vm_capable nodes from a list.
8841
8842   @type lu: L{LogicalUnit}
8843   @param lu: the logical unit for which we check
8844   @type nodenames: list
8845   @param nodenames: the list of nodes on which we should check
8846   @rtype: list
8847   @return: the list of vm-capable nodes
8848
8849   """
8850   vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
8851   return [name for name in nodenames if name not in vm_nodes]
8852
8853
8854 def _CheckHVParams(lu, nodenames, hvname, hvparams):
8855   """Hypervisor parameter validation.
8856
8857   This function abstract the hypervisor parameter validation to be
8858   used in both instance create and instance modify.
8859
8860   @type lu: L{LogicalUnit}
8861   @param lu: the logical unit for which we check
8862   @type nodenames: list
8863   @param nodenames: the list of nodes on which we should check
8864   @type hvname: string
8865   @param hvname: the name of the hypervisor we should use
8866   @type hvparams: dict
8867   @param hvparams: the parameters which we need to check
8868   @raise errors.OpPrereqError: if the parameters are not valid
8869
8870   """
8871   nodenames = _FilterVmNodes(lu, nodenames)
8872
8873   cluster = lu.cfg.GetClusterInfo()
8874   hvfull = objects.FillDict(cluster.hvparams.get(hvname, {}), hvparams)
8875
8876   hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames, hvname, hvfull)
8877   for node in nodenames:
8878     info = hvinfo[node]
8879     if info.offline:
8880       continue
8881     info.Raise("Hypervisor parameter validation failed on node %s" % node)
8882
8883
8884 def _CheckOSParams(lu, required, nodenames, osname, osparams):
8885   """OS parameters validation.
8886
8887   @type lu: L{LogicalUnit}
8888   @param lu: the logical unit for which we check
8889   @type required: boolean
8890   @param required: whether the validation should fail if the OS is not
8891       found
8892   @type nodenames: list
8893   @param nodenames: the list of nodes on which we should check
8894   @type osname: string
8895   @param osname: the name of the hypervisor we should use
8896   @type osparams: dict
8897   @param osparams: the parameters which we need to check
8898   @raise errors.OpPrereqError: if the parameters are not valid
8899
8900   """
8901   nodenames = _FilterVmNodes(lu, nodenames)
8902   result = lu.rpc.call_os_validate(nodenames, required, osname,
8903                                    [constants.OS_VALIDATE_PARAMETERS],
8904                                    osparams)
8905   for node, nres in result.items():
8906     # we don't check for offline cases since this should be run only
8907     # against the master node and/or an instance's nodes
8908     nres.Raise("OS Parameters validation failed on node %s" % node)
8909     if not nres.payload:
8910       lu.LogInfo("OS %s not found on node %s, validation skipped",
8911                  osname, node)
8912
8913
8914 class LUInstanceCreate(LogicalUnit):
8915   """Create an instance.
8916
8917   """
8918   HPATH = "instance-add"
8919   HTYPE = constants.HTYPE_INSTANCE
8920   REQ_BGL = False
8921
8922   def CheckArguments(self):
8923     """Check arguments.
8924
8925     """
8926     # do not require name_check to ease forward/backward compatibility
8927     # for tools
8928     if self.op.no_install and self.op.start:
8929       self.LogInfo("No-installation mode selected, disabling startup")
8930       self.op.start = False
8931     # validate/normalize the instance name
8932     self.op.instance_name = \
8933       netutils.Hostname.GetNormalizedName(self.op.instance_name)
8934
8935     if self.op.ip_check and not self.op.name_check:
8936       # TODO: make the ip check more flexible and not depend on the name check
8937       raise errors.OpPrereqError("Cannot do IP address check without a name"
8938                                  " check", errors.ECODE_INVAL)
8939
8940     # check nics' parameter names
8941     for nic in self.op.nics:
8942       utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
8943
8944     # check disks. parameter names and consistent adopt/no-adopt strategy
8945     has_adopt = has_no_adopt = False
8946     for disk in self.op.disks:
8947       utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
8948       if constants.IDISK_ADOPT in disk:
8949         has_adopt = True
8950       else:
8951         has_no_adopt = True
8952     if has_adopt and has_no_adopt:
8953       raise errors.OpPrereqError("Either all disks are adopted or none is",
8954                                  errors.ECODE_INVAL)
8955     if has_adopt:
8956       if self.op.disk_template not in constants.DTS_MAY_ADOPT:
8957         raise errors.OpPrereqError("Disk adoption is not supported for the"
8958                                    " '%s' disk template" %
8959                                    self.op.disk_template,
8960                                    errors.ECODE_INVAL)
8961       if self.op.iallocator is not None:
8962         raise errors.OpPrereqError("Disk adoption not allowed with an"
8963                                    " iallocator script", errors.ECODE_INVAL)
8964       if self.op.mode == constants.INSTANCE_IMPORT:
8965         raise errors.OpPrereqError("Disk adoption not allowed for"
8966                                    " instance import", errors.ECODE_INVAL)
8967     else:
8968       if self.op.disk_template in constants.DTS_MUST_ADOPT:
8969         raise errors.OpPrereqError("Disk template %s requires disk adoption,"
8970                                    " but no 'adopt' parameter given" %
8971                                    self.op.disk_template,
8972                                    errors.ECODE_INVAL)
8973
8974     self.adopt_disks = has_adopt
8975
8976     # instance name verification
8977     if self.op.name_check:
8978       self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
8979       self.op.instance_name = self.hostname1.name
8980       # used in CheckPrereq for ip ping check
8981       self.check_ip = self.hostname1.ip
8982     else:
8983       self.check_ip = None
8984
8985     # file storage checks
8986     if (self.op.file_driver and
8987         not self.op.file_driver in constants.FILE_DRIVER):
8988       raise errors.OpPrereqError("Invalid file driver name '%s'" %
8989                                  self.op.file_driver, errors.ECODE_INVAL)
8990
8991     if self.op.disk_template == constants.DT_FILE:
8992       opcodes.RequireFileStorage()
8993     elif self.op.disk_template == constants.DT_SHARED_FILE:
8994       opcodes.RequireSharedFileStorage()
8995
8996     ### Node/iallocator related checks
8997     _CheckIAllocatorOrNode(self, "iallocator", "pnode")
8998
8999     if self.op.pnode is not None:
9000       if self.op.disk_template in constants.DTS_INT_MIRROR:
9001         if self.op.snode is None:
9002           raise errors.OpPrereqError("The networked disk templates need"
9003                                      " a mirror node", errors.ECODE_INVAL)
9004       elif self.op.snode:
9005         self.LogWarning("Secondary node will be ignored on non-mirrored disk"
9006                         " template")
9007         self.op.snode = None
9008
9009     self._cds = _GetClusterDomainSecret()
9010
9011     if self.op.mode == constants.INSTANCE_IMPORT:
9012       # On import force_variant must be True, because if we forced it at
9013       # initial install, our only chance when importing it back is that it
9014       # works again!
9015       self.op.force_variant = True
9016
9017       if self.op.no_install:
9018         self.LogInfo("No-installation mode has no effect during import")
9019
9020     elif self.op.mode == constants.INSTANCE_CREATE:
9021       if self.op.os_type is None:
9022         raise errors.OpPrereqError("No guest OS specified",
9023                                    errors.ECODE_INVAL)
9024       if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
9025         raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
9026                                    " installation" % self.op.os_type,
9027                                    errors.ECODE_STATE)
9028       if self.op.disk_template is None:
9029         raise errors.OpPrereqError("No disk template specified",
9030                                    errors.ECODE_INVAL)
9031
9032     elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
9033       # Check handshake to ensure both clusters have the same domain secret
9034       src_handshake = self.op.source_handshake
9035       if not src_handshake:
9036         raise errors.OpPrereqError("Missing source handshake",
9037                                    errors.ECODE_INVAL)
9038
9039       errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
9040                                                            src_handshake)
9041       if errmsg:
9042         raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
9043                                    errors.ECODE_INVAL)
9044
9045       # Load and check source CA
9046       self.source_x509_ca_pem = self.op.source_x509_ca
9047       if not self.source_x509_ca_pem:
9048         raise errors.OpPrereqError("Missing source X509 CA",
9049                                    errors.ECODE_INVAL)
9050
9051       try:
9052         (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
9053                                                     self._cds)
9054       except OpenSSL.crypto.Error, err:
9055         raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
9056                                    (err, ), errors.ECODE_INVAL)
9057
9058       (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
9059       if errcode is not None:
9060         raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
9061                                    errors.ECODE_INVAL)
9062
9063       self.source_x509_ca = cert
9064
9065       src_instance_name = self.op.source_instance_name
9066       if not src_instance_name:
9067         raise errors.OpPrereqError("Missing source instance name",
9068                                    errors.ECODE_INVAL)
9069
9070       self.source_instance_name = \
9071           netutils.GetHostname(name=src_instance_name).name
9072
9073     else:
9074       raise errors.OpPrereqError("Invalid instance creation mode %r" %
9075                                  self.op.mode, errors.ECODE_INVAL)
9076
9077   def ExpandNames(self):
9078     """ExpandNames for CreateInstance.
9079
9080     Figure out the right locks for instance creation.
9081
9082     """
9083     self.needed_locks = {}
9084
9085     instance_name = self.op.instance_name
9086     # this is just a preventive check, but someone might still add this
9087     # instance in the meantime, and creation will fail at lock-add time
9088     if instance_name in self.cfg.GetInstanceList():
9089       raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
9090                                  instance_name, errors.ECODE_EXISTS)
9091
9092     self.add_locks[locking.LEVEL_INSTANCE] = instance_name
9093
9094     if self.op.iallocator:
9095       # TODO: Find a solution to not lock all nodes in the cluster, e.g. by
9096       # specifying a group on instance creation and then selecting nodes from
9097       # that group
9098       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9099       self.needed_locks[locking.LEVEL_NODE_RES] = locking.ALL_SET
9100     else:
9101       self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
9102       nodelist = [self.op.pnode]
9103       if self.op.snode is not None:
9104         self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
9105         nodelist.append(self.op.snode)
9106       self.needed_locks[locking.LEVEL_NODE] = nodelist
9107       # Lock resources of instance's primary and secondary nodes (copy to
9108       # prevent accidential modification)
9109       self.needed_locks[locking.LEVEL_NODE_RES] = list(nodelist)
9110
9111     # in case of import lock the source node too
9112     if self.op.mode == constants.INSTANCE_IMPORT:
9113       src_node = self.op.src_node
9114       src_path = self.op.src_path
9115
9116       if src_path is None:
9117         self.op.src_path = src_path = self.op.instance_name
9118
9119       if src_node is None:
9120         self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9121         self.op.src_node = None
9122         if os.path.isabs(src_path):
9123           raise errors.OpPrereqError("Importing an instance from a path"
9124                                      " requires a source node option",
9125                                      errors.ECODE_INVAL)
9126       else:
9127         self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
9128         if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
9129           self.needed_locks[locking.LEVEL_NODE].append(src_node)
9130         if not os.path.isabs(src_path):
9131           self.op.src_path = src_path = \
9132             utils.PathJoin(constants.EXPORT_DIR, src_path)
9133
9134   def _RunAllocator(self):
9135     """Run the allocator based on input opcode.
9136
9137     """
9138     nics = [n.ToDict() for n in self.nics]
9139     ial = IAllocator(self.cfg, self.rpc,
9140                      mode=constants.IALLOCATOR_MODE_ALLOC,
9141                      name=self.op.instance_name,
9142                      disk_template=self.op.disk_template,
9143                      tags=self.op.tags,
9144                      os=self.op.os_type,
9145                      vcpus=self.be_full[constants.BE_VCPUS],
9146                      memory=self.be_full[constants.BE_MAXMEM],
9147                      disks=self.disks,
9148                      nics=nics,
9149                      hypervisor=self.op.hypervisor,
9150                      )
9151
9152     ial.Run(self.op.iallocator)
9153
9154     if not ial.success:
9155       raise errors.OpPrereqError("Can't compute nodes using"
9156                                  " iallocator '%s': %s" %
9157                                  (self.op.iallocator, ial.info),
9158                                  errors.ECODE_NORES)
9159     if len(ial.result) != ial.required_nodes:
9160       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
9161                                  " of nodes (%s), required %s" %
9162                                  (self.op.iallocator, len(ial.result),
9163                                   ial.required_nodes), errors.ECODE_FAULT)
9164     self.op.pnode = ial.result[0]
9165     self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
9166                  self.op.instance_name, self.op.iallocator,
9167                  utils.CommaJoin(ial.result))
9168     if ial.required_nodes == 2:
9169       self.op.snode = ial.result[1]
9170
9171   def BuildHooksEnv(self):
9172     """Build hooks env.
9173
9174     This runs on master, primary and secondary nodes of the instance.
9175
9176     """
9177     env = {
9178       "ADD_MODE": self.op.mode,
9179       }
9180     if self.op.mode == constants.INSTANCE_IMPORT:
9181       env["SRC_NODE"] = self.op.src_node
9182       env["SRC_PATH"] = self.op.src_path
9183       env["SRC_IMAGES"] = self.src_images
9184
9185     env.update(_BuildInstanceHookEnv(
9186       name=self.op.instance_name,
9187       primary_node=self.op.pnode,
9188       secondary_nodes=self.secondaries,
9189       status=self.op.start,
9190       os_type=self.op.os_type,
9191       minmem=self.be_full[constants.BE_MINMEM],
9192       maxmem=self.be_full[constants.BE_MAXMEM],
9193       vcpus=self.be_full[constants.BE_VCPUS],
9194       nics=_NICListToTuple(self, self.nics),
9195       disk_template=self.op.disk_template,
9196       disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE])
9197              for d in self.disks],
9198       bep=self.be_full,
9199       hvp=self.hv_full,
9200       hypervisor_name=self.op.hypervisor,
9201       tags=self.op.tags,
9202     ))
9203
9204     return env
9205
9206   def BuildHooksNodes(self):
9207     """Build hooks nodes.
9208
9209     """
9210     nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
9211     return nl, nl
9212
9213   def _ReadExportInfo(self):
9214     """Reads the export information from disk.
9215
9216     It will override the opcode source node and path with the actual
9217     information, if these two were not specified before.
9218
9219     @return: the export information
9220
9221     """
9222     assert self.op.mode == constants.INSTANCE_IMPORT
9223
9224     src_node = self.op.src_node
9225     src_path = self.op.src_path
9226
9227     if src_node is None:
9228       locked_nodes = self.owned_locks(locking.LEVEL_NODE)
9229       exp_list = self.rpc.call_export_list(locked_nodes)
9230       found = False
9231       for node in exp_list:
9232         if exp_list[node].fail_msg:
9233           continue
9234         if src_path in exp_list[node].payload:
9235           found = True
9236           self.op.src_node = src_node = node
9237           self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
9238                                                        src_path)
9239           break
9240       if not found:
9241         raise errors.OpPrereqError("No export found for relative path %s" %
9242                                     src_path, errors.ECODE_INVAL)
9243
9244     _CheckNodeOnline(self, src_node)
9245     result = self.rpc.call_export_info(src_node, src_path)
9246     result.Raise("No export or invalid export found in dir %s" % src_path)
9247
9248     export_info = objects.SerializableConfigParser.Loads(str(result.payload))
9249     if not export_info.has_section(constants.INISECT_EXP):
9250       raise errors.ProgrammerError("Corrupted export config",
9251                                    errors.ECODE_ENVIRON)
9252
9253     ei_version = export_info.get(constants.INISECT_EXP, "version")
9254     if (int(ei_version) != constants.EXPORT_VERSION):
9255       raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
9256                                  (ei_version, constants.EXPORT_VERSION),
9257                                  errors.ECODE_ENVIRON)
9258     return export_info
9259
9260   def _ReadExportParams(self, einfo):
9261     """Use export parameters as defaults.
9262
9263     In case the opcode doesn't specify (as in override) some instance
9264     parameters, then try to use them from the export information, if
9265     that declares them.
9266
9267     """
9268     self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
9269
9270     if self.op.disk_template is None:
9271       if einfo.has_option(constants.INISECT_INS, "disk_template"):
9272         self.op.disk_template = einfo.get(constants.INISECT_INS,
9273                                           "disk_template")
9274         if self.op.disk_template not in constants.DISK_TEMPLATES:
9275           raise errors.OpPrereqError("Disk template specified in configuration"
9276                                      " file is not one of the allowed values:"
9277                                      " %s" % " ".join(constants.DISK_TEMPLATES))
9278       else:
9279         raise errors.OpPrereqError("No disk template specified and the export"
9280                                    " is missing the disk_template information",
9281                                    errors.ECODE_INVAL)
9282
9283     if not self.op.disks:
9284       disks = []
9285       # TODO: import the disk iv_name too
9286       for idx in range(constants.MAX_DISKS):
9287         if einfo.has_option(constants.INISECT_INS, "disk%d_size" % idx):
9288           disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
9289           disks.append({constants.IDISK_SIZE: disk_sz})
9290       self.op.disks = disks
9291       if not disks and self.op.disk_template != constants.DT_DISKLESS:
9292         raise errors.OpPrereqError("No disk info specified and the export"
9293                                    " is missing the disk information",
9294                                    errors.ECODE_INVAL)
9295
9296     if not self.op.nics:
9297       nics = []
9298       for idx in range(constants.MAX_NICS):
9299         if einfo.has_option(constants.INISECT_INS, "nic%d_mac" % idx):
9300           ndict = {}
9301           for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
9302             v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
9303             ndict[name] = v
9304           nics.append(ndict)
9305         else:
9306           break
9307       self.op.nics = nics
9308
9309     if not self.op.tags and einfo.has_option(constants.INISECT_INS, "tags"):
9310       self.op.tags = einfo.get(constants.INISECT_INS, "tags").split()
9311
9312     if (self.op.hypervisor is None and
9313         einfo.has_option(constants.INISECT_INS, "hypervisor")):
9314       self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
9315
9316     if einfo.has_section(constants.INISECT_HYP):
9317       # use the export parameters but do not override the ones
9318       # specified by the user
9319       for name, value in einfo.items(constants.INISECT_HYP):
9320         if name not in self.op.hvparams:
9321           self.op.hvparams[name] = value
9322
9323     if einfo.has_section(constants.INISECT_BEP):
9324       # use the parameters, without overriding
9325       for name, value in einfo.items(constants.INISECT_BEP):
9326         if name not in self.op.beparams:
9327           self.op.beparams[name] = value
9328         # Compatibility for the old "memory" be param
9329         if name == constants.BE_MEMORY:
9330           if constants.BE_MAXMEM not in self.op.beparams:
9331             self.op.beparams[constants.BE_MAXMEM] = value
9332           if constants.BE_MINMEM not in self.op.beparams:
9333             self.op.beparams[constants.BE_MINMEM] = value
9334     else:
9335       # try to read the parameters old style, from the main section
9336       for name in constants.BES_PARAMETERS:
9337         if (name not in self.op.beparams and
9338             einfo.has_option(constants.INISECT_INS, name)):
9339           self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
9340
9341     if einfo.has_section(constants.INISECT_OSP):
9342       # use the parameters, without overriding
9343       for name, value in einfo.items(constants.INISECT_OSP):
9344         if name not in self.op.osparams:
9345           self.op.osparams[name] = value
9346
9347   def _RevertToDefaults(self, cluster):
9348     """Revert the instance parameters to the default values.
9349
9350     """
9351     # hvparams
9352     hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
9353     for name in self.op.hvparams.keys():
9354       if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
9355         del self.op.hvparams[name]
9356     # beparams
9357     be_defs = cluster.SimpleFillBE({})
9358     for name in self.op.beparams.keys():
9359       if name in be_defs and be_defs[name] == self.op.beparams[name]:
9360         del self.op.beparams[name]
9361     # nic params
9362     nic_defs = cluster.SimpleFillNIC({})
9363     for nic in self.op.nics:
9364       for name in constants.NICS_PARAMETERS:
9365         if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
9366           del nic[name]
9367     # osparams
9368     os_defs = cluster.SimpleFillOS(self.op.os_type, {})
9369     for name in self.op.osparams.keys():
9370       if name in os_defs and os_defs[name] == self.op.osparams[name]:
9371         del self.op.osparams[name]
9372
9373   def _CalculateFileStorageDir(self):
9374     """Calculate final instance file storage dir.
9375
9376     """
9377     # file storage dir calculation/check
9378     self.instance_file_storage_dir = None
9379     if self.op.disk_template in constants.DTS_FILEBASED:
9380       # build the full file storage dir path
9381       joinargs = []
9382
9383       if self.op.disk_template == constants.DT_SHARED_FILE:
9384         get_fsd_fn = self.cfg.GetSharedFileStorageDir
9385       else:
9386         get_fsd_fn = self.cfg.GetFileStorageDir
9387
9388       cfg_storagedir = get_fsd_fn()
9389       if not cfg_storagedir:
9390         raise errors.OpPrereqError("Cluster file storage dir not defined")
9391       joinargs.append(cfg_storagedir)
9392
9393       if self.op.file_storage_dir is not None:
9394         joinargs.append(self.op.file_storage_dir)
9395
9396       joinargs.append(self.op.instance_name)
9397
9398       # pylint: disable=W0142
9399       self.instance_file_storage_dir = utils.PathJoin(*joinargs)
9400
9401   def CheckPrereq(self): # pylint: disable=R0914
9402     """Check prerequisites.
9403
9404     """
9405     self._CalculateFileStorageDir()
9406
9407     if self.op.mode == constants.INSTANCE_IMPORT:
9408       export_info = self._ReadExportInfo()
9409       self._ReadExportParams(export_info)
9410
9411     if (not self.cfg.GetVGName() and
9412         self.op.disk_template not in constants.DTS_NOT_LVM):
9413       raise errors.OpPrereqError("Cluster does not support lvm-based"
9414                                  " instances", errors.ECODE_STATE)
9415
9416     if (self.op.hypervisor is None or
9417         self.op.hypervisor == constants.VALUE_AUTO):
9418       self.op.hypervisor = self.cfg.GetHypervisorType()
9419
9420     cluster = self.cfg.GetClusterInfo()
9421     enabled_hvs = cluster.enabled_hypervisors
9422     if self.op.hypervisor not in enabled_hvs:
9423       raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
9424                                  " cluster (%s)" % (self.op.hypervisor,
9425                                   ",".join(enabled_hvs)),
9426                                  errors.ECODE_STATE)
9427
9428     # Check tag validity
9429     for tag in self.op.tags:
9430       objects.TaggableObject.ValidateTag(tag)
9431
9432     # check hypervisor parameter syntax (locally)
9433     utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
9434     filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
9435                                       self.op.hvparams)
9436     hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
9437     hv_type.CheckParameterSyntax(filled_hvp)
9438     self.hv_full = filled_hvp
9439     # check that we don't specify global parameters on an instance
9440     _CheckGlobalHvParams(self.op.hvparams)
9441
9442     # fill and remember the beparams dict
9443     default_beparams = cluster.beparams[constants.PP_DEFAULT]
9444     for param, value in self.op.beparams.iteritems():
9445       if value == constants.VALUE_AUTO:
9446         self.op.beparams[param] = default_beparams[param]
9447     objects.UpgradeBeParams(self.op.beparams)
9448     utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
9449     self.be_full = cluster.SimpleFillBE(self.op.beparams)
9450
9451     # build os parameters
9452     self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
9453
9454     # now that hvp/bep are in final format, let's reset to defaults,
9455     # if told to do so
9456     if self.op.identify_defaults:
9457       self._RevertToDefaults(cluster)
9458
9459     # NIC buildup
9460     self.nics = []
9461     for idx, nic in enumerate(self.op.nics):
9462       nic_mode_req = nic.get(constants.INIC_MODE, None)
9463       nic_mode = nic_mode_req
9464       if nic_mode is None or nic_mode == constants.VALUE_AUTO:
9465         nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
9466
9467       # in routed mode, for the first nic, the default ip is 'auto'
9468       if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
9469         default_ip_mode = constants.VALUE_AUTO
9470       else:
9471         default_ip_mode = constants.VALUE_NONE
9472
9473       # ip validity checks
9474       ip = nic.get(constants.INIC_IP, default_ip_mode)
9475       if ip is None or ip.lower() == constants.VALUE_NONE:
9476         nic_ip = None
9477       elif ip.lower() == constants.VALUE_AUTO:
9478         if not self.op.name_check:
9479           raise errors.OpPrereqError("IP address set to auto but name checks"
9480                                      " have been skipped",
9481                                      errors.ECODE_INVAL)
9482         nic_ip = self.hostname1.ip
9483       else:
9484         if not netutils.IPAddress.IsValid(ip):
9485           raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
9486                                      errors.ECODE_INVAL)
9487         nic_ip = ip
9488
9489       # TODO: check the ip address for uniqueness
9490       if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
9491         raise errors.OpPrereqError("Routed nic mode requires an ip address",
9492                                    errors.ECODE_INVAL)
9493
9494       # MAC address verification
9495       mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
9496       if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9497         mac = utils.NormalizeAndValidateMac(mac)
9498
9499         try:
9500           self.cfg.ReserveMAC(mac, self.proc.GetECId())
9501         except errors.ReservationError:
9502           raise errors.OpPrereqError("MAC address %s already in use"
9503                                      " in cluster" % mac,
9504                                      errors.ECODE_NOTUNIQUE)
9505
9506       #  Build nic parameters
9507       link = nic.get(constants.INIC_LINK, None)
9508       if link == constants.VALUE_AUTO:
9509         link = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_LINK]
9510       nicparams = {}
9511       if nic_mode_req:
9512         nicparams[constants.NIC_MODE] = nic_mode
9513       if link:
9514         nicparams[constants.NIC_LINK] = link
9515
9516       check_params = cluster.SimpleFillNIC(nicparams)
9517       objects.NIC.CheckParameterSyntax(check_params)
9518       self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
9519
9520     # disk checks/pre-build
9521     default_vg = self.cfg.GetVGName()
9522     self.disks = []
9523     for disk in self.op.disks:
9524       mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
9525       if mode not in constants.DISK_ACCESS_SET:
9526         raise errors.OpPrereqError("Invalid disk access mode '%s'" %
9527                                    mode, errors.ECODE_INVAL)
9528       size = disk.get(constants.IDISK_SIZE, None)
9529       if size is None:
9530         raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
9531       try:
9532         size = int(size)
9533       except (TypeError, ValueError):
9534         raise errors.OpPrereqError("Invalid disk size '%s'" % size,
9535                                    errors.ECODE_INVAL)
9536
9537       data_vg = disk.get(constants.IDISK_VG, default_vg)
9538       new_disk = {
9539         constants.IDISK_SIZE: size,
9540         constants.IDISK_MODE: mode,
9541         constants.IDISK_VG: data_vg,
9542         }
9543       if constants.IDISK_METAVG in disk:
9544         new_disk[constants.IDISK_METAVG] = disk[constants.IDISK_METAVG]
9545       if constants.IDISK_ADOPT in disk:
9546         new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
9547       self.disks.append(new_disk)
9548
9549     if self.op.mode == constants.INSTANCE_IMPORT:
9550       disk_images = []
9551       for idx in range(len(self.disks)):
9552         option = "disk%d_dump" % idx
9553         if export_info.has_option(constants.INISECT_INS, option):
9554           # FIXME: are the old os-es, disk sizes, etc. useful?
9555           export_name = export_info.get(constants.INISECT_INS, option)
9556           image = utils.PathJoin(self.op.src_path, export_name)
9557           disk_images.append(image)
9558         else:
9559           disk_images.append(False)
9560
9561       self.src_images = disk_images
9562
9563       old_name = export_info.get(constants.INISECT_INS, "name")
9564       if self.op.instance_name == old_name:
9565         for idx, nic in enumerate(self.nics):
9566           if nic.mac == constants.VALUE_AUTO:
9567             nic_mac_ini = "nic%d_mac" % idx
9568             nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
9569
9570     # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
9571
9572     # ip ping checks (we use the same ip that was resolved in ExpandNames)
9573     if self.op.ip_check:
9574       if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
9575         raise errors.OpPrereqError("IP %s of instance %s already in use" %
9576                                    (self.check_ip, self.op.instance_name),
9577                                    errors.ECODE_NOTUNIQUE)
9578
9579     #### mac address generation
9580     # By generating here the mac address both the allocator and the hooks get
9581     # the real final mac address rather than the 'auto' or 'generate' value.
9582     # There is a race condition between the generation and the instance object
9583     # creation, which means that we know the mac is valid now, but we're not
9584     # sure it will be when we actually add the instance. If things go bad
9585     # adding the instance will abort because of a duplicate mac, and the
9586     # creation job will fail.
9587     for nic in self.nics:
9588       if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9589         nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
9590
9591     #### allocator run
9592
9593     if self.op.iallocator is not None:
9594       self._RunAllocator()
9595
9596     # Release all unneeded node locks
9597     _ReleaseLocks(self, locking.LEVEL_NODE,
9598                   keep=filter(None, [self.op.pnode, self.op.snode,
9599                                      self.op.src_node]))
9600
9601     #### node related checks
9602
9603     # check primary node
9604     self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
9605     assert self.pnode is not None, \
9606       "Cannot retrieve locked node %s" % self.op.pnode
9607     if pnode.offline:
9608       raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
9609                                  pnode.name, errors.ECODE_STATE)
9610     if pnode.drained:
9611       raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
9612                                  pnode.name, errors.ECODE_STATE)
9613     if not pnode.vm_capable:
9614       raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
9615                                  " '%s'" % pnode.name, errors.ECODE_STATE)
9616
9617     self.secondaries = []
9618
9619     # mirror node verification
9620     if self.op.disk_template in constants.DTS_INT_MIRROR:
9621       if self.op.snode == pnode.name:
9622         raise errors.OpPrereqError("The secondary node cannot be the"
9623                                    " primary node", errors.ECODE_INVAL)
9624       _CheckNodeOnline(self, self.op.snode)
9625       _CheckNodeNotDrained(self, self.op.snode)
9626       _CheckNodeVmCapable(self, self.op.snode)
9627       self.secondaries.append(self.op.snode)
9628
9629       snode = self.cfg.GetNodeInfo(self.op.snode)
9630       if pnode.group != snode.group:
9631         self.LogWarning("The primary and secondary nodes are in two"
9632                         " different node groups; the disk parameters"
9633                         " from the first disk's node group will be"
9634                         " used")
9635
9636     nodenames = [pnode.name] + self.secondaries
9637
9638     # Verify instance specs
9639     ispec = {
9640       constants.ISPEC_MEM_SIZE: self.be_full.get(constants.BE_MAXMEM, None),
9641       constants.ISPEC_CPU_COUNT: self.be_full.get(constants.BE_VCPUS, None),
9642       constants.ISPEC_DISK_COUNT: len(self.disks),
9643       constants.ISPEC_DISK_SIZE: [disk["size"] for disk in self.disks],
9644       constants.ISPEC_NIC_COUNT: len(self.nics),
9645       }
9646
9647     ipolicy = _CalculateGroupIPolicy(cluster, pnode.group)
9648     res = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec)
9649     if not self.op.ignore_ipolicy and res:
9650       raise errors.OpPrereqError(("Instance allocation to group %s violates"
9651                                   " policy: %s") % (pnode.group,
9652                                                     utils.CommaJoin(res)),
9653                                   errors.ECODE_INVAL)
9654
9655     # disk parameters (not customizable at instance or node level)
9656     # just use the primary node parameters, ignoring the secondary.
9657     self.diskparams = self.cfg.GetNodeGroup(pnode.group).diskparams
9658
9659     if not self.adopt_disks:
9660       # Check lv size requirements, if not adopting
9661       req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
9662       _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
9663
9664     elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
9665       all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
9666                                 disk[constants.IDISK_ADOPT])
9667                      for disk in self.disks])
9668       if len(all_lvs) != len(self.disks):
9669         raise errors.OpPrereqError("Duplicate volume names given for adoption",
9670                                    errors.ECODE_INVAL)
9671       for lv_name in all_lvs:
9672         try:
9673           # FIXME: lv_name here is "vg/lv" need to ensure that other calls
9674           # to ReserveLV uses the same syntax
9675           self.cfg.ReserveLV(lv_name, self.proc.GetECId())
9676         except errors.ReservationError:
9677           raise errors.OpPrereqError("LV named %s used by another instance" %
9678                                      lv_name, errors.ECODE_NOTUNIQUE)
9679
9680       vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
9681       vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
9682
9683       node_lvs = self.rpc.call_lv_list([pnode.name],
9684                                        vg_names.payload.keys())[pnode.name]
9685       node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
9686       node_lvs = node_lvs.payload
9687
9688       delta = all_lvs.difference(node_lvs.keys())
9689       if delta:
9690         raise errors.OpPrereqError("Missing logical volume(s): %s" %
9691                                    utils.CommaJoin(delta),
9692                                    errors.ECODE_INVAL)
9693       online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
9694       if online_lvs:
9695         raise errors.OpPrereqError("Online logical volumes found, cannot"
9696                                    " adopt: %s" % utils.CommaJoin(online_lvs),
9697                                    errors.ECODE_STATE)
9698       # update the size of disk based on what is found
9699       for dsk in self.disks:
9700         dsk[constants.IDISK_SIZE] = \
9701           int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
9702                                         dsk[constants.IDISK_ADOPT])][0]))
9703
9704     elif self.op.disk_template == constants.DT_BLOCK:
9705       # Normalize and de-duplicate device paths
9706       all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
9707                        for disk in self.disks])
9708       if len(all_disks) != len(self.disks):
9709         raise errors.OpPrereqError("Duplicate disk names given for adoption",
9710                                    errors.ECODE_INVAL)
9711       baddisks = [d for d in all_disks
9712                   if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
9713       if baddisks:
9714         raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
9715                                    " cannot be adopted" %
9716                                    (", ".join(baddisks),
9717                                     constants.ADOPTABLE_BLOCKDEV_ROOT),
9718                                    errors.ECODE_INVAL)
9719
9720       node_disks = self.rpc.call_bdev_sizes([pnode.name],
9721                                             list(all_disks))[pnode.name]
9722       node_disks.Raise("Cannot get block device information from node %s" %
9723                        pnode.name)
9724       node_disks = node_disks.payload
9725       delta = all_disks.difference(node_disks.keys())
9726       if delta:
9727         raise errors.OpPrereqError("Missing block device(s): %s" %
9728                                    utils.CommaJoin(delta),
9729                                    errors.ECODE_INVAL)
9730       for dsk in self.disks:
9731         dsk[constants.IDISK_SIZE] = \
9732           int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
9733
9734     _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
9735
9736     _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
9737     # check OS parameters (remotely)
9738     _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
9739
9740     _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
9741
9742     # memory check on primary node
9743     #TODO(dynmem): use MINMEM for checking
9744     if self.op.start:
9745       _CheckNodeFreeMemory(self, self.pnode.name,
9746                            "creating instance %s" % self.op.instance_name,
9747                            self.be_full[constants.BE_MAXMEM],
9748                            self.op.hypervisor)
9749
9750     self.dry_run_result = list(nodenames)
9751
9752   def Exec(self, feedback_fn):
9753     """Create and add the instance to the cluster.
9754
9755     """
9756     instance = self.op.instance_name
9757     pnode_name = self.pnode.name
9758
9759     assert not (self.owned_locks(locking.LEVEL_NODE_RES) -
9760                 self.owned_locks(locking.LEVEL_NODE)), \
9761       "Node locks differ from node resource locks"
9762
9763     ht_kind = self.op.hypervisor
9764     if ht_kind in constants.HTS_REQ_PORT:
9765       network_port = self.cfg.AllocatePort()
9766     else:
9767       network_port = None
9768
9769     disks = _GenerateDiskTemplate(self,
9770                                   self.op.disk_template,
9771                                   instance, pnode_name,
9772                                   self.secondaries,
9773                                   self.disks,
9774                                   self.instance_file_storage_dir,
9775                                   self.op.file_driver,
9776                                   0,
9777                                   feedback_fn,
9778                                   self.diskparams)
9779
9780     iobj = objects.Instance(name=instance, os=self.op.os_type,
9781                             primary_node=pnode_name,
9782                             nics=self.nics, disks=disks,
9783                             disk_template=self.op.disk_template,
9784                             admin_state=constants.ADMINST_DOWN,
9785                             network_port=network_port,
9786                             beparams=self.op.beparams,
9787                             hvparams=self.op.hvparams,
9788                             hypervisor=self.op.hypervisor,
9789                             osparams=self.op.osparams,
9790                             )
9791
9792     if self.op.tags:
9793       for tag in self.op.tags:
9794         iobj.AddTag(tag)
9795
9796     if self.adopt_disks:
9797       if self.op.disk_template == constants.DT_PLAIN:
9798         # rename LVs to the newly-generated names; we need to construct
9799         # 'fake' LV disks with the old data, plus the new unique_id
9800         tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
9801         rename_to = []
9802         for t_dsk, a_dsk in zip(tmp_disks, self.disks):
9803           rename_to.append(t_dsk.logical_id)
9804           t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
9805           self.cfg.SetDiskID(t_dsk, pnode_name)
9806         result = self.rpc.call_blockdev_rename(pnode_name,
9807                                                zip(tmp_disks, rename_to))
9808         result.Raise("Failed to rename adoped LVs")
9809     else:
9810       feedback_fn("* creating instance disks...")
9811       try:
9812         _CreateDisks(self, iobj)
9813       except errors.OpExecError:
9814         self.LogWarning("Device creation failed, reverting...")
9815         try:
9816           _RemoveDisks(self, iobj)
9817         finally:
9818           self.cfg.ReleaseDRBDMinors(instance)
9819           raise
9820
9821     feedback_fn("adding instance %s to cluster config" % instance)
9822
9823     self.cfg.AddInstance(iobj, self.proc.GetECId())
9824
9825     # Declare that we don't want to remove the instance lock anymore, as we've
9826     # added the instance to the config
9827     del self.remove_locks[locking.LEVEL_INSTANCE]
9828
9829     if self.op.mode == constants.INSTANCE_IMPORT:
9830       # Release unused nodes
9831       _ReleaseLocks(self, locking.LEVEL_NODE, keep=[self.op.src_node])
9832     else:
9833       # Release all nodes
9834       _ReleaseLocks(self, locking.LEVEL_NODE)
9835
9836     disk_abort = False
9837     if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
9838       feedback_fn("* wiping instance disks...")
9839       try:
9840         _WipeDisks(self, iobj)
9841       except errors.OpExecError, err:
9842         logging.exception("Wiping disks failed")
9843         self.LogWarning("Wiping instance disks failed (%s)", err)
9844         disk_abort = True
9845
9846     if disk_abort:
9847       # Something is already wrong with the disks, don't do anything else
9848       pass
9849     elif self.op.wait_for_sync:
9850       disk_abort = not _WaitForSync(self, iobj)
9851     elif iobj.disk_template in constants.DTS_INT_MIRROR:
9852       # make sure the disks are not degraded (still sync-ing is ok)
9853       feedback_fn("* checking mirrors status")
9854       disk_abort = not _WaitForSync(self, iobj, oneshot=True)
9855     else:
9856       disk_abort = False
9857
9858     if disk_abort:
9859       _RemoveDisks(self, iobj)
9860       self.cfg.RemoveInstance(iobj.name)
9861       # Make sure the instance lock gets removed
9862       self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
9863       raise errors.OpExecError("There are some degraded disks for"
9864                                " this instance")
9865
9866     # Release all node resource locks
9867     _ReleaseLocks(self, locking.LEVEL_NODE_RES)
9868
9869     if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
9870       if self.op.mode == constants.INSTANCE_CREATE:
9871         if not self.op.no_install:
9872           pause_sync = (iobj.disk_template in constants.DTS_INT_MIRROR and
9873                         not self.op.wait_for_sync)
9874           if pause_sync:
9875             feedback_fn("* pausing disk sync to install instance OS")
9876             result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
9877                                                               iobj.disks, True)
9878             for idx, success in enumerate(result.payload):
9879               if not success:
9880                 logging.warn("pause-sync of instance %s for disk %d failed",
9881                              instance, idx)
9882
9883           feedback_fn("* running the instance OS create scripts...")
9884           # FIXME: pass debug option from opcode to backend
9885           os_add_result = \
9886             self.rpc.call_instance_os_add(pnode_name, (iobj, None), False,
9887                                           self.op.debug_level)
9888           if pause_sync:
9889             feedback_fn("* resuming disk sync")
9890             result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
9891                                                               iobj.disks, False)
9892             for idx, success in enumerate(result.payload):
9893               if not success:
9894                 logging.warn("resume-sync of instance %s for disk %d failed",
9895                              instance, idx)
9896
9897           os_add_result.Raise("Could not add os for instance %s"
9898                               " on node %s" % (instance, pnode_name))
9899
9900       elif self.op.mode == constants.INSTANCE_IMPORT:
9901         feedback_fn("* running the instance OS import scripts...")
9902
9903         transfers = []
9904
9905         for idx, image in enumerate(self.src_images):
9906           if not image:
9907             continue
9908
9909           # FIXME: pass debug option from opcode to backend
9910           dt = masterd.instance.DiskTransfer("disk/%s" % idx,
9911                                              constants.IEIO_FILE, (image, ),
9912                                              constants.IEIO_SCRIPT,
9913                                              (iobj.disks[idx], idx),
9914                                              None)
9915           transfers.append(dt)
9916
9917         import_result = \
9918           masterd.instance.TransferInstanceData(self, feedback_fn,
9919                                                 self.op.src_node, pnode_name,
9920                                                 self.pnode.secondary_ip,
9921                                                 iobj, transfers)
9922         if not compat.all(import_result):
9923           self.LogWarning("Some disks for instance %s on node %s were not"
9924                           " imported successfully" % (instance, pnode_name))
9925
9926       elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
9927         feedback_fn("* preparing remote import...")
9928         # The source cluster will stop the instance before attempting to make a
9929         # connection. In some cases stopping an instance can take a long time,
9930         # hence the shutdown timeout is added to the connection timeout.
9931         connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
9932                            self.op.source_shutdown_timeout)
9933         timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
9934
9935         assert iobj.primary_node == self.pnode.name
9936         disk_results = \
9937           masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
9938                                         self.source_x509_ca,
9939                                         self._cds, timeouts)
9940         if not compat.all(disk_results):
9941           # TODO: Should the instance still be started, even if some disks
9942           # failed to import (valid for local imports, too)?
9943           self.LogWarning("Some disks for instance %s on node %s were not"
9944                           " imported successfully" % (instance, pnode_name))
9945
9946         # Run rename script on newly imported instance
9947         assert iobj.name == instance
9948         feedback_fn("Running rename script for %s" % instance)
9949         result = self.rpc.call_instance_run_rename(pnode_name, iobj,
9950                                                    self.source_instance_name,
9951                                                    self.op.debug_level)
9952         if result.fail_msg:
9953           self.LogWarning("Failed to run rename script for %s on node"
9954                           " %s: %s" % (instance, pnode_name, result.fail_msg))
9955
9956       else:
9957         # also checked in the prereq part
9958         raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
9959                                      % self.op.mode)
9960
9961     assert not self.owned_locks(locking.LEVEL_NODE_RES)
9962
9963     if self.op.start:
9964       iobj.admin_state = constants.ADMINST_UP
9965       self.cfg.Update(iobj, feedback_fn)
9966       logging.info("Starting instance %s on node %s", instance, pnode_name)
9967       feedback_fn("* starting instance...")
9968       result = self.rpc.call_instance_start(pnode_name, (iobj, None, None),
9969                                             False)
9970       result.Raise("Could not start instance")
9971
9972     return list(iobj.all_nodes)
9973
9974
9975 class LUInstanceConsole(NoHooksLU):
9976   """Connect to an instance's console.
9977
9978   This is somewhat special in that it returns the command line that
9979   you need to run on the master node in order to connect to the
9980   console.
9981
9982   """
9983   REQ_BGL = False
9984
9985   def ExpandNames(self):
9986     self.share_locks = _ShareAll()
9987     self._ExpandAndLockInstance()
9988
9989   def CheckPrereq(self):
9990     """Check prerequisites.
9991
9992     This checks that the instance is in the cluster.
9993
9994     """
9995     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
9996     assert self.instance is not None, \
9997       "Cannot retrieve locked instance %s" % self.op.instance_name
9998     _CheckNodeOnline(self, self.instance.primary_node)
9999
10000   def Exec(self, feedback_fn):
10001     """Connect to the console of an instance
10002
10003     """
10004     instance = self.instance
10005     node = instance.primary_node
10006
10007     node_insts = self.rpc.call_instance_list([node],
10008                                              [instance.hypervisor])[node]
10009     node_insts.Raise("Can't get node information from %s" % node)
10010
10011     if instance.name not in node_insts.payload:
10012       if instance.admin_state == constants.ADMINST_UP:
10013         state = constants.INSTST_ERRORDOWN
10014       elif instance.admin_state == constants.ADMINST_DOWN:
10015         state = constants.INSTST_ADMINDOWN
10016       else:
10017         state = constants.INSTST_ADMINOFFLINE
10018       raise errors.OpExecError("Instance %s is not running (state %s)" %
10019                                (instance.name, state))
10020
10021     logging.debug("Connecting to console of %s on %s", instance.name, node)
10022
10023     return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
10024
10025
10026 def _GetInstanceConsole(cluster, instance):
10027   """Returns console information for an instance.
10028
10029   @type cluster: L{objects.Cluster}
10030   @type instance: L{objects.Instance}
10031   @rtype: dict
10032
10033   """
10034   hyper = hypervisor.GetHypervisor(instance.hypervisor)
10035   # beparams and hvparams are passed separately, to avoid editing the
10036   # instance and then saving the defaults in the instance itself.
10037   hvparams = cluster.FillHV(instance)
10038   beparams = cluster.FillBE(instance)
10039   console = hyper.GetInstanceConsole(instance, hvparams, beparams)
10040
10041   assert console.instance == instance.name
10042   assert console.Validate()
10043
10044   return console.ToDict()
10045
10046
10047 class LUInstanceReplaceDisks(LogicalUnit):
10048   """Replace the disks of an instance.
10049
10050   """
10051   HPATH = "mirrors-replace"
10052   HTYPE = constants.HTYPE_INSTANCE
10053   REQ_BGL = False
10054
10055   def CheckArguments(self):
10056     TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
10057                                   self.op.iallocator)
10058
10059   def ExpandNames(self):
10060     self._ExpandAndLockInstance()
10061
10062     assert locking.LEVEL_NODE not in self.needed_locks
10063     assert locking.LEVEL_NODE_RES not in self.needed_locks
10064     assert locking.LEVEL_NODEGROUP not in self.needed_locks
10065
10066     assert self.op.iallocator is None or self.op.remote_node is None, \
10067       "Conflicting options"
10068
10069     if self.op.remote_node is not None:
10070       self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10071
10072       # Warning: do not remove the locking of the new secondary here
10073       # unless DRBD8.AddChildren is changed to work in parallel;
10074       # currently it doesn't since parallel invocations of
10075       # FindUnusedMinor will conflict
10076       self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
10077       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
10078     else:
10079       self.needed_locks[locking.LEVEL_NODE] = []
10080       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10081
10082       if self.op.iallocator is not None:
10083         # iallocator will select a new node in the same group
10084         self.needed_locks[locking.LEVEL_NODEGROUP] = []
10085
10086     self.needed_locks[locking.LEVEL_NODE_RES] = []
10087
10088     self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
10089                                    self.op.iallocator, self.op.remote_node,
10090                                    self.op.disks, False, self.op.early_release)
10091
10092     self.tasklets = [self.replacer]
10093
10094   def DeclareLocks(self, level):
10095     if level == locking.LEVEL_NODEGROUP:
10096       assert self.op.remote_node is None
10097       assert self.op.iallocator is not None
10098       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
10099
10100       self.share_locks[locking.LEVEL_NODEGROUP] = 1
10101       # Lock all groups used by instance optimistically; this requires going
10102       # via the node before it's locked, requiring verification later on
10103       self.needed_locks[locking.LEVEL_NODEGROUP] = \
10104         self.cfg.GetInstanceNodeGroups(self.op.instance_name)
10105
10106     elif level == locking.LEVEL_NODE:
10107       if self.op.iallocator is not None:
10108         assert self.op.remote_node is None
10109         assert not self.needed_locks[locking.LEVEL_NODE]
10110
10111         # Lock member nodes of all locked groups
10112         self.needed_locks[locking.LEVEL_NODE] = [node_name
10113           for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
10114           for node_name in self.cfg.GetNodeGroup(group_uuid).members]
10115       else:
10116         self._LockInstancesNodes()
10117     elif level == locking.LEVEL_NODE_RES:
10118       # Reuse node locks
10119       self.needed_locks[locking.LEVEL_NODE_RES] = \
10120         self.needed_locks[locking.LEVEL_NODE]
10121
10122   def BuildHooksEnv(self):
10123     """Build hooks env.
10124
10125     This runs on the master, the primary and all the secondaries.
10126
10127     """
10128     instance = self.replacer.instance
10129     env = {
10130       "MODE": self.op.mode,
10131       "NEW_SECONDARY": self.op.remote_node,
10132       "OLD_SECONDARY": instance.secondary_nodes[0],
10133       }
10134     env.update(_BuildInstanceHookEnvByObject(self, instance))
10135     return env
10136
10137   def BuildHooksNodes(self):
10138     """Build hooks nodes.
10139
10140     """
10141     instance = self.replacer.instance
10142     nl = [
10143       self.cfg.GetMasterNode(),
10144       instance.primary_node,
10145       ]
10146     if self.op.remote_node is not None:
10147       nl.append(self.op.remote_node)
10148     return nl, nl
10149
10150   def CheckPrereq(self):
10151     """Check prerequisites.
10152
10153     """
10154     assert (self.glm.is_owned(locking.LEVEL_NODEGROUP) or
10155             self.op.iallocator is None)
10156
10157     # Verify if node group locks are still correct
10158     owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
10159     if owned_groups:
10160       _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
10161
10162     return LogicalUnit.CheckPrereq(self)
10163
10164
10165 class TLReplaceDisks(Tasklet):
10166   """Replaces disks for an instance.
10167
10168   Note: Locking is not within the scope of this class.
10169
10170   """
10171   def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
10172                disks, delay_iallocator, early_release):
10173     """Initializes this class.
10174
10175     """
10176     Tasklet.__init__(self, lu)
10177
10178     # Parameters
10179     self.instance_name = instance_name
10180     self.mode = mode
10181     self.iallocator_name = iallocator_name
10182     self.remote_node = remote_node
10183     self.disks = disks
10184     self.delay_iallocator = delay_iallocator
10185     self.early_release = early_release
10186
10187     # Runtime data
10188     self.instance = None
10189     self.new_node = None
10190     self.target_node = None
10191     self.other_node = None
10192     self.remote_node_info = None
10193     self.node_secondary_ip = None
10194
10195   @staticmethod
10196   def CheckArguments(mode, remote_node, iallocator):
10197     """Helper function for users of this class.
10198
10199     """
10200     # check for valid parameter combination
10201     if mode == constants.REPLACE_DISK_CHG:
10202       if remote_node is None and iallocator is None:
10203         raise errors.OpPrereqError("When changing the secondary either an"
10204                                    " iallocator script must be used or the"
10205                                    " new node given", errors.ECODE_INVAL)
10206
10207       if remote_node is not None and iallocator is not None:
10208         raise errors.OpPrereqError("Give either the iallocator or the new"
10209                                    " secondary, not both", errors.ECODE_INVAL)
10210
10211     elif remote_node is not None or iallocator is not None:
10212       # Not replacing the secondary
10213       raise errors.OpPrereqError("The iallocator and new node options can"
10214                                  " only be used when changing the"
10215                                  " secondary node", errors.ECODE_INVAL)
10216
10217   @staticmethod
10218   def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
10219     """Compute a new secondary node using an IAllocator.
10220
10221     """
10222     ial = IAllocator(lu.cfg, lu.rpc,
10223                      mode=constants.IALLOCATOR_MODE_RELOC,
10224                      name=instance_name,
10225                      relocate_from=list(relocate_from))
10226
10227     ial.Run(iallocator_name)
10228
10229     if not ial.success:
10230       raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
10231                                  " %s" % (iallocator_name, ial.info),
10232                                  errors.ECODE_NORES)
10233
10234     if len(ial.result) != ial.required_nodes:
10235       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
10236                                  " of nodes (%s), required %s" %
10237                                  (iallocator_name,
10238                                   len(ial.result), ial.required_nodes),
10239                                  errors.ECODE_FAULT)
10240
10241     remote_node_name = ial.result[0]
10242
10243     lu.LogInfo("Selected new secondary for instance '%s': %s",
10244                instance_name, remote_node_name)
10245
10246     return remote_node_name
10247
10248   def _FindFaultyDisks(self, node_name):
10249     """Wrapper for L{_FindFaultyInstanceDisks}.
10250
10251     """
10252     return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
10253                                     node_name, True)
10254
10255   def _CheckDisksActivated(self, instance):
10256     """Checks if the instance disks are activated.
10257
10258     @param instance: The instance to check disks
10259     @return: True if they are activated, False otherwise
10260
10261     """
10262     nodes = instance.all_nodes
10263
10264     for idx, dev in enumerate(instance.disks):
10265       for node in nodes:
10266         self.lu.LogInfo("Checking disk/%d on %s", idx, node)
10267         self.cfg.SetDiskID(dev, node)
10268
10269         result = self.rpc.call_blockdev_find(node, dev)
10270
10271         if result.offline:
10272           continue
10273         elif result.fail_msg or not result.payload:
10274           return False
10275
10276     return True
10277
10278   def CheckPrereq(self):
10279     """Check prerequisites.
10280
10281     This checks that the instance is in the cluster.
10282
10283     """
10284     self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
10285     assert instance is not None, \
10286       "Cannot retrieve locked instance %s" % self.instance_name
10287
10288     if instance.disk_template != constants.DT_DRBD8:
10289       raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
10290                                  " instances", errors.ECODE_INVAL)
10291
10292     if len(instance.secondary_nodes) != 1:
10293       raise errors.OpPrereqError("The instance has a strange layout,"
10294                                  " expected one secondary but found %d" %
10295                                  len(instance.secondary_nodes),
10296                                  errors.ECODE_FAULT)
10297
10298     if not self.delay_iallocator:
10299       self._CheckPrereq2()
10300
10301   def _CheckPrereq2(self):
10302     """Check prerequisites, second part.
10303
10304     This function should always be part of CheckPrereq. It was separated and is
10305     now called from Exec because during node evacuation iallocator was only
10306     called with an unmodified cluster model, not taking planned changes into
10307     account.
10308
10309     """
10310     instance = self.instance
10311     secondary_node = instance.secondary_nodes[0]
10312
10313     if self.iallocator_name is None:
10314       remote_node = self.remote_node
10315     else:
10316       remote_node = self._RunAllocator(self.lu, self.iallocator_name,
10317                                        instance.name, instance.secondary_nodes)
10318
10319     if remote_node is None:
10320       self.remote_node_info = None
10321     else:
10322       assert remote_node in self.lu.owned_locks(locking.LEVEL_NODE), \
10323              "Remote node '%s' is not locked" % remote_node
10324
10325       self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
10326       assert self.remote_node_info is not None, \
10327         "Cannot retrieve locked node %s" % remote_node
10328
10329     if remote_node == self.instance.primary_node:
10330       raise errors.OpPrereqError("The specified node is the primary node of"
10331                                  " the instance", errors.ECODE_INVAL)
10332
10333     if remote_node == secondary_node:
10334       raise errors.OpPrereqError("The specified node is already the"
10335                                  " secondary node of the instance",
10336                                  errors.ECODE_INVAL)
10337
10338     if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
10339                                     constants.REPLACE_DISK_CHG):
10340       raise errors.OpPrereqError("Cannot specify disks to be replaced",
10341                                  errors.ECODE_INVAL)
10342
10343     if self.mode == constants.REPLACE_DISK_AUTO:
10344       if not self._CheckDisksActivated(instance):
10345         raise errors.OpPrereqError("Please run activate-disks on instance %s"
10346                                    " first" % self.instance_name,
10347                                    errors.ECODE_STATE)
10348       faulty_primary = self._FindFaultyDisks(instance.primary_node)
10349       faulty_secondary = self._FindFaultyDisks(secondary_node)
10350
10351       if faulty_primary and faulty_secondary:
10352         raise errors.OpPrereqError("Instance %s has faulty disks on more than"
10353                                    " one node and can not be repaired"
10354                                    " automatically" % self.instance_name,
10355                                    errors.ECODE_STATE)
10356
10357       if faulty_primary:
10358         self.disks = faulty_primary
10359         self.target_node = instance.primary_node
10360         self.other_node = secondary_node
10361         check_nodes = [self.target_node, self.other_node]
10362       elif faulty_secondary:
10363         self.disks = faulty_secondary
10364         self.target_node = secondary_node
10365         self.other_node = instance.primary_node
10366         check_nodes = [self.target_node, self.other_node]
10367       else:
10368         self.disks = []
10369         check_nodes = []
10370
10371     else:
10372       # Non-automatic modes
10373       if self.mode == constants.REPLACE_DISK_PRI:
10374         self.target_node = instance.primary_node
10375         self.other_node = secondary_node
10376         check_nodes = [self.target_node, self.other_node]
10377
10378       elif self.mode == constants.REPLACE_DISK_SEC:
10379         self.target_node = secondary_node
10380         self.other_node = instance.primary_node
10381         check_nodes = [self.target_node, self.other_node]
10382
10383       elif self.mode == constants.REPLACE_DISK_CHG:
10384         self.new_node = remote_node
10385         self.other_node = instance.primary_node
10386         self.target_node = secondary_node
10387         check_nodes = [self.new_node, self.other_node]
10388
10389         _CheckNodeNotDrained(self.lu, remote_node)
10390         _CheckNodeVmCapable(self.lu, remote_node)
10391
10392         old_node_info = self.cfg.GetNodeInfo(secondary_node)
10393         assert old_node_info is not None
10394         if old_node_info.offline and not self.early_release:
10395           # doesn't make sense to delay the release
10396           self.early_release = True
10397           self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
10398                           " early-release mode", secondary_node)
10399
10400       else:
10401         raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
10402                                      self.mode)
10403
10404       # If not specified all disks should be replaced
10405       if not self.disks:
10406         self.disks = range(len(self.instance.disks))
10407
10408     # TODO: compute disk parameters
10409     primary_node_info = self.cfg.GetNodeInfo(instance.primary_node)
10410     secondary_node_info = self.cfg.GetNodeInfo(secondary_node)
10411     if primary_node_info.group != secondary_node_info.group:
10412       self.lu.LogInfo("The instance primary and secondary nodes are in two"
10413                       " different node groups; the disk parameters of the"
10414                       " primary node's group will be applied.")
10415
10416     self.diskparams = self.cfg.GetNodeGroup(primary_node_info.group).diskparams
10417
10418     for node in check_nodes:
10419       _CheckNodeOnline(self.lu, node)
10420
10421     touched_nodes = frozenset(node_name for node_name in [self.new_node,
10422                                                           self.other_node,
10423                                                           self.target_node]
10424                               if node_name is not None)
10425
10426     # Release unneeded node and node resource locks
10427     _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
10428     _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES, keep=touched_nodes)
10429
10430     # Release any owned node group
10431     if self.lu.glm.is_owned(locking.LEVEL_NODEGROUP):
10432       _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
10433
10434     # Check whether disks are valid
10435     for disk_idx in self.disks:
10436       instance.FindDisk(disk_idx)
10437
10438     # Get secondary node IP addresses
10439     self.node_secondary_ip = dict((name, node.secondary_ip) for (name, node)
10440                                   in self.cfg.GetMultiNodeInfo(touched_nodes))
10441
10442   def Exec(self, feedback_fn):
10443     """Execute disk replacement.
10444
10445     This dispatches the disk replacement to the appropriate handler.
10446
10447     """
10448     if self.delay_iallocator:
10449       self._CheckPrereq2()
10450
10451     if __debug__:
10452       # Verify owned locks before starting operation
10453       owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
10454       assert set(owned_nodes) == set(self.node_secondary_ip), \
10455           ("Incorrect node locks, owning %s, expected %s" %
10456            (owned_nodes, self.node_secondary_ip.keys()))
10457       assert (self.lu.owned_locks(locking.LEVEL_NODE) ==
10458               self.lu.owned_locks(locking.LEVEL_NODE_RES))
10459
10460       owned_instances = self.lu.owned_locks(locking.LEVEL_INSTANCE)
10461       assert list(owned_instances) == [self.instance_name], \
10462           "Instance '%s' not locked" % self.instance_name
10463
10464       assert not self.lu.glm.is_owned(locking.LEVEL_NODEGROUP), \
10465           "Should not own any node group lock at this point"
10466
10467     if not self.disks:
10468       feedback_fn("No disks need replacement")
10469       return
10470
10471     feedback_fn("Replacing disk(s) %s for %s" %
10472                 (utils.CommaJoin(self.disks), self.instance.name))
10473
10474     activate_disks = (self.instance.admin_state != constants.ADMINST_UP)
10475
10476     # Activate the instance disks if we're replacing them on a down instance
10477     if activate_disks:
10478       _StartInstanceDisks(self.lu, self.instance, True)
10479
10480     try:
10481       # Should we replace the secondary node?
10482       if self.new_node is not None:
10483         fn = self._ExecDrbd8Secondary
10484       else:
10485         fn = self._ExecDrbd8DiskOnly
10486
10487       result = fn(feedback_fn)
10488     finally:
10489       # Deactivate the instance disks if we're replacing them on a
10490       # down instance
10491       if activate_disks:
10492         _SafeShutdownInstanceDisks(self.lu, self.instance)
10493
10494     assert not self.lu.owned_locks(locking.LEVEL_NODE)
10495
10496     if __debug__:
10497       # Verify owned locks
10498       owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE_RES)
10499       nodes = frozenset(self.node_secondary_ip)
10500       assert ((self.early_release and not owned_nodes) or
10501               (not self.early_release and not (set(owned_nodes) - nodes))), \
10502         ("Not owning the correct locks, early_release=%s, owned=%r,"
10503          " nodes=%r" % (self.early_release, owned_nodes, nodes))
10504
10505     return result
10506
10507   def _CheckVolumeGroup(self, nodes):
10508     self.lu.LogInfo("Checking volume groups")
10509
10510     vgname = self.cfg.GetVGName()
10511
10512     # Make sure volume group exists on all involved nodes
10513     results = self.rpc.call_vg_list(nodes)
10514     if not results:
10515       raise errors.OpExecError("Can't list volume groups on the nodes")
10516
10517     for node in nodes:
10518       res = results[node]
10519       res.Raise("Error checking node %s" % node)
10520       if vgname not in res.payload:
10521         raise errors.OpExecError("Volume group '%s' not found on node %s" %
10522                                  (vgname, node))
10523
10524   def _CheckDisksExistence(self, nodes):
10525     # Check disk existence
10526     for idx, dev in enumerate(self.instance.disks):
10527       if idx not in self.disks:
10528         continue
10529
10530       for node in nodes:
10531         self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
10532         self.cfg.SetDiskID(dev, node)
10533
10534         result = self.rpc.call_blockdev_find(node, dev)
10535
10536         msg = result.fail_msg
10537         if msg or not result.payload:
10538           if not msg:
10539             msg = "disk not found"
10540           raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
10541                                    (idx, node, msg))
10542
10543   def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
10544     for idx, dev in enumerate(self.instance.disks):
10545       if idx not in self.disks:
10546         continue
10547
10548       self.lu.LogInfo("Checking disk/%d consistency on node %s" %
10549                       (idx, node_name))
10550
10551       if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
10552                                    ldisk=ldisk):
10553         raise errors.OpExecError("Node %s has degraded storage, unsafe to"
10554                                  " replace disks for instance %s" %
10555                                  (node_name, self.instance.name))
10556
10557   def _CreateNewStorage(self, node_name):
10558     """Create new storage on the primary or secondary node.
10559
10560     This is only used for same-node replaces, not for changing the
10561     secondary node, hence we don't want to modify the existing disk.
10562
10563     """
10564     iv_names = {}
10565
10566     for idx, dev in enumerate(self.instance.disks):
10567       if idx not in self.disks:
10568         continue
10569
10570       self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
10571
10572       self.cfg.SetDiskID(dev, node_name)
10573
10574       lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
10575       names = _GenerateUniqueNames(self.lu, lv_names)
10576
10577       _, data_p, meta_p = _ComputeLDParams(constants.DT_DRBD8, self.diskparams)
10578
10579       vg_data = dev.children[0].logical_id[0]
10580       lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
10581                              logical_id=(vg_data, names[0]), params=data_p)
10582       vg_meta = dev.children[1].logical_id[0]
10583       lv_meta = objects.Disk(dev_type=constants.LD_LV, size=DRBD_META_SIZE,
10584                              logical_id=(vg_meta, names[1]), params=meta_p)
10585
10586       new_lvs = [lv_data, lv_meta]
10587       old_lvs = [child.Copy() for child in dev.children]
10588       iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
10589
10590       # we pass force_create=True to force the LVM creation
10591       for new_lv in new_lvs:
10592         _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
10593                         _GetInstanceInfoText(self.instance), False)
10594
10595     return iv_names
10596
10597   def _CheckDevices(self, node_name, iv_names):
10598     for name, (dev, _, _) in iv_names.iteritems():
10599       self.cfg.SetDiskID(dev, node_name)
10600
10601       result = self.rpc.call_blockdev_find(node_name, dev)
10602
10603       msg = result.fail_msg
10604       if msg or not result.payload:
10605         if not msg:
10606           msg = "disk not found"
10607         raise errors.OpExecError("Can't find DRBD device %s: %s" %
10608                                  (name, msg))
10609
10610       if result.payload.is_degraded:
10611         raise errors.OpExecError("DRBD device %s is degraded!" % name)
10612
10613   def _RemoveOldStorage(self, node_name, iv_names):
10614     for name, (_, old_lvs, _) in iv_names.iteritems():
10615       self.lu.LogInfo("Remove logical volumes for %s" % name)
10616
10617       for lv in old_lvs:
10618         self.cfg.SetDiskID(lv, node_name)
10619
10620         msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
10621         if msg:
10622           self.lu.LogWarning("Can't remove old LV: %s" % msg,
10623                              hint="remove unused LVs manually")
10624
10625   def _ExecDrbd8DiskOnly(self, feedback_fn): # pylint: disable=W0613
10626     """Replace a disk on the primary or secondary for DRBD 8.
10627
10628     The algorithm for replace is quite complicated:
10629
10630       1. for each disk to be replaced:
10631
10632         1. create new LVs on the target node with unique names
10633         1. detach old LVs from the drbd device
10634         1. rename old LVs to name_replaced.<time_t>
10635         1. rename new LVs to old LVs
10636         1. attach the new LVs (with the old names now) to the drbd device
10637
10638       1. wait for sync across all devices
10639
10640       1. for each modified disk:
10641
10642         1. remove old LVs (which have the name name_replaces.<time_t>)
10643
10644     Failures are not very well handled.
10645
10646     """
10647     steps_total = 6
10648
10649     # Step: check device activation
10650     self.lu.LogStep(1, steps_total, "Check device existence")
10651     self._CheckDisksExistence([self.other_node, self.target_node])
10652     self._CheckVolumeGroup([self.target_node, self.other_node])
10653
10654     # Step: check other node consistency
10655     self.lu.LogStep(2, steps_total, "Check peer consistency")
10656     self._CheckDisksConsistency(self.other_node,
10657                                 self.other_node == self.instance.primary_node,
10658                                 False)
10659
10660     # Step: create new storage
10661     self.lu.LogStep(3, steps_total, "Allocate new storage")
10662     iv_names = self._CreateNewStorage(self.target_node)
10663
10664     # Step: for each lv, detach+rename*2+attach
10665     self.lu.LogStep(4, steps_total, "Changing drbd configuration")
10666     for dev, old_lvs, new_lvs in iv_names.itervalues():
10667       self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
10668
10669       result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
10670                                                      old_lvs)
10671       result.Raise("Can't detach drbd from local storage on node"
10672                    " %s for device %s" % (self.target_node, dev.iv_name))
10673       #dev.children = []
10674       #cfg.Update(instance)
10675
10676       # ok, we created the new LVs, so now we know we have the needed
10677       # storage; as such, we proceed on the target node to rename
10678       # old_lv to _old, and new_lv to old_lv; note that we rename LVs
10679       # using the assumption that logical_id == physical_id (which in
10680       # turn is the unique_id on that node)
10681
10682       # FIXME(iustin): use a better name for the replaced LVs
10683       temp_suffix = int(time.time())
10684       ren_fn = lambda d, suff: (d.physical_id[0],
10685                                 d.physical_id[1] + "_replaced-%s" % suff)
10686
10687       # Build the rename list based on what LVs exist on the node
10688       rename_old_to_new = []
10689       for to_ren in old_lvs:
10690         result = self.rpc.call_blockdev_find(self.target_node, to_ren)
10691         if not result.fail_msg and result.payload:
10692           # device exists
10693           rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
10694
10695       self.lu.LogInfo("Renaming the old LVs on the target node")
10696       result = self.rpc.call_blockdev_rename(self.target_node,
10697                                              rename_old_to_new)
10698       result.Raise("Can't rename old LVs on node %s" % self.target_node)
10699
10700       # Now we rename the new LVs to the old LVs
10701       self.lu.LogInfo("Renaming the new LVs on the target node")
10702       rename_new_to_old = [(new, old.physical_id)
10703                            for old, new in zip(old_lvs, new_lvs)]
10704       result = self.rpc.call_blockdev_rename(self.target_node,
10705                                              rename_new_to_old)
10706       result.Raise("Can't rename new LVs on node %s" % self.target_node)
10707
10708       # Intermediate steps of in memory modifications
10709       for old, new in zip(old_lvs, new_lvs):
10710         new.logical_id = old.logical_id
10711         self.cfg.SetDiskID(new, self.target_node)
10712
10713       # We need to modify old_lvs so that removal later removes the
10714       # right LVs, not the newly added ones; note that old_lvs is a
10715       # copy here
10716       for disk in old_lvs:
10717         disk.logical_id = ren_fn(disk, temp_suffix)
10718         self.cfg.SetDiskID(disk, self.target_node)
10719
10720       # Now that the new lvs have the old name, we can add them to the device
10721       self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
10722       result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
10723                                                   new_lvs)
10724       msg = result.fail_msg
10725       if msg:
10726         for new_lv in new_lvs:
10727           msg2 = self.rpc.call_blockdev_remove(self.target_node,
10728                                                new_lv).fail_msg
10729           if msg2:
10730             self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
10731                                hint=("cleanup manually the unused logical"
10732                                      "volumes"))
10733         raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
10734
10735     cstep = itertools.count(5)
10736
10737     if self.early_release:
10738       self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
10739       self._RemoveOldStorage(self.target_node, iv_names)
10740       # TODO: Check if releasing locks early still makes sense
10741       _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
10742     else:
10743       # Release all resource locks except those used by the instance
10744       _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
10745                     keep=self.node_secondary_ip.keys())
10746
10747     # Release all node locks while waiting for sync
10748     _ReleaseLocks(self.lu, locking.LEVEL_NODE)
10749
10750     # TODO: Can the instance lock be downgraded here? Take the optional disk
10751     # shutdown in the caller into consideration.
10752
10753     # Wait for sync
10754     # This can fail as the old devices are degraded and _WaitForSync
10755     # does a combined result over all disks, so we don't check its return value
10756     self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
10757     _WaitForSync(self.lu, self.instance)
10758
10759     # Check all devices manually
10760     self._CheckDevices(self.instance.primary_node, iv_names)
10761
10762     # Step: remove old storage
10763     if not self.early_release:
10764       self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
10765       self._RemoveOldStorage(self.target_node, iv_names)
10766
10767   def _ExecDrbd8Secondary(self, feedback_fn):
10768     """Replace the secondary node for DRBD 8.
10769
10770     The algorithm for replace is quite complicated:
10771       - for all disks of the instance:
10772         - create new LVs on the new node with same names
10773         - shutdown the drbd device on the old secondary
10774         - disconnect the drbd network on the primary
10775         - create the drbd device on the new secondary
10776         - network attach the drbd on the primary, using an artifice:
10777           the drbd code for Attach() will connect to the network if it
10778           finds a device which is connected to the good local disks but
10779           not network enabled
10780       - wait for sync across all devices
10781       - remove all disks from the old secondary
10782
10783     Failures are not very well handled.
10784
10785     """
10786     steps_total = 6
10787
10788     pnode = self.instance.primary_node
10789
10790     # Step: check device activation
10791     self.lu.LogStep(1, steps_total, "Check device existence")
10792     self._CheckDisksExistence([self.instance.primary_node])
10793     self._CheckVolumeGroup([self.instance.primary_node])
10794
10795     # Step: check other node consistency
10796     self.lu.LogStep(2, steps_total, "Check peer consistency")
10797     self._CheckDisksConsistency(self.instance.primary_node, True, True)
10798
10799     # Step: create new storage
10800     self.lu.LogStep(3, steps_total, "Allocate new storage")
10801     for idx, dev in enumerate(self.instance.disks):
10802       self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
10803                       (self.new_node, idx))
10804       # we pass force_create=True to force LVM creation
10805       for new_lv in dev.children:
10806         _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
10807                         _GetInstanceInfoText(self.instance), False)
10808
10809     # Step 4: dbrd minors and drbd setups changes
10810     # after this, we must manually remove the drbd minors on both the
10811     # error and the success paths
10812     self.lu.LogStep(4, steps_total, "Changing drbd configuration")
10813     minors = self.cfg.AllocateDRBDMinor([self.new_node
10814                                          for dev in self.instance.disks],
10815                                         self.instance.name)
10816     logging.debug("Allocated minors %r", minors)
10817
10818     iv_names = {}
10819     for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
10820       self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
10821                       (self.new_node, idx))
10822       # create new devices on new_node; note that we create two IDs:
10823       # one without port, so the drbd will be activated without
10824       # networking information on the new node at this stage, and one
10825       # with network, for the latter activation in step 4
10826       (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
10827       if self.instance.primary_node == o_node1:
10828         p_minor = o_minor1
10829       else:
10830         assert self.instance.primary_node == o_node2, "Three-node instance?"
10831         p_minor = o_minor2
10832
10833       new_alone_id = (self.instance.primary_node, self.new_node, None,
10834                       p_minor, new_minor, o_secret)
10835       new_net_id = (self.instance.primary_node, self.new_node, o_port,
10836                     p_minor, new_minor, o_secret)
10837
10838       iv_names[idx] = (dev, dev.children, new_net_id)
10839       logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
10840                     new_net_id)
10841       drbd_params, _, _ = _ComputeLDParams(constants.DT_DRBD8, self.diskparams)
10842       new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
10843                               logical_id=new_alone_id,
10844                               children=dev.children,
10845                               size=dev.size,
10846                               params=drbd_params)
10847       try:
10848         _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
10849                               _GetInstanceInfoText(self.instance), False)
10850       except errors.GenericError:
10851         self.cfg.ReleaseDRBDMinors(self.instance.name)
10852         raise
10853
10854     # We have new devices, shutdown the drbd on the old secondary
10855     for idx, dev in enumerate(self.instance.disks):
10856       self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
10857       self.cfg.SetDiskID(dev, self.target_node)
10858       msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
10859       if msg:
10860         self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
10861                            "node: %s" % (idx, msg),
10862                            hint=("Please cleanup this device manually as"
10863                                  " soon as possible"))
10864
10865     self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
10866     result = self.rpc.call_drbd_disconnect_net([pnode], self.node_secondary_ip,
10867                                                self.instance.disks)[pnode]
10868
10869     msg = result.fail_msg
10870     if msg:
10871       # detaches didn't succeed (unlikely)
10872       self.cfg.ReleaseDRBDMinors(self.instance.name)
10873       raise errors.OpExecError("Can't detach the disks from the network on"
10874                                " old node: %s" % (msg,))
10875
10876     # if we managed to detach at least one, we update all the disks of
10877     # the instance to point to the new secondary
10878     self.lu.LogInfo("Updating instance configuration")
10879     for dev, _, new_logical_id in iv_names.itervalues():
10880       dev.logical_id = new_logical_id
10881       self.cfg.SetDiskID(dev, self.instance.primary_node)
10882
10883     self.cfg.Update(self.instance, feedback_fn)
10884
10885     # Release all node locks (the configuration has been updated)
10886     _ReleaseLocks(self.lu, locking.LEVEL_NODE)
10887
10888     # and now perform the drbd attach
10889     self.lu.LogInfo("Attaching primary drbds to new secondary"
10890                     " (standalone => connected)")
10891     result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
10892                                             self.new_node],
10893                                            self.node_secondary_ip,
10894                                            self.instance.disks,
10895                                            self.instance.name,
10896                                            False)
10897     for to_node, to_result in result.items():
10898       msg = to_result.fail_msg
10899       if msg:
10900         self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
10901                            to_node, msg,
10902                            hint=("please do a gnt-instance info to see the"
10903                                  " status of disks"))
10904
10905     cstep = itertools.count(5)
10906
10907     if self.early_release:
10908       self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
10909       self._RemoveOldStorage(self.target_node, iv_names)
10910       # TODO: Check if releasing locks early still makes sense
10911       _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
10912     else:
10913       # Release all resource locks except those used by the instance
10914       _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
10915                     keep=self.node_secondary_ip.keys())
10916
10917     # TODO: Can the instance lock be downgraded here? Take the optional disk
10918     # shutdown in the caller into consideration.
10919
10920     # Wait for sync
10921     # This can fail as the old devices are degraded and _WaitForSync
10922     # does a combined result over all disks, so we don't check its return value
10923     self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
10924     _WaitForSync(self.lu, self.instance)
10925
10926     # Check all devices manually
10927     self._CheckDevices(self.instance.primary_node, iv_names)
10928
10929     # Step: remove old storage
10930     if not self.early_release:
10931       self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
10932       self._RemoveOldStorage(self.target_node, iv_names)
10933
10934
10935 class LURepairNodeStorage(NoHooksLU):
10936   """Repairs the volume group on a node.
10937
10938   """
10939   REQ_BGL = False
10940
10941   def CheckArguments(self):
10942     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
10943
10944     storage_type = self.op.storage_type
10945
10946     if (constants.SO_FIX_CONSISTENCY not in
10947         constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
10948       raise errors.OpPrereqError("Storage units of type '%s' can not be"
10949                                  " repaired" % storage_type,
10950                                  errors.ECODE_INVAL)
10951
10952   def ExpandNames(self):
10953     self.needed_locks = {
10954       locking.LEVEL_NODE: [self.op.node_name],
10955       }
10956
10957   def _CheckFaultyDisks(self, instance, node_name):
10958     """Ensure faulty disks abort the opcode or at least warn."""
10959     try:
10960       if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
10961                                   node_name, True):
10962         raise errors.OpPrereqError("Instance '%s' has faulty disks on"
10963                                    " node '%s'" % (instance.name, node_name),
10964                                    errors.ECODE_STATE)
10965     except errors.OpPrereqError, err:
10966       if self.op.ignore_consistency:
10967         self.proc.LogWarning(str(err.args[0]))
10968       else:
10969         raise
10970
10971   def CheckPrereq(self):
10972     """Check prerequisites.
10973
10974     """
10975     # Check whether any instance on this node has faulty disks
10976     for inst in _GetNodeInstances(self.cfg, self.op.node_name):
10977       if inst.admin_state != constants.ADMINST_UP:
10978         continue
10979       check_nodes = set(inst.all_nodes)
10980       check_nodes.discard(self.op.node_name)
10981       for inst_node_name in check_nodes:
10982         self._CheckFaultyDisks(inst, inst_node_name)
10983
10984   def Exec(self, feedback_fn):
10985     feedback_fn("Repairing storage unit '%s' on %s ..." %
10986                 (self.op.name, self.op.node_name))
10987
10988     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
10989     result = self.rpc.call_storage_execute(self.op.node_name,
10990                                            self.op.storage_type, st_args,
10991                                            self.op.name,
10992                                            constants.SO_FIX_CONSISTENCY)
10993     result.Raise("Failed to repair storage unit '%s' on %s" %
10994                  (self.op.name, self.op.node_name))
10995
10996
10997 class LUNodeEvacuate(NoHooksLU):
10998   """Evacuates instances off a list of nodes.
10999
11000   """
11001   REQ_BGL = False
11002
11003   _MODE2IALLOCATOR = {
11004     constants.NODE_EVAC_PRI: constants.IALLOCATOR_NEVAC_PRI,
11005     constants.NODE_EVAC_SEC: constants.IALLOCATOR_NEVAC_SEC,
11006     constants.NODE_EVAC_ALL: constants.IALLOCATOR_NEVAC_ALL,
11007     }
11008   assert frozenset(_MODE2IALLOCATOR.keys()) == constants.NODE_EVAC_MODES
11009   assert (frozenset(_MODE2IALLOCATOR.values()) ==
11010           constants.IALLOCATOR_NEVAC_MODES)
11011
11012   def CheckArguments(self):
11013     _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
11014
11015   def ExpandNames(self):
11016     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
11017
11018     if self.op.remote_node is not None:
11019       self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
11020       assert self.op.remote_node
11021
11022       if self.op.remote_node == self.op.node_name:
11023         raise errors.OpPrereqError("Can not use evacuated node as a new"
11024                                    " secondary node", errors.ECODE_INVAL)
11025
11026       if self.op.mode != constants.NODE_EVAC_SEC:
11027         raise errors.OpPrereqError("Without the use of an iallocator only"
11028                                    " secondary instances can be evacuated",
11029                                    errors.ECODE_INVAL)
11030
11031     # Declare locks
11032     self.share_locks = _ShareAll()
11033     self.needed_locks = {
11034       locking.LEVEL_INSTANCE: [],
11035       locking.LEVEL_NODEGROUP: [],
11036       locking.LEVEL_NODE: [],
11037       }
11038
11039     # Determine nodes (via group) optimistically, needs verification once locks
11040     # have been acquired
11041     self.lock_nodes = self._DetermineNodes()
11042
11043   def _DetermineNodes(self):
11044     """Gets the list of nodes to operate on.
11045
11046     """
11047     if self.op.remote_node is None:
11048       # Iallocator will choose any node(s) in the same group
11049       group_nodes = self.cfg.GetNodeGroupMembersByNodes([self.op.node_name])
11050     else:
11051       group_nodes = frozenset([self.op.remote_node])
11052
11053     # Determine nodes to be locked
11054     return set([self.op.node_name]) | group_nodes
11055
11056   def _DetermineInstances(self):
11057     """Builds list of instances to operate on.
11058
11059     """
11060     assert self.op.mode in constants.NODE_EVAC_MODES
11061
11062     if self.op.mode == constants.NODE_EVAC_PRI:
11063       # Primary instances only
11064       inst_fn = _GetNodePrimaryInstances
11065       assert self.op.remote_node is None, \
11066         "Evacuating primary instances requires iallocator"
11067     elif self.op.mode == constants.NODE_EVAC_SEC:
11068       # Secondary instances only
11069       inst_fn = _GetNodeSecondaryInstances
11070     else:
11071       # All instances
11072       assert self.op.mode == constants.NODE_EVAC_ALL
11073       inst_fn = _GetNodeInstances
11074       # TODO: In 2.6, change the iallocator interface to take an evacuation mode
11075       # per instance
11076       raise errors.OpPrereqError("Due to an issue with the iallocator"
11077                                  " interface it is not possible to evacuate"
11078                                  " all instances at once; specify explicitly"
11079                                  " whether to evacuate primary or secondary"
11080                                  " instances",
11081                                  errors.ECODE_INVAL)
11082
11083     return inst_fn(self.cfg, self.op.node_name)
11084
11085   def DeclareLocks(self, level):
11086     if level == locking.LEVEL_INSTANCE:
11087       # Lock instances optimistically, needs verification once node and group
11088       # locks have been acquired
11089       self.needed_locks[locking.LEVEL_INSTANCE] = \
11090         set(i.name for i in self._DetermineInstances())
11091
11092     elif level == locking.LEVEL_NODEGROUP:
11093       # Lock node groups for all potential target nodes optimistically, needs
11094       # verification once nodes have been acquired
11095       self.needed_locks[locking.LEVEL_NODEGROUP] = \
11096         self.cfg.GetNodeGroupsFromNodes(self.lock_nodes)
11097
11098     elif level == locking.LEVEL_NODE:
11099       self.needed_locks[locking.LEVEL_NODE] = self.lock_nodes
11100
11101   def CheckPrereq(self):
11102     # Verify locks
11103     owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
11104     owned_nodes = self.owned_locks(locking.LEVEL_NODE)
11105     owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
11106
11107     need_nodes = self._DetermineNodes()
11108
11109     if not owned_nodes.issuperset(need_nodes):
11110       raise errors.OpPrereqError("Nodes in same group as '%s' changed since"
11111                                  " locks were acquired, current nodes are"
11112                                  " are '%s', used to be '%s'; retry the"
11113                                  " operation" %
11114                                  (self.op.node_name,
11115                                   utils.CommaJoin(need_nodes),
11116                                   utils.CommaJoin(owned_nodes)),
11117                                  errors.ECODE_STATE)
11118
11119     wanted_groups = self.cfg.GetNodeGroupsFromNodes(owned_nodes)
11120     if owned_groups != wanted_groups:
11121       raise errors.OpExecError("Node groups changed since locks were acquired,"
11122                                " current groups are '%s', used to be '%s';"
11123                                " retry the operation" %
11124                                (utils.CommaJoin(wanted_groups),
11125                                 utils.CommaJoin(owned_groups)))
11126
11127     # Determine affected instances
11128     self.instances = self._DetermineInstances()
11129     self.instance_names = [i.name for i in self.instances]
11130
11131     if set(self.instance_names) != owned_instances:
11132       raise errors.OpExecError("Instances on node '%s' changed since locks"
11133                                " were acquired, current instances are '%s',"
11134                                " used to be '%s'; retry the operation" %
11135                                (self.op.node_name,
11136                                 utils.CommaJoin(self.instance_names),
11137                                 utils.CommaJoin(owned_instances)))
11138
11139     if self.instance_names:
11140       self.LogInfo("Evacuating instances from node '%s': %s",
11141                    self.op.node_name,
11142                    utils.CommaJoin(utils.NiceSort(self.instance_names)))
11143     else:
11144       self.LogInfo("No instances to evacuate from node '%s'",
11145                    self.op.node_name)
11146
11147     if self.op.remote_node is not None:
11148       for i in self.instances:
11149         if i.primary_node == self.op.remote_node:
11150           raise errors.OpPrereqError("Node %s is the primary node of"
11151                                      " instance %s, cannot use it as"
11152                                      " secondary" %
11153                                      (self.op.remote_node, i.name),
11154                                      errors.ECODE_INVAL)
11155
11156   def Exec(self, feedback_fn):
11157     assert (self.op.iallocator is not None) ^ (self.op.remote_node is not None)
11158
11159     if not self.instance_names:
11160       # No instances to evacuate
11161       jobs = []
11162
11163     elif self.op.iallocator is not None:
11164       # TODO: Implement relocation to other group
11165       ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_NODE_EVAC,
11166                        evac_mode=self._MODE2IALLOCATOR[self.op.mode],
11167                        instances=list(self.instance_names))
11168
11169       ial.Run(self.op.iallocator)
11170
11171       if not ial.success:
11172         raise errors.OpPrereqError("Can't compute node evacuation using"
11173                                    " iallocator '%s': %s" %
11174                                    (self.op.iallocator, ial.info),
11175                                    errors.ECODE_NORES)
11176
11177       jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, True)
11178
11179     elif self.op.remote_node is not None:
11180       assert self.op.mode == constants.NODE_EVAC_SEC
11181       jobs = [
11182         [opcodes.OpInstanceReplaceDisks(instance_name=instance_name,
11183                                         remote_node=self.op.remote_node,
11184                                         disks=[],
11185                                         mode=constants.REPLACE_DISK_CHG,
11186                                         early_release=self.op.early_release)]
11187         for instance_name in self.instance_names
11188         ]
11189
11190     else:
11191       raise errors.ProgrammerError("No iallocator or remote node")
11192
11193     return ResultWithJobs(jobs)
11194
11195
11196 def _SetOpEarlyRelease(early_release, op):
11197   """Sets C{early_release} flag on opcodes if available.
11198
11199   """
11200   try:
11201     op.early_release = early_release
11202   except AttributeError:
11203     assert not isinstance(op, opcodes.OpInstanceReplaceDisks)
11204
11205   return op
11206
11207
11208 def _NodeEvacDest(use_nodes, group, nodes):
11209   """Returns group or nodes depending on caller's choice.
11210
11211   """
11212   if use_nodes:
11213     return utils.CommaJoin(nodes)
11214   else:
11215     return group
11216
11217
11218 def _LoadNodeEvacResult(lu, alloc_result, early_release, use_nodes):
11219   """Unpacks the result of change-group and node-evacuate iallocator requests.
11220
11221   Iallocator modes L{constants.IALLOCATOR_MODE_NODE_EVAC} and
11222   L{constants.IALLOCATOR_MODE_CHG_GROUP}.
11223
11224   @type lu: L{LogicalUnit}
11225   @param lu: Logical unit instance
11226   @type alloc_result: tuple/list
11227   @param alloc_result: Result from iallocator
11228   @type early_release: bool
11229   @param early_release: Whether to release locks early if possible
11230   @type use_nodes: bool
11231   @param use_nodes: Whether to display node names instead of groups
11232
11233   """
11234   (moved, failed, jobs) = alloc_result
11235
11236   if failed:
11237     failreason = utils.CommaJoin("%s (%s)" % (name, reason)
11238                                  for (name, reason) in failed)
11239     lu.LogWarning("Unable to evacuate instances %s", failreason)
11240     raise errors.OpExecError("Unable to evacuate instances %s" % failreason)
11241
11242   if moved:
11243     lu.LogInfo("Instances to be moved: %s",
11244                utils.CommaJoin("%s (to %s)" %
11245                                (name, _NodeEvacDest(use_nodes, group, nodes))
11246                                for (name, group, nodes) in moved))
11247
11248   return [map(compat.partial(_SetOpEarlyRelease, early_release),
11249               map(opcodes.OpCode.LoadOpCode, ops))
11250           for ops in jobs]
11251
11252
11253 class LUInstanceGrowDisk(LogicalUnit):
11254   """Grow a disk of an instance.
11255
11256   """
11257   HPATH = "disk-grow"
11258   HTYPE = constants.HTYPE_INSTANCE
11259   REQ_BGL = False
11260
11261   def ExpandNames(self):
11262     self._ExpandAndLockInstance()
11263     self.needed_locks[locking.LEVEL_NODE] = []
11264     self.needed_locks[locking.LEVEL_NODE_RES] = []
11265     self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
11266
11267   def DeclareLocks(self, level):
11268     if level == locking.LEVEL_NODE:
11269       self._LockInstancesNodes()
11270     elif level == locking.LEVEL_NODE_RES:
11271       # Copy node locks
11272       self.needed_locks[locking.LEVEL_NODE_RES] = \
11273         self.needed_locks[locking.LEVEL_NODE][:]
11274
11275   def BuildHooksEnv(self):
11276     """Build hooks env.
11277
11278     This runs on the master, the primary and all the secondaries.
11279
11280     """
11281     env = {
11282       "DISK": self.op.disk,
11283       "AMOUNT": self.op.amount,
11284       }
11285     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
11286     return env
11287
11288   def BuildHooksNodes(self):
11289     """Build hooks nodes.
11290
11291     """
11292     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
11293     return (nl, nl)
11294
11295   def CheckPrereq(self):
11296     """Check prerequisites.
11297
11298     This checks that the instance is in the cluster.
11299
11300     """
11301     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
11302     assert instance is not None, \
11303       "Cannot retrieve locked instance %s" % self.op.instance_name
11304     nodenames = list(instance.all_nodes)
11305     for node in nodenames:
11306       _CheckNodeOnline(self, node)
11307
11308     self.instance = instance
11309
11310     if instance.disk_template not in constants.DTS_GROWABLE:
11311       raise errors.OpPrereqError("Instance's disk layout does not support"
11312                                  " growing", errors.ECODE_INVAL)
11313
11314     self.disk = instance.FindDisk(self.op.disk)
11315
11316     if instance.disk_template not in (constants.DT_FILE,
11317                                       constants.DT_SHARED_FILE):
11318       # TODO: check the free disk space for file, when that feature will be
11319       # supported
11320       _CheckNodesFreeDiskPerVG(self, nodenames,
11321                                self.disk.ComputeGrowth(self.op.amount))
11322
11323   def Exec(self, feedback_fn):
11324     """Execute disk grow.
11325
11326     """
11327     instance = self.instance
11328     disk = self.disk
11329
11330     assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
11331     assert (self.owned_locks(locking.LEVEL_NODE) ==
11332             self.owned_locks(locking.LEVEL_NODE_RES))
11333
11334     disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
11335     if not disks_ok:
11336       raise errors.OpExecError("Cannot activate block device to grow")
11337
11338     feedback_fn("Growing disk %s of instance '%s' by %s" %
11339                 (self.op.disk, instance.name,
11340                  utils.FormatUnit(self.op.amount, "h")))
11341
11342     # First run all grow ops in dry-run mode
11343     for node in instance.all_nodes:
11344       self.cfg.SetDiskID(disk, node)
11345       result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, True)
11346       result.Raise("Grow request failed to node %s" % node)
11347
11348     # We know that (as far as we can test) operations across different
11349     # nodes will succeed, time to run it for real
11350     for node in instance.all_nodes:
11351       self.cfg.SetDiskID(disk, node)
11352       result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, False)
11353       result.Raise("Grow request failed to node %s" % node)
11354
11355       # TODO: Rewrite code to work properly
11356       # DRBD goes into sync mode for a short amount of time after executing the
11357       # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
11358       # calling "resize" in sync mode fails. Sleeping for a short amount of
11359       # time is a work-around.
11360       time.sleep(5)
11361
11362     disk.RecordGrow(self.op.amount)
11363     self.cfg.Update(instance, feedback_fn)
11364
11365     # Changes have been recorded, release node lock
11366     _ReleaseLocks(self, locking.LEVEL_NODE)
11367
11368     # Downgrade lock while waiting for sync
11369     self.glm.downgrade(locking.LEVEL_INSTANCE)
11370
11371     if self.op.wait_for_sync:
11372       disk_abort = not _WaitForSync(self, instance, disks=[disk])
11373       if disk_abort:
11374         self.proc.LogWarning("Disk sync-ing has not returned a good"
11375                              " status; please check the instance")
11376       if instance.admin_state != constants.ADMINST_UP:
11377         _SafeShutdownInstanceDisks(self, instance, disks=[disk])
11378     elif instance.admin_state != constants.ADMINST_UP:
11379       self.proc.LogWarning("Not shutting down the disk even if the instance is"
11380                            " not supposed to be running because no wait for"
11381                            " sync mode was requested")
11382
11383     assert self.owned_locks(locking.LEVEL_NODE_RES)
11384     assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
11385
11386
11387 class LUInstanceQueryData(NoHooksLU):
11388   """Query runtime instance data.
11389
11390   """
11391   REQ_BGL = False
11392
11393   def ExpandNames(self):
11394     self.needed_locks = {}
11395
11396     # Use locking if requested or when non-static information is wanted
11397     if not (self.op.static or self.op.use_locking):
11398       self.LogWarning("Non-static data requested, locks need to be acquired")
11399       self.op.use_locking = True
11400
11401     if self.op.instances or not self.op.use_locking:
11402       # Expand instance names right here
11403       self.wanted_names = _GetWantedInstances(self, self.op.instances)
11404     else:
11405       # Will use acquired locks
11406       self.wanted_names = None
11407
11408     if self.op.use_locking:
11409       self.share_locks = _ShareAll()
11410
11411       if self.wanted_names is None:
11412         self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
11413       else:
11414         self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
11415
11416       self.needed_locks[locking.LEVEL_NODE] = []
11417       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
11418
11419   def DeclareLocks(self, level):
11420     if self.op.use_locking and level == locking.LEVEL_NODE:
11421       self._LockInstancesNodes()
11422
11423   def CheckPrereq(self):
11424     """Check prerequisites.
11425
11426     This only checks the optional instance list against the existing names.
11427
11428     """
11429     if self.wanted_names is None:
11430       assert self.op.use_locking, "Locking was not used"
11431       self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
11432
11433     self.wanted_instances = \
11434         map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
11435
11436   def _ComputeBlockdevStatus(self, node, instance_name, dev):
11437     """Returns the status of a block device
11438
11439     """
11440     if self.op.static or not node:
11441       return None
11442
11443     self.cfg.SetDiskID(dev, node)
11444
11445     result = self.rpc.call_blockdev_find(node, dev)
11446     if result.offline:
11447       return None
11448
11449     result.Raise("Can't compute disk status for %s" % instance_name)
11450
11451     status = result.payload
11452     if status is None:
11453       return None
11454
11455     return (status.dev_path, status.major, status.minor,
11456             status.sync_percent, status.estimated_time,
11457             status.is_degraded, status.ldisk_status)
11458
11459   def _ComputeDiskStatus(self, instance, snode, dev):
11460     """Compute block device status.
11461
11462     """
11463     if dev.dev_type in constants.LDS_DRBD:
11464       # we change the snode then (otherwise we use the one passed in)
11465       if dev.logical_id[0] == instance.primary_node:
11466         snode = dev.logical_id[1]
11467       else:
11468         snode = dev.logical_id[0]
11469
11470     dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
11471                                               instance.name, dev)
11472     dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
11473
11474     if dev.children:
11475       dev_children = map(compat.partial(self._ComputeDiskStatus,
11476                                         instance, snode),
11477                          dev.children)
11478     else:
11479       dev_children = []
11480
11481     return {
11482       "iv_name": dev.iv_name,
11483       "dev_type": dev.dev_type,
11484       "logical_id": dev.logical_id,
11485       "physical_id": dev.physical_id,
11486       "pstatus": dev_pstatus,
11487       "sstatus": dev_sstatus,
11488       "children": dev_children,
11489       "mode": dev.mode,
11490       "size": dev.size,
11491       }
11492
11493   def Exec(self, feedback_fn):
11494     """Gather and return data"""
11495     result = {}
11496
11497     cluster = self.cfg.GetClusterInfo()
11498
11499     pri_nodes = self.cfg.GetMultiNodeInfo(i.primary_node
11500                                           for i in self.wanted_instances)
11501     for instance, (_, pnode) in zip(self.wanted_instances, pri_nodes):
11502       if self.op.static or pnode.offline:
11503         remote_state = None
11504         if pnode.offline:
11505           self.LogWarning("Primary node %s is marked offline, returning static"
11506                           " information only for instance %s" %
11507                           (pnode.name, instance.name))
11508       else:
11509         remote_info = self.rpc.call_instance_info(instance.primary_node,
11510                                                   instance.name,
11511                                                   instance.hypervisor)
11512         remote_info.Raise("Error checking node %s" % instance.primary_node)
11513         remote_info = remote_info.payload
11514         if remote_info and "state" in remote_info:
11515           remote_state = "up"
11516         else:
11517           if instance.admin_state == constants.ADMINST_UP:
11518             remote_state = "down"
11519           else:
11520             remote_state = instance.admin_state
11521
11522       disks = map(compat.partial(self._ComputeDiskStatus, instance, None),
11523                   instance.disks)
11524
11525       result[instance.name] = {
11526         "name": instance.name,
11527         "config_state": instance.admin_state,
11528         "run_state": remote_state,
11529         "pnode": instance.primary_node,
11530         "snodes": instance.secondary_nodes,
11531         "os": instance.os,
11532         # this happens to be the same format used for hooks
11533         "nics": _NICListToTuple(self, instance.nics),
11534         "disk_template": instance.disk_template,
11535         "disks": disks,
11536         "hypervisor": instance.hypervisor,
11537         "network_port": instance.network_port,
11538         "hv_instance": instance.hvparams,
11539         "hv_actual": cluster.FillHV(instance, skip_globals=True),
11540         "be_instance": instance.beparams,
11541         "be_actual": cluster.FillBE(instance),
11542         "os_instance": instance.osparams,
11543         "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
11544         "serial_no": instance.serial_no,
11545         "mtime": instance.mtime,
11546         "ctime": instance.ctime,
11547         "uuid": instance.uuid,
11548         }
11549
11550     return result
11551
11552
11553 class LUInstanceSetParams(LogicalUnit):
11554   """Modifies an instances's parameters.
11555
11556   """
11557   HPATH = "instance-modify"
11558   HTYPE = constants.HTYPE_INSTANCE
11559   REQ_BGL = False
11560
11561   def CheckArguments(self):
11562     if not (self.op.nics or self.op.disks or self.op.disk_template or
11563             self.op.hvparams or self.op.beparams or self.op.os_name or
11564             self.op.online_inst or self.op.offline_inst):
11565       raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
11566
11567     if self.op.hvparams:
11568       _CheckGlobalHvParams(self.op.hvparams)
11569
11570     # Disk validation
11571     disk_addremove = 0
11572     for disk_op, disk_dict in self.op.disks:
11573       utils.ForceDictType(disk_dict, constants.IDISK_PARAMS_TYPES)
11574       if disk_op == constants.DDM_REMOVE:
11575         disk_addremove += 1
11576         continue
11577       elif disk_op == constants.DDM_ADD:
11578         disk_addremove += 1
11579       else:
11580         if not isinstance(disk_op, int):
11581           raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
11582         if not isinstance(disk_dict, dict):
11583           msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
11584           raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
11585
11586       if disk_op == constants.DDM_ADD:
11587         mode = disk_dict.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
11588         if mode not in constants.DISK_ACCESS_SET:
11589           raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
11590                                      errors.ECODE_INVAL)
11591         size = disk_dict.get(constants.IDISK_SIZE, None)
11592         if size is None:
11593           raise errors.OpPrereqError("Required disk parameter size missing",
11594                                      errors.ECODE_INVAL)
11595         try:
11596           size = int(size)
11597         except (TypeError, ValueError), err:
11598           raise errors.OpPrereqError("Invalid disk size parameter: %s" %
11599                                      str(err), errors.ECODE_INVAL)
11600         disk_dict[constants.IDISK_SIZE] = size
11601       else:
11602         # modification of disk
11603         if constants.IDISK_SIZE in disk_dict:
11604           raise errors.OpPrereqError("Disk size change not possible, use"
11605                                      " grow-disk", errors.ECODE_INVAL)
11606
11607     if disk_addremove > 1:
11608       raise errors.OpPrereqError("Only one disk add or remove operation"
11609                                  " supported at a time", errors.ECODE_INVAL)
11610
11611     if self.op.disks and self.op.disk_template is not None:
11612       raise errors.OpPrereqError("Disk template conversion and other disk"
11613                                  " changes not supported at the same time",
11614                                  errors.ECODE_INVAL)
11615
11616     if (self.op.disk_template and
11617         self.op.disk_template in constants.DTS_INT_MIRROR and
11618         self.op.remote_node is None):
11619       raise errors.OpPrereqError("Changing the disk template to a mirrored"
11620                                  " one requires specifying a secondary node",
11621                                  errors.ECODE_INVAL)
11622
11623     # NIC validation
11624     nic_addremove = 0
11625     for nic_op, nic_dict in self.op.nics:
11626       utils.ForceDictType(nic_dict, constants.INIC_PARAMS_TYPES)
11627       if nic_op == constants.DDM_REMOVE:
11628         nic_addremove += 1
11629         continue
11630       elif nic_op == constants.DDM_ADD:
11631         nic_addremove += 1
11632       else:
11633         if not isinstance(nic_op, int):
11634           raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
11635         if not isinstance(nic_dict, dict):
11636           msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
11637           raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
11638
11639       # nic_dict should be a dict
11640       nic_ip = nic_dict.get(constants.INIC_IP, None)
11641       if nic_ip is not None:
11642         if nic_ip.lower() == constants.VALUE_NONE:
11643           nic_dict[constants.INIC_IP] = None
11644         else:
11645           if not netutils.IPAddress.IsValid(nic_ip):
11646             raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
11647                                        errors.ECODE_INVAL)
11648
11649       nic_bridge = nic_dict.get("bridge", None)
11650       nic_link = nic_dict.get(constants.INIC_LINK, None)
11651       if nic_bridge and nic_link:
11652         raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
11653                                    " at the same time", errors.ECODE_INVAL)
11654       elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
11655         nic_dict["bridge"] = None
11656       elif nic_link and nic_link.lower() == constants.VALUE_NONE:
11657         nic_dict[constants.INIC_LINK] = None
11658
11659       if nic_op == constants.DDM_ADD:
11660         nic_mac = nic_dict.get(constants.INIC_MAC, None)
11661         if nic_mac is None:
11662           nic_dict[constants.INIC_MAC] = constants.VALUE_AUTO
11663
11664       if constants.INIC_MAC in nic_dict:
11665         nic_mac = nic_dict[constants.INIC_MAC]
11666         if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
11667           nic_mac = utils.NormalizeAndValidateMac(nic_mac)
11668
11669         if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
11670           raise errors.OpPrereqError("'auto' is not a valid MAC address when"
11671                                      " modifying an existing nic",
11672                                      errors.ECODE_INVAL)
11673
11674     if nic_addremove > 1:
11675       raise errors.OpPrereqError("Only one NIC add or remove operation"
11676                                  " supported at a time", errors.ECODE_INVAL)
11677
11678   def ExpandNames(self):
11679     self._ExpandAndLockInstance()
11680     # Can't even acquire node locks in shared mode as upcoming changes in
11681     # Ganeti 2.6 will start to modify the node object on disk conversion
11682     self.needed_locks[locking.LEVEL_NODE] = []
11683     self.needed_locks[locking.LEVEL_NODE_RES] = []
11684     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
11685
11686   def DeclareLocks(self, level):
11687     if level == locking.LEVEL_NODE:
11688       self._LockInstancesNodes()
11689       if self.op.disk_template and self.op.remote_node:
11690         self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
11691         self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
11692     elif level == locking.LEVEL_NODE_RES and self.op.disk_template:
11693       # Copy node locks
11694       self.needed_locks[locking.LEVEL_NODE_RES] = \
11695         self.needed_locks[locking.LEVEL_NODE][:]
11696
11697   def BuildHooksEnv(self):
11698     """Build hooks env.
11699
11700     This runs on the master, primary and secondaries.
11701
11702     """
11703     args = dict()
11704     if constants.BE_MINMEM in self.be_new:
11705       args["minmem"] = self.be_new[constants.BE_MINMEM]
11706     if constants.BE_MAXMEM in self.be_new:
11707       args["maxmem"] = self.be_new[constants.BE_MAXMEM]
11708     if constants.BE_VCPUS in self.be_new:
11709       args["vcpus"] = self.be_new[constants.BE_VCPUS]
11710     # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
11711     # information at all.
11712     if self.op.nics:
11713       args["nics"] = []
11714       nic_override = dict(self.op.nics)
11715       for idx, nic in enumerate(self.instance.nics):
11716         if idx in nic_override:
11717           this_nic_override = nic_override[idx]
11718         else:
11719           this_nic_override = {}
11720         if constants.INIC_IP in this_nic_override:
11721           ip = this_nic_override[constants.INIC_IP]
11722         else:
11723           ip = nic.ip
11724         if constants.INIC_MAC in this_nic_override:
11725           mac = this_nic_override[constants.INIC_MAC]
11726         else:
11727           mac = nic.mac
11728         if idx in self.nic_pnew:
11729           nicparams = self.nic_pnew[idx]
11730         else:
11731           nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
11732         mode = nicparams[constants.NIC_MODE]
11733         link = nicparams[constants.NIC_LINK]
11734         args["nics"].append((ip, mac, mode, link))
11735       if constants.DDM_ADD in nic_override:
11736         ip = nic_override[constants.DDM_ADD].get(constants.INIC_IP, None)
11737         mac = nic_override[constants.DDM_ADD][constants.INIC_MAC]
11738         nicparams = self.nic_pnew[constants.DDM_ADD]
11739         mode = nicparams[constants.NIC_MODE]
11740         link = nicparams[constants.NIC_LINK]
11741         args["nics"].append((ip, mac, mode, link))
11742       elif constants.DDM_REMOVE in nic_override:
11743         del args["nics"][-1]
11744
11745     env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
11746     if self.op.disk_template:
11747       env["NEW_DISK_TEMPLATE"] = self.op.disk_template
11748
11749     return env
11750
11751   def BuildHooksNodes(self):
11752     """Build hooks nodes.
11753
11754     """
11755     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
11756     return (nl, nl)
11757
11758   def CheckPrereq(self):
11759     """Check prerequisites.
11760
11761     This only checks the instance list against the existing names.
11762
11763     """
11764     # checking the new params on the primary/secondary nodes
11765
11766     instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
11767     cluster = self.cluster = self.cfg.GetClusterInfo()
11768     assert self.instance is not None, \
11769       "Cannot retrieve locked instance %s" % self.op.instance_name
11770     pnode = instance.primary_node
11771     nodelist = list(instance.all_nodes)
11772     pnode_info = self.cfg.GetNodeInfo(pnode)
11773     self.diskparams = self.cfg.GetNodeGroup(pnode_info.group).diskparams
11774
11775     # OS change
11776     if self.op.os_name and not self.op.force:
11777       _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
11778                       self.op.force_variant)
11779       instance_os = self.op.os_name
11780     else:
11781       instance_os = instance.os
11782
11783     if self.op.disk_template:
11784       if instance.disk_template == self.op.disk_template:
11785         raise errors.OpPrereqError("Instance already has disk template %s" %
11786                                    instance.disk_template, errors.ECODE_INVAL)
11787
11788       if (instance.disk_template,
11789           self.op.disk_template) not in self._DISK_CONVERSIONS:
11790         raise errors.OpPrereqError("Unsupported disk template conversion from"
11791                                    " %s to %s" % (instance.disk_template,
11792                                                   self.op.disk_template),
11793                                    errors.ECODE_INVAL)
11794       _CheckInstanceState(self, instance, INSTANCE_DOWN,
11795                           msg="cannot change disk template")
11796       if self.op.disk_template in constants.DTS_INT_MIRROR:
11797         if self.op.remote_node == pnode:
11798           raise errors.OpPrereqError("Given new secondary node %s is the same"
11799                                      " as the primary node of the instance" %
11800                                      self.op.remote_node, errors.ECODE_STATE)
11801         _CheckNodeOnline(self, self.op.remote_node)
11802         _CheckNodeNotDrained(self, self.op.remote_node)
11803         # FIXME: here we assume that the old instance type is DT_PLAIN
11804         assert instance.disk_template == constants.DT_PLAIN
11805         disks = [{constants.IDISK_SIZE: d.size,
11806                   constants.IDISK_VG: d.logical_id[0]}
11807                  for d in instance.disks]
11808         required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
11809         _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
11810
11811         snode_info = self.cfg.GetNodeInfo(self.op.remote_node)
11812         ipolicy = _CalculateGroupIPolicy(cluster, snode_info.group)
11813         _CheckTargetNodeIPolicy(self, ipolicy, instance, snode_info,
11814                                 ignore=self.op.ignore_ipolicy)
11815         if pnode_info.group != snode_info.group:
11816           self.LogWarning("The primary and secondary nodes are in two"
11817                           " different node groups; the disk parameters"
11818                           " from the first disk's node group will be"
11819                           " used")
11820
11821     # hvparams processing
11822     if self.op.hvparams:
11823       hv_type = instance.hypervisor
11824       i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
11825       utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
11826       hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
11827
11828       # local check
11829       hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
11830       _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
11831       self.hv_proposed = self.hv_new = hv_new # the new actual values
11832       self.hv_inst = i_hvdict # the new dict (without defaults)
11833     else:
11834       self.hv_proposed = cluster.SimpleFillHV(instance.hypervisor, instance.os,
11835                                               instance.hvparams)
11836       self.hv_new = self.hv_inst = {}
11837
11838     # beparams processing
11839     if self.op.beparams:
11840       i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
11841                                    use_none=True)
11842       objects.UpgradeBeParams(i_bedict)
11843       utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
11844       be_new = cluster.SimpleFillBE(i_bedict)
11845       self.be_proposed = self.be_new = be_new # the new actual values
11846       self.be_inst = i_bedict # the new dict (without defaults)
11847     else:
11848       self.be_new = self.be_inst = {}
11849       self.be_proposed = cluster.SimpleFillBE(instance.beparams)
11850     be_old = cluster.FillBE(instance)
11851
11852     # CPU param validation -- checking every time a paramtere is
11853     # changed to cover all cases where either CPU mask or vcpus have
11854     # changed
11855     if (constants.BE_VCPUS in self.be_proposed and
11856         constants.HV_CPU_MASK in self.hv_proposed):
11857       cpu_list = \
11858         utils.ParseMultiCpuMask(self.hv_proposed[constants.HV_CPU_MASK])
11859       # Verify mask is consistent with number of vCPUs. Can skip this
11860       # test if only 1 entry in the CPU mask, which means same mask
11861       # is applied to all vCPUs.
11862       if (len(cpu_list) > 1 and
11863           len(cpu_list) != self.be_proposed[constants.BE_VCPUS]):
11864         raise errors.OpPrereqError("Number of vCPUs [%d] does not match the"
11865                                    " CPU mask [%s]" %
11866                                    (self.be_proposed[constants.BE_VCPUS],
11867                                     self.hv_proposed[constants.HV_CPU_MASK]),
11868                                    errors.ECODE_INVAL)
11869
11870       # Only perform this test if a new CPU mask is given
11871       if constants.HV_CPU_MASK in self.hv_new:
11872         # Calculate the largest CPU number requested
11873         max_requested_cpu = max(map(max, cpu_list))
11874         # Check that all of the instance's nodes have enough physical CPUs to
11875         # satisfy the requested CPU mask
11876         _CheckNodesPhysicalCPUs(self, instance.all_nodes,
11877                                 max_requested_cpu + 1, instance.hypervisor)
11878
11879     # osparams processing
11880     if self.op.osparams:
11881       i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
11882       _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
11883       self.os_inst = i_osdict # the new dict (without defaults)
11884     else:
11885       self.os_inst = {}
11886
11887     self.warn = []
11888
11889     #TODO(dynmem): do the appropriate check involving MINMEM
11890     if (constants.BE_MAXMEM in self.op.beparams and not self.op.force and
11891         be_new[constants.BE_MAXMEM] > be_old[constants.BE_MAXMEM]):
11892       mem_check_list = [pnode]
11893       if be_new[constants.BE_AUTO_BALANCE]:
11894         # either we changed auto_balance to yes or it was from before
11895         mem_check_list.extend(instance.secondary_nodes)
11896       instance_info = self.rpc.call_instance_info(pnode, instance.name,
11897                                                   instance.hypervisor)
11898       nodeinfo = self.rpc.call_node_info(mem_check_list, None,
11899                                          [instance.hypervisor])
11900       pninfo = nodeinfo[pnode]
11901       msg = pninfo.fail_msg
11902       if msg:
11903         # Assume the primary node is unreachable and go ahead
11904         self.warn.append("Can't get info from primary node %s: %s" %
11905                          (pnode, msg))
11906       else:
11907         (_, _, (pnhvinfo, )) = pninfo.payload
11908         if not isinstance(pnhvinfo.get("memory_free", None), int):
11909           self.warn.append("Node data from primary node %s doesn't contain"
11910                            " free memory information" % pnode)
11911         elif instance_info.fail_msg:
11912           self.warn.append("Can't get instance runtime information: %s" %
11913                           instance_info.fail_msg)
11914         else:
11915           if instance_info.payload:
11916             current_mem = int(instance_info.payload["memory"])
11917           else:
11918             # Assume instance not running
11919             # (there is a slight race condition here, but it's not very
11920             # probable, and we have no other way to check)
11921             # TODO: Describe race condition
11922             current_mem = 0
11923           #TODO(dynmem): do the appropriate check involving MINMEM
11924           miss_mem = (be_new[constants.BE_MAXMEM] - current_mem -
11925                       pnhvinfo["memory_free"])
11926           if miss_mem > 0:
11927             raise errors.OpPrereqError("This change will prevent the instance"
11928                                        " from starting, due to %d MB of memory"
11929                                        " missing on its primary node" %
11930                                        miss_mem,
11931                                        errors.ECODE_NORES)
11932
11933       if be_new[constants.BE_AUTO_BALANCE]:
11934         for node, nres in nodeinfo.items():
11935           if node not in instance.secondary_nodes:
11936             continue
11937           nres.Raise("Can't get info from secondary node %s" % node,
11938                      prereq=True, ecode=errors.ECODE_STATE)
11939           (_, _, (nhvinfo, )) = nres.payload
11940           if not isinstance(nhvinfo.get("memory_free", None), int):
11941             raise errors.OpPrereqError("Secondary node %s didn't return free"
11942                                        " memory information" % node,
11943                                        errors.ECODE_STATE)
11944           #TODO(dynmem): do the appropriate check involving MINMEM
11945           elif be_new[constants.BE_MAXMEM] > nhvinfo["memory_free"]:
11946             raise errors.OpPrereqError("This change will prevent the instance"
11947                                        " from failover to its secondary node"
11948                                        " %s, due to not enough memory" % node,
11949                                        errors.ECODE_STATE)
11950
11951     # NIC processing
11952     self.nic_pnew = {}
11953     self.nic_pinst = {}
11954     for nic_op, nic_dict in self.op.nics:
11955       if nic_op == constants.DDM_REMOVE:
11956         if not instance.nics:
11957           raise errors.OpPrereqError("Instance has no NICs, cannot remove",
11958                                      errors.ECODE_INVAL)
11959         continue
11960       if nic_op != constants.DDM_ADD:
11961         # an existing nic
11962         if not instance.nics:
11963           raise errors.OpPrereqError("Invalid NIC index %s, instance has"
11964                                      " no NICs" % nic_op,
11965                                      errors.ECODE_INVAL)
11966         if nic_op < 0 or nic_op >= len(instance.nics):
11967           raise errors.OpPrereqError("Invalid NIC index %s, valid values"
11968                                      " are 0 to %d" %
11969                                      (nic_op, len(instance.nics) - 1),
11970                                      errors.ECODE_INVAL)
11971         old_nic_params = instance.nics[nic_op].nicparams
11972         old_nic_ip = instance.nics[nic_op].ip
11973       else:
11974         old_nic_params = {}
11975         old_nic_ip = None
11976
11977       update_params_dict = dict([(key, nic_dict[key])
11978                                  for key in constants.NICS_PARAMETERS
11979                                  if key in nic_dict])
11980
11981       if "bridge" in nic_dict:
11982         update_params_dict[constants.NIC_LINK] = nic_dict["bridge"]
11983
11984       new_nic_params = _GetUpdatedParams(old_nic_params,
11985                                          update_params_dict)
11986       utils.ForceDictType(new_nic_params, constants.NICS_PARAMETER_TYPES)
11987       new_filled_nic_params = cluster.SimpleFillNIC(new_nic_params)
11988       objects.NIC.CheckParameterSyntax(new_filled_nic_params)
11989       self.nic_pinst[nic_op] = new_nic_params
11990       self.nic_pnew[nic_op] = new_filled_nic_params
11991       new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
11992
11993       if new_nic_mode == constants.NIC_MODE_BRIDGED:
11994         nic_bridge = new_filled_nic_params[constants.NIC_LINK]
11995         msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
11996         if msg:
11997           msg = "Error checking bridges on node %s: %s" % (pnode, msg)
11998           if self.op.force:
11999             self.warn.append(msg)
12000           else:
12001             raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
12002       if new_nic_mode == constants.NIC_MODE_ROUTED:
12003         if constants.INIC_IP in nic_dict:
12004           nic_ip = nic_dict[constants.INIC_IP]
12005         else:
12006           nic_ip = old_nic_ip
12007         if nic_ip is None:
12008           raise errors.OpPrereqError("Cannot set the nic ip to None"
12009                                      " on a routed nic", errors.ECODE_INVAL)
12010       if constants.INIC_MAC in nic_dict:
12011         nic_mac = nic_dict[constants.INIC_MAC]
12012         if nic_mac is None:
12013           raise errors.OpPrereqError("Cannot set the nic mac to None",
12014                                      errors.ECODE_INVAL)
12015         elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
12016           # otherwise generate the mac
12017           nic_dict[constants.INIC_MAC] = \
12018             self.cfg.GenerateMAC(self.proc.GetECId())
12019         else:
12020           # or validate/reserve the current one
12021           try:
12022             self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
12023           except errors.ReservationError:
12024             raise errors.OpPrereqError("MAC address %s already in use"
12025                                        " in cluster" % nic_mac,
12026                                        errors.ECODE_NOTUNIQUE)
12027
12028     # DISK processing
12029     if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
12030       raise errors.OpPrereqError("Disk operations not supported for"
12031                                  " diskless instances",
12032                                  errors.ECODE_INVAL)
12033     for disk_op, _ in self.op.disks:
12034       if disk_op == constants.DDM_REMOVE:
12035         if len(instance.disks) == 1:
12036           raise errors.OpPrereqError("Cannot remove the last disk of"
12037                                      " an instance", errors.ECODE_INVAL)
12038         _CheckInstanceState(self, instance, INSTANCE_DOWN,
12039                             msg="cannot remove disks")
12040
12041       if (disk_op == constants.DDM_ADD and
12042           len(instance.disks) >= constants.MAX_DISKS):
12043         raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
12044                                    " add more" % constants.MAX_DISKS,
12045                                    errors.ECODE_STATE)
12046       if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
12047         # an existing disk
12048         if disk_op < 0 or disk_op >= len(instance.disks):
12049           raise errors.OpPrereqError("Invalid disk index %s, valid values"
12050                                      " are 0 to %d" %
12051                                      (disk_op, len(instance.disks)),
12052                                      errors.ECODE_INVAL)
12053
12054     # disabling the instance
12055     if self.op.offline_inst:
12056       _CheckInstanceState(self, instance, INSTANCE_DOWN,
12057                           msg="cannot change instance state to offline")
12058
12059     # enabling the instance
12060     if self.op.online_inst:
12061       _CheckInstanceState(self, instance, INSTANCE_OFFLINE,
12062                           msg="cannot make instance go online")
12063
12064   def _ConvertPlainToDrbd(self, feedback_fn):
12065     """Converts an instance from plain to drbd.
12066
12067     """
12068     feedback_fn("Converting template to drbd")
12069     instance = self.instance
12070     pnode = instance.primary_node
12071     snode = self.op.remote_node
12072
12073     assert instance.disk_template == constants.DT_PLAIN
12074
12075     # create a fake disk info for _GenerateDiskTemplate
12076     disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
12077                   constants.IDISK_VG: d.logical_id[0]}
12078                  for d in instance.disks]
12079     new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
12080                                       instance.name, pnode, [snode],
12081                                       disk_info, None, None, 0, feedback_fn,
12082                                       self.diskparams)
12083     info = _GetInstanceInfoText(instance)
12084     feedback_fn("Creating aditional volumes...")
12085     # first, create the missing data and meta devices
12086     for disk in new_disks:
12087       # unfortunately this is... not too nice
12088       _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
12089                             info, True)
12090       for child in disk.children:
12091         _CreateSingleBlockDev(self, snode, instance, child, info, True)
12092     # at this stage, all new LVs have been created, we can rename the
12093     # old ones
12094     feedback_fn("Renaming original volumes...")
12095     rename_list = [(o, n.children[0].logical_id)
12096                    for (o, n) in zip(instance.disks, new_disks)]
12097     result = self.rpc.call_blockdev_rename(pnode, rename_list)
12098     result.Raise("Failed to rename original LVs")
12099
12100     feedback_fn("Initializing DRBD devices...")
12101     # all child devices are in place, we can now create the DRBD devices
12102     for disk in new_disks:
12103       for node in [pnode, snode]:
12104         f_create = node == pnode
12105         _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
12106
12107     # at this point, the instance has been modified
12108     instance.disk_template = constants.DT_DRBD8
12109     instance.disks = new_disks
12110     self.cfg.Update(instance, feedback_fn)
12111
12112     # Release node locks while waiting for sync
12113     _ReleaseLocks(self, locking.LEVEL_NODE)
12114
12115     # disks are created, waiting for sync
12116     disk_abort = not _WaitForSync(self, instance,
12117                                   oneshot=not self.op.wait_for_sync)
12118     if disk_abort:
12119       raise errors.OpExecError("There are some degraded disks for"
12120                                " this instance, please cleanup manually")
12121
12122     # Node resource locks will be released by caller
12123
12124   def _ConvertDrbdToPlain(self, feedback_fn):
12125     """Converts an instance from drbd to plain.
12126
12127     """
12128     instance = self.instance
12129
12130     assert len(instance.secondary_nodes) == 1
12131     assert instance.disk_template == constants.DT_DRBD8
12132
12133     pnode = instance.primary_node
12134     snode = instance.secondary_nodes[0]
12135     feedback_fn("Converting template to plain")
12136
12137     old_disks = instance.disks
12138     new_disks = [d.children[0] for d in old_disks]
12139
12140     # copy over size and mode
12141     for parent, child in zip(old_disks, new_disks):
12142       child.size = parent.size
12143       child.mode = parent.mode
12144
12145     # update instance structure
12146     instance.disks = new_disks
12147     instance.disk_template = constants.DT_PLAIN
12148     self.cfg.Update(instance, feedback_fn)
12149
12150     # Release locks in case removing disks takes a while
12151     _ReleaseLocks(self, locking.LEVEL_NODE)
12152
12153     feedback_fn("Removing volumes on the secondary node...")
12154     for disk in old_disks:
12155       self.cfg.SetDiskID(disk, snode)
12156       msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
12157       if msg:
12158         self.LogWarning("Could not remove block device %s on node %s,"
12159                         " continuing anyway: %s", disk.iv_name, snode, msg)
12160
12161     feedback_fn("Removing unneeded volumes on the primary node...")
12162     for idx, disk in enumerate(old_disks):
12163       meta = disk.children[1]
12164       self.cfg.SetDiskID(meta, pnode)
12165       msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
12166       if msg:
12167         self.LogWarning("Could not remove metadata for disk %d on node %s,"
12168                         " continuing anyway: %s", idx, pnode, msg)
12169
12170     # this is a DRBD disk, return its port to the pool
12171     for disk in old_disks:
12172       tcp_port = disk.logical_id[2]
12173       self.cfg.AddTcpUdpPort(tcp_port)
12174
12175     # Node resource locks will be released by caller
12176
12177   def Exec(self, feedback_fn):
12178     """Modifies an instance.
12179
12180     All parameters take effect only at the next restart of the instance.
12181
12182     """
12183     # Process here the warnings from CheckPrereq, as we don't have a
12184     # feedback_fn there.
12185     for warn in self.warn:
12186       feedback_fn("WARNING: %s" % warn)
12187
12188     assert ((self.op.disk_template is None) ^
12189             bool(self.owned_locks(locking.LEVEL_NODE_RES))), \
12190       "Not owning any node resource locks"
12191
12192     result = []
12193     instance = self.instance
12194     # disk changes
12195     for disk_op, disk_dict in self.op.disks:
12196       if disk_op == constants.DDM_REMOVE:
12197         # remove the last disk
12198         device = instance.disks.pop()
12199         device_idx = len(instance.disks)
12200         for node, disk in device.ComputeNodeTree(instance.primary_node):
12201           self.cfg.SetDiskID(disk, node)
12202           msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
12203           if msg:
12204             self.LogWarning("Could not remove disk/%d on node %s: %s,"
12205                             " continuing anyway", device_idx, node, msg)
12206         result.append(("disk/%d" % device_idx, "remove"))
12207
12208         # if this is a DRBD disk, return its port to the pool
12209         if device.dev_type in constants.LDS_DRBD:
12210           tcp_port = device.logical_id[2]
12211           self.cfg.AddTcpUdpPort(tcp_port)
12212       elif disk_op == constants.DDM_ADD:
12213         # add a new disk
12214         if instance.disk_template in (constants.DT_FILE,
12215                                         constants.DT_SHARED_FILE):
12216           file_driver, file_path = instance.disks[0].logical_id
12217           file_path = os.path.dirname(file_path)
12218         else:
12219           file_driver = file_path = None
12220         disk_idx_base = len(instance.disks)
12221         new_disk = _GenerateDiskTemplate(self,
12222                                          instance.disk_template,
12223                                          instance.name, instance.primary_node,
12224                                          instance.secondary_nodes,
12225                                          [disk_dict],
12226                                          file_path,
12227                                          file_driver,
12228                                          disk_idx_base,
12229                                          feedback_fn,
12230                                          self.diskparams)[0]
12231         instance.disks.append(new_disk)
12232         info = _GetInstanceInfoText(instance)
12233
12234         logging.info("Creating volume %s for instance %s",
12235                      new_disk.iv_name, instance.name)
12236         # Note: this needs to be kept in sync with _CreateDisks
12237         #HARDCODE
12238         for node in instance.all_nodes:
12239           f_create = node == instance.primary_node
12240           try:
12241             _CreateBlockDev(self, node, instance, new_disk,
12242                             f_create, info, f_create)
12243           except errors.OpExecError, err:
12244             self.LogWarning("Failed to create volume %s (%s) on"
12245                             " node %s: %s",
12246                             new_disk.iv_name, new_disk, node, err)
12247         result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
12248                        (new_disk.size, new_disk.mode)))
12249       else:
12250         # change a given disk
12251         instance.disks[disk_op].mode = disk_dict[constants.IDISK_MODE]
12252         result.append(("disk.mode/%d" % disk_op,
12253                        disk_dict[constants.IDISK_MODE]))
12254
12255     if self.op.disk_template:
12256       if __debug__:
12257         check_nodes = set(instance.all_nodes)
12258         if self.op.remote_node:
12259           check_nodes.add(self.op.remote_node)
12260         for level in [locking.LEVEL_NODE, locking.LEVEL_NODE_RES]:
12261           owned = self.owned_locks(level)
12262           assert not (check_nodes - owned), \
12263             ("Not owning the correct locks, owning %r, expected at least %r" %
12264              (owned, check_nodes))
12265
12266       r_shut = _ShutdownInstanceDisks(self, instance)
12267       if not r_shut:
12268         raise errors.OpExecError("Cannot shutdown instance disks, unable to"
12269                                  " proceed with disk template conversion")
12270       mode = (instance.disk_template, self.op.disk_template)
12271       try:
12272         self._DISK_CONVERSIONS[mode](self, feedback_fn)
12273       except:
12274         self.cfg.ReleaseDRBDMinors(instance.name)
12275         raise
12276       result.append(("disk_template", self.op.disk_template))
12277
12278       assert instance.disk_template == self.op.disk_template, \
12279         ("Expected disk template '%s', found '%s'" %
12280          (self.op.disk_template, instance.disk_template))
12281
12282     # Release node and resource locks if there are any (they might already have
12283     # been released during disk conversion)
12284     _ReleaseLocks(self, locking.LEVEL_NODE)
12285     _ReleaseLocks(self, locking.LEVEL_NODE_RES)
12286
12287     # NIC changes
12288     for nic_op, nic_dict in self.op.nics:
12289       if nic_op == constants.DDM_REMOVE:
12290         # remove the last nic
12291         del instance.nics[-1]
12292         result.append(("nic.%d" % len(instance.nics), "remove"))
12293       elif nic_op == constants.DDM_ADD:
12294         # mac and bridge should be set, by now
12295         mac = nic_dict[constants.INIC_MAC]
12296         ip = nic_dict.get(constants.INIC_IP, None)
12297         nicparams = self.nic_pinst[constants.DDM_ADD]
12298         new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
12299         instance.nics.append(new_nic)
12300         result.append(("nic.%d" % (len(instance.nics) - 1),
12301                        "add:mac=%s,ip=%s,mode=%s,link=%s" %
12302                        (new_nic.mac, new_nic.ip,
12303                         self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
12304                         self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
12305                        )))
12306       else:
12307         for key in (constants.INIC_MAC, constants.INIC_IP):
12308           if key in nic_dict:
12309             setattr(instance.nics[nic_op], key, nic_dict[key])
12310         if nic_op in self.nic_pinst:
12311           instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
12312         for key, val in nic_dict.iteritems():
12313           result.append(("nic.%s/%d" % (key, nic_op), val))
12314
12315     # hvparams changes
12316     if self.op.hvparams:
12317       instance.hvparams = self.hv_inst
12318       for key, val in self.op.hvparams.iteritems():
12319         result.append(("hv/%s" % key, val))
12320
12321     # beparams changes
12322     if self.op.beparams:
12323       instance.beparams = self.be_inst
12324       for key, val in self.op.beparams.iteritems():
12325         result.append(("be/%s" % key, val))
12326
12327     # OS change
12328     if self.op.os_name:
12329       instance.os = self.op.os_name
12330
12331     # osparams changes
12332     if self.op.osparams:
12333       instance.osparams = self.os_inst
12334       for key, val in self.op.osparams.iteritems():
12335         result.append(("os/%s" % key, val))
12336
12337     # online/offline instance
12338     if self.op.online_inst:
12339       self.cfg.MarkInstanceDown(instance.name)
12340       result.append(("admin_state", constants.ADMINST_DOWN))
12341     if self.op.offline_inst:
12342       self.cfg.MarkInstanceOffline(instance.name)
12343       result.append(("admin_state", constants.ADMINST_OFFLINE))
12344
12345     self.cfg.Update(instance, feedback_fn)
12346
12347     assert not (self.owned_locks(locking.LEVEL_NODE_RES) or
12348                 self.owned_locks(locking.LEVEL_NODE)), \
12349       "All node locks should have been released by now"
12350
12351     return result
12352
12353   _DISK_CONVERSIONS = {
12354     (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
12355     (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
12356     }
12357
12358
12359 class LUInstanceChangeGroup(LogicalUnit):
12360   HPATH = "instance-change-group"
12361   HTYPE = constants.HTYPE_INSTANCE
12362   REQ_BGL = False
12363
12364   def ExpandNames(self):
12365     self.share_locks = _ShareAll()
12366     self.needed_locks = {
12367       locking.LEVEL_NODEGROUP: [],
12368       locking.LEVEL_NODE: [],
12369       }
12370
12371     self._ExpandAndLockInstance()
12372
12373     if self.op.target_groups:
12374       self.req_target_uuids = map(self.cfg.LookupNodeGroup,
12375                                   self.op.target_groups)
12376     else:
12377       self.req_target_uuids = None
12378
12379     self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
12380
12381   def DeclareLocks(self, level):
12382     if level == locking.LEVEL_NODEGROUP:
12383       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
12384
12385       if self.req_target_uuids:
12386         lock_groups = set(self.req_target_uuids)
12387
12388         # Lock all groups used by instance optimistically; this requires going
12389         # via the node before it's locked, requiring verification later on
12390         instance_groups = self.cfg.GetInstanceNodeGroups(self.op.instance_name)
12391         lock_groups.update(instance_groups)
12392       else:
12393         # No target groups, need to lock all of them
12394         lock_groups = locking.ALL_SET
12395
12396       self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
12397
12398     elif level == locking.LEVEL_NODE:
12399       if self.req_target_uuids:
12400         # Lock all nodes used by instances
12401         self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
12402         self._LockInstancesNodes()
12403
12404         # Lock all nodes in all potential target groups
12405         lock_groups = (frozenset(self.owned_locks(locking.LEVEL_NODEGROUP)) -
12406                        self.cfg.GetInstanceNodeGroups(self.op.instance_name))
12407         member_nodes = [node_name
12408                         for group in lock_groups
12409                         for node_name in self.cfg.GetNodeGroup(group).members]
12410         self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
12411       else:
12412         # Lock all nodes as all groups are potential targets
12413         self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
12414
12415   def CheckPrereq(self):
12416     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
12417     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
12418     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
12419
12420     assert (self.req_target_uuids is None or
12421             owned_groups.issuperset(self.req_target_uuids))
12422     assert owned_instances == set([self.op.instance_name])
12423
12424     # Get instance information
12425     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
12426
12427     # Check if node groups for locked instance are still correct
12428     assert owned_nodes.issuperset(self.instance.all_nodes), \
12429       ("Instance %s's nodes changed while we kept the lock" %
12430        self.op.instance_name)
12431
12432     inst_groups = _CheckInstanceNodeGroups(self.cfg, self.op.instance_name,
12433                                            owned_groups)
12434
12435     if self.req_target_uuids:
12436       # User requested specific target groups
12437       self.target_uuids = self.req_target_uuids
12438     else:
12439       # All groups except those used by the instance are potential targets
12440       self.target_uuids = owned_groups - inst_groups
12441
12442     conflicting_groups = self.target_uuids & inst_groups
12443     if conflicting_groups:
12444       raise errors.OpPrereqError("Can't use group(s) '%s' as targets, they are"
12445                                  " used by the instance '%s'" %
12446                                  (utils.CommaJoin(conflicting_groups),
12447                                   self.op.instance_name),
12448                                  errors.ECODE_INVAL)
12449
12450     if not self.target_uuids:
12451       raise errors.OpPrereqError("There are no possible target groups",
12452                                  errors.ECODE_INVAL)
12453
12454   def BuildHooksEnv(self):
12455     """Build hooks env.
12456
12457     """
12458     assert self.target_uuids
12459
12460     env = {
12461       "TARGET_GROUPS": " ".join(self.target_uuids),
12462       }
12463
12464     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
12465
12466     return env
12467
12468   def BuildHooksNodes(self):
12469     """Build hooks nodes.
12470
12471     """
12472     mn = self.cfg.GetMasterNode()
12473     return ([mn], [mn])
12474
12475   def Exec(self, feedback_fn):
12476     instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
12477
12478     assert instances == [self.op.instance_name], "Instance not locked"
12479
12480     ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
12481                      instances=instances, target_groups=list(self.target_uuids))
12482
12483     ial.Run(self.op.iallocator)
12484
12485     if not ial.success:
12486       raise errors.OpPrereqError("Can't compute solution for changing group of"
12487                                  " instance '%s' using iallocator '%s': %s" %
12488                                  (self.op.instance_name, self.op.iallocator,
12489                                   ial.info),
12490                                  errors.ECODE_NORES)
12491
12492     jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
12493
12494     self.LogInfo("Iallocator returned %s job(s) for changing group of"
12495                  " instance '%s'", len(jobs), self.op.instance_name)
12496
12497     return ResultWithJobs(jobs)
12498
12499
12500 class LUBackupQuery(NoHooksLU):
12501   """Query the exports list
12502
12503   """
12504   REQ_BGL = False
12505
12506   def ExpandNames(self):
12507     self.needed_locks = {}
12508     self.share_locks[locking.LEVEL_NODE] = 1
12509     if not self.op.nodes:
12510       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
12511     else:
12512       self.needed_locks[locking.LEVEL_NODE] = \
12513         _GetWantedNodes(self, self.op.nodes)
12514
12515   def Exec(self, feedback_fn):
12516     """Compute the list of all the exported system images.
12517
12518     @rtype: dict
12519     @return: a dictionary with the structure node->(export-list)
12520         where export-list is a list of the instances exported on
12521         that node.
12522
12523     """
12524     self.nodes = self.owned_locks(locking.LEVEL_NODE)
12525     rpcresult = self.rpc.call_export_list(self.nodes)
12526     result = {}
12527     for node in rpcresult:
12528       if rpcresult[node].fail_msg:
12529         result[node] = False
12530       else:
12531         result[node] = rpcresult[node].payload
12532
12533     return result
12534
12535
12536 class LUBackupPrepare(NoHooksLU):
12537   """Prepares an instance for an export and returns useful information.
12538
12539   """
12540   REQ_BGL = False
12541
12542   def ExpandNames(self):
12543     self._ExpandAndLockInstance()
12544
12545   def CheckPrereq(self):
12546     """Check prerequisites.
12547
12548     """
12549     instance_name = self.op.instance_name
12550
12551     self.instance = self.cfg.GetInstanceInfo(instance_name)
12552     assert self.instance is not None, \
12553           "Cannot retrieve locked instance %s" % self.op.instance_name
12554     _CheckNodeOnline(self, self.instance.primary_node)
12555
12556     self._cds = _GetClusterDomainSecret()
12557
12558   def Exec(self, feedback_fn):
12559     """Prepares an instance for an export.
12560
12561     """
12562     instance = self.instance
12563
12564     if self.op.mode == constants.EXPORT_MODE_REMOTE:
12565       salt = utils.GenerateSecret(8)
12566
12567       feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
12568       result = self.rpc.call_x509_cert_create(instance.primary_node,
12569                                               constants.RIE_CERT_VALIDITY)
12570       result.Raise("Can't create X509 key and certificate on %s" % result.node)
12571
12572       (name, cert_pem) = result.payload
12573
12574       cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
12575                                              cert_pem)
12576
12577       return {
12578         "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
12579         "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
12580                           salt),
12581         "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
12582         }
12583
12584     return None
12585
12586
12587 class LUBackupExport(LogicalUnit):
12588   """Export an instance to an image in the cluster.
12589
12590   """
12591   HPATH = "instance-export"
12592   HTYPE = constants.HTYPE_INSTANCE
12593   REQ_BGL = False
12594
12595   def CheckArguments(self):
12596     """Check the arguments.
12597
12598     """
12599     self.x509_key_name = self.op.x509_key_name
12600     self.dest_x509_ca_pem = self.op.destination_x509_ca
12601
12602     if self.op.mode == constants.EXPORT_MODE_REMOTE:
12603       if not self.x509_key_name:
12604         raise errors.OpPrereqError("Missing X509 key name for encryption",
12605                                    errors.ECODE_INVAL)
12606
12607       if not self.dest_x509_ca_pem:
12608         raise errors.OpPrereqError("Missing destination X509 CA",
12609                                    errors.ECODE_INVAL)
12610
12611   def ExpandNames(self):
12612     self._ExpandAndLockInstance()
12613
12614     # Lock all nodes for local exports
12615     if self.op.mode == constants.EXPORT_MODE_LOCAL:
12616       # FIXME: lock only instance primary and destination node
12617       #
12618       # Sad but true, for now we have do lock all nodes, as we don't know where
12619       # the previous export might be, and in this LU we search for it and
12620       # remove it from its current node. In the future we could fix this by:
12621       #  - making a tasklet to search (share-lock all), then create the
12622       #    new one, then one to remove, after
12623       #  - removing the removal operation altogether
12624       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
12625
12626   def DeclareLocks(self, level):
12627     """Last minute lock declaration."""
12628     # All nodes are locked anyway, so nothing to do here.
12629
12630   def BuildHooksEnv(self):
12631     """Build hooks env.
12632
12633     This will run on the master, primary node and target node.
12634
12635     """
12636     env = {
12637       "EXPORT_MODE": self.op.mode,
12638       "EXPORT_NODE": self.op.target_node,
12639       "EXPORT_DO_SHUTDOWN": self.op.shutdown,
12640       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
12641       # TODO: Generic function for boolean env variables
12642       "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
12643       }
12644
12645     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
12646
12647     return env
12648
12649   def BuildHooksNodes(self):
12650     """Build hooks nodes.
12651
12652     """
12653     nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
12654
12655     if self.op.mode == constants.EXPORT_MODE_LOCAL:
12656       nl.append(self.op.target_node)
12657
12658     return (nl, nl)
12659
12660   def CheckPrereq(self):
12661     """Check prerequisites.
12662
12663     This checks that the instance and node names are valid.
12664
12665     """
12666     instance_name = self.op.instance_name
12667
12668     self.instance = self.cfg.GetInstanceInfo(instance_name)
12669     assert self.instance is not None, \
12670           "Cannot retrieve locked instance %s" % self.op.instance_name
12671     _CheckNodeOnline(self, self.instance.primary_node)
12672
12673     if (self.op.remove_instance and
12674         self.instance.admin_state == constants.ADMINST_UP and
12675         not self.op.shutdown):
12676       raise errors.OpPrereqError("Can not remove instance without shutting it"
12677                                  " down before")
12678
12679     if self.op.mode == constants.EXPORT_MODE_LOCAL:
12680       self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
12681       self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
12682       assert self.dst_node is not None
12683
12684       _CheckNodeOnline(self, self.dst_node.name)
12685       _CheckNodeNotDrained(self, self.dst_node.name)
12686
12687       self._cds = None
12688       self.dest_disk_info = None
12689       self.dest_x509_ca = None
12690
12691     elif self.op.mode == constants.EXPORT_MODE_REMOTE:
12692       self.dst_node = None
12693
12694       if len(self.op.target_node) != len(self.instance.disks):
12695         raise errors.OpPrereqError(("Received destination information for %s"
12696                                     " disks, but instance %s has %s disks") %
12697                                    (len(self.op.target_node), instance_name,
12698                                     len(self.instance.disks)),
12699                                    errors.ECODE_INVAL)
12700
12701       cds = _GetClusterDomainSecret()
12702
12703       # Check X509 key name
12704       try:
12705         (key_name, hmac_digest, hmac_salt) = self.x509_key_name
12706       except (TypeError, ValueError), err:
12707         raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
12708
12709       if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
12710         raise errors.OpPrereqError("HMAC for X509 key name is wrong",
12711                                    errors.ECODE_INVAL)
12712
12713       # Load and verify CA
12714       try:
12715         (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
12716       except OpenSSL.crypto.Error, err:
12717         raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
12718                                    (err, ), errors.ECODE_INVAL)
12719
12720       (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
12721       if errcode is not None:
12722         raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
12723                                    (msg, ), errors.ECODE_INVAL)
12724
12725       self.dest_x509_ca = cert
12726
12727       # Verify target information
12728       disk_info = []
12729       for idx, disk_data in enumerate(self.op.target_node):
12730         try:
12731           (host, port, magic) = \
12732             masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
12733         except errors.GenericError, err:
12734           raise errors.OpPrereqError("Target info for disk %s: %s" %
12735                                      (idx, err), errors.ECODE_INVAL)
12736
12737         disk_info.append((host, port, magic))
12738
12739       assert len(disk_info) == len(self.op.target_node)
12740       self.dest_disk_info = disk_info
12741
12742     else:
12743       raise errors.ProgrammerError("Unhandled export mode %r" %
12744                                    self.op.mode)
12745
12746     # instance disk type verification
12747     # TODO: Implement export support for file-based disks
12748     for disk in self.instance.disks:
12749       if disk.dev_type == constants.LD_FILE:
12750         raise errors.OpPrereqError("Export not supported for instances with"
12751                                    " file-based disks", errors.ECODE_INVAL)
12752
12753   def _CleanupExports(self, feedback_fn):
12754     """Removes exports of current instance from all other nodes.
12755
12756     If an instance in a cluster with nodes A..D was exported to node C, its
12757     exports will be removed from the nodes A, B and D.
12758
12759     """
12760     assert self.op.mode != constants.EXPORT_MODE_REMOTE
12761
12762     nodelist = self.cfg.GetNodeList()
12763     nodelist.remove(self.dst_node.name)
12764
12765     # on one-node clusters nodelist will be empty after the removal
12766     # if we proceed the backup would be removed because OpBackupQuery
12767     # substitutes an empty list with the full cluster node list.
12768     iname = self.instance.name
12769     if nodelist:
12770       feedback_fn("Removing old exports for instance %s" % iname)
12771       exportlist = self.rpc.call_export_list(nodelist)
12772       for node in exportlist:
12773         if exportlist[node].fail_msg:
12774           continue
12775         if iname in exportlist[node].payload:
12776           msg = self.rpc.call_export_remove(node, iname).fail_msg
12777           if msg:
12778             self.LogWarning("Could not remove older export for instance %s"
12779                             " on node %s: %s", iname, node, msg)
12780
12781   def Exec(self, feedback_fn):
12782     """Export an instance to an image in the cluster.
12783
12784     """
12785     assert self.op.mode in constants.EXPORT_MODES
12786
12787     instance = self.instance
12788     src_node = instance.primary_node
12789
12790     if self.op.shutdown:
12791       # shutdown the instance, but not the disks
12792       feedback_fn("Shutting down instance %s" % instance.name)
12793       result = self.rpc.call_instance_shutdown(src_node, instance,
12794                                                self.op.shutdown_timeout)
12795       # TODO: Maybe ignore failures if ignore_remove_failures is set
12796       result.Raise("Could not shutdown instance %s on"
12797                    " node %s" % (instance.name, src_node))
12798
12799     # set the disks ID correctly since call_instance_start needs the
12800     # correct drbd minor to create the symlinks
12801     for disk in instance.disks:
12802       self.cfg.SetDiskID(disk, src_node)
12803
12804     activate_disks = (instance.admin_state != constants.ADMINST_UP)
12805
12806     if activate_disks:
12807       # Activate the instance disks if we'exporting a stopped instance
12808       feedback_fn("Activating disks for %s" % instance.name)
12809       _StartInstanceDisks(self, instance, None)
12810
12811     try:
12812       helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
12813                                                      instance)
12814
12815       helper.CreateSnapshots()
12816       try:
12817         if (self.op.shutdown and
12818             instance.admin_state == constants.ADMINST_UP and
12819             not self.op.remove_instance):
12820           assert not activate_disks
12821           feedback_fn("Starting instance %s" % instance.name)
12822           result = self.rpc.call_instance_start(src_node,
12823                                                 (instance, None, None), False)
12824           msg = result.fail_msg
12825           if msg:
12826             feedback_fn("Failed to start instance: %s" % msg)
12827             _ShutdownInstanceDisks(self, instance)
12828             raise errors.OpExecError("Could not start instance: %s" % msg)
12829
12830         if self.op.mode == constants.EXPORT_MODE_LOCAL:
12831           (fin_resu, dresults) = helper.LocalExport(self.dst_node)
12832         elif self.op.mode == constants.EXPORT_MODE_REMOTE:
12833           connect_timeout = constants.RIE_CONNECT_TIMEOUT
12834           timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
12835
12836           (key_name, _, _) = self.x509_key_name
12837
12838           dest_ca_pem = \
12839             OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
12840                                             self.dest_x509_ca)
12841
12842           (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
12843                                                      key_name, dest_ca_pem,
12844                                                      timeouts)
12845       finally:
12846         helper.Cleanup()
12847
12848       # Check for backwards compatibility
12849       assert len(dresults) == len(instance.disks)
12850       assert compat.all(isinstance(i, bool) for i in dresults), \
12851              "Not all results are boolean: %r" % dresults
12852
12853     finally:
12854       if activate_disks:
12855         feedback_fn("Deactivating disks for %s" % instance.name)
12856         _ShutdownInstanceDisks(self, instance)
12857
12858     if not (compat.all(dresults) and fin_resu):
12859       failures = []
12860       if not fin_resu:
12861         failures.append("export finalization")
12862       if not compat.all(dresults):
12863         fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
12864                                if not dsk)
12865         failures.append("disk export: disk(s) %s" % fdsk)
12866
12867       raise errors.OpExecError("Export failed, errors in %s" %
12868                                utils.CommaJoin(failures))
12869
12870     # At this point, the export was successful, we can cleanup/finish
12871
12872     # Remove instance if requested
12873     if self.op.remove_instance:
12874       feedback_fn("Removing instance %s" % instance.name)
12875       _RemoveInstance(self, feedback_fn, instance,
12876                       self.op.ignore_remove_failures)
12877
12878     if self.op.mode == constants.EXPORT_MODE_LOCAL:
12879       self._CleanupExports(feedback_fn)
12880
12881     return fin_resu, dresults
12882
12883
12884 class LUBackupRemove(NoHooksLU):
12885   """Remove exports related to the named instance.
12886
12887   """
12888   REQ_BGL = False
12889
12890   def ExpandNames(self):
12891     self.needed_locks = {}
12892     # We need all nodes to be locked in order for RemoveExport to work, but we
12893     # don't need to lock the instance itself, as nothing will happen to it (and
12894     # we can remove exports also for a removed instance)
12895     self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
12896
12897   def Exec(self, feedback_fn):
12898     """Remove any export.
12899
12900     """
12901     instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
12902     # If the instance was not found we'll try with the name that was passed in.
12903     # This will only work if it was an FQDN, though.
12904     fqdn_warn = False
12905     if not instance_name:
12906       fqdn_warn = True
12907       instance_name = self.op.instance_name
12908
12909     locked_nodes = self.owned_locks(locking.LEVEL_NODE)
12910     exportlist = self.rpc.call_export_list(locked_nodes)
12911     found = False
12912     for node in exportlist:
12913       msg = exportlist[node].fail_msg
12914       if msg:
12915         self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
12916         continue
12917       if instance_name in exportlist[node].payload:
12918         found = True
12919         result = self.rpc.call_export_remove(node, instance_name)
12920         msg = result.fail_msg
12921         if msg:
12922           logging.error("Could not remove export for instance %s"
12923                         " on node %s: %s", instance_name, node, msg)
12924
12925     if fqdn_warn and not found:
12926       feedback_fn("Export not found. If trying to remove an export belonging"
12927                   " to a deleted instance please use its Fully Qualified"
12928                   " Domain Name.")
12929
12930
12931 class LUGroupAdd(LogicalUnit):
12932   """Logical unit for creating node groups.
12933
12934   """
12935   HPATH = "group-add"
12936   HTYPE = constants.HTYPE_GROUP
12937   REQ_BGL = False
12938
12939   def ExpandNames(self):
12940     # We need the new group's UUID here so that we can create and acquire the
12941     # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
12942     # that it should not check whether the UUID exists in the configuration.
12943     self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
12944     self.needed_locks = {}
12945     self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
12946
12947   def CheckPrereq(self):
12948     """Check prerequisites.
12949
12950     This checks that the given group name is not an existing node group
12951     already.
12952
12953     """
12954     try:
12955       existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12956     except errors.OpPrereqError:
12957       pass
12958     else:
12959       raise errors.OpPrereqError("Desired group name '%s' already exists as a"
12960                                  " node group (UUID: %s)" %
12961                                  (self.op.group_name, existing_uuid),
12962                                  errors.ECODE_EXISTS)
12963
12964     if self.op.ndparams:
12965       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
12966
12967     if self.op.hv_state:
12968       self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
12969     else:
12970       self.new_hv_state = None
12971
12972     if self.op.disk_state:
12973       self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
12974     else:
12975       self.new_disk_state = None
12976
12977     if self.op.diskparams:
12978       for templ in constants.DISK_TEMPLATES:
12979         if templ not in self.op.diskparams:
12980           self.op.diskparams[templ] = {}
12981         utils.ForceDictType(self.op.diskparams[templ], constants.DISK_DT_TYPES)
12982     else:
12983       self.op.diskparams = self.cfg.GetClusterInfo().diskparams
12984
12985     if self.op.ipolicy:
12986       cluster = self.cfg.GetClusterInfo()
12987       full_ipolicy = cluster.SimpleFillIPolicy(self.op.ipolicy)
12988       objects.InstancePolicy.CheckParameterSyntax(full_ipolicy)
12989
12990   def BuildHooksEnv(self):
12991     """Build hooks env.
12992
12993     """
12994     return {
12995       "GROUP_NAME": self.op.group_name,
12996       }
12997
12998   def BuildHooksNodes(self):
12999     """Build hooks nodes.
13000
13001     """
13002     mn = self.cfg.GetMasterNode()
13003     return ([mn], [mn])
13004
13005   def Exec(self, feedback_fn):
13006     """Add the node group to the cluster.
13007
13008     """
13009     group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
13010                                   uuid=self.group_uuid,
13011                                   alloc_policy=self.op.alloc_policy,
13012                                   ndparams=self.op.ndparams,
13013                                   diskparams=self.op.diskparams,
13014                                   ipolicy=self.op.ipolicy,
13015                                   hv_state_static=self.new_hv_state,
13016                                   disk_state_static=self.new_disk_state)
13017
13018     self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
13019     del self.remove_locks[locking.LEVEL_NODEGROUP]
13020
13021
13022 class LUGroupAssignNodes(NoHooksLU):
13023   """Logical unit for assigning nodes to groups.
13024
13025   """
13026   REQ_BGL = False
13027
13028   def ExpandNames(self):
13029     # These raise errors.OpPrereqError on their own:
13030     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13031     self.op.nodes = _GetWantedNodes(self, self.op.nodes)
13032
13033     # We want to lock all the affected nodes and groups. We have readily
13034     # available the list of nodes, and the *destination* group. To gather the
13035     # list of "source" groups, we need to fetch node information later on.
13036     self.needed_locks = {
13037       locking.LEVEL_NODEGROUP: set([self.group_uuid]),
13038       locking.LEVEL_NODE: self.op.nodes,
13039       }
13040
13041   def DeclareLocks(self, level):
13042     if level == locking.LEVEL_NODEGROUP:
13043       assert len(self.needed_locks[locking.LEVEL_NODEGROUP]) == 1
13044
13045       # Try to get all affected nodes' groups without having the group or node
13046       # lock yet. Needs verification later in the code flow.
13047       groups = self.cfg.GetNodeGroupsFromNodes(self.op.nodes)
13048
13049       self.needed_locks[locking.LEVEL_NODEGROUP].update(groups)
13050
13051   def CheckPrereq(self):
13052     """Check prerequisites.
13053
13054     """
13055     assert self.needed_locks[locking.LEVEL_NODEGROUP]
13056     assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
13057             frozenset(self.op.nodes))
13058
13059     expected_locks = (set([self.group_uuid]) |
13060                       self.cfg.GetNodeGroupsFromNodes(self.op.nodes))
13061     actual_locks = self.owned_locks(locking.LEVEL_NODEGROUP)
13062     if actual_locks != expected_locks:
13063       raise errors.OpExecError("Nodes changed groups since locks were acquired,"
13064                                " current groups are '%s', used to be '%s'" %
13065                                (utils.CommaJoin(expected_locks),
13066                                 utils.CommaJoin(actual_locks)))
13067
13068     self.node_data = self.cfg.GetAllNodesInfo()
13069     self.group = self.cfg.GetNodeGroup(self.group_uuid)
13070     instance_data = self.cfg.GetAllInstancesInfo()
13071
13072     if self.group is None:
13073       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
13074                                (self.op.group_name, self.group_uuid))
13075
13076     (new_splits, previous_splits) = \
13077       self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
13078                                              for node in self.op.nodes],
13079                                             self.node_data, instance_data)
13080
13081     if new_splits:
13082       fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
13083
13084       if not self.op.force:
13085         raise errors.OpExecError("The following instances get split by this"
13086                                  " change and --force was not given: %s" %
13087                                  fmt_new_splits)
13088       else:
13089         self.LogWarning("This operation will split the following instances: %s",
13090                         fmt_new_splits)
13091
13092         if previous_splits:
13093           self.LogWarning("In addition, these already-split instances continue"
13094                           " to be split across groups: %s",
13095                           utils.CommaJoin(utils.NiceSort(previous_splits)))
13096
13097   def Exec(self, feedback_fn):
13098     """Assign nodes to a new group.
13099
13100     """
13101     mods = [(node_name, self.group_uuid) for node_name in self.op.nodes]
13102
13103     self.cfg.AssignGroupNodes(mods)
13104
13105   @staticmethod
13106   def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
13107     """Check for split instances after a node assignment.
13108
13109     This method considers a series of node assignments as an atomic operation,
13110     and returns information about split instances after applying the set of
13111     changes.
13112
13113     In particular, it returns information about newly split instances, and
13114     instances that were already split, and remain so after the change.
13115
13116     Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
13117     considered.
13118
13119     @type changes: list of (node_name, new_group_uuid) pairs.
13120     @param changes: list of node assignments to consider.
13121     @param node_data: a dict with data for all nodes
13122     @param instance_data: a dict with all instances to consider
13123     @rtype: a two-tuple
13124     @return: a list of instances that were previously okay and result split as a
13125       consequence of this change, and a list of instances that were previously
13126       split and this change does not fix.
13127
13128     """
13129     changed_nodes = dict((node, group) for node, group in changes
13130                          if node_data[node].group != group)
13131
13132     all_split_instances = set()
13133     previously_split_instances = set()
13134
13135     def InstanceNodes(instance):
13136       return [instance.primary_node] + list(instance.secondary_nodes)
13137
13138     for inst in instance_data.values():
13139       if inst.disk_template not in constants.DTS_INT_MIRROR:
13140         continue
13141
13142       instance_nodes = InstanceNodes(inst)
13143
13144       if len(set(node_data[node].group for node in instance_nodes)) > 1:
13145         previously_split_instances.add(inst.name)
13146
13147       if len(set(changed_nodes.get(node, node_data[node].group)
13148                  for node in instance_nodes)) > 1:
13149         all_split_instances.add(inst.name)
13150
13151     return (list(all_split_instances - previously_split_instances),
13152             list(previously_split_instances & all_split_instances))
13153
13154
13155 class _GroupQuery(_QueryBase):
13156   FIELDS = query.GROUP_FIELDS
13157
13158   def ExpandNames(self, lu):
13159     lu.needed_locks = {}
13160
13161     self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
13162     self._cluster = lu.cfg.GetClusterInfo()
13163     name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
13164
13165     if not self.names:
13166       self.wanted = [name_to_uuid[name]
13167                      for name in utils.NiceSort(name_to_uuid.keys())]
13168     else:
13169       # Accept names to be either names or UUIDs.
13170       missing = []
13171       self.wanted = []
13172       all_uuid = frozenset(self._all_groups.keys())
13173
13174       for name in self.names:
13175         if name in all_uuid:
13176           self.wanted.append(name)
13177         elif name in name_to_uuid:
13178           self.wanted.append(name_to_uuid[name])
13179         else:
13180           missing.append(name)
13181
13182       if missing:
13183         raise errors.OpPrereqError("Some groups do not exist: %s" %
13184                                    utils.CommaJoin(missing),
13185                                    errors.ECODE_NOENT)
13186
13187   def DeclareLocks(self, lu, level):
13188     pass
13189
13190   def _GetQueryData(self, lu):
13191     """Computes the list of node groups and their attributes.
13192
13193     """
13194     do_nodes = query.GQ_NODE in self.requested_data
13195     do_instances = query.GQ_INST in self.requested_data
13196
13197     group_to_nodes = None
13198     group_to_instances = None
13199
13200     # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
13201     # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
13202     # latter GetAllInstancesInfo() is not enough, for we have to go through
13203     # instance->node. Hence, we will need to process nodes even if we only need
13204     # instance information.
13205     if do_nodes or do_instances:
13206       all_nodes = lu.cfg.GetAllNodesInfo()
13207       group_to_nodes = dict((uuid, []) for uuid in self.wanted)
13208       node_to_group = {}
13209
13210       for node in all_nodes.values():
13211         if node.group in group_to_nodes:
13212           group_to_nodes[node.group].append(node.name)
13213           node_to_group[node.name] = node.group
13214
13215       if do_instances:
13216         all_instances = lu.cfg.GetAllInstancesInfo()
13217         group_to_instances = dict((uuid, []) for uuid in self.wanted)
13218
13219         for instance in all_instances.values():
13220           node = instance.primary_node
13221           if node in node_to_group:
13222             group_to_instances[node_to_group[node]].append(instance.name)
13223
13224         if not do_nodes:
13225           # Do not pass on node information if it was not requested.
13226           group_to_nodes = None
13227
13228     return query.GroupQueryData(self._cluster,
13229                                 [self._all_groups[uuid]
13230                                  for uuid in self.wanted],
13231                                 group_to_nodes, group_to_instances)
13232
13233
13234 class LUGroupQuery(NoHooksLU):
13235   """Logical unit for querying node groups.
13236
13237   """
13238   REQ_BGL = False
13239
13240   def CheckArguments(self):
13241     self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
13242                           self.op.output_fields, False)
13243
13244   def ExpandNames(self):
13245     self.gq.ExpandNames(self)
13246
13247   def DeclareLocks(self, level):
13248     self.gq.DeclareLocks(self, level)
13249
13250   def Exec(self, feedback_fn):
13251     return self.gq.OldStyleQuery(self)
13252
13253
13254 class LUGroupSetParams(LogicalUnit):
13255   """Modifies the parameters of a node group.
13256
13257   """
13258   HPATH = "group-modify"
13259   HTYPE = constants.HTYPE_GROUP
13260   REQ_BGL = False
13261
13262   def CheckArguments(self):
13263     all_changes = [
13264       self.op.ndparams,
13265       self.op.diskparams,
13266       self.op.alloc_policy,
13267       self.op.hv_state,
13268       self.op.disk_state,
13269       self.op.ipolicy,
13270       ]
13271
13272     if all_changes.count(None) == len(all_changes):
13273       raise errors.OpPrereqError("Please pass at least one modification",
13274                                  errors.ECODE_INVAL)
13275
13276   def ExpandNames(self):
13277     # This raises errors.OpPrereqError on its own:
13278     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13279
13280     self.needed_locks = {
13281       locking.LEVEL_NODEGROUP: [self.group_uuid],
13282       }
13283
13284   def CheckPrereq(self):
13285     """Check prerequisites.
13286
13287     """
13288     self.group = self.cfg.GetNodeGroup(self.group_uuid)
13289
13290     if self.group is None:
13291       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
13292                                (self.op.group_name, self.group_uuid))
13293
13294     if self.op.ndparams:
13295       new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
13296       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
13297       self.new_ndparams = new_ndparams
13298
13299     if self.op.diskparams:
13300       self.new_diskparams = dict()
13301       for templ in constants.DISK_TEMPLATES:
13302         if templ not in self.op.diskparams:
13303           self.op.diskparams[templ] = {}
13304         new_templ_params = _GetUpdatedParams(self.group.diskparams[templ],
13305                                              self.op.diskparams[templ])
13306         utils.ForceDictType(new_templ_params, constants.DISK_DT_TYPES)
13307         self.new_diskparams[templ] = new_templ_params
13308
13309     if self.op.hv_state:
13310       self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
13311                                                  self.group.hv_state_static)
13312
13313     if self.op.disk_state:
13314       self.new_disk_state = \
13315         _MergeAndVerifyDiskState(self.op.disk_state,
13316                                  self.group.disk_state_static)
13317
13318     if self.op.ipolicy:
13319       g_ipolicy = {}
13320       for key, value in self.op.ipolicy.iteritems():
13321         g_ipolicy[key] = _GetUpdatedParams(self.group.ipolicy.get(key, {}),
13322                                            value,
13323                                            use_none=True)
13324         utils.ForceDictType(g_ipolicy[key], constants.ISPECS_PARAMETER_TYPES)
13325       self.new_ipolicy = g_ipolicy
13326       objects.InstancePolicy.CheckParameterSyntax(self.new_ipolicy)
13327
13328   def BuildHooksEnv(self):
13329     """Build hooks env.
13330
13331     """
13332     return {
13333       "GROUP_NAME": self.op.group_name,
13334       "NEW_ALLOC_POLICY": self.op.alloc_policy,
13335       }
13336
13337   def BuildHooksNodes(self):
13338     """Build hooks nodes.
13339
13340     """
13341     mn = self.cfg.GetMasterNode()
13342     return ([mn], [mn])
13343
13344   def Exec(self, feedback_fn):
13345     """Modifies the node group.
13346
13347     """
13348     result = []
13349
13350     if self.op.ndparams:
13351       self.group.ndparams = self.new_ndparams
13352       result.append(("ndparams", str(self.group.ndparams)))
13353
13354     if self.op.diskparams:
13355       self.group.diskparams = self.new_diskparams
13356       result.append(("diskparams", str(self.group.diskparams)))
13357
13358     if self.op.alloc_policy:
13359       self.group.alloc_policy = self.op.alloc_policy
13360
13361     if self.op.hv_state:
13362       self.group.hv_state_static = self.new_hv_state
13363
13364     if self.op.disk_state:
13365       self.group.disk_state_static = self.new_disk_state
13366
13367     if self.op.ipolicy:
13368       self.group.ipolicy = self.new_ipolicy
13369
13370     self.cfg.Update(self.group, feedback_fn)
13371     return result
13372
13373
13374 class LUGroupRemove(LogicalUnit):
13375   HPATH = "group-remove"
13376   HTYPE = constants.HTYPE_GROUP
13377   REQ_BGL = False
13378
13379   def ExpandNames(self):
13380     # This will raises errors.OpPrereqError on its own:
13381     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13382     self.needed_locks = {
13383       locking.LEVEL_NODEGROUP: [self.group_uuid],
13384       }
13385
13386   def CheckPrereq(self):
13387     """Check prerequisites.
13388
13389     This checks that the given group name exists as a node group, that is
13390     empty (i.e., contains no nodes), and that is not the last group of the
13391     cluster.
13392
13393     """
13394     # Verify that the group is empty.
13395     group_nodes = [node.name
13396                    for node in self.cfg.GetAllNodesInfo().values()
13397                    if node.group == self.group_uuid]
13398
13399     if group_nodes:
13400       raise errors.OpPrereqError("Group '%s' not empty, has the following"
13401                                  " nodes: %s" %
13402                                  (self.op.group_name,
13403                                   utils.CommaJoin(utils.NiceSort(group_nodes))),
13404                                  errors.ECODE_STATE)
13405
13406     # Verify the cluster would not be left group-less.
13407     if len(self.cfg.GetNodeGroupList()) == 1:
13408       raise errors.OpPrereqError("Group '%s' is the only group,"
13409                                  " cannot be removed" %
13410                                  self.op.group_name,
13411                                  errors.ECODE_STATE)
13412
13413   def BuildHooksEnv(self):
13414     """Build hooks env.
13415
13416     """
13417     return {
13418       "GROUP_NAME": self.op.group_name,
13419       }
13420
13421   def BuildHooksNodes(self):
13422     """Build hooks nodes.
13423
13424     """
13425     mn = self.cfg.GetMasterNode()
13426     return ([mn], [mn])
13427
13428   def Exec(self, feedback_fn):
13429     """Remove the node group.
13430
13431     """
13432     try:
13433       self.cfg.RemoveNodeGroup(self.group_uuid)
13434     except errors.ConfigurationError:
13435       raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
13436                                (self.op.group_name, self.group_uuid))
13437
13438     self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
13439
13440
13441 class LUGroupRename(LogicalUnit):
13442   HPATH = "group-rename"
13443   HTYPE = constants.HTYPE_GROUP
13444   REQ_BGL = False
13445
13446   def ExpandNames(self):
13447     # This raises errors.OpPrereqError on its own:
13448     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13449
13450     self.needed_locks = {
13451       locking.LEVEL_NODEGROUP: [self.group_uuid],
13452       }
13453
13454   def CheckPrereq(self):
13455     """Check prerequisites.
13456
13457     Ensures requested new name is not yet used.
13458
13459     """
13460     try:
13461       new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
13462     except errors.OpPrereqError:
13463       pass
13464     else:
13465       raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
13466                                  " node group (UUID: %s)" %
13467                                  (self.op.new_name, new_name_uuid),
13468                                  errors.ECODE_EXISTS)
13469
13470   def BuildHooksEnv(self):
13471     """Build hooks env.
13472
13473     """
13474     return {
13475       "OLD_NAME": self.op.group_name,
13476       "NEW_NAME": self.op.new_name,
13477       }
13478
13479   def BuildHooksNodes(self):
13480     """Build hooks nodes.
13481
13482     """
13483     mn = self.cfg.GetMasterNode()
13484
13485     all_nodes = self.cfg.GetAllNodesInfo()
13486     all_nodes.pop(mn, None)
13487
13488     run_nodes = [mn]
13489     run_nodes.extend(node.name for node in all_nodes.values()
13490                      if node.group == self.group_uuid)
13491
13492     return (run_nodes, run_nodes)
13493
13494   def Exec(self, feedback_fn):
13495     """Rename the node group.
13496
13497     """
13498     group = self.cfg.GetNodeGroup(self.group_uuid)
13499
13500     if group is None:
13501       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
13502                                (self.op.group_name, self.group_uuid))
13503
13504     group.name = self.op.new_name
13505     self.cfg.Update(group, feedback_fn)
13506
13507     return self.op.new_name
13508
13509
13510 class LUGroupEvacuate(LogicalUnit):
13511   HPATH = "group-evacuate"
13512   HTYPE = constants.HTYPE_GROUP
13513   REQ_BGL = False
13514
13515   def ExpandNames(self):
13516     # This raises errors.OpPrereqError on its own:
13517     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13518
13519     if self.op.target_groups:
13520       self.req_target_uuids = map(self.cfg.LookupNodeGroup,
13521                                   self.op.target_groups)
13522     else:
13523       self.req_target_uuids = []
13524
13525     if self.group_uuid in self.req_target_uuids:
13526       raise errors.OpPrereqError("Group to be evacuated (%s) can not be used"
13527                                  " as a target group (targets are %s)" %
13528                                  (self.group_uuid,
13529                                   utils.CommaJoin(self.req_target_uuids)),
13530                                  errors.ECODE_INVAL)
13531
13532     self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
13533
13534     self.share_locks = _ShareAll()
13535     self.needed_locks = {
13536       locking.LEVEL_INSTANCE: [],
13537       locking.LEVEL_NODEGROUP: [],
13538       locking.LEVEL_NODE: [],
13539       }
13540
13541   def DeclareLocks(self, level):
13542     if level == locking.LEVEL_INSTANCE:
13543       assert not self.needed_locks[locking.LEVEL_INSTANCE]
13544
13545       # Lock instances optimistically, needs verification once node and group
13546       # locks have been acquired
13547       self.needed_locks[locking.LEVEL_INSTANCE] = \
13548         self.cfg.GetNodeGroupInstances(self.group_uuid)
13549
13550     elif level == locking.LEVEL_NODEGROUP:
13551       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
13552
13553       if self.req_target_uuids:
13554         lock_groups = set([self.group_uuid] + self.req_target_uuids)
13555
13556         # Lock all groups used by instances optimistically; this requires going
13557         # via the node before it's locked, requiring verification later on
13558         lock_groups.update(group_uuid
13559                            for instance_name in
13560                              self.owned_locks(locking.LEVEL_INSTANCE)
13561                            for group_uuid in
13562                              self.cfg.GetInstanceNodeGroups(instance_name))
13563       else:
13564         # No target groups, need to lock all of them
13565         lock_groups = locking.ALL_SET
13566
13567       self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
13568
13569     elif level == locking.LEVEL_NODE:
13570       # This will only lock the nodes in the group to be evacuated which
13571       # contain actual instances
13572       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
13573       self._LockInstancesNodes()
13574
13575       # Lock all nodes in group to be evacuated and target groups
13576       owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
13577       assert self.group_uuid in owned_groups
13578       member_nodes = [node_name
13579                       for group in owned_groups
13580                       for node_name in self.cfg.GetNodeGroup(group).members]
13581       self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
13582
13583   def CheckPrereq(self):
13584     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
13585     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
13586     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
13587
13588     assert owned_groups.issuperset(self.req_target_uuids)
13589     assert self.group_uuid in owned_groups
13590
13591     # Check if locked instances are still correct
13592     _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
13593
13594     # Get instance information
13595     self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
13596
13597     # Check if node groups for locked instances are still correct
13598     for instance_name in owned_instances:
13599       inst = self.instances[instance_name]
13600       assert owned_nodes.issuperset(inst.all_nodes), \
13601         "Instance %s's nodes changed while we kept the lock" % instance_name
13602
13603       inst_groups = _CheckInstanceNodeGroups(self.cfg, instance_name,
13604                                              owned_groups)
13605
13606       assert self.group_uuid in inst_groups, \
13607         "Instance %s has no node in group %s" % (instance_name, self.group_uuid)
13608
13609     if self.req_target_uuids:
13610       # User requested specific target groups
13611       self.target_uuids = self.req_target_uuids
13612     else:
13613       # All groups except the one to be evacuated are potential targets
13614       self.target_uuids = [group_uuid for group_uuid in owned_groups
13615                            if group_uuid != self.group_uuid]
13616
13617       if not self.target_uuids:
13618         raise errors.OpPrereqError("There are no possible target groups",
13619                                    errors.ECODE_INVAL)
13620
13621   def BuildHooksEnv(self):
13622     """Build hooks env.
13623
13624     """
13625     return {
13626       "GROUP_NAME": self.op.group_name,
13627       "TARGET_GROUPS": " ".join(self.target_uuids),
13628       }
13629
13630   def BuildHooksNodes(self):
13631     """Build hooks nodes.
13632
13633     """
13634     mn = self.cfg.GetMasterNode()
13635
13636     assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
13637
13638     run_nodes = [mn] + self.cfg.GetNodeGroup(self.group_uuid).members
13639
13640     return (run_nodes, run_nodes)
13641
13642   def Exec(self, feedback_fn):
13643     instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
13644
13645     assert self.group_uuid not in self.target_uuids
13646
13647     ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
13648                      instances=instances, target_groups=self.target_uuids)
13649
13650     ial.Run(self.op.iallocator)
13651
13652     if not ial.success:
13653       raise errors.OpPrereqError("Can't compute group evacuation using"
13654                                  " iallocator '%s': %s" %
13655                                  (self.op.iallocator, ial.info),
13656                                  errors.ECODE_NORES)
13657
13658     jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
13659
13660     self.LogInfo("Iallocator returned %s job(s) for evacuating node group %s",
13661                  len(jobs), self.op.group_name)
13662
13663     return ResultWithJobs(jobs)
13664
13665
13666 class TagsLU(NoHooksLU): # pylint: disable=W0223
13667   """Generic tags LU.
13668
13669   This is an abstract class which is the parent of all the other tags LUs.
13670
13671   """
13672   def ExpandNames(self):
13673     self.group_uuid = None
13674     self.needed_locks = {}
13675     if self.op.kind == constants.TAG_NODE:
13676       self.op.name = _ExpandNodeName(self.cfg, self.op.name)
13677       self.needed_locks[locking.LEVEL_NODE] = self.op.name
13678     elif self.op.kind == constants.TAG_INSTANCE:
13679       self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
13680       self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
13681     elif self.op.kind == constants.TAG_NODEGROUP:
13682       self.group_uuid = self.cfg.LookupNodeGroup(self.op.name)
13683
13684     # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
13685     # not possible to acquire the BGL based on opcode parameters)
13686
13687   def CheckPrereq(self):
13688     """Check prerequisites.
13689
13690     """
13691     if self.op.kind == constants.TAG_CLUSTER:
13692       self.target = self.cfg.GetClusterInfo()
13693     elif self.op.kind == constants.TAG_NODE:
13694       self.target = self.cfg.GetNodeInfo(self.op.name)
13695     elif self.op.kind == constants.TAG_INSTANCE:
13696       self.target = self.cfg.GetInstanceInfo(self.op.name)
13697     elif self.op.kind == constants.TAG_NODEGROUP:
13698       self.target = self.cfg.GetNodeGroup(self.group_uuid)
13699     else:
13700       raise errors.OpPrereqError("Wrong tag type requested (%s)" %
13701                                  str(self.op.kind), errors.ECODE_INVAL)
13702
13703
13704 class LUTagsGet(TagsLU):
13705   """Returns the tags of a given object.
13706
13707   """
13708   REQ_BGL = False
13709
13710   def ExpandNames(self):
13711     TagsLU.ExpandNames(self)
13712
13713     # Share locks as this is only a read operation
13714     self.share_locks = _ShareAll()
13715
13716   def Exec(self, feedback_fn):
13717     """Returns the tag list.
13718
13719     """
13720     return list(self.target.GetTags())
13721
13722
13723 class LUTagsSearch(NoHooksLU):
13724   """Searches the tags for a given pattern.
13725
13726   """
13727   REQ_BGL = False
13728
13729   def ExpandNames(self):
13730     self.needed_locks = {}
13731
13732   def CheckPrereq(self):
13733     """Check prerequisites.
13734
13735     This checks the pattern passed for validity by compiling it.
13736
13737     """
13738     try:
13739       self.re = re.compile(self.op.pattern)
13740     except re.error, err:
13741       raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
13742                                  (self.op.pattern, err), errors.ECODE_INVAL)
13743
13744   def Exec(self, feedback_fn):
13745     """Returns the tag list.
13746
13747     """
13748     cfg = self.cfg
13749     tgts = [("/cluster", cfg.GetClusterInfo())]
13750     ilist = cfg.GetAllInstancesInfo().values()
13751     tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
13752     nlist = cfg.GetAllNodesInfo().values()
13753     tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
13754     tgts.extend(("/nodegroup/%s" % n.name, n)
13755                 for n in cfg.GetAllNodeGroupsInfo().values())
13756     results = []
13757     for path, target in tgts:
13758       for tag in target.GetTags():
13759         if self.re.search(tag):
13760           results.append((path, tag))
13761     return results
13762
13763
13764 class LUTagsSet(TagsLU):
13765   """Sets a tag on a given object.
13766
13767   """
13768   REQ_BGL = False
13769
13770   def CheckPrereq(self):
13771     """Check prerequisites.
13772
13773     This checks the type and length of the tag name and value.
13774
13775     """
13776     TagsLU.CheckPrereq(self)
13777     for tag in self.op.tags:
13778       objects.TaggableObject.ValidateTag(tag)
13779
13780   def Exec(self, feedback_fn):
13781     """Sets the tag.
13782
13783     """
13784     try:
13785       for tag in self.op.tags:
13786         self.target.AddTag(tag)
13787     except errors.TagError, err:
13788       raise errors.OpExecError("Error while setting tag: %s" % str(err))
13789     self.cfg.Update(self.target, feedback_fn)
13790
13791
13792 class LUTagsDel(TagsLU):
13793   """Delete a list of tags from a given object.
13794
13795   """
13796   REQ_BGL = False
13797
13798   def CheckPrereq(self):
13799     """Check prerequisites.
13800
13801     This checks that we have the given tag.
13802
13803     """
13804     TagsLU.CheckPrereq(self)
13805     for tag in self.op.tags:
13806       objects.TaggableObject.ValidateTag(tag)
13807     del_tags = frozenset(self.op.tags)
13808     cur_tags = self.target.GetTags()
13809
13810     diff_tags = del_tags - cur_tags
13811     if diff_tags:
13812       diff_names = ("'%s'" % i for i in sorted(diff_tags))
13813       raise errors.OpPrereqError("Tag(s) %s not found" %
13814                                  (utils.CommaJoin(diff_names), ),
13815                                  errors.ECODE_NOENT)
13816
13817   def Exec(self, feedback_fn):
13818     """Remove the tag from the object.
13819
13820     """
13821     for tag in self.op.tags:
13822       self.target.RemoveTag(tag)
13823     self.cfg.Update(self.target, feedback_fn)
13824
13825
13826 class LUTestDelay(NoHooksLU):
13827   """Sleep for a specified amount of time.
13828
13829   This LU sleeps on the master and/or nodes for a specified amount of
13830   time.
13831
13832   """
13833   REQ_BGL = False
13834
13835   def ExpandNames(self):
13836     """Expand names and set required locks.
13837
13838     This expands the node list, if any.
13839
13840     """
13841     self.needed_locks = {}
13842     if self.op.on_nodes:
13843       # _GetWantedNodes can be used here, but is not always appropriate to use
13844       # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
13845       # more information.
13846       self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
13847       self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
13848
13849   def _TestDelay(self):
13850     """Do the actual sleep.
13851
13852     """
13853     if self.op.on_master:
13854       if not utils.TestDelay(self.op.duration):
13855         raise errors.OpExecError("Error during master delay test")
13856     if self.op.on_nodes:
13857       result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
13858       for node, node_result in result.items():
13859         node_result.Raise("Failure during rpc call to node %s" % node)
13860
13861   def Exec(self, feedback_fn):
13862     """Execute the test delay opcode, with the wanted repetitions.
13863
13864     """
13865     if self.op.repeat == 0:
13866       self._TestDelay()
13867     else:
13868       top_value = self.op.repeat - 1
13869       for i in range(self.op.repeat):
13870         self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
13871         self._TestDelay()
13872
13873
13874 class LUTestJqueue(NoHooksLU):
13875   """Utility LU to test some aspects of the job queue.
13876
13877   """
13878   REQ_BGL = False
13879
13880   # Must be lower than default timeout for WaitForJobChange to see whether it
13881   # notices changed jobs
13882   _CLIENT_CONNECT_TIMEOUT = 20.0
13883   _CLIENT_CONFIRM_TIMEOUT = 60.0
13884
13885   @classmethod
13886   def _NotifyUsingSocket(cls, cb, errcls):
13887     """Opens a Unix socket and waits for another program to connect.
13888
13889     @type cb: callable
13890     @param cb: Callback to send socket name to client
13891     @type errcls: class
13892     @param errcls: Exception class to use for errors
13893
13894     """
13895     # Using a temporary directory as there's no easy way to create temporary
13896     # sockets without writing a custom loop around tempfile.mktemp and
13897     # socket.bind
13898     tmpdir = tempfile.mkdtemp()
13899     try:
13900       tmpsock = utils.PathJoin(tmpdir, "sock")
13901
13902       logging.debug("Creating temporary socket at %s", tmpsock)
13903       sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
13904       try:
13905         sock.bind(tmpsock)
13906         sock.listen(1)
13907
13908         # Send details to client
13909         cb(tmpsock)
13910
13911         # Wait for client to connect before continuing
13912         sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
13913         try:
13914           (conn, _) = sock.accept()
13915         except socket.error, err:
13916           raise errcls("Client didn't connect in time (%s)" % err)
13917       finally:
13918         sock.close()
13919     finally:
13920       # Remove as soon as client is connected
13921       shutil.rmtree(tmpdir)
13922
13923     # Wait for client to close
13924     try:
13925       try:
13926         # pylint: disable=E1101
13927         # Instance of '_socketobject' has no ... member
13928         conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
13929         conn.recv(1)
13930       except socket.error, err:
13931         raise errcls("Client failed to confirm notification (%s)" % err)
13932     finally:
13933       conn.close()
13934
13935   def _SendNotification(self, test, arg, sockname):
13936     """Sends a notification to the client.
13937
13938     @type test: string
13939     @param test: Test name
13940     @param arg: Test argument (depends on test)
13941     @type sockname: string
13942     @param sockname: Socket path
13943
13944     """
13945     self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
13946
13947   def _Notify(self, prereq, test, arg):
13948     """Notifies the client of a test.
13949
13950     @type prereq: bool
13951     @param prereq: Whether this is a prereq-phase test
13952     @type test: string
13953     @param test: Test name
13954     @param arg: Test argument (depends on test)
13955
13956     """
13957     if prereq:
13958       errcls = errors.OpPrereqError
13959     else:
13960       errcls = errors.OpExecError
13961
13962     return self._NotifyUsingSocket(compat.partial(self._SendNotification,
13963                                                   test, arg),
13964                                    errcls)
13965
13966   def CheckArguments(self):
13967     self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
13968     self.expandnames_calls = 0
13969
13970   def ExpandNames(self):
13971     checkargs_calls = getattr(self, "checkargs_calls", 0)
13972     if checkargs_calls < 1:
13973       raise errors.ProgrammerError("CheckArguments was not called")
13974
13975     self.expandnames_calls += 1
13976
13977     if self.op.notify_waitlock:
13978       self._Notify(True, constants.JQT_EXPANDNAMES, None)
13979
13980     self.LogInfo("Expanding names")
13981
13982     # Get lock on master node (just to get a lock, not for a particular reason)
13983     self.needed_locks = {
13984       locking.LEVEL_NODE: self.cfg.GetMasterNode(),
13985       }
13986
13987   def Exec(self, feedback_fn):
13988     if self.expandnames_calls < 1:
13989       raise errors.ProgrammerError("ExpandNames was not called")
13990
13991     if self.op.notify_exec:
13992       self._Notify(False, constants.JQT_EXEC, None)
13993
13994     self.LogInfo("Executing")
13995
13996     if self.op.log_messages:
13997       self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
13998       for idx, msg in enumerate(self.op.log_messages):
13999         self.LogInfo("Sending log message %s", idx + 1)
14000         feedback_fn(constants.JQT_MSGPREFIX + msg)
14001         # Report how many test messages have been sent
14002         self._Notify(False, constants.JQT_LOGMSG, idx + 1)
14003
14004     if self.op.fail:
14005       raise errors.OpExecError("Opcode failure was requested")
14006
14007     return True
14008
14009
14010 class IAllocator(object):
14011   """IAllocator framework.
14012
14013   An IAllocator instance has three sets of attributes:
14014     - cfg that is needed to query the cluster
14015     - input data (all members of the _KEYS class attribute are required)
14016     - four buffer attributes (in|out_data|text), that represent the
14017       input (to the external script) in text and data structure format,
14018       and the output from it, again in two formats
14019     - the result variables from the script (success, info, nodes) for
14020       easy usage
14021
14022   """
14023   # pylint: disable=R0902
14024   # lots of instance attributes
14025
14026   def __init__(self, cfg, rpc_runner, mode, **kwargs):
14027     self.cfg = cfg
14028     self.rpc = rpc_runner
14029     # init buffer variables
14030     self.in_text = self.out_text = self.in_data = self.out_data = None
14031     # init all input fields so that pylint is happy
14032     self.mode = mode
14033     self.memory = self.disks = self.disk_template = None
14034     self.os = self.tags = self.nics = self.vcpus = None
14035     self.hypervisor = None
14036     self.relocate_from = None
14037     self.name = None
14038     self.instances = None
14039     self.evac_mode = None
14040     self.target_groups = []
14041     # computed fields
14042     self.required_nodes = None
14043     # init result fields
14044     self.success = self.info = self.result = None
14045
14046     try:
14047       (fn, keydata, self._result_check) = self._MODE_DATA[self.mode]
14048     except KeyError:
14049       raise errors.ProgrammerError("Unknown mode '%s' passed to the"
14050                                    " IAllocator" % self.mode)
14051
14052     keyset = [n for (n, _) in keydata]
14053
14054     for key in kwargs:
14055       if key not in keyset:
14056         raise errors.ProgrammerError("Invalid input parameter '%s' to"
14057                                      " IAllocator" % key)
14058       setattr(self, key, kwargs[key])
14059
14060     for key in keyset:
14061       if key not in kwargs:
14062         raise errors.ProgrammerError("Missing input parameter '%s' to"
14063                                      " IAllocator" % key)
14064     self._BuildInputData(compat.partial(fn, self), keydata)
14065
14066   def _ComputeClusterData(self):
14067     """Compute the generic allocator input data.
14068
14069     This is the data that is independent of the actual operation.
14070
14071     """
14072     cfg = self.cfg
14073     cluster_info = cfg.GetClusterInfo()
14074     # cluster data
14075     data = {
14076       "version": constants.IALLOCATOR_VERSION,
14077       "cluster_name": cfg.GetClusterName(),
14078       "cluster_tags": list(cluster_info.GetTags()),
14079       "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
14080       # we don't have job IDs
14081       }
14082     ninfo = cfg.GetAllNodesInfo()
14083     iinfo = cfg.GetAllInstancesInfo().values()
14084     i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
14085
14086     # node data
14087     node_list = [n.name for n in ninfo.values() if n.vm_capable]
14088
14089     if self.mode == constants.IALLOCATOR_MODE_ALLOC:
14090       hypervisor_name = self.hypervisor
14091     elif self.mode == constants.IALLOCATOR_MODE_RELOC:
14092       hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
14093     else:
14094       hypervisor_name = cluster_info.primary_hypervisor
14095
14096     node_data = self.rpc.call_node_info(node_list, [cfg.GetVGName()],
14097                                         [hypervisor_name])
14098     node_iinfo = \
14099       self.rpc.call_all_instances_info(node_list,
14100                                        cluster_info.enabled_hypervisors)
14101
14102     data["nodegroups"] = self._ComputeNodeGroupData(cfg)
14103
14104     config_ndata = self._ComputeBasicNodeData(ninfo)
14105     data["nodes"] = self._ComputeDynamicNodeData(ninfo, node_data, node_iinfo,
14106                                                  i_list, config_ndata)
14107     assert len(data["nodes"]) == len(ninfo), \
14108         "Incomplete node data computed"
14109
14110     data["instances"] = self._ComputeInstanceData(cluster_info, i_list)
14111
14112     self.in_data = data
14113
14114   @staticmethod
14115   def _ComputeNodeGroupData(cfg):
14116     """Compute node groups data.
14117
14118     """
14119     ng = dict((guuid, {
14120       "name": gdata.name,
14121       "alloc_policy": gdata.alloc_policy,
14122       })
14123       for guuid, gdata in cfg.GetAllNodeGroupsInfo().items())
14124
14125     return ng
14126
14127   @staticmethod
14128   def _ComputeBasicNodeData(node_cfg):
14129     """Compute global node data.
14130
14131     @rtype: dict
14132     @returns: a dict of name: (node dict, node config)
14133
14134     """
14135     # fill in static (config-based) values
14136     node_results = dict((ninfo.name, {
14137       "tags": list(ninfo.GetTags()),
14138       "primary_ip": ninfo.primary_ip,
14139       "secondary_ip": ninfo.secondary_ip,
14140       "offline": ninfo.offline,
14141       "drained": ninfo.drained,
14142       "master_candidate": ninfo.master_candidate,
14143       "group": ninfo.group,
14144       "master_capable": ninfo.master_capable,
14145       "vm_capable": ninfo.vm_capable,
14146       })
14147       for ninfo in node_cfg.values())
14148
14149     return node_results
14150
14151   @staticmethod
14152   def _ComputeDynamicNodeData(node_cfg, node_data, node_iinfo, i_list,
14153                               node_results):
14154     """Compute global node data.
14155
14156     @param node_results: the basic node structures as filled from the config
14157
14158     """
14159     #TODO(dynmem): compute the right data on MAX and MIN memory
14160     # make a copy of the current dict
14161     node_results = dict(node_results)
14162     for nname, nresult in node_data.items():
14163       assert nname in node_results, "Missing basic data for node %s" % nname
14164       ninfo = node_cfg[nname]
14165
14166       if not (ninfo.offline or ninfo.drained):
14167         nresult.Raise("Can't get data for node %s" % nname)
14168         node_iinfo[nname].Raise("Can't get node instance info from node %s" %
14169                                 nname)
14170         remote_info = _MakeLegacyNodeInfo(nresult.payload)
14171
14172         for attr in ["memory_total", "memory_free", "memory_dom0",
14173                      "vg_size", "vg_free", "cpu_total"]:
14174           if attr not in remote_info:
14175             raise errors.OpExecError("Node '%s' didn't return attribute"
14176                                      " '%s'" % (nname, attr))
14177           if not isinstance(remote_info[attr], int):
14178             raise errors.OpExecError("Node '%s' returned invalid value"
14179                                      " for '%s': %s" %
14180                                      (nname, attr, remote_info[attr]))
14181         # compute memory used by primary instances
14182         i_p_mem = i_p_up_mem = 0
14183         for iinfo, beinfo in i_list:
14184           if iinfo.primary_node == nname:
14185             i_p_mem += beinfo[constants.BE_MAXMEM]
14186             if iinfo.name not in node_iinfo[nname].payload:
14187               i_used_mem = 0
14188             else:
14189               i_used_mem = int(node_iinfo[nname].payload[iinfo.name]["memory"])
14190             i_mem_diff = beinfo[constants.BE_MAXMEM] - i_used_mem
14191             remote_info["memory_free"] -= max(0, i_mem_diff)
14192
14193             if iinfo.admin_state == constants.ADMINST_UP:
14194               i_p_up_mem += beinfo[constants.BE_MAXMEM]
14195
14196         # compute memory used by instances
14197         pnr_dyn = {
14198           "total_memory": remote_info["memory_total"],
14199           "reserved_memory": remote_info["memory_dom0"],
14200           "free_memory": remote_info["memory_free"],
14201           "total_disk": remote_info["vg_size"],
14202           "free_disk": remote_info["vg_free"],
14203           "total_cpus": remote_info["cpu_total"],
14204           "i_pri_memory": i_p_mem,
14205           "i_pri_up_memory": i_p_up_mem,
14206           }
14207         pnr_dyn.update(node_results[nname])
14208         node_results[nname] = pnr_dyn
14209
14210     return node_results
14211
14212   @staticmethod
14213   def _ComputeInstanceData(cluster_info, i_list):
14214     """Compute global instance data.
14215
14216     """
14217     instance_data = {}
14218     for iinfo, beinfo in i_list:
14219       nic_data = []
14220       for nic in iinfo.nics:
14221         filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
14222         nic_dict = {
14223           "mac": nic.mac,
14224           "ip": nic.ip,
14225           "mode": filled_params[constants.NIC_MODE],
14226           "link": filled_params[constants.NIC_LINK],
14227           }
14228         if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
14229           nic_dict["bridge"] = filled_params[constants.NIC_LINK]
14230         nic_data.append(nic_dict)
14231       pir = {
14232         "tags": list(iinfo.GetTags()),
14233         "admin_state": iinfo.admin_state,
14234         "vcpus": beinfo[constants.BE_VCPUS],
14235         "memory": beinfo[constants.BE_MAXMEM],
14236         "os": iinfo.os,
14237         "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
14238         "nics": nic_data,
14239         "disks": [{constants.IDISK_SIZE: dsk.size,
14240                    constants.IDISK_MODE: dsk.mode}
14241                   for dsk in iinfo.disks],
14242         "disk_template": iinfo.disk_template,
14243         "hypervisor": iinfo.hypervisor,
14244         }
14245       pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
14246                                                  pir["disks"])
14247       instance_data[iinfo.name] = pir
14248
14249     return instance_data
14250
14251   def _AddNewInstance(self):
14252     """Add new instance data to allocator structure.
14253
14254     This in combination with _AllocatorGetClusterData will create the
14255     correct structure needed as input for the allocator.
14256
14257     The checks for the completeness of the opcode must have already been
14258     done.
14259
14260     """
14261     disk_space = _ComputeDiskSize(self.disk_template, self.disks)
14262
14263     if self.disk_template in constants.DTS_INT_MIRROR:
14264       self.required_nodes = 2
14265     else:
14266       self.required_nodes = 1
14267
14268     request = {
14269       "name": self.name,
14270       "disk_template": self.disk_template,
14271       "tags": self.tags,
14272       "os": self.os,
14273       "vcpus": self.vcpus,
14274       "memory": self.memory,
14275       "disks": self.disks,
14276       "disk_space_total": disk_space,
14277       "nics": self.nics,
14278       "required_nodes": self.required_nodes,
14279       "hypervisor": self.hypervisor,
14280       }
14281
14282     return request
14283
14284   def _AddRelocateInstance(self):
14285     """Add relocate instance data to allocator structure.
14286
14287     This in combination with _IAllocatorGetClusterData will create the
14288     correct structure needed as input for the allocator.
14289
14290     The checks for the completeness of the opcode must have already been
14291     done.
14292
14293     """
14294     instance = self.cfg.GetInstanceInfo(self.name)
14295     if instance is None:
14296       raise errors.ProgrammerError("Unknown instance '%s' passed to"
14297                                    " IAllocator" % self.name)
14298
14299     if instance.disk_template not in constants.DTS_MIRRORED:
14300       raise errors.OpPrereqError("Can't relocate non-mirrored instances",
14301                                  errors.ECODE_INVAL)
14302
14303     if instance.disk_template in constants.DTS_INT_MIRROR and \
14304         len(instance.secondary_nodes) != 1:
14305       raise errors.OpPrereqError("Instance has not exactly one secondary node",
14306                                  errors.ECODE_STATE)
14307
14308     self.required_nodes = 1
14309     disk_sizes = [{constants.IDISK_SIZE: disk.size} for disk in instance.disks]
14310     disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
14311
14312     request = {
14313       "name": self.name,
14314       "disk_space_total": disk_space,
14315       "required_nodes": self.required_nodes,
14316       "relocate_from": self.relocate_from,
14317       }
14318     return request
14319
14320   def _AddNodeEvacuate(self):
14321     """Get data for node-evacuate requests.
14322
14323     """
14324     return {
14325       "instances": self.instances,
14326       "evac_mode": self.evac_mode,
14327       }
14328
14329   def _AddChangeGroup(self):
14330     """Get data for node-evacuate requests.
14331
14332     """
14333     return {
14334       "instances": self.instances,
14335       "target_groups": self.target_groups,
14336       }
14337
14338   def _BuildInputData(self, fn, keydata):
14339     """Build input data structures.
14340
14341     """
14342     self._ComputeClusterData()
14343
14344     request = fn()
14345     request["type"] = self.mode
14346     for keyname, keytype in keydata:
14347       if keyname not in request:
14348         raise errors.ProgrammerError("Request parameter %s is missing" %
14349                                      keyname)
14350       val = request[keyname]
14351       if not keytype(val):
14352         raise errors.ProgrammerError("Request parameter %s doesn't pass"
14353                                      " validation, value %s, expected"
14354                                      " type %s" % (keyname, val, keytype))
14355     self.in_data["request"] = request
14356
14357     self.in_text = serializer.Dump(self.in_data)
14358
14359   _STRING_LIST = ht.TListOf(ht.TString)
14360   _JOB_LIST = ht.TListOf(ht.TListOf(ht.TStrictDict(True, False, {
14361      # pylint: disable=E1101
14362      # Class '...' has no 'OP_ID' member
14363      "OP_ID": ht.TElemOf([opcodes.OpInstanceFailover.OP_ID,
14364                           opcodes.OpInstanceMigrate.OP_ID,
14365                           opcodes.OpInstanceReplaceDisks.OP_ID])
14366      })))
14367
14368   _NEVAC_MOVED = \
14369     ht.TListOf(ht.TAnd(ht.TIsLength(3),
14370                        ht.TItems([ht.TNonEmptyString,
14371                                   ht.TNonEmptyString,
14372                                   ht.TListOf(ht.TNonEmptyString),
14373                                  ])))
14374   _NEVAC_FAILED = \
14375     ht.TListOf(ht.TAnd(ht.TIsLength(2),
14376                        ht.TItems([ht.TNonEmptyString,
14377                                   ht.TMaybeString,
14378                                  ])))
14379   _NEVAC_RESULT = ht.TAnd(ht.TIsLength(3),
14380                           ht.TItems([_NEVAC_MOVED, _NEVAC_FAILED, _JOB_LIST]))
14381
14382   _MODE_DATA = {
14383     constants.IALLOCATOR_MODE_ALLOC:
14384       (_AddNewInstance,
14385        [
14386         ("name", ht.TString),
14387         ("memory", ht.TInt),
14388         ("disks", ht.TListOf(ht.TDict)),
14389         ("disk_template", ht.TString),
14390         ("os", ht.TString),
14391         ("tags", _STRING_LIST),
14392         ("nics", ht.TListOf(ht.TDict)),
14393         ("vcpus", ht.TInt),
14394         ("hypervisor", ht.TString),
14395         ], ht.TList),
14396     constants.IALLOCATOR_MODE_RELOC:
14397       (_AddRelocateInstance,
14398        [("name", ht.TString), ("relocate_from", _STRING_LIST)],
14399        ht.TList),
14400      constants.IALLOCATOR_MODE_NODE_EVAC:
14401       (_AddNodeEvacuate, [
14402         ("instances", _STRING_LIST),
14403         ("evac_mode", ht.TElemOf(constants.IALLOCATOR_NEVAC_MODES)),
14404         ], _NEVAC_RESULT),
14405      constants.IALLOCATOR_MODE_CHG_GROUP:
14406       (_AddChangeGroup, [
14407         ("instances", _STRING_LIST),
14408         ("target_groups", _STRING_LIST),
14409         ], _NEVAC_RESULT),
14410     }
14411
14412   def Run(self, name, validate=True, call_fn=None):
14413     """Run an instance allocator and return the results.
14414
14415     """
14416     if call_fn is None:
14417       call_fn = self.rpc.call_iallocator_runner
14418
14419     result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
14420     result.Raise("Failure while running the iallocator script")
14421
14422     self.out_text = result.payload
14423     if validate:
14424       self._ValidateResult()
14425
14426   def _ValidateResult(self):
14427     """Process the allocator results.
14428
14429     This will process and if successful save the result in
14430     self.out_data and the other parameters.
14431
14432     """
14433     try:
14434       rdict = serializer.Load(self.out_text)
14435     except Exception, err:
14436       raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
14437
14438     if not isinstance(rdict, dict):
14439       raise errors.OpExecError("Can't parse iallocator results: not a dict")
14440
14441     # TODO: remove backwards compatiblity in later versions
14442     if "nodes" in rdict and "result" not in rdict:
14443       rdict["result"] = rdict["nodes"]
14444       del rdict["nodes"]
14445
14446     for key in "success", "info", "result":
14447       if key not in rdict:
14448         raise errors.OpExecError("Can't parse iallocator results:"
14449                                  " missing key '%s'" % key)
14450       setattr(self, key, rdict[key])
14451
14452     if not self._result_check(self.result):
14453       raise errors.OpExecError("Iallocator returned invalid result,"
14454                                " expected %s, got %s" %
14455                                (self._result_check, self.result),
14456                                errors.ECODE_INVAL)
14457
14458     if self.mode == constants.IALLOCATOR_MODE_RELOC:
14459       assert self.relocate_from is not None
14460       assert self.required_nodes == 1
14461
14462       node2group = dict((name, ndata["group"])
14463                         for (name, ndata) in self.in_data["nodes"].items())
14464
14465       fn = compat.partial(self._NodesToGroups, node2group,
14466                           self.in_data["nodegroups"])
14467
14468       instance = self.cfg.GetInstanceInfo(self.name)
14469       request_groups = fn(self.relocate_from + [instance.primary_node])
14470       result_groups = fn(rdict["result"] + [instance.primary_node])
14471
14472       if self.success and not set(result_groups).issubset(request_groups):
14473         raise errors.OpExecError("Groups of nodes returned by iallocator (%s)"
14474                                  " differ from original groups (%s)" %
14475                                  (utils.CommaJoin(result_groups),
14476                                   utils.CommaJoin(request_groups)))
14477
14478     elif self.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
14479       assert self.evac_mode in constants.IALLOCATOR_NEVAC_MODES
14480
14481     self.out_data = rdict
14482
14483   @staticmethod
14484   def _NodesToGroups(node2group, groups, nodes):
14485     """Returns a list of unique group names for a list of nodes.
14486
14487     @type node2group: dict
14488     @param node2group: Map from node name to group UUID
14489     @type groups: dict
14490     @param groups: Group information
14491     @type nodes: list
14492     @param nodes: Node names
14493
14494     """
14495     result = set()
14496
14497     for node in nodes:
14498       try:
14499         group_uuid = node2group[node]
14500       except KeyError:
14501         # Ignore unknown node
14502         pass
14503       else:
14504         try:
14505           group = groups[group_uuid]
14506         except KeyError:
14507           # Can't find group, let's use UUID
14508           group_name = group_uuid
14509         else:
14510           group_name = group["name"]
14511
14512         result.add(group_name)
14513
14514     return sorted(result)
14515
14516
14517 class LUTestAllocator(NoHooksLU):
14518   """Run allocator tests.
14519
14520   This LU runs the allocator tests
14521
14522   """
14523   def CheckPrereq(self):
14524     """Check prerequisites.
14525
14526     This checks the opcode parameters depending on the director and mode test.
14527
14528     """
14529     if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
14530       for attr in ["memory", "disks", "disk_template",
14531                    "os", "tags", "nics", "vcpus"]:
14532         if not hasattr(self.op, attr):
14533           raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
14534                                      attr, errors.ECODE_INVAL)
14535       iname = self.cfg.ExpandInstanceName(self.op.name)
14536       if iname is not None:
14537         raise errors.OpPrereqError("Instance '%s' already in the cluster" %
14538                                    iname, errors.ECODE_EXISTS)
14539       if not isinstance(self.op.nics, list):
14540         raise errors.OpPrereqError("Invalid parameter 'nics'",
14541                                    errors.ECODE_INVAL)
14542       if not isinstance(self.op.disks, list):
14543         raise errors.OpPrereqError("Invalid parameter 'disks'",
14544                                    errors.ECODE_INVAL)
14545       for row in self.op.disks:
14546         if (not isinstance(row, dict) or
14547             constants.IDISK_SIZE not in row or
14548             not isinstance(row[constants.IDISK_SIZE], int) or
14549             constants.IDISK_MODE not in row or
14550             row[constants.IDISK_MODE] not in constants.DISK_ACCESS_SET):
14551           raise errors.OpPrereqError("Invalid contents of the 'disks'"
14552                                      " parameter", errors.ECODE_INVAL)
14553       if self.op.hypervisor is None:
14554         self.op.hypervisor = self.cfg.GetHypervisorType()
14555     elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
14556       fname = _ExpandInstanceName(self.cfg, self.op.name)
14557       self.op.name = fname
14558       self.relocate_from = \
14559           list(self.cfg.GetInstanceInfo(fname).secondary_nodes)
14560     elif self.op.mode in (constants.IALLOCATOR_MODE_CHG_GROUP,
14561                           constants.IALLOCATOR_MODE_NODE_EVAC):
14562       if not self.op.instances:
14563         raise errors.OpPrereqError("Missing instances", errors.ECODE_INVAL)
14564       self.op.instances = _GetWantedInstances(self, self.op.instances)
14565     else:
14566       raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
14567                                  self.op.mode, errors.ECODE_INVAL)
14568
14569     if self.op.direction == constants.IALLOCATOR_DIR_OUT:
14570       if self.op.allocator is None:
14571         raise errors.OpPrereqError("Missing allocator name",
14572                                    errors.ECODE_INVAL)
14573     elif self.op.direction != constants.IALLOCATOR_DIR_IN:
14574       raise errors.OpPrereqError("Wrong allocator test '%s'" %
14575                                  self.op.direction, errors.ECODE_INVAL)
14576
14577   def Exec(self, feedback_fn):
14578     """Run the allocator test.
14579
14580     """
14581     if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
14582       ial = IAllocator(self.cfg, self.rpc,
14583                        mode=self.op.mode,
14584                        name=self.op.name,
14585                        memory=self.op.memory,
14586                        disks=self.op.disks,
14587                        disk_template=self.op.disk_template,
14588                        os=self.op.os,
14589                        tags=self.op.tags,
14590                        nics=self.op.nics,
14591                        vcpus=self.op.vcpus,
14592                        hypervisor=self.op.hypervisor,
14593                        )
14594     elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
14595       ial = IAllocator(self.cfg, self.rpc,
14596                        mode=self.op.mode,
14597                        name=self.op.name,
14598                        relocate_from=list(self.relocate_from),
14599                        )
14600     elif self.op.mode == constants.IALLOCATOR_MODE_CHG_GROUP:
14601       ial = IAllocator(self.cfg, self.rpc,
14602                        mode=self.op.mode,
14603                        instances=self.op.instances,
14604                        target_groups=self.op.target_groups)
14605     elif self.op.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
14606       ial = IAllocator(self.cfg, self.rpc,
14607                        mode=self.op.mode,
14608                        instances=self.op.instances,
14609                        evac_mode=self.op.evac_mode)
14610     else:
14611       raise errors.ProgrammerError("Uncatched mode %s in"
14612                                    " LUTestAllocator.Exec", self.op.mode)
14613
14614     if self.op.direction == constants.IALLOCATOR_DIR_IN:
14615       result = ial.in_text
14616     else:
14617       ial.Run(self.op.allocator, validate=False)
14618       result = ial.out_text
14619     return result
14620
14621
14622 #: Query type implementations
14623 _QUERY_IMPL = {
14624   constants.QR_INSTANCE: _InstanceQuery,
14625   constants.QR_NODE: _NodeQuery,
14626   constants.QR_GROUP: _GroupQuery,
14627   constants.QR_OS: _OsQuery,
14628   }
14629
14630 assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
14631
14632
14633 def _GetQueryImplementation(name):
14634   """Returns the implemtnation for a query type.
14635
14636   @param name: Query type, must be one of L{constants.QR_VIA_OP}
14637
14638   """
14639   try:
14640     return _QUERY_IMPL[name]
14641   except KeyError:
14642     raise errors.OpPrereqError("Unknown query resource '%s'" % name,
14643                                errors.ECODE_INVAL)