code.grnet.gr Git - ganeti-local/blob - lib/cmdlib.py

   1 #
   2 #
   3
   4 # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012 Google Inc.
   5 #
   6 # This program is free software; you can redistribute it and/or modify
   7 # it under the terms of the GNU General Public License as published by
   8 # the Free Software Foundation; either version 2 of the License, or
   9 # (at your option) any later version.
  10 #
  11 # This program is distributed in the hope that it will be useful, but
  12 # WITHOUT ANY WARRANTY; without even the implied warranty of
  13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 # General Public License for more details.
  15 #
  16 # You should have received a copy of the GNU General Public License
  17 # along with this program; if not, write to the Free Software
  18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  19 # 02110-1301, USA.
  20
  21
  22 """Module implementing the master-side code."""
  23
  24 # pylint: disable=W0201,C0302
  25
  26 # W0201 since most LU attributes are defined in CheckPrereq or similar
  27 # functions
  28
  29 # C0302: since we have waaaay too many lines in this module
  30
  31 import os
  32 import os.path
  33 import time
  34 import re
  35 import logging
  36 import copy
  37 import OpenSSL
  38 import socket
  39 import tempfile
  40 import shutil
  41 import itertools
  42 import operator
  43
  44 from ganeti import ssh
  45 from ganeti import utils
  46 from ganeti import errors
  47 from ganeti import hypervisor
  48 from ganeti import locking
  49 from ganeti import constants
  50 from ganeti import objects
  51 from ganeti import ssconf
  52 from ganeti import uidpool
  53 from ganeti import compat
  54 from ganeti import masterd
  55 from ganeti import netutils
  56 from ganeti import query
  57 from ganeti import qlang
  58 from ganeti import opcodes
  59 from ganeti import ht
  60 from ganeti import rpc
  61 from ganeti import runtime
  62 from ganeti import pathutils
  63 from ganeti import vcluster
  64 from ganeti.masterd import iallocator
  65
  66 import ganeti.masterd.instance # pylint: disable=W0611
  67
  68
  69 # States of instance
  70 INSTANCE_DOWN = [constants.ADMINST_DOWN]
  71 INSTANCE_ONLINE = [constants.ADMINST_DOWN, constants.ADMINST_UP]
  72 INSTANCE_NOT_RUNNING = [constants.ADMINST_DOWN, constants.ADMINST_OFFLINE]
  73
  74 #: Instance status in which an instance can be marked as offline/online
  75 CAN_CHANGE_INSTANCE_OFFLINE = (frozenset(INSTANCE_DOWN) | frozenset([
  76   constants.ADMINST_OFFLINE,
  77   ]))
  78
  79
  80 class ResultWithJobs:
  81   """Data container for LU results with jobs.
  82
  83   Instances of this class returned from L{LogicalUnit.Exec} will be recognized
  84   by L{mcpu._ProcessResult}. The latter will then submit the jobs
  85   contained in the C{jobs} attribute and include the job IDs in the opcode
  86   result.
  87
  88   """
  89   def __init__(self, jobs, **kwargs):
  90     """Initializes this class.
  91
  92     Additional return values can be specified as keyword arguments.
  93
  94     @type jobs: list of lists of L{opcode.OpCode}
  95     @param jobs: A list of lists of opcode objects
  96
  97     """
  98     self.jobs = jobs
  99     self.other = kwargs
 100
 101
 102 class LogicalUnit(object):
 103   """Logical Unit base class.
 104
 105   Subclasses must follow these rules:
 106     - implement ExpandNames
 107     - implement CheckPrereq (except when tasklets are used)
 108     - implement Exec (except when tasklets are used)
 109     - implement BuildHooksEnv
 110     - implement BuildHooksNodes
 111     - redefine HPATH and HTYPE
 112     - optionally redefine their run requirements:
 113         REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
 114
 115   Note that all commands require root permissions.
 116
 117   @ivar dry_run_result: the value (if any) that will be returned to the caller
 118       in dry-run mode (signalled by opcode dry_run parameter)
 119
 120   """
 121   HPATH = None
 122   HTYPE = None
 123   REQ_BGL = True
 124
 125   def __init__(self, processor, op, context, rpc_runner):
 126     """Constructor for LogicalUnit.
 127
 128     This needs to be overridden in derived classes in order to check op
 129     validity.
 130
 131     """
 132     self.proc = processor
 133     self.op = op
 134     self.cfg = context.cfg
 135     self.glm = context.glm
 136     # readability alias
 137     self.owned_locks = context.glm.list_owned
 138     self.context = context
 139     self.rpc = rpc_runner
 140     # Dicts used to declare locking needs to mcpu
 141     self.needed_locks = None
 142     self.share_locks = dict.fromkeys(locking.LEVELS, 0)
 143     self.add_locks = {}
 144     self.remove_locks = {}
 145     # Used to force good behavior when calling helper functions
 146     self.recalculate_locks = {}
 147     # logging
 148     self.Log = processor.Log # pylint: disable=C0103
 149     self.LogWarning = processor.LogWarning # pylint: disable=C0103
 150     self.LogInfo = processor.LogInfo # pylint: disable=C0103
 151     self.LogStep = processor.LogStep # pylint: disable=C0103
 152     # support for dry-run
 153     self.dry_run_result = None
 154     # support for generic debug attribute
 155     if (not hasattr(self.op, "debug_level") or
 156         not isinstance(self.op.debug_level, int)):
 157       self.op.debug_level = 0
 158
 159     # Tasklets
 160     self.tasklets = None
 161
 162     # Validate opcode parameters and set defaults
 163     self.op.Validate(True)
 164
 165     self.CheckArguments()
 166
 167   def CheckArguments(self):
 168     """Check syntactic validity for the opcode arguments.
 169
 170     This method is for doing a simple syntactic check and ensure
 171     validity of opcode parameters, without any cluster-related
 172     checks. While the same can be accomplished in ExpandNames and/or
 173     CheckPrereq, doing these separate is better because:
 174
 175       - ExpandNames is left as as purely a lock-related function
 176       - CheckPrereq is run after we have acquired locks (and possible
 177         waited for them)
 178
 179     The function is allowed to change the self.op attribute so that
 180     later methods can no longer worry about missing parameters.
 181
 182     """
 183     pass
 184
 185   def ExpandNames(self):
 186     """Expand names for this LU.
 187
 188     This method is called before starting to execute the opcode, and it should
 189     update all the parameters of the opcode to their canonical form (e.g. a
 190     short node name must be fully expanded after this method has successfully
 191     completed). This way locking, hooks, logging, etc. can work correctly.
 192
 193     LUs which implement this method must also populate the self.needed_locks
 194     member, as a dict with lock levels as keys, and a list of needed lock names
 195     as values. Rules:
 196
 197       - use an empty dict if you don't need any lock
 198       - if you don't need any lock at a particular level omit that
 199         level (note that in this case C{DeclareLocks} won't be called
 200         at all for that level)
 201       - if you need locks at a level, but you can't calculate it in
 202         this function, initialise that level with an empty list and do
 203         further processing in L{LogicalUnit.DeclareLocks} (see that
 204         function's docstring)
 205       - don't put anything for the BGL level
 206       - if you want all locks at a level use L{locking.ALL_SET} as a value
 207
 208     If you need to share locks (rather than acquire them exclusively) at one
 209     level you can modify self.share_locks, setting a true value (usually 1) for
 210     that level. By default locks are not shared.
 211
 212     This function can also define a list of tasklets, which then will be
 213     executed in order instead of the usual LU-level CheckPrereq and Exec
 214     functions, if those are not defined by the LU.
 215
 216     Examples::
 217
 218       # Acquire all nodes and one instance
 219       self.needed_locks = {
 220         locking.LEVEL_NODE: locking.ALL_SET,
 221         locking.LEVEL_INSTANCE: ['instance1.example.com'],
 222       }
 223       # Acquire just two nodes
 224       self.needed_locks = {
 225         locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
 226       }
 227       # Acquire no locks
 228       self.needed_locks = {} # No, you can't leave it to the default value None
 229
 230     """
 231     # The implementation of this method is mandatory only if the new LU is
 232     # concurrent, so that old LUs don't need to be changed all at the same
 233     # time.
 234     if self.REQ_BGL:
 235       self.needed_locks = {} # Exclusive LUs don't need locks.
 236     else:
 237       raise NotImplementedError
 238
 239   def DeclareLocks(self, level):
 240     """Declare LU locking needs for a level
 241
 242     While most LUs can just declare their locking needs at ExpandNames time,
 243     sometimes there's the need to calculate some locks after having acquired
 244     the ones before. This function is called just before acquiring locks at a
 245     particular level, but after acquiring the ones at lower levels, and permits
 246     such calculations. It can be used to modify self.needed_locks, and by
 247     default it does nothing.
 248
 249     This function is only called if you have something already set in
 250     self.needed_locks for the level.
 251
 252     @param level: Locking level which is going to be locked
 253     @type level: member of L{ganeti.locking.LEVELS}
 254
 255     """
 256
 257   def CheckPrereq(self):
 258     """Check prerequisites for this LU.
 259
 260     This method should check that the prerequisites for the execution
 261     of this LU are fulfilled. It can do internode communication, but
 262     it should be idempotent - no cluster or system changes are
 263     allowed.
 264
 265     The method should raise errors.OpPrereqError in case something is
 266     not fulfilled. Its return value is ignored.
 267
 268     This method should also update all the parameters of the opcode to
 269     their canonical form if it hasn't been done by ExpandNames before.
 270
 271     """
 272     if self.tasklets is not None:
 273       for (idx, tl) in enumerate(self.tasklets):
 274         logging.debug("Checking prerequisites for tasklet %s/%s",
 275                       idx + 1, len(self.tasklets))
 276         tl.CheckPrereq()
 277     else:
 278       pass
 279
 280   def Exec(self, feedback_fn):
 281     """Execute the LU.
 282
 283     This method should implement the actual work. It should raise
 284     errors.OpExecError for failures that are somewhat dealt with in
 285     code, or expected.
 286
 287     """
 288     if self.tasklets is not None:
 289       for (idx, tl) in enumerate(self.tasklets):
 290         logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
 291         tl.Exec(feedback_fn)
 292     else:
 293       raise NotImplementedError
 294
 295   def BuildHooksEnv(self):
 296     """Build hooks environment for this LU.
 297
 298     @rtype: dict
 299     @return: Dictionary containing the environment that will be used for
 300       running the hooks for this LU. The keys of the dict must not be prefixed
 301       with "GANETI_"--that'll be added by the hooks runner. The hooks runner
 302       will extend the environment with additional variables. If no environment
 303       should be defined, an empty dictionary should be returned (not C{None}).
 304     @note: If the C{HPATH} attribute of the LU class is C{None}, this function
 305       will not be called.
 306
 307     """
 308     raise NotImplementedError
 309
 310   def BuildHooksNodes(self):
 311     """Build list of nodes to run LU's hooks.
 312
 313     @rtype: tuple; (list, list)
 314     @return: Tuple containing a list of node names on which the hook
 315       should run before the execution and a list of node names on which the
 316       hook should run after the execution. No nodes should be returned as an
 317       empty list (and not None).
 318     @note: If the C{HPATH} attribute of the LU class is C{None}, this function
 319       will not be called.
 320
 321     """
 322     raise NotImplementedError
 323
 324   def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
 325     """Notify the LU about the results of its hooks.
 326
 327     This method is called every time a hooks phase is executed, and notifies
 328     the Logical Unit about the hooks' result. The LU can then use it to alter
 329     its result based on the hooks.  By default the method does nothing and the
 330     previous result is passed back unchanged but any LU can define it if it
 331     wants to use the local cluster hook-scripts somehow.
 332
 333     @param phase: one of L{constants.HOOKS_PHASE_POST} or
 334         L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
 335     @param hook_results: the results of the multi-node hooks rpc call
 336     @param feedback_fn: function used send feedback back to the caller
 337     @param lu_result: the previous Exec result this LU had, or None
 338         in the PRE phase
 339     @return: the new Exec result, based on the previous result
 340         and hook results
 341
 342     """
 343     # API must be kept, thus we ignore the unused argument and could
 344     # be a function warnings
 345     # pylint: disable=W0613,R0201
 346     return lu_result
 347
 348   def _ExpandAndLockInstance(self):
 349     """Helper function to expand and lock an instance.
 350
 351     Many LUs that work on an instance take its name in self.op.instance_name
 352     and need to expand it and then declare the expanded name for locking. This
 353     function does it, and then updates self.op.instance_name to the expanded
 354     name. It also initializes needed_locks as a dict, if this hasn't been done
 355     before.
 356
 357     """
 358     if self.needed_locks is None:
 359       self.needed_locks = {}
 360     else:
 361       assert locking.LEVEL_INSTANCE not in self.needed_locks, \
 362         "_ExpandAndLockInstance called with instance-level locks set"
 363     self.op.instance_name = _ExpandInstanceName(self.cfg,
 364                                                 self.op.instance_name)
 365     self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
 366
 367   def _LockInstancesNodes(self, primary_only=False,
 368                           level=locking.LEVEL_NODE):
 369     """Helper function to declare instances' nodes for locking.
 370
 371     This function should be called after locking one or more instances to lock
 372     their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
 373     with all primary or secondary nodes for instances already locked and
 374     present in self.needed_locks[locking.LEVEL_INSTANCE].
 375
 376     It should be called from DeclareLocks, and for safety only works if
 377     self.recalculate_locks[locking.LEVEL_NODE] is set.
 378
 379     In the future it may grow parameters to just lock some instance's nodes, or
 380     to just lock primaries or secondary nodes, if needed.
 381
 382     If should be called in DeclareLocks in a way similar to::
 383
 384       if level == locking.LEVEL_NODE:
 385         self._LockInstancesNodes()
 386
 387     @type primary_only: boolean
 388     @param primary_only: only lock primary nodes of locked instances
 389     @param level: Which lock level to use for locking nodes
 390
 391     """
 392     assert level in self.recalculate_locks, \
 393       "_LockInstancesNodes helper function called with no nodes to recalculate"
 394
 395     # TODO: check if we're really been called with the instance locks held
 396
 397     # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
 398     # future we might want to have different behaviors depending on the value
 399     # of self.recalculate_locks[locking.LEVEL_NODE]
 400     wanted_nodes = []
 401     locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
 402     for _, instance in self.cfg.GetMultiInstanceInfo(locked_i):
 403       wanted_nodes.append(instance.primary_node)
 404       if not primary_only:
 405         wanted_nodes.extend(instance.secondary_nodes)
 406
 407     if self.recalculate_locks[level] == constants.LOCKS_REPLACE:
 408       self.needed_locks[level] = wanted_nodes
 409     elif self.recalculate_locks[level] == constants.LOCKS_APPEND:
 410       self.needed_locks[level].extend(wanted_nodes)
 411     else:
 412       raise errors.ProgrammerError("Unknown recalculation mode")
 413
 414     del self.recalculate_locks[level]
 415
 416
 417 class NoHooksLU(LogicalUnit): # pylint: disable=W0223
 418   """Simple LU which runs no hooks.
 419
 420   This LU is intended as a parent for other LogicalUnits which will
 421   run no hooks, in order to reduce duplicate code.
 422
 423   """
 424   HPATH = None
 425   HTYPE = None
 426
 427   def BuildHooksEnv(self):
 428     """Empty BuildHooksEnv for NoHooksLu.
 429
 430     This just raises an error.
 431
 432     """
 433     raise AssertionError("BuildHooksEnv called for NoHooksLUs")
 434
 435   def BuildHooksNodes(self):
 436     """Empty BuildHooksNodes for NoHooksLU.
 437
 438     """
 439     raise AssertionError("BuildHooksNodes called for NoHooksLU")
 440
 441
 442 class Tasklet:
 443   """Tasklet base class.
 444
 445   Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
 446   they can mix legacy code with tasklets. Locking needs to be done in the LU,
 447   tasklets know nothing about locks.
 448
 449   Subclasses must follow these rules:
 450     - Implement CheckPrereq
 451     - Implement Exec
 452
 453   """
 454   def __init__(self, lu):
 455     self.lu = lu
 456
 457     # Shortcuts
 458     self.cfg = lu.cfg
 459     self.rpc = lu.rpc
 460
 461   def CheckPrereq(self):
 462     """Check prerequisites for this tasklets.
 463
 464     This method should check whether the prerequisites for the execution of
 465     this tasklet are fulfilled. It can do internode communication, but it
 466     should be idempotent - no cluster or system changes are allowed.
 467
 468     The method should raise errors.OpPrereqError in case something is not
 469     fulfilled. Its return value is ignored.
 470
 471     This method should also update all parameters to their canonical form if it
 472     hasn't been done before.
 473
 474     """
 475     pass
 476
 477   def Exec(self, feedback_fn):
 478     """Execute the tasklet.
 479
 480     This method should implement the actual work. It should raise
 481     errors.OpExecError for failures that are somewhat dealt with in code, or
 482     expected.
 483
 484     """
 485     raise NotImplementedError
 486
 487
 488 class _QueryBase:
 489   """Base for query utility classes.
 490
 491   """
 492   #: Attribute holding field definitions
 493   FIELDS = None
 494
 495   #: Field to sort by
 496   SORT_FIELD = "name"
 497
 498   def __init__(self, qfilter, fields, use_locking):
 499     """Initializes this class.
 500
 501     """
 502     self.use_locking = use_locking
 503
 504     self.query = query.Query(self.FIELDS, fields, qfilter=qfilter,
 505                              namefield=self.SORT_FIELD)
 506     self.requested_data = self.query.RequestedData()
 507     self.names = self.query.RequestedNames()
 508
 509     # Sort only if no names were requested
 510     self.sort_by_name = not self.names
 511
 512     self.do_locking = None
 513     self.wanted = None
 514
 515   def _GetNames(self, lu, all_names, lock_level):
 516     """Helper function to determine names asked for in the query.
 517
 518     """
 519     if self.do_locking:
 520       names = lu.owned_locks(lock_level)
 521     else:
 522       names = all_names
 523
 524     if self.wanted == locking.ALL_SET:
 525       assert not self.names
 526       # caller didn't specify names, so ordering is not important
 527       return utils.NiceSort(names)
 528
 529     # caller specified names and we must keep the same order
 530     assert self.names
 531     assert not self.do_locking or lu.glm.is_owned(lock_level)
 532
 533     missing = set(self.wanted).difference(names)
 534     if missing:
 535       raise errors.OpExecError("Some items were removed before retrieving"
 536                                " their data: %s" % missing)
 537
 538     # Return expanded names
 539     return self.wanted
 540
 541   def ExpandNames(self, lu):
 542     """Expand names for this query.
 543
 544     See L{LogicalUnit.ExpandNames}.
 545
 546     """
 547     raise NotImplementedError()
 548
 549   def DeclareLocks(self, lu, level):
 550     """Declare locks for this query.
 551
 552     See L{LogicalUnit.DeclareLocks}.
 553
 554     """
 555     raise NotImplementedError()
 556
 557   def _GetQueryData(self, lu):
 558     """Collects all data for this query.
 559
 560     @return: Query data object
 561
 562     """
 563     raise NotImplementedError()
 564
 565   def NewStyleQuery(self, lu):
 566     """Collect data and execute query.
 567
 568     """
 569     return query.GetQueryResponse(self.query, self._GetQueryData(lu),
 570                                   sort_by_name=self.sort_by_name)
 571
 572   def OldStyleQuery(self, lu):
 573     """Collect data and execute query.
 574
 575     """
 576     return self.query.OldStyleQuery(self._GetQueryData(lu),
 577                                     sort_by_name=self.sort_by_name)
 578
 579
 580 def _ShareAll():
 581   """Returns a dict declaring all lock levels shared.
 582
 583   """
 584   return dict.fromkeys(locking.LEVELS, 1)
 585
 586
 587 def _AnnotateDiskParams(instance, devs, cfg):
 588   """Little helper wrapper to the rpc annotation method.
 589
 590   @param instance: The instance object
 591   @type devs: List of L{objects.Disk}
 592   @param devs: The root devices (not any of its children!)
 593   @param cfg: The config object
 594   @returns The annotated disk copies
 595   @see L{rpc.AnnotateDiskParams}
 596
 597   """
 598   return rpc.AnnotateDiskParams(instance.disk_template, devs,
 599                                 cfg.GetInstanceDiskParams(instance))
 600
 601
 602 def _CheckInstancesNodeGroups(cfg, instances, owned_groups, owned_nodes,
 603                               cur_group_uuid):
 604   """Checks if node groups for locked instances are still correct.
 605
 606   @type cfg: L{config.ConfigWriter}
 607   @param cfg: Cluster configuration
 608   @type instances: dict; string as key, L{objects.Instance} as value
 609   @param instances: Dictionary, instance name as key, instance object as value
 610   @type owned_groups: iterable of string
 611   @param owned_groups: List of owned groups
 612   @type owned_nodes: iterable of string
 613   @param owned_nodes: List of owned nodes
 614   @type cur_group_uuid: string or None
 615   @param cur_group_uuid: Optional group UUID to check against instance's groups
 616
 617   """
 618   for (name, inst) in instances.items():
 619     assert owned_nodes.issuperset(inst.all_nodes), \
 620       "Instance %s's nodes changed while we kept the lock" % name
 621
 622     inst_groups = _CheckInstanceNodeGroups(cfg, name, owned_groups)
 623
 624     assert cur_group_uuid is None or cur_group_uuid in inst_groups, \
 625       "Instance %s has no node in group %s" % (name, cur_group_uuid)
 626
 627
 628 def _CheckInstanceNodeGroups(cfg, instance_name, owned_groups,
 629                              primary_only=False):
 630   """Checks if the owned node groups are still correct for an instance.
 631
 632   @type cfg: L{config.ConfigWriter}
 633   @param cfg: The cluster configuration
 634   @type instance_name: string
 635   @param instance_name: Instance name
 636   @type owned_groups: set or frozenset
 637   @param owned_groups: List of currently owned node groups
 638   @type primary_only: boolean
 639   @param primary_only: Whether to check node groups for only the primary node
 640
 641   """
 642   inst_groups = cfg.GetInstanceNodeGroups(instance_name, primary_only)
 643
 644   if not owned_groups.issuperset(inst_groups):
 645     raise errors.OpPrereqError("Instance %s's node groups changed since"
 646                                " locks were acquired, current groups are"
 647                                " are '%s', owning groups '%s'; retry the"
 648                                " operation" %
 649                                (instance_name,
 650                                 utils.CommaJoin(inst_groups),
 651                                 utils.CommaJoin(owned_groups)),
 652                                errors.ECODE_STATE)
 653
 654   return inst_groups
 655
 656
 657 def _CheckNodeGroupInstances(cfg, group_uuid, owned_instances):
 658   """Checks if the instances in a node group are still correct.
 659
 660   @type cfg: L{config.ConfigWriter}
 661   @param cfg: The cluster configuration
 662   @type group_uuid: string
 663   @param group_uuid: Node group UUID
 664   @type owned_instances: set or frozenset
 665   @param owned_instances: List of currently owned instances
 666
 667   """
 668   wanted_instances = cfg.GetNodeGroupInstances(group_uuid)
 669   if owned_instances != wanted_instances:
 670     raise errors.OpPrereqError("Instances in node group '%s' changed since"
 671                                " locks were acquired, wanted '%s', have '%s';"
 672                                " retry the operation" %
 673                                (group_uuid,
 674                                 utils.CommaJoin(wanted_instances),
 675                                 utils.CommaJoin(owned_instances)),
 676                                errors.ECODE_STATE)
 677
 678   return wanted_instances
 679
 680
 681 def _SupportsOob(cfg, node):
 682   """Tells if node supports OOB.
 683
 684   @type cfg: L{config.ConfigWriter}
 685   @param cfg: The cluster configuration
 686   @type node: L{objects.Node}
 687   @param node: The node
 688   @return: The OOB script if supported or an empty string otherwise
 689
 690   """
 691   return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
 692
 693
 694 def _CopyLockList(names):
 695   """Makes a copy of a list of lock names.
 696
 697   Handles L{locking.ALL_SET} correctly.
 698
 699   """
 700   if names == locking.ALL_SET:
 701     return locking.ALL_SET
 702   else:
 703     return names[:]
 704
 705
 706 def _GetWantedNodes(lu, nodes):
 707   """Returns list of checked and expanded node names.
 708
 709   @type lu: L{LogicalUnit}
 710   @param lu: the logical unit on whose behalf we execute
 711   @type nodes: list
 712   @param nodes: list of node names or None for all nodes
 713   @rtype: list
 714   @return: the list of nodes, sorted
 715   @raise errors.ProgrammerError: if the nodes parameter is wrong type
 716
 717   """
 718   if nodes:
 719     return [_ExpandNodeName(lu.cfg, name) for name in nodes]
 720
 721   return utils.NiceSort(lu.cfg.GetNodeList())
 722
 723
 724 def _GetWantedInstances(lu, instances):
 725   """Returns list of checked and expanded instance names.
 726
 727   @type lu: L{LogicalUnit}
 728   @param lu: the logical unit on whose behalf we execute
 729   @type instances: list
 730   @param instances: list of instance names or None for all instances
 731   @rtype: list
 732   @return: the list of instances, sorted
 733   @raise errors.OpPrereqError: if the instances parameter is wrong type
 734   @raise errors.OpPrereqError: if any of the passed instances is not found
 735
 736   """
 737   if instances:
 738     wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
 739   else:
 740     wanted = utils.NiceSort(lu.cfg.GetInstanceList())
 741   return wanted
 742
 743
 744 def _GetUpdatedParams(old_params, update_dict,
 745                       use_default=True, use_none=False):
 746   """Return the new version of a parameter dictionary.
 747
 748   @type old_params: dict
 749   @param old_params: old parameters
 750   @type update_dict: dict
 751   @param update_dict: dict containing new parameter values, or
 752       constants.VALUE_DEFAULT to reset the parameter to its default
 753       value
 754   @param use_default: boolean
 755   @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
 756       values as 'to be deleted' values
 757   @param use_none: boolean
 758   @type use_none: whether to recognise C{None} values as 'to be
 759       deleted' values
 760   @rtype: dict
 761   @return: the new parameter dictionary
 762
 763   """
 764   params_copy = copy.deepcopy(old_params)
 765   for key, val in update_dict.iteritems():
 766     if ((use_default and val == constants.VALUE_DEFAULT) or
 767         (use_none and val is None)):
 768       try:
 769         del params_copy[key]
 770       except KeyError:
 771         pass
 772     else:
 773       params_copy[key] = val
 774   return params_copy
 775
 776
 777 def _GetUpdatedIPolicy(old_ipolicy, new_ipolicy, group_policy=False):
 778   """Return the new version of a instance policy.
 779
 780   @param group_policy: whether this policy applies to a group and thus
 781     we should support removal of policy entries
 782
 783   """
 784   use_none = use_default = group_policy
 785   ipolicy = copy.deepcopy(old_ipolicy)
 786   for key, value in new_ipolicy.items():
 787     if key not in constants.IPOLICY_ALL_KEYS:
 788       raise errors.OpPrereqError("Invalid key in new ipolicy: %s" % key,
 789                                  errors.ECODE_INVAL)
 790     if key in constants.IPOLICY_ISPECS:
 791       utils.ForceDictType(value, constants.ISPECS_PARAMETER_TYPES)
 792       ipolicy[key] = _GetUpdatedParams(old_ipolicy.get(key, {}), value,
 793                                        use_none=use_none,
 794                                        use_default=use_default)
 795     else:
 796       if (not value or value == [constants.VALUE_DEFAULT] or
 797           value == constants.VALUE_DEFAULT):
 798         if group_policy:
 799           del ipolicy[key]
 800         else:
 801           raise errors.OpPrereqError("Can't unset ipolicy attribute '%s'"
 802                                      " on the cluster'" % key,
 803                                      errors.ECODE_INVAL)
 804       else:
 805         if key in constants.IPOLICY_PARAMETERS:
 806           # FIXME: we assume all such values are float
 807           try:
 808             ipolicy[key] = float(value)
 809           except (TypeError, ValueError), err:
 810             raise errors.OpPrereqError("Invalid value for attribute"
 811                                        " '%s': '%s', error: %s" %
 812                                        (key, value, err), errors.ECODE_INVAL)
 813         else:
 814           # FIXME: we assume all others are lists; this should be redone
 815           # in a nicer way
 816           ipolicy[key] = list(value)
 817   try:
 818     objects.InstancePolicy.CheckParameterSyntax(ipolicy, not group_policy)
 819   except errors.ConfigurationError, err:
 820     raise errors.OpPrereqError("Invalid instance policy: %s" % err,
 821                                errors.ECODE_INVAL)
 822   return ipolicy
 823
 824
 825 def _UpdateAndVerifySubDict(base, updates, type_check):
 826   """Updates and verifies a dict with sub dicts of the same type.
 827
 828   @param base: The dict with the old data
 829   @param updates: The dict with the new data
 830   @param type_check: Dict suitable to ForceDictType to verify correct types
 831   @returns: A new dict with updated and verified values
 832
 833   """
 834   def fn(old, value):
 835     new = _GetUpdatedParams(old, value)
 836     utils.ForceDictType(new, type_check)
 837     return new
 838
 839   ret = copy.deepcopy(base)
 840   ret.update(dict((key, fn(base.get(key, {}), value))
 841                   for key, value in updates.items()))
 842   return ret
 843
 844
 845 def _MergeAndVerifyHvState(op_input, obj_input):
 846   """Combines the hv state from an opcode with the one of the object
 847
 848   @param op_input: The input dict from the opcode
 849   @param obj_input: The input dict from the objects
 850   @return: The verified and updated dict
 851
 852   """
 853   if op_input:
 854     invalid_hvs = set(op_input) - constants.HYPER_TYPES
 855     if invalid_hvs:
 856       raise errors.OpPrereqError("Invalid hypervisor(s) in hypervisor state:"
 857                                  " %s" % utils.CommaJoin(invalid_hvs),
 858                                  errors.ECODE_INVAL)
 859     if obj_input is None:
 860       obj_input = {}
 861     type_check = constants.HVSTS_PARAMETER_TYPES
 862     return _UpdateAndVerifySubDict(obj_input, op_input, type_check)
 863
 864   return None
 865
 866
 867 def _MergeAndVerifyDiskState(op_input, obj_input):
 868   """Combines the disk state from an opcode with the one of the object
 869
 870   @param op_input: The input dict from the opcode
 871   @param obj_input: The input dict from the objects
 872   @return: The verified and updated dict
 873   """
 874   if op_input:
 875     invalid_dst = set(op_input) - constants.DS_VALID_TYPES
 876     if invalid_dst:
 877       raise errors.OpPrereqError("Invalid storage type(s) in disk state: %s" %
 878                                  utils.CommaJoin(invalid_dst),
 879                                  errors.ECODE_INVAL)
 880     type_check = constants.DSS_PARAMETER_TYPES
 881     if obj_input is None:
 882       obj_input = {}
 883     return dict((key, _UpdateAndVerifySubDict(obj_input.get(key, {}), value,
 884                                               type_check))
 885                 for key, value in op_input.items())
 886
 887   return None
 888
 889
 890 def _ReleaseLocks(lu, level, names=None, keep=None):
 891   """Releases locks owned by an LU.
 892
 893   @type lu: L{LogicalUnit}
 894   @param level: Lock level
 895   @type names: list or None
 896   @param names: Names of locks to release
 897   @type keep: list or None
 898   @param keep: Names of locks to retain
 899
 900   """
 901   assert not (keep is not None and names is not None), \
 902          "Only one of the 'names' and the 'keep' parameters can be given"
 903
 904   if names is not None:
 905     should_release = names.__contains__
 906   elif keep:
 907     should_release = lambda name: name not in keep
 908   else:
 909     should_release = None
 910
 911   owned = lu.owned_locks(level)
 912   if not owned:
 913     # Not owning any lock at this level, do nothing
 914     pass
 915
 916   elif should_release:
 917     retain = []
 918     release = []
 919
 920     # Determine which locks to release
 921     for name in owned:
 922       if should_release(name):
 923         release.append(name)
 924       else:
 925         retain.append(name)
 926
 927     assert len(lu.owned_locks(level)) == (len(retain) + len(release))
 928
 929     # Release just some locks
 930     lu.glm.release(level, names=release)
 931
 932     assert frozenset(lu.owned_locks(level)) == frozenset(retain)
 933   else:
 934     # Release everything
 935     lu.glm.release(level)
 936
 937     assert not lu.glm.is_owned(level), "No locks should be owned"
 938
 939
 940 def _MapInstanceDisksToNodes(instances):
 941   """Creates a map from (node, volume) to instance name.
 942
 943   @type instances: list of L{objects.Instance}
 944   @rtype: dict; tuple of (node name, volume name) as key, instance name as value
 945
 946   """
 947   return dict(((node, vol), inst.name)
 948               for inst in instances
 949               for (node, vols) in inst.MapLVsByNode().items()
 950               for vol in vols)
 951
 952
 953 def _RunPostHook(lu, node_name):
 954   """Runs the post-hook for an opcode on a single node.
 955
 956   """
 957   hm = lu.proc.BuildHooksManager(lu)
 958   try:
 959     hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
 960   except Exception, err: # pylint: disable=W0703
 961     lu.LogWarning("Errors occurred running hooks on %s: %s" % (node_name, err))
 962
 963
 964 def _CheckOutputFields(static, dynamic, selected):
 965   """Checks whether all selected fields are valid.
 966
 967   @type static: L{utils.FieldSet}
 968   @param static: static fields set
 969   @type dynamic: L{utils.FieldSet}
 970   @param dynamic: dynamic fields set
 971
 972   """
 973   f = utils.FieldSet()
 974   f.Extend(static)
 975   f.Extend(dynamic)
 976
 977   delta = f.NonMatching(selected)
 978   if delta:
 979     raise errors.OpPrereqError("Unknown output fields selected: %s"
 980                                % ",".join(delta), errors.ECODE_INVAL)
 981
 982
 983 def _CheckGlobalHvParams(params):
 984   """Validates that given hypervisor params are not global ones.
 985
 986   This will ensure that instances don't get customised versions of
 987   global params.
 988
 989   """
 990   used_globals = constants.HVC_GLOBALS.intersection(params)
 991   if used_globals:
 992     msg = ("The following hypervisor parameters are global and cannot"
 993            " be customized at instance level, please modify them at"
 994            " cluster level: %s" % utils.CommaJoin(used_globals))
 995     raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
 996
 997
 998 def _CheckNodeOnline(lu, node, msg=None):
 999   """Ensure that a given node is online.
1000
1001   @param lu: the LU on behalf of which we make the check
1002   @param node: the node to check
1003   @param msg: if passed, should be a message to replace the default one
1004   @raise errors.OpPrereqError: if the node is offline
1005
1006   """
1007   if msg is None:
1008     msg = "Can't use offline node"
1009   if lu.cfg.GetNodeInfo(node).offline:
1010     raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
1011
1012
1013 def _CheckNodeNotDrained(lu, node):
1014   """Ensure that a given node is not drained.
1015
1016   @param lu: the LU on behalf of which we make the check
1017   @param node: the node to check
1018   @raise errors.OpPrereqError: if the node is drained
1019
1020   """
1021   if lu.cfg.GetNodeInfo(node).drained:
1022     raise errors.OpPrereqError("Can't use drained node %s" % node,
1023                                errors.ECODE_STATE)
1024
1025
1026 def _CheckNodeVmCapable(lu, node):
1027   """Ensure that a given node is vm capable.
1028
1029   @param lu: the LU on behalf of which we make the check
1030   @param node: the node to check
1031   @raise errors.OpPrereqError: if the node is not vm capable
1032
1033   """
1034   if not lu.cfg.GetNodeInfo(node).vm_capable:
1035     raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
1036                                errors.ECODE_STATE)
1037
1038
1039 def _CheckNodeHasOS(lu, node, os_name, force_variant):
1040   """Ensure that a node supports a given OS.
1041
1042   @param lu: the LU on behalf of which we make the check
1043   @param node: the node to check
1044   @param os_name: the OS to query about
1045   @param force_variant: whether to ignore variant errors
1046   @raise errors.OpPrereqError: if the node is not supporting the OS
1047
1048   """
1049   result = lu.rpc.call_os_get(node, os_name)
1050   result.Raise("OS '%s' not in supported OS list for node %s" %
1051                (os_name, node),
1052                prereq=True, ecode=errors.ECODE_INVAL)
1053   if not force_variant:
1054     _CheckOSVariant(result.payload, os_name)
1055
1056
1057 def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
1058   """Ensure that a node has the given secondary ip.
1059
1060   @type lu: L{LogicalUnit}
1061   @param lu: the LU on behalf of which we make the check
1062   @type node: string
1063   @param node: the node to check
1064   @type secondary_ip: string
1065   @param secondary_ip: the ip to check
1066   @type prereq: boolean
1067   @param prereq: whether to throw a prerequisite or an execute error
1068   @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
1069   @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
1070
1071   """
1072   result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
1073   result.Raise("Failure checking secondary ip on node %s" % node,
1074                prereq=prereq, ecode=errors.ECODE_ENVIRON)
1075   if not result.payload:
1076     msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
1077            " please fix and re-run this command" % secondary_ip)
1078     if prereq:
1079       raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
1080     else:
1081       raise errors.OpExecError(msg)
1082
1083
1084 def _GetClusterDomainSecret():
1085   """Reads the cluster domain secret.
1086
1087   """
1088   return utils.ReadOneLineFile(pathutils.CLUSTER_DOMAIN_SECRET_FILE,
1089                                strict=True)
1090
1091
1092 def _CheckInstanceState(lu, instance, req_states, msg=None):
1093   """Ensure that an instance is in one of the required states.
1094
1095   @param lu: the LU on behalf of which we make the check
1096   @param instance: the instance to check
1097   @param msg: if passed, should be a message to replace the default one
1098   @raise errors.OpPrereqError: if the instance is not in the required state
1099
1100   """
1101   if msg is None:
1102     msg = "can't use instance from outside %s states" % ", ".join(req_states)
1103   if instance.admin_state not in req_states:
1104     raise errors.OpPrereqError("Instance '%s' is marked to be %s, %s" %
1105                                (instance.name, instance.admin_state, msg),
1106                                errors.ECODE_STATE)
1107
1108   if constants.ADMINST_UP not in req_states:
1109     pnode = instance.primary_node
1110     if not lu.cfg.GetNodeInfo(pnode).offline:
1111       ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
1112       ins_l.Raise("Can't contact node %s for instance information" % pnode,
1113                   prereq=True, ecode=errors.ECODE_ENVIRON)
1114       if instance.name in ins_l.payload:
1115         raise errors.OpPrereqError("Instance %s is running, %s" %
1116                                    (instance.name, msg), errors.ECODE_STATE)
1117     else:
1118       lu.LogWarning("Primary node offline, ignoring check that instance"
1119                      " is down")
1120
1121
1122 def _ComputeMinMaxSpec(name, qualifier, ipolicy, value):
1123   """Computes if value is in the desired range.
1124
1125   @param name: name of the parameter for which we perform the check
1126   @param qualifier: a qualifier used in the error message (e.g. 'disk/1',
1127       not just 'disk')
1128   @param ipolicy: dictionary containing min, max and std values
1129   @param value: actual value that we want to use
1130   @return: None or element not meeting the criteria
1131
1132
1133   """
1134   if value in [None, constants.VALUE_AUTO]:
1135     return None
1136   max_v = ipolicy[constants.ISPECS_MAX].get(name, value)
1137   min_v = ipolicy[constants.ISPECS_MIN].get(name, value)
1138   if value > max_v or min_v > value:
1139     if qualifier:
1140       fqn = "%s/%s" % (name, qualifier)
1141     else:
1142       fqn = name
1143     return ("%s value %s is not in range [%s, %s]" %
1144             (fqn, value, min_v, max_v))
1145   return None
1146
1147
1148 def _ComputeIPolicySpecViolation(ipolicy, mem_size, cpu_count, disk_count,
1149                                  nic_count, disk_sizes, spindle_use,
1150                                  _compute_fn=_ComputeMinMaxSpec):
1151   """Verifies ipolicy against provided specs.
1152
1153   @type ipolicy: dict
1154   @param ipolicy: The ipolicy
1155   @type mem_size: int
1156   @param mem_size: The memory size
1157   @type cpu_count: int
1158   @param cpu_count: Used cpu cores
1159   @type disk_count: int
1160   @param disk_count: Number of disks used
1161   @type nic_count: int
1162   @param nic_count: Number of nics used
1163   @type disk_sizes: list of ints
1164   @param disk_sizes: Disk sizes of used disk (len must match C{disk_count})
1165   @type spindle_use: int
1166   @param spindle_use: The number of spindles this instance uses
1167   @param _compute_fn: The compute function (unittest only)
1168   @return: A list of violations, or an empty list of no violations are found
1169
1170   """
1171   assert disk_count == len(disk_sizes)
1172
1173   test_settings = [
1174     (constants.ISPEC_MEM_SIZE, "", mem_size),
1175     (constants.ISPEC_CPU_COUNT, "", cpu_count),
1176     (constants.ISPEC_DISK_COUNT, "", disk_count),
1177     (constants.ISPEC_NIC_COUNT, "", nic_count),
1178     (constants.ISPEC_SPINDLE_USE, "", spindle_use),
1179     ] + [(constants.ISPEC_DISK_SIZE, str(idx), d)
1180          for idx, d in enumerate(disk_sizes)]
1181
1182   return filter(None,
1183                 (_compute_fn(name, qualifier, ipolicy, value)
1184                  for (name, qualifier, value) in test_settings))
1185
1186
1187 def _ComputeIPolicyInstanceViolation(ipolicy, instance,
1188                                      _compute_fn=_ComputeIPolicySpecViolation):
1189   """Compute if instance meets the specs of ipolicy.
1190
1191   @type ipolicy: dict
1192   @param ipolicy: The ipolicy to verify against
1193   @type instance: L{objects.Instance}
1194   @param instance: The instance to verify
1195   @param _compute_fn: The function to verify ipolicy (unittest only)
1196   @see: L{_ComputeIPolicySpecViolation}
1197
1198   """
1199   mem_size = instance.beparams.get(constants.BE_MAXMEM, None)
1200   cpu_count = instance.beparams.get(constants.BE_VCPUS, None)
1201   spindle_use = instance.beparams.get(constants.BE_SPINDLE_USE, None)
1202   disk_count = len(instance.disks)
1203   disk_sizes = [disk.size for disk in instance.disks]
1204   nic_count = len(instance.nics)
1205
1206   return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1207                      disk_sizes, spindle_use)
1208
1209
1210 def _ComputeIPolicyInstanceSpecViolation(
1211   ipolicy, instance_spec, _compute_fn=_ComputeIPolicySpecViolation):
1212   """Compute if instance specs meets the specs of ipolicy.
1213
1214   @type ipolicy: dict
1215   @param ipolicy: The ipolicy to verify against
1216   @param instance_spec: dict
1217   @param instance_spec: The instance spec to verify
1218   @param _compute_fn: The function to verify ipolicy (unittest only)
1219   @see: L{_ComputeIPolicySpecViolation}
1220
1221   """
1222   mem_size = instance_spec.get(constants.ISPEC_MEM_SIZE, None)
1223   cpu_count = instance_spec.get(constants.ISPEC_CPU_COUNT, None)
1224   disk_count = instance_spec.get(constants.ISPEC_DISK_COUNT, 0)
1225   disk_sizes = instance_spec.get(constants.ISPEC_DISK_SIZE, [])
1226   nic_count = instance_spec.get(constants.ISPEC_NIC_COUNT, 0)
1227   spindle_use = instance_spec.get(constants.ISPEC_SPINDLE_USE, None)
1228
1229   return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1230                      disk_sizes, spindle_use)
1231
1232
1233 def _ComputeIPolicyNodeViolation(ipolicy, instance, current_group,
1234                                  target_group,
1235                                  _compute_fn=_ComputeIPolicyInstanceViolation):
1236   """Compute if instance meets the specs of the new target group.
1237
1238   @param ipolicy: The ipolicy to verify
1239   @param instance: The instance object to verify
1240   @param current_group: The current group of the instance
1241   @param target_group: The new group of the instance
1242   @param _compute_fn: The function to verify ipolicy (unittest only)
1243   @see: L{_ComputeIPolicySpecViolation}
1244
1245   """
1246   if current_group == target_group:
1247     return []
1248   else:
1249     return _compute_fn(ipolicy, instance)
1250
1251
1252 def _CheckTargetNodeIPolicy(lu, ipolicy, instance, node, ignore=False,
1253                             _compute_fn=_ComputeIPolicyNodeViolation):
1254   """Checks that the target node is correct in terms of instance policy.
1255
1256   @param ipolicy: The ipolicy to verify
1257   @param instance: The instance object to verify
1258   @param node: The new node to relocate
1259   @param ignore: Ignore violations of the ipolicy
1260   @param _compute_fn: The function to verify ipolicy (unittest only)
1261   @see: L{_ComputeIPolicySpecViolation}
1262
1263   """
1264   primary_node = lu.cfg.GetNodeInfo(instance.primary_node)
1265   res = _compute_fn(ipolicy, instance, primary_node.group, node.group)
1266
1267   if res:
1268     msg = ("Instance does not meet target node group's (%s) instance"
1269            " policy: %s") % (node.group, utils.CommaJoin(res))
1270     if ignore:
1271       lu.LogWarning(msg)
1272     else:
1273       raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
1274
1275
1276 def _ComputeNewInstanceViolations(old_ipolicy, new_ipolicy, instances):
1277   """Computes a set of any instances that would violate the new ipolicy.
1278
1279   @param old_ipolicy: The current (still in-place) ipolicy
1280   @param new_ipolicy: The new (to become) ipolicy
1281   @param instances: List of instances to verify
1282   @return: A list of instances which violates the new ipolicy but
1283       did not before
1284
1285   """
1286   return (_ComputeViolatingInstances(new_ipolicy, instances) -
1287           _ComputeViolatingInstances(old_ipolicy, instances))
1288
1289
1290 def _ExpandItemName(fn, name, kind):
1291   """Expand an item name.
1292
1293   @param fn: the function to use for expansion
1294   @param name: requested item name
1295   @param kind: text description ('Node' or 'Instance')
1296   @return: the resolved (full) name
1297   @raise errors.OpPrereqError: if the item is not found
1298
1299   """
1300   full_name = fn(name)
1301   if full_name is None:
1302     raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
1303                                errors.ECODE_NOENT)
1304   return full_name
1305
1306
1307 def _ExpandNodeName(cfg, name):
1308   """Wrapper over L{_ExpandItemName} for nodes."""
1309   return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
1310
1311
1312 def _ExpandInstanceName(cfg, name):
1313   """Wrapper over L{_ExpandItemName} for instance."""
1314   return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
1315
1316
1317 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
1318                           minmem, maxmem, vcpus, nics, disk_template, disks,
1319                           bep, hvp, hypervisor_name, tags):
1320   """Builds instance related env variables for hooks
1321
1322   This builds the hook environment from individual variables.
1323
1324   @type name: string
1325   @param name: the name of the instance
1326   @type primary_node: string
1327   @param primary_node: the name of the instance's primary node
1328   @type secondary_nodes: list
1329   @param secondary_nodes: list of secondary nodes as strings
1330   @type os_type: string
1331   @param os_type: the name of the instance's OS
1332   @type status: string
1333   @param status: the desired status of the instance
1334   @type minmem: string
1335   @param minmem: the minimum memory size of the instance
1336   @type maxmem: string
1337   @param maxmem: the maximum memory size of the instance
1338   @type vcpus: string
1339   @param vcpus: the count of VCPUs the instance has
1340   @type nics: list
1341   @param nics: list of tuples (ip, mac, mode, link) representing
1342       the NICs the instance has
1343   @type disk_template: string
1344   @param disk_template: the disk template of the instance
1345   @type disks: list
1346   @param disks: the list of (size, mode) pairs
1347   @type bep: dict
1348   @param bep: the backend parameters for the instance
1349   @type hvp: dict
1350   @param hvp: the hypervisor parameters for the instance
1351   @type hypervisor_name: string
1352   @param hypervisor_name: the hypervisor for the instance
1353   @type tags: list
1354   @param tags: list of instance tags as strings
1355   @rtype: dict
1356   @return: the hook environment for this instance
1357
1358   """
1359   env = {
1360     "OP_TARGET": name,
1361     "INSTANCE_NAME": name,
1362     "INSTANCE_PRIMARY": primary_node,
1363     "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
1364     "INSTANCE_OS_TYPE": os_type,
1365     "INSTANCE_STATUS": status,
1366     "INSTANCE_MINMEM": minmem,
1367     "INSTANCE_MAXMEM": maxmem,
1368     # TODO(2.7) remove deprecated "memory" value
1369     "INSTANCE_MEMORY": maxmem,
1370     "INSTANCE_VCPUS": vcpus,
1371     "INSTANCE_DISK_TEMPLATE": disk_template,
1372     "INSTANCE_HYPERVISOR": hypervisor_name,
1373   }
1374   if nics:
1375     nic_count = len(nics)
1376     for idx, (ip, mac, mode, link) in enumerate(nics):
1377       if ip is None:
1378         ip = ""
1379       env["INSTANCE_NIC%d_IP" % idx] = ip
1380       env["INSTANCE_NIC%d_MAC" % idx] = mac
1381       env["INSTANCE_NIC%d_MODE" % idx] = mode
1382       env["INSTANCE_NIC%d_LINK" % idx] = link
1383       if mode == constants.NIC_MODE_BRIDGED:
1384         env["INSTANCE_NIC%d_BRIDGE" % idx] = link
1385   else:
1386     nic_count = 0
1387
1388   env["INSTANCE_NIC_COUNT"] = nic_count
1389
1390   if disks:
1391     disk_count = len(disks)
1392     for idx, (size, mode) in enumerate(disks):
1393       env["INSTANCE_DISK%d_SIZE" % idx] = size
1394       env["INSTANCE_DISK%d_MODE" % idx] = mode
1395   else:
1396     disk_count = 0
1397
1398   env["INSTANCE_DISK_COUNT"] = disk_count
1399
1400   if not tags:
1401     tags = []
1402
1403   env["INSTANCE_TAGS"] = " ".join(tags)
1404
1405   for source, kind in [(bep, "BE"), (hvp, "HV")]:
1406     for key, value in source.items():
1407       env["INSTANCE_%s_%s" % (kind, key)] = value
1408
1409   return env
1410
1411
1412 def _NICListToTuple(lu, nics):
1413   """Build a list of nic information tuples.
1414
1415   This list is suitable to be passed to _BuildInstanceHookEnv or as a return
1416   value in LUInstanceQueryData.
1417
1418   @type lu:  L{LogicalUnit}
1419   @param lu: the logical unit on whose behalf we execute
1420   @type nics: list of L{objects.NIC}
1421   @param nics: list of nics to convert to hooks tuples
1422
1423   """
1424   hooks_nics = []
1425   cluster = lu.cfg.GetClusterInfo()
1426   for nic in nics:
1427     ip = nic.ip
1428     mac = nic.mac
1429     filled_params = cluster.SimpleFillNIC(nic.nicparams)
1430     mode = filled_params[constants.NIC_MODE]
1431     link = filled_params[constants.NIC_LINK]
1432     hooks_nics.append((ip, mac, mode, link))
1433   return hooks_nics
1434
1435
1436 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
1437   """Builds instance related env variables for hooks from an object.
1438
1439   @type lu: L{LogicalUnit}
1440   @param lu: the logical unit on whose behalf we execute
1441   @type instance: L{objects.Instance}
1442   @param instance: the instance for which we should build the
1443       environment
1444   @type override: dict
1445   @param override: dictionary with key/values that will override
1446       our values
1447   @rtype: dict
1448   @return: the hook environment dictionary
1449
1450   """
1451   cluster = lu.cfg.GetClusterInfo()
1452   bep = cluster.FillBE(instance)
1453   hvp = cluster.FillHV(instance)
1454   args = {
1455     "name": instance.name,
1456     "primary_node": instance.primary_node,
1457     "secondary_nodes": instance.secondary_nodes,
1458     "os_type": instance.os,
1459     "status": instance.admin_state,
1460     "maxmem": bep[constants.BE_MAXMEM],
1461     "minmem": bep[constants.BE_MINMEM],
1462     "vcpus": bep[constants.BE_VCPUS],
1463     "nics": _NICListToTuple(lu, instance.nics),
1464     "disk_template": instance.disk_template,
1465     "disks": [(disk.size, disk.mode) for disk in instance.disks],
1466     "bep": bep,
1467     "hvp": hvp,
1468     "hypervisor_name": instance.hypervisor,
1469     "tags": instance.tags,
1470   }
1471   if override:
1472     args.update(override)
1473   return _BuildInstanceHookEnv(**args) # pylint: disable=W0142
1474
1475
1476 def _AdjustCandidatePool(lu, exceptions):
1477   """Adjust the candidate pool after node operations.
1478
1479   """
1480   mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1481   if mod_list:
1482     lu.LogInfo("Promoted nodes to master candidate role: %s",
1483                utils.CommaJoin(node.name for node in mod_list))
1484     for name in mod_list:
1485       lu.context.ReaddNode(name)
1486   mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1487   if mc_now > mc_max:
1488     lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1489                (mc_now, mc_max))
1490
1491
1492 def _DecideSelfPromotion(lu, exceptions=None):
1493   """Decide whether I should promote myself as a master candidate.
1494
1495   """
1496   cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1497   mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1498   # the new node will increase mc_max with one, so:
1499   mc_should = min(mc_should + 1, cp_size)
1500   return mc_now < mc_should
1501
1502
1503 def _ComputeViolatingInstances(ipolicy, instances):
1504   """Computes a set of instances who violates given ipolicy.
1505
1506   @param ipolicy: The ipolicy to verify
1507   @type instances: object.Instance
1508   @param instances: List of instances to verify
1509   @return: A frozenset of instance names violating the ipolicy
1510
1511   """
1512   return frozenset([inst.name for inst in instances
1513                     if _ComputeIPolicyInstanceViolation(ipolicy, inst)])
1514
1515
1516 def _CheckNicsBridgesExist(lu, target_nics, target_node):
1517   """Check that the brigdes needed by a list of nics exist.
1518
1519   """
1520   cluster = lu.cfg.GetClusterInfo()
1521   paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1522   brlist = [params[constants.NIC_LINK] for params in paramslist
1523             if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1524   if brlist:
1525     result = lu.rpc.call_bridges_exist(target_node, brlist)
1526     result.Raise("Error checking bridges on destination node '%s'" %
1527                  target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1528
1529
1530 def _CheckInstanceBridgesExist(lu, instance, node=None):
1531   """Check that the brigdes needed by an instance exist.
1532
1533   """
1534   if node is None:
1535     node = instance.primary_node
1536   _CheckNicsBridgesExist(lu, instance.nics, node)
1537
1538
1539 def _CheckOSVariant(os_obj, name):
1540   """Check whether an OS name conforms to the os variants specification.
1541
1542   @type os_obj: L{objects.OS}
1543   @param os_obj: OS object to check
1544   @type name: string
1545   @param name: OS name passed by the user, to check for validity
1546
1547   """
1548   variant = objects.OS.GetVariant(name)
1549   if not os_obj.supported_variants:
1550     if variant:
1551       raise errors.OpPrereqError("OS '%s' doesn't support variants ('%s'"
1552                                  " passed)" % (os_obj.name, variant),
1553                                  errors.ECODE_INVAL)
1554     return
1555   if not variant:
1556     raise errors.OpPrereqError("OS name must include a variant",
1557                                errors.ECODE_INVAL)
1558
1559   if variant not in os_obj.supported_variants:
1560     raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1561
1562
1563 def _GetNodeInstancesInner(cfg, fn):
1564   return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1565
1566
1567 def _GetNodeInstances(cfg, node_name):
1568   """Returns a list of all primary and secondary instances on a node.
1569
1570   """
1571
1572   return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1573
1574
1575 def _GetNodePrimaryInstances(cfg, node_name):
1576   """Returns primary instances on a node.
1577
1578   """
1579   return _GetNodeInstancesInner(cfg,
1580                                 lambda inst: node_name == inst.primary_node)
1581
1582
1583 def _GetNodeSecondaryInstances(cfg, node_name):
1584   """Returns secondary instances on a node.
1585
1586   """
1587   return _GetNodeInstancesInner(cfg,
1588                                 lambda inst: node_name in inst.secondary_nodes)
1589
1590
1591 def _GetStorageTypeArgs(cfg, storage_type):
1592   """Returns the arguments for a storage type.
1593
1594   """
1595   # Special case for file storage
1596   if storage_type == constants.ST_FILE:
1597     # storage.FileStorage wants a list of storage directories
1598     return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1599
1600   return []
1601
1602
1603 def _FindFaultyInstanceDisks(cfg, rpc_runner, instance, node_name, prereq):
1604   faulty = []
1605
1606   for dev in instance.disks:
1607     cfg.SetDiskID(dev, node_name)
1608
1609   result = rpc_runner.call_blockdev_getmirrorstatus(node_name, (instance.disks,
1610                                                                 instance))
1611   result.Raise("Failed to get disk status from node %s" % node_name,
1612                prereq=prereq, ecode=errors.ECODE_ENVIRON)
1613
1614   for idx, bdev_status in enumerate(result.payload):
1615     if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1616       faulty.append(idx)
1617
1618   return faulty
1619
1620
1621 def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1622   """Check the sanity of iallocator and node arguments and use the
1623   cluster-wide iallocator if appropriate.
1624
1625   Check that at most one of (iallocator, node) is specified. If none is
1626   specified, or the iallocator is L{constants.DEFAULT_IALLOCATOR_SHORTCUT},
1627   then the LU's opcode's iallocator slot is filled with the cluster-wide
1628   default iallocator.
1629
1630   @type iallocator_slot: string
1631   @param iallocator_slot: the name of the opcode iallocator slot
1632   @type node_slot: string
1633   @param node_slot: the name of the opcode target node slot
1634
1635   """
1636   node = getattr(lu.op, node_slot, None)
1637   ialloc = getattr(lu.op, iallocator_slot, None)
1638   if node == []:
1639     node = None
1640
1641   if node is not None and ialloc is not None:
1642     raise errors.OpPrereqError("Do not specify both, iallocator and node",
1643                                errors.ECODE_INVAL)
1644   elif ((node is None and ialloc is None) or
1645         ialloc == constants.DEFAULT_IALLOCATOR_SHORTCUT):
1646     default_iallocator = lu.cfg.GetDefaultIAllocator()
1647     if default_iallocator:
1648       setattr(lu.op, iallocator_slot, default_iallocator)
1649     else:
1650       raise errors.OpPrereqError("No iallocator or node given and no"
1651                                  " cluster-wide default iallocator found;"
1652                                  " please specify either an iallocator or a"
1653                                  " node, or set a cluster-wide default"
1654                                  " iallocator", errors.ECODE_INVAL)
1655
1656
1657 def _GetDefaultIAllocator(cfg, ialloc):
1658   """Decides on which iallocator to use.
1659
1660   @type cfg: L{config.ConfigWriter}
1661   @param cfg: Cluster configuration object
1662   @type ialloc: string or None
1663   @param ialloc: Iallocator specified in opcode
1664   @rtype: string
1665   @return: Iallocator name
1666
1667   """
1668   if not ialloc:
1669     # Use default iallocator
1670     ialloc = cfg.GetDefaultIAllocator()
1671
1672   if not ialloc:
1673     raise errors.OpPrereqError("No iallocator was specified, neither in the"
1674                                " opcode nor as a cluster-wide default",
1675                                errors.ECODE_INVAL)
1676
1677   return ialloc
1678
1679
1680 def _CheckHostnameSane(lu, name):
1681   """Ensures that a given hostname resolves to a 'sane' name.
1682
1683   The given name is required to be a prefix of the resolved hostname,
1684   to prevent accidental mismatches.
1685
1686   @param lu: the logical unit on behalf of which we're checking
1687   @param name: the name we should resolve and check
1688   @return: the resolved hostname object
1689
1690   """
1691   hostname = netutils.GetHostname(name=name)
1692   if hostname.name != name:
1693     lu.LogInfo("Resolved given name '%s' to '%s'", name, hostname.name)
1694   if not utils.MatchNameComponent(name, [hostname.name]):
1695     raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
1696                                 " same as given hostname '%s'") %
1697                                 (hostname.name, name), errors.ECODE_INVAL)
1698   return hostname
1699
1700
1701 class LUClusterPostInit(LogicalUnit):
1702   """Logical unit for running hooks after cluster initialization.
1703
1704   """
1705   HPATH = "cluster-init"
1706   HTYPE = constants.HTYPE_CLUSTER
1707
1708   def BuildHooksEnv(self):
1709     """Build hooks env.
1710
1711     """
1712     return {
1713       "OP_TARGET": self.cfg.GetClusterName(),
1714       }
1715
1716   def BuildHooksNodes(self):
1717     """Build hooks nodes.
1718
1719     """
1720     return ([], [self.cfg.GetMasterNode()])
1721
1722   def Exec(self, feedback_fn):
1723     """Nothing to do.
1724
1725     """
1726     return True
1727
1728
1729 class LUClusterDestroy(LogicalUnit):
1730   """Logical unit for destroying the cluster.
1731
1732   """
1733   HPATH = "cluster-destroy"
1734   HTYPE = constants.HTYPE_CLUSTER
1735
1736   def BuildHooksEnv(self):
1737     """Build hooks env.
1738
1739     """
1740     return {
1741       "OP_TARGET": self.cfg.GetClusterName(),
1742       }
1743
1744   def BuildHooksNodes(self):
1745     """Build hooks nodes.
1746
1747     """
1748     return ([], [])
1749
1750   def CheckPrereq(self):
1751     """Check prerequisites.
1752
1753     This checks whether the cluster is empty.
1754
1755     Any errors are signaled by raising errors.OpPrereqError.
1756
1757     """
1758     master = self.cfg.GetMasterNode()
1759
1760     nodelist = self.cfg.GetNodeList()
1761     if len(nodelist) != 1 or nodelist[0] != master:
1762       raise errors.OpPrereqError("There are still %d node(s) in"
1763                                  " this cluster." % (len(nodelist) - 1),
1764                                  errors.ECODE_INVAL)
1765     instancelist = self.cfg.GetInstanceList()
1766     if instancelist:
1767       raise errors.OpPrereqError("There are still %d instance(s) in"
1768                                  " this cluster." % len(instancelist),
1769                                  errors.ECODE_INVAL)
1770
1771   def Exec(self, feedback_fn):
1772     """Destroys the cluster.
1773
1774     """
1775     master_params = self.cfg.GetMasterNetworkParameters()
1776
1777     # Run post hooks on master node before it's removed
1778     _RunPostHook(self, master_params.name)
1779
1780     ems = self.cfg.GetUseExternalMipScript()
1781     result = self.rpc.call_node_deactivate_master_ip(master_params.name,
1782                                                      master_params, ems)
1783     if result.fail_msg:
1784       self.LogWarning("Error disabling the master IP address: %s",
1785                       result.fail_msg)
1786
1787     return master_params.name
1788
1789
1790 def _VerifyCertificate(filename):
1791   """Verifies a certificate for L{LUClusterVerifyConfig}.
1792
1793   @type filename: string
1794   @param filename: Path to PEM file
1795
1796   """
1797   try:
1798     cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1799                                            utils.ReadFile(filename))
1800   except Exception, err: # pylint: disable=W0703
1801     return (LUClusterVerifyConfig.ETYPE_ERROR,
1802             "Failed to load X509 certificate %s: %s" % (filename, err))
1803
1804   (errcode, msg) = \
1805     utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1806                                 constants.SSL_CERT_EXPIRATION_ERROR)
1807
1808   if msg:
1809     fnamemsg = "While verifying %s: %s" % (filename, msg)
1810   else:
1811     fnamemsg = None
1812
1813   if errcode is None:
1814     return (None, fnamemsg)
1815   elif errcode == utils.CERT_WARNING:
1816     return (LUClusterVerifyConfig.ETYPE_WARNING, fnamemsg)
1817   elif errcode == utils.CERT_ERROR:
1818     return (LUClusterVerifyConfig.ETYPE_ERROR, fnamemsg)
1819
1820   raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1821
1822
1823 def _GetAllHypervisorParameters(cluster, instances):
1824   """Compute the set of all hypervisor parameters.
1825
1826   @type cluster: L{objects.Cluster}
1827   @param cluster: the cluster object
1828   @param instances: list of L{objects.Instance}
1829   @param instances: additional instances from which to obtain parameters
1830   @rtype: list of (origin, hypervisor, parameters)
1831   @return: a list with all parameters found, indicating the hypervisor they
1832        apply to, and the origin (can be "cluster", "os X", or "instance Y")
1833
1834   """
1835   hvp_data = []
1836
1837   for hv_name in cluster.enabled_hypervisors:
1838     hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
1839
1840   for os_name, os_hvp in cluster.os_hvp.items():
1841     for hv_name, hv_params in os_hvp.items():
1842       if hv_params:
1843         full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
1844         hvp_data.append(("os %s" % os_name, hv_name, full_params))
1845
1846   # TODO: collapse identical parameter values in a single one
1847   for instance in instances:
1848     if instance.hvparams:
1849       hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
1850                        cluster.FillHV(instance)))
1851
1852   return hvp_data
1853
1854
1855 class _VerifyErrors(object):
1856   """Mix-in for cluster/group verify LUs.
1857
1858   It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects
1859   self.op and self._feedback_fn to be available.)
1860
1861   """
1862
1863   ETYPE_FIELD = "code"
1864   ETYPE_ERROR = "ERROR"
1865   ETYPE_WARNING = "WARNING"
1866
1867   def _Error(self, ecode, item, msg, *args, **kwargs):
1868     """Format an error message.
1869
1870     Based on the opcode's error_codes parameter, either format a
1871     parseable error code, or a simpler error string.
1872
1873     This must be called only from Exec and functions called from Exec.
1874
1875     """
1876     ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1877     itype, etxt, _ = ecode
1878     # first complete the msg
1879     if args:
1880       msg = msg % args
1881     # then format the whole message
1882     if self.op.error_codes: # This is a mix-in. pylint: disable=E1101
1883       msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1884     else:
1885       if item:
1886         item = " " + item
1887       else:
1888         item = ""
1889       msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1890     # and finally report it via the feedback_fn
1891     self._feedback_fn("  - %s" % msg) # Mix-in. pylint: disable=E1101
1892
1893   def _ErrorIf(self, cond, ecode, *args, **kwargs):
1894     """Log an error message if the passed condition is True.
1895
1896     """
1897     cond = (bool(cond)
1898             or self.op.debug_simulate_errors) # pylint: disable=E1101
1899
1900     # If the error code is in the list of ignored errors, demote the error to a
1901     # warning
1902     (_, etxt, _) = ecode
1903     if etxt in self.op.ignore_errors:     # pylint: disable=E1101
1904       kwargs[self.ETYPE_FIELD] = self.ETYPE_WARNING
1905
1906     if cond:
1907       self._Error(ecode, *args, **kwargs)
1908
1909     # do not mark the operation as failed for WARN cases only
1910     if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1911       self.bad = self.bad or cond
1912
1913
1914 class LUClusterVerify(NoHooksLU):
1915   """Submits all jobs necessary to verify the cluster.
1916
1917   """
1918   REQ_BGL = False
1919
1920   def ExpandNames(self):
1921     self.needed_locks = {}
1922
1923   def Exec(self, feedback_fn):
1924     jobs = []
1925
1926     if self.op.group_name:
1927       groups = [self.op.group_name]
1928       depends_fn = lambda: None
1929     else:
1930       groups = self.cfg.GetNodeGroupList()
1931
1932       # Verify global configuration
1933       jobs.append([
1934         opcodes.OpClusterVerifyConfig(ignore_errors=self.op.ignore_errors)
1935         ])
1936
1937       # Always depend on global verification
1938       depends_fn = lambda: [(-len(jobs), [])]
1939
1940     jobs.extend(
1941       [opcodes.OpClusterVerifyGroup(group_name=group,
1942                                     ignore_errors=self.op.ignore_errors,
1943                                     depends=depends_fn())]
1944       for group in groups)
1945
1946     # Fix up all parameters
1947     for op in itertools.chain(*jobs): # pylint: disable=W0142
1948       op.debug_simulate_errors = self.op.debug_simulate_errors
1949       op.verbose = self.op.verbose
1950       op.error_codes = self.op.error_codes
1951       try:
1952         op.skip_checks = self.op.skip_checks
1953       except AttributeError:
1954         assert not isinstance(op, opcodes.OpClusterVerifyGroup)
1955
1956     return ResultWithJobs(jobs)
1957
1958
1959 class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
1960   """Verifies the cluster config.
1961
1962   """
1963   REQ_BGL = False
1964
1965   def _VerifyHVP(self, hvp_data):
1966     """Verifies locally the syntax of the hypervisor parameters.
1967
1968     """
1969     for item, hv_name, hv_params in hvp_data:
1970       msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
1971              (item, hv_name))
1972       try:
1973         hv_class = hypervisor.GetHypervisor(hv_name)
1974         utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
1975         hv_class.CheckParameterSyntax(hv_params)
1976       except errors.GenericError, err:
1977         self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg % str(err))
1978
1979   def ExpandNames(self):
1980     self.needed_locks = dict.fromkeys(locking.LEVELS, locking.ALL_SET)
1981     self.share_locks = _ShareAll()
1982
1983   def CheckPrereq(self):
1984     """Check prerequisites.
1985
1986     """
1987     # Retrieve all information
1988     self.all_group_info = self.cfg.GetAllNodeGroupsInfo()
1989     self.all_node_info = self.cfg.GetAllNodesInfo()
1990     self.all_inst_info = self.cfg.GetAllInstancesInfo()
1991
1992   def Exec(self, feedback_fn):
1993     """Verify integrity of cluster, performing various test on nodes.
1994
1995     """
1996     self.bad = False
1997     self._feedback_fn = feedback_fn
1998
1999     feedback_fn("* Verifying cluster config")
2000
2001     for msg in self.cfg.VerifyConfig():
2002       self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg)
2003
2004     feedback_fn("* Verifying cluster certificate files")
2005
2006     for cert_filename in pathutils.ALL_CERT_FILES:
2007       (errcode, msg) = _VerifyCertificate(cert_filename)
2008       self._ErrorIf(errcode, constants.CV_ECLUSTERCERT, None, msg, code=errcode)
2009
2010     feedback_fn("* Verifying hypervisor parameters")
2011
2012     self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(),
2013                                                 self.all_inst_info.values()))
2014
2015     feedback_fn("* Verifying all nodes belong to an existing group")
2016
2017     # We do this verification here because, should this bogus circumstance
2018     # occur, it would never be caught by VerifyGroup, which only acts on
2019     # nodes/instances reachable from existing node groups.
2020
2021     dangling_nodes = set(node.name for node in self.all_node_info.values()
2022                          if node.group not in self.all_group_info)
2023
2024     dangling_instances = {}
2025     no_node_instances = []
2026
2027     for inst in self.all_inst_info.values():
2028       if inst.primary_node in dangling_nodes:
2029         dangling_instances.setdefault(inst.primary_node, []).append(inst.name)
2030       elif inst.primary_node not in self.all_node_info:
2031         no_node_instances.append(inst.name)
2032
2033     pretty_dangling = [
2034         "%s (%s)" %
2035         (node.name,
2036          utils.CommaJoin(dangling_instances.get(node.name,
2037                                                 ["no instances"])))
2038         for node in dangling_nodes]
2039
2040     self._ErrorIf(bool(dangling_nodes), constants.CV_ECLUSTERDANGLINGNODES,
2041                   None,
2042                   "the following nodes (and their instances) belong to a non"
2043                   " existing group: %s", utils.CommaJoin(pretty_dangling))
2044
2045     self._ErrorIf(bool(no_node_instances), constants.CV_ECLUSTERDANGLINGINST,
2046                   None,
2047                   "the following instances have a non-existing primary-node:"
2048                   " %s", utils.CommaJoin(no_node_instances))
2049
2050     return not self.bad
2051
2052
2053 class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
2054   """Verifies the status of a node group.
2055
2056   """
2057   HPATH = "cluster-verify"
2058   HTYPE = constants.HTYPE_CLUSTER
2059   REQ_BGL = False
2060
2061   _HOOKS_INDENT_RE = re.compile("^", re.M)
2062
2063   class NodeImage(object):
2064     """A class representing the logical and physical status of a node.
2065
2066     @type name: string
2067     @ivar name: the node name to which this object refers
2068     @ivar volumes: a structure as returned from
2069         L{ganeti.backend.GetVolumeList} (runtime)
2070     @ivar instances: a list of running instances (runtime)
2071     @ivar pinst: list of configured primary instances (config)
2072     @ivar sinst: list of configured secondary instances (config)
2073     @ivar sbp: dictionary of {primary-node: list of instances} for all
2074         instances for which this node is secondary (config)
2075     @ivar mfree: free memory, as reported by hypervisor (runtime)
2076     @ivar dfree: free disk, as reported by the node (runtime)
2077     @ivar offline: the offline status (config)
2078     @type rpc_fail: boolean
2079     @ivar rpc_fail: whether the RPC verify call was successfull (overall,
2080         not whether the individual keys were correct) (runtime)
2081     @type lvm_fail: boolean
2082     @ivar lvm_fail: whether the RPC call didn't return valid LVM data
2083     @type hyp_fail: boolean
2084     @ivar hyp_fail: whether the RPC call didn't return the instance list
2085     @type ghost: boolean
2086     @ivar ghost: whether this is a known node or not (config)
2087     @type os_fail: boolean
2088     @ivar os_fail: whether the RPC call didn't return valid OS data
2089     @type oslist: list
2090     @ivar oslist: list of OSes as diagnosed by DiagnoseOS
2091     @type vm_capable: boolean
2092     @ivar vm_capable: whether the node can host instances
2093
2094     """
2095     def __init__(self, offline=False, name=None, vm_capable=True):
2096       self.name = name
2097       self.volumes = {}
2098       self.instances = []
2099       self.pinst = []
2100       self.sinst = []
2101       self.sbp = {}
2102       self.mfree = 0
2103       self.dfree = 0
2104       self.offline = offline
2105       self.vm_capable = vm_capable
2106       self.rpc_fail = False
2107       self.lvm_fail = False
2108       self.hyp_fail = False
2109       self.ghost = False
2110       self.os_fail = False
2111       self.oslist = {}
2112
2113   def ExpandNames(self):
2114     # This raises errors.OpPrereqError on its own:
2115     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
2116
2117     # Get instances in node group; this is unsafe and needs verification later
2118     inst_names = \
2119       self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True)
2120
2121     self.needed_locks = {
2122       locking.LEVEL_INSTANCE: inst_names,
2123       locking.LEVEL_NODEGROUP: [self.group_uuid],
2124       locking.LEVEL_NODE: [],
2125       }
2126
2127     self.share_locks = _ShareAll()
2128
2129   def DeclareLocks(self, level):
2130     if level == locking.LEVEL_NODE:
2131       # Get members of node group; this is unsafe and needs verification later
2132       nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
2133
2134       all_inst_info = self.cfg.GetAllInstancesInfo()
2135
2136       # In Exec(), we warn about mirrored instances that have primary and
2137       # secondary living in separate node groups. To fully verify that
2138       # volumes for these instances are healthy, we will need to do an
2139       # extra call to their secondaries. We ensure here those nodes will
2140       # be locked.
2141       for inst in self.owned_locks(locking.LEVEL_INSTANCE):
2142         # Important: access only the instances whose lock is owned
2143         if all_inst_info[inst].disk_template in constants.DTS_INT_MIRROR:
2144           nodes.update(all_inst_info[inst].secondary_nodes)
2145
2146       self.needed_locks[locking.LEVEL_NODE] = nodes
2147
2148   def CheckPrereq(self):
2149     assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
2150     self.group_info = self.cfg.GetNodeGroup(self.group_uuid)
2151
2152     group_nodes = set(self.group_info.members)
2153     group_instances = \
2154       self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True)
2155
2156     unlocked_nodes = \
2157         group_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
2158
2159     unlocked_instances = \
2160         group_instances.difference(self.owned_locks(locking.LEVEL_INSTANCE))
2161
2162     if unlocked_nodes:
2163       raise errors.OpPrereqError("Missing lock for nodes: %s" %
2164                                  utils.CommaJoin(unlocked_nodes),
2165                                  errors.ECODE_STATE)
2166
2167     if unlocked_instances:
2168       raise errors.OpPrereqError("Missing lock for instances: %s" %
2169                                  utils.CommaJoin(unlocked_instances),
2170                                  errors.ECODE_STATE)
2171
2172     self.all_node_info = self.cfg.GetAllNodesInfo()
2173     self.all_inst_info = self.cfg.GetAllInstancesInfo()
2174
2175     self.my_node_names = utils.NiceSort(group_nodes)
2176     self.my_inst_names = utils.NiceSort(group_instances)
2177
2178     self.my_node_info = dict((name, self.all_node_info[name])
2179                              for name in self.my_node_names)
2180
2181     self.my_inst_info = dict((name, self.all_inst_info[name])
2182                              for name in self.my_inst_names)
2183
2184     # We detect here the nodes that will need the extra RPC calls for verifying
2185     # split LV volumes; they should be locked.
2186     extra_lv_nodes = set()
2187
2188     for inst in self.my_inst_info.values():
2189       if inst.disk_template in constants.DTS_INT_MIRROR:
2190         for nname in inst.all_nodes:
2191           if self.all_node_info[nname].group != self.group_uuid:
2192             extra_lv_nodes.add(nname)
2193
2194     unlocked_lv_nodes = \
2195         extra_lv_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
2196
2197     if unlocked_lv_nodes:
2198       raise errors.OpPrereqError("Missing node locks for LV check: %s" %
2199                                  utils.CommaJoin(unlocked_lv_nodes),
2200                                  errors.ECODE_STATE)
2201     self.extra_lv_nodes = list(extra_lv_nodes)
2202
2203   def _VerifyNode(self, ninfo, nresult):
2204     """Perform some basic validation on data returned from a node.
2205
2206       - check the result data structure is well formed and has all the
2207         mandatory fields
2208       - check ganeti version
2209
2210     @type ninfo: L{objects.Node}
2211     @param ninfo: the node to check
2212     @param nresult: the results from the node
2213     @rtype: boolean
2214     @return: whether overall this call was successful (and we can expect
2215          reasonable values in the respose)
2216
2217     """
2218     node = ninfo.name
2219     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2220
2221     # main result, nresult should be a non-empty dict
2222     test = not nresult or not isinstance(nresult, dict)
2223     _ErrorIf(test, constants.CV_ENODERPC, node,
2224                   "unable to verify node: no data returned")
2225     if test:
2226       return False
2227
2228     # compares ganeti version
2229     local_version = constants.PROTOCOL_VERSION
2230     remote_version = nresult.get("version", None)
2231     test = not (remote_version and
2232                 isinstance(remote_version, (list, tuple)) and
2233                 len(remote_version) == 2)
2234     _ErrorIf(test, constants.CV_ENODERPC, node,
2235              "connection to node returned invalid data")
2236     if test:
2237       return False
2238
2239     test = local_version != remote_version[0]
2240     _ErrorIf(test, constants.CV_ENODEVERSION, node,
2241              "incompatible protocol versions: master %s,"
2242              " node %s", local_version, remote_version[0])
2243     if test:
2244       return False
2245
2246     # node seems compatible, we can actually try to look into its results
2247
2248     # full package version
2249     self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
2250                   constants.CV_ENODEVERSION, node,
2251                   "software version mismatch: master %s, node %s",
2252                   constants.RELEASE_VERSION, remote_version[1],
2253                   code=self.ETYPE_WARNING)
2254
2255     hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
2256     if ninfo.vm_capable and isinstance(hyp_result, dict):
2257       for hv_name, hv_result in hyp_result.iteritems():
2258         test = hv_result is not None
2259         _ErrorIf(test, constants.CV_ENODEHV, node,
2260                  "hypervisor %s verify failure: '%s'", hv_name, hv_result)
2261
2262     hvp_result = nresult.get(constants.NV_HVPARAMS, None)
2263     if ninfo.vm_capable and isinstance(hvp_result, list):
2264       for item, hv_name, hv_result in hvp_result:
2265         _ErrorIf(True, constants.CV_ENODEHV, node,
2266                  "hypervisor %s parameter verify failure (source %s): %s",
2267                  hv_name, item, hv_result)
2268
2269     test = nresult.get(constants.NV_NODESETUP,
2270                        ["Missing NODESETUP results"])
2271     _ErrorIf(test, constants.CV_ENODESETUP, node, "node setup error: %s",
2272              "; ".join(test))
2273
2274     return True
2275
2276   def _VerifyNodeTime(self, ninfo, nresult,
2277                       nvinfo_starttime, nvinfo_endtime):
2278     """Check the node time.
2279
2280     @type ninfo: L{objects.Node}
2281     @param ninfo: the node to check
2282     @param nresult: the remote results for the node
2283     @param nvinfo_starttime: the start time of the RPC call
2284     @param nvinfo_endtime: the end time of the RPC call
2285
2286     """
2287     node = ninfo.name
2288     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2289
2290     ntime = nresult.get(constants.NV_TIME, None)
2291     try:
2292       ntime_merged = utils.MergeTime(ntime)
2293     except (ValueError, TypeError):
2294       _ErrorIf(True, constants.CV_ENODETIME, node, "Node returned invalid time")
2295       return
2296
2297     if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
2298       ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
2299     elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
2300       ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
2301     else:
2302       ntime_diff = None
2303
2304     _ErrorIf(ntime_diff is not None, constants.CV_ENODETIME, node,
2305              "Node time diverges by at least %s from master node time",
2306              ntime_diff)
2307
2308   def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
2309     """Check the node LVM results.
2310
2311     @type ninfo: L{objects.Node}
2312     @param ninfo: the node to check
2313     @param nresult: the remote results for the node
2314     @param vg_name: the configured VG name
2315
2316     """
2317     if vg_name is None:
2318       return
2319
2320     node = ninfo.name
2321     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2322
2323     # checks vg existence and size > 20G
2324     vglist = nresult.get(constants.NV_VGLIST, None)
2325     test = not vglist
2326     _ErrorIf(test, constants.CV_ENODELVM, node, "unable to check volume groups")
2327     if not test:
2328       vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
2329                                             constants.MIN_VG_SIZE)
2330       _ErrorIf(vgstatus, constants.CV_ENODELVM, node, vgstatus)
2331
2332     # check pv names
2333     pvlist = nresult.get(constants.NV_PVLIST, None)
2334     test = pvlist is None
2335     _ErrorIf(test, constants.CV_ENODELVM, node, "Can't get PV list from node")
2336     if not test:
2337       # check that ':' is not present in PV names, since it's a
2338       # special character for lvcreate (denotes the range of PEs to
2339       # use on the PV)
2340       for _, pvname, owner_vg in pvlist:
2341         test = ":" in pvname
2342         _ErrorIf(test, constants.CV_ENODELVM, node,
2343                  "Invalid character ':' in PV '%s' of VG '%s'",
2344                  pvname, owner_vg)
2345
2346   def _VerifyNodeBridges(self, ninfo, nresult, bridges):
2347     """Check the node bridges.
2348
2349     @type ninfo: L{objects.Node}
2350     @param ninfo: the node to check
2351     @param nresult: the remote results for the node
2352     @param bridges: the expected list of bridges
2353
2354     """
2355     if not bridges:
2356       return
2357
2358     node = ninfo.name
2359     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2360
2361     missing = nresult.get(constants.NV_BRIDGES, None)
2362     test = not isinstance(missing, list)
2363     _ErrorIf(test, constants.CV_ENODENET, node,
2364              "did not return valid bridge information")
2365     if not test:
2366       _ErrorIf(bool(missing), constants.CV_ENODENET, node,
2367                "missing bridges: %s" % utils.CommaJoin(sorted(missing)))
2368
2369   def _VerifyNodeUserScripts(self, ninfo, nresult):
2370     """Check the results of user scripts presence and executability on the node
2371
2372     @type ninfo: L{objects.Node}
2373     @param ninfo: the node to check
2374     @param nresult: the remote results for the node
2375
2376     """
2377     node = ninfo.name
2378
2379     test = not constants.NV_USERSCRIPTS in nresult
2380     self._ErrorIf(test, constants.CV_ENODEUSERSCRIPTS, node,
2381                   "did not return user scripts information")
2382
2383     broken_scripts = nresult.get(constants.NV_USERSCRIPTS, None)
2384     if not test:
2385       self._ErrorIf(broken_scripts, constants.CV_ENODEUSERSCRIPTS, node,
2386                     "user scripts not present or not executable: %s" %
2387                     utils.CommaJoin(sorted(broken_scripts)))
2388
2389   def _VerifyNodeNetwork(self, ninfo, nresult):
2390     """Check the node network connectivity results.
2391
2392     @type ninfo: L{objects.Node}
2393     @param ninfo: the node to check
2394     @param nresult: the remote results for the node
2395
2396     """
2397     node = ninfo.name
2398     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2399
2400     test = constants.NV_NODELIST not in nresult
2401     _ErrorIf(test, constants.CV_ENODESSH, node,
2402              "node hasn't returned node ssh connectivity data")
2403     if not test:
2404       if nresult[constants.NV_NODELIST]:
2405         for a_node, a_msg in nresult[constants.NV_NODELIST].items():
2406           _ErrorIf(True, constants.CV_ENODESSH, node,
2407                    "ssh communication with node '%s': %s", a_node, a_msg)
2408
2409     test = constants.NV_NODENETTEST not in nresult
2410     _ErrorIf(test, constants.CV_ENODENET, node,
2411              "node hasn't returned node tcp connectivity data")
2412     if not test:
2413       if nresult[constants.NV_NODENETTEST]:
2414         nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
2415         for anode in nlist:
2416           _ErrorIf(True, constants.CV_ENODENET, node,
2417                    "tcp communication with node '%s': %s",
2418                    anode, nresult[constants.NV_NODENETTEST][anode])
2419
2420     test = constants.NV_MASTERIP not in nresult
2421     _ErrorIf(test, constants.CV_ENODENET, node,
2422              "node hasn't returned node master IP reachability data")
2423     if not test:
2424       if not nresult[constants.NV_MASTERIP]:
2425         if node == self.master_node:
2426           msg = "the master node cannot reach the master IP (not configured?)"
2427         else:
2428           msg = "cannot reach the master IP"
2429         _ErrorIf(True, constants.CV_ENODENET, node, msg)
2430
2431   def _VerifyInstance(self, instance, instanceconfig, node_image,
2432                       diskstatus):
2433     """Verify an instance.
2434
2435     This function checks to see if the required block devices are
2436     available on the instance's node.
2437
2438     """
2439     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2440     node_current = instanceconfig.primary_node
2441
2442     node_vol_should = {}
2443     instanceconfig.MapLVsByNode(node_vol_should)
2444
2445     cluster = self.cfg.GetClusterInfo()
2446     ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
2447                                                             self.group_info)
2448     err = _ComputeIPolicyInstanceViolation(ipolicy, instanceconfig)
2449     _ErrorIf(err, constants.CV_EINSTANCEPOLICY, instance, utils.CommaJoin(err))
2450
2451     for node in node_vol_should:
2452       n_img = node_image[node]
2453       if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2454         # ignore missing volumes on offline or broken nodes
2455         continue
2456       for volume in node_vol_should[node]:
2457         test = volume not in n_img.volumes
2458         _ErrorIf(test, constants.CV_EINSTANCEMISSINGDISK, instance,
2459                  "volume %s missing on node %s", volume, node)
2460
2461     if instanceconfig.admin_state == constants.ADMINST_UP:
2462       pri_img = node_image[node_current]
2463       test = instance not in pri_img.instances and not pri_img.offline
2464       _ErrorIf(test, constants.CV_EINSTANCEDOWN, instance,
2465                "instance not running on its primary node %s",
2466                node_current)
2467
2468     diskdata = [(nname, success, status, idx)
2469                 for (nname, disks) in diskstatus.items()
2470                 for idx, (success, status) in enumerate(disks)]
2471
2472     for nname, success, bdev_status, idx in diskdata:
2473       # the 'ghost node' construction in Exec() ensures that we have a
2474       # node here
2475       snode = node_image[nname]
2476       bad_snode = snode.ghost or snode.offline
2477       _ErrorIf(instanceconfig.admin_state == constants.ADMINST_UP and
2478                not success and not bad_snode,
2479                constants.CV_EINSTANCEFAULTYDISK, instance,
2480                "couldn't retrieve status for disk/%s on %s: %s",
2481                idx, nname, bdev_status)
2482       _ErrorIf((instanceconfig.admin_state == constants.ADMINST_UP and
2483                 success and bdev_status.ldisk_status == constants.LDS_FAULTY),
2484                constants.CV_EINSTANCEFAULTYDISK, instance,
2485                "disk/%s on %s is faulty", idx, nname)
2486
2487   def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
2488     """Verify if there are any unknown volumes in the cluster.
2489
2490     The .os, .swap and backup volumes are ignored. All other volumes are
2491     reported as unknown.
2492
2493     @type reserved: L{ganeti.utils.FieldSet}
2494     @param reserved: a FieldSet of reserved volume names
2495
2496     """
2497     for node, n_img in node_image.items():
2498       if (n_img.offline or n_img.rpc_fail or n_img.lvm_fail or
2499           self.all_node_info[node].group != self.group_uuid):
2500         # skip non-healthy nodes
2501         continue
2502       for volume in n_img.volumes:
2503         test = ((node not in node_vol_should or
2504                 volume not in node_vol_should[node]) and
2505                 not reserved.Matches(volume))
2506         self._ErrorIf(test, constants.CV_ENODEORPHANLV, node,
2507                       "volume %s is unknown", volume)
2508
2509   def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
2510     """Verify N+1 Memory Resilience.
2511
2512     Check that if one single node dies we can still start all the
2513     instances it was primary for.
2514
2515     """
2516     cluster_info = self.cfg.GetClusterInfo()
2517     for node, n_img in node_image.items():
2518       # This code checks that every node which is now listed as
2519       # secondary has enough memory to host all instances it is
2520       # supposed to should a single other node in the cluster fail.
2521       # FIXME: not ready for failover to an arbitrary node
2522       # FIXME: does not support file-backed instances
2523       # WARNING: we currently take into account down instances as well
2524       # as up ones, considering that even if they're down someone
2525       # might want to start them even in the event of a node failure.
2526       if n_img.offline or self.all_node_info[node].group != self.group_uuid:
2527         # we're skipping nodes marked offline and nodes in other groups from
2528         # the N+1 warning, since most likely we don't have good memory
2529         # infromation from them; we already list instances living on such
2530         # nodes, and that's enough warning
2531         continue
2532       #TODO(dynmem): also consider ballooning out other instances
2533       for prinode, instances in n_img.sbp.items():
2534         needed_mem = 0
2535         for instance in instances:
2536           bep = cluster_info.FillBE(instance_cfg[instance])
2537           if bep[constants.BE_AUTO_BALANCE]:
2538             needed_mem += bep[constants.BE_MINMEM]
2539         test = n_img.mfree < needed_mem
2540         self._ErrorIf(test, constants.CV_ENODEN1, node,
2541                       "not enough memory to accomodate instance failovers"
2542                       " should node %s fail (%dMiB needed, %dMiB available)",
2543                       prinode, needed_mem, n_img.mfree)
2544
2545   @classmethod
2546   def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
2547                    (files_all, files_opt, files_mc, files_vm)):
2548     """Verifies file checksums collected from all nodes.
2549
2550     @param errorif: Callback for reporting errors
2551     @param nodeinfo: List of L{objects.Node} objects
2552     @param master_node: Name of master node
2553     @param all_nvinfo: RPC results
2554
2555     """
2556     # Define functions determining which nodes to consider for a file
2557     files2nodefn = [
2558       (files_all, None),
2559       (files_mc, lambda node: (node.master_candidate or
2560                                node.name == master_node)),
2561       (files_vm, lambda node: node.vm_capable),
2562       ]
2563
2564     # Build mapping from filename to list of nodes which should have the file
2565     nodefiles = {}
2566     for (files, fn) in files2nodefn:
2567       if fn is None:
2568         filenodes = nodeinfo
2569       else:
2570         filenodes = filter(fn, nodeinfo)
2571       nodefiles.update((filename,
2572                         frozenset(map(operator.attrgetter("name"), filenodes)))
2573                        for filename in files)
2574
2575     assert set(nodefiles) == (files_all | files_mc | files_vm)
2576
2577     fileinfo = dict((filename, {}) for filename in nodefiles)
2578     ignore_nodes = set()
2579
2580     for node in nodeinfo:
2581       if node.offline:
2582         ignore_nodes.add(node.name)
2583         continue
2584
2585       nresult = all_nvinfo[node.name]
2586
2587       if nresult.fail_msg or not nresult.payload:
2588         node_files = None
2589       else:
2590         fingerprints = nresult.payload.get(constants.NV_FILELIST, None)
2591         node_files = dict((vcluster.LocalizeVirtualPath(key), value)
2592                           for (key, value) in fingerprints.items())
2593         del fingerprints
2594
2595       test = not (node_files and isinstance(node_files, dict))
2596       errorif(test, constants.CV_ENODEFILECHECK, node.name,
2597               "Node did not return file checksum data")
2598       if test:
2599         ignore_nodes.add(node.name)
2600         continue
2601
2602       # Build per-checksum mapping from filename to nodes having it
2603       for (filename, checksum) in node_files.items():
2604         assert filename in nodefiles
2605         fileinfo[filename].setdefault(checksum, set()).add(node.name)
2606
2607     for (filename, checksums) in fileinfo.items():
2608       assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
2609
2610       # Nodes having the file
2611       with_file = frozenset(node_name
2612                             for nodes in fileinfo[filename].values()
2613                             for node_name in nodes) - ignore_nodes
2614
2615       expected_nodes = nodefiles[filename] - ignore_nodes
2616
2617       # Nodes missing file
2618       missing_file = expected_nodes - with_file
2619
2620       if filename in files_opt:
2621         # All or no nodes
2622         errorif(missing_file and missing_file != expected_nodes,
2623                 constants.CV_ECLUSTERFILECHECK, None,
2624                 "File %s is optional, but it must exist on all or no"
2625                 " nodes (not found on %s)",
2626                 filename, utils.CommaJoin(utils.NiceSort(missing_file)))
2627       else:
2628         errorif(missing_file, constants.CV_ECLUSTERFILECHECK, None,
2629                 "File %s is missing from node(s) %s", filename,
2630                 utils.CommaJoin(utils.NiceSort(missing_file)))
2631
2632         # Warn if a node has a file it shouldn't
2633         unexpected = with_file - expected_nodes
2634         errorif(unexpected,
2635                 constants.CV_ECLUSTERFILECHECK, None,
2636                 "File %s should not exist on node(s) %s",
2637                 filename, utils.CommaJoin(utils.NiceSort(unexpected)))
2638
2639       # See if there are multiple versions of the file
2640       test = len(checksums) > 1
2641       if test:
2642         variants = ["variant %s on %s" %
2643                     (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
2644                     for (idx, (checksum, nodes)) in
2645                       enumerate(sorted(checksums.items()))]
2646       else:
2647         variants = []
2648
2649       errorif(test, constants.CV_ECLUSTERFILECHECK, None,
2650               "File %s found with %s different checksums (%s)",
2651               filename, len(checksums), "; ".join(variants))
2652
2653   def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
2654                       drbd_map):
2655     """Verifies and the node DRBD status.
2656
2657     @type ninfo: L{objects.Node}
2658     @param ninfo: the node to check
2659     @param nresult: the remote results for the node
2660     @param instanceinfo: the dict of instances
2661     @param drbd_helper: the configured DRBD usermode helper
2662     @param drbd_map: the DRBD map as returned by
2663         L{ganeti.config.ConfigWriter.ComputeDRBDMap}
2664
2665     """
2666     node = ninfo.name
2667     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2668
2669     if drbd_helper:
2670       helper_result = nresult.get(constants.NV_DRBDHELPER, None)
2671       test = (helper_result is None)
2672       _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2673                "no drbd usermode helper returned")
2674       if helper_result:
2675         status, payload = helper_result
2676         test = not status
2677         _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2678                  "drbd usermode helper check unsuccessful: %s", payload)
2679         test = status and (payload != drbd_helper)
2680         _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2681                  "wrong drbd usermode helper: %s", payload)
2682
2683     # compute the DRBD minors
2684     node_drbd = {}
2685     for minor, instance in drbd_map[node].items():
2686       test = instance not in instanceinfo
2687       _ErrorIf(test, constants.CV_ECLUSTERCFG, None,
2688                "ghost instance '%s' in temporary DRBD map", instance)
2689         # ghost instance should not be running, but otherwise we
2690         # don't give double warnings (both ghost instance and
2691         # unallocated minor in use)
2692       if test:
2693         node_drbd[minor] = (instance, False)
2694       else:
2695         instance = instanceinfo[instance]
2696         node_drbd[minor] = (instance.name,
2697                             instance.admin_state == constants.ADMINST_UP)
2698
2699     # and now check them
2700     used_minors = nresult.get(constants.NV_DRBDLIST, [])
2701     test = not isinstance(used_minors, (tuple, list))
2702     _ErrorIf(test, constants.CV_ENODEDRBD, node,
2703              "cannot parse drbd status file: %s", str(used_minors))
2704     if test:
2705       # we cannot check drbd status
2706       return
2707
2708     for minor, (iname, must_exist) in node_drbd.items():
2709       test = minor not in used_minors and must_exist
2710       _ErrorIf(test, constants.CV_ENODEDRBD, node,
2711                "drbd minor %d of instance %s is not active", minor, iname)
2712     for minor in used_minors:
2713       test = minor not in node_drbd
2714       _ErrorIf(test, constants.CV_ENODEDRBD, node,
2715                "unallocated drbd minor %d is in use", minor)
2716
2717   def _UpdateNodeOS(self, ninfo, nresult, nimg):
2718     """Builds the node OS structures.
2719
2720     @type ninfo: L{objects.Node}
2721     @param ninfo: the node to check
2722     @param nresult: the remote results for the node
2723     @param nimg: the node image object
2724
2725     """
2726     node = ninfo.name
2727     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2728
2729     remote_os = nresult.get(constants.NV_OSLIST, None)
2730     test = (not isinstance(remote_os, list) or
2731             not compat.all(isinstance(v, list) and len(v) == 7
2732                            for v in remote_os))
2733
2734     _ErrorIf(test, constants.CV_ENODEOS, node,
2735              "node hasn't returned valid OS data")
2736
2737     nimg.os_fail = test
2738
2739     if test:
2740       return
2741
2742     os_dict = {}
2743
2744     for (name, os_path, status, diagnose,
2745          variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
2746
2747       if name not in os_dict:
2748         os_dict[name] = []
2749
2750       # parameters is a list of lists instead of list of tuples due to
2751       # JSON lacking a real tuple type, fix it:
2752       parameters = [tuple(v) for v in parameters]
2753       os_dict[name].append((os_path, status, diagnose,
2754                             set(variants), set(parameters), set(api_ver)))
2755
2756     nimg.oslist = os_dict
2757
2758   def _VerifyNodeOS(self, ninfo, nimg, base):
2759     """Verifies the node OS list.
2760
2761     @type ninfo: L{objects.Node}
2762     @param ninfo: the node to check
2763     @param nimg: the node image object
2764     @param base: the 'template' node we match against (e.g. from the master)
2765
2766     """
2767     node = ninfo.name
2768     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2769
2770     assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
2771
2772     beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
2773     for os_name, os_data in nimg.oslist.items():
2774       assert os_data, "Empty OS status for OS %s?!" % os_name
2775       f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
2776       _ErrorIf(not f_status, constants.CV_ENODEOS, node,
2777                "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
2778       _ErrorIf(len(os_data) > 1, constants.CV_ENODEOS, node,
2779                "OS '%s' has multiple entries (first one shadows the rest): %s",
2780                os_name, utils.CommaJoin([v[0] for v in os_data]))
2781       # comparisons with the 'base' image
2782       test = os_name not in base.oslist
2783       _ErrorIf(test, constants.CV_ENODEOS, node,
2784                "Extra OS %s not present on reference node (%s)",
2785                os_name, base.name)
2786       if test:
2787         continue
2788       assert base.oslist[os_name], "Base node has empty OS status?"
2789       _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
2790       if not b_status:
2791         # base OS is invalid, skipping
2792         continue
2793       for kind, a, b in [("API version", f_api, b_api),
2794                          ("variants list", f_var, b_var),
2795                          ("parameters", beautify_params(f_param),
2796                           beautify_params(b_param))]:
2797         _ErrorIf(a != b, constants.CV_ENODEOS, node,
2798                  "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
2799                  kind, os_name, base.name,
2800                  utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
2801
2802     # check any missing OSes
2803     missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
2804     _ErrorIf(missing, constants.CV_ENODEOS, node,
2805              "OSes present on reference node %s but missing on this node: %s",
2806              base.name, utils.CommaJoin(missing))
2807
2808   def _VerifyFileStoragePaths(self, ninfo, nresult, is_master):
2809     """Verifies paths in L{pathutils.FILE_STORAGE_PATHS_FILE}.
2810
2811     @type ninfo: L{objects.Node}
2812     @param ninfo: the node to check
2813     @param nresult: the remote results for the node
2814     @type is_master: bool
2815     @param is_master: Whether node is the master node
2816
2817     """
2818     node = ninfo.name
2819
2820     if (is_master and
2821         (constants.ENABLE_FILE_STORAGE or
2822          constants.ENABLE_SHARED_FILE_STORAGE)):
2823       try:
2824         fspaths = nresult[constants.NV_FILE_STORAGE_PATHS]
2825       except KeyError:
2826         # This should never happen
2827         self._ErrorIf(True, constants.CV_ENODEFILESTORAGEPATHS, node,
2828                       "Node did not return forbidden file storage paths")
2829       else:
2830         self._ErrorIf(fspaths, constants.CV_ENODEFILESTORAGEPATHS, node,
2831                       "Found forbidden file storage paths: %s",
2832                       utils.CommaJoin(fspaths))
2833     else:
2834       self._ErrorIf(constants.NV_FILE_STORAGE_PATHS in nresult,
2835                     constants.CV_ENODEFILESTORAGEPATHS, node,
2836                     "Node should not have returned forbidden file storage"
2837                     " paths")
2838
2839   def _VerifyOob(self, ninfo, nresult):
2840     """Verifies out of band functionality of a node.
2841
2842     @type ninfo: L{objects.Node}
2843     @param ninfo: the node to check
2844     @param nresult: the remote results for the node
2845
2846     """
2847     node = ninfo.name
2848     # We just have to verify the paths on master and/or master candidates
2849     # as the oob helper is invoked on the master
2850     if ((ninfo.master_candidate or ninfo.master_capable) and
2851         constants.NV_OOB_PATHS in nresult):
2852       for path_result in nresult[constants.NV_OOB_PATHS]:
2853         self._ErrorIf(path_result, constants.CV_ENODEOOBPATH, node, path_result)
2854
2855   def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
2856     """Verifies and updates the node volume data.
2857
2858     This function will update a L{NodeImage}'s internal structures
2859     with data from the remote call.
2860
2861     @type ninfo: L{objects.Node}
2862     @param ninfo: the node to check
2863     @param nresult: the remote results for the node
2864     @param nimg: the node image object
2865     @param vg_name: the configured VG name
2866
2867     """
2868     node = ninfo.name
2869     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2870
2871     nimg.lvm_fail = True
2872     lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
2873     if vg_name is None:
2874       pass
2875     elif isinstance(lvdata, basestring):
2876       _ErrorIf(True, constants.CV_ENODELVM, node, "LVM problem on node: %s",
2877                utils.SafeEncode(lvdata))
2878     elif not isinstance(lvdata, dict):
2879       _ErrorIf(True, constants.CV_ENODELVM, node,
2880                "rpc call to node failed (lvlist)")
2881     else:
2882       nimg.volumes = lvdata
2883       nimg.lvm_fail = False
2884
2885   def _UpdateNodeInstances(self, ninfo, nresult, nimg):
2886     """Verifies and updates the node instance list.
2887
2888     If the listing was successful, then updates this node's instance
2889     list. Otherwise, it marks the RPC call as failed for the instance
2890     list key.
2891
2892     @type ninfo: L{objects.Node}
2893     @param ninfo: the node to check
2894     @param nresult: the remote results for the node
2895     @param nimg: the node image object
2896
2897     """
2898     idata = nresult.get(constants.NV_INSTANCELIST, None)
2899     test = not isinstance(idata, list)
2900     self._ErrorIf(test, constants.CV_ENODEHV, ninfo.name,
2901                   "rpc call to node failed (instancelist): %s",
2902                   utils.SafeEncode(str(idata)))
2903     if test:
2904       nimg.hyp_fail = True
2905     else:
2906       nimg.instances = idata
2907
2908   def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
2909     """Verifies and computes a node information map
2910
2911     @type ninfo: L{objects.Node}
2912     @param ninfo: the node to check
2913     @param nresult: the remote results for the node
2914     @param nimg: the node image object
2915     @param vg_name: the configured VG name
2916
2917     """
2918     node = ninfo.name
2919     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2920
2921     # try to read free memory (from the hypervisor)
2922     hv_info = nresult.get(constants.NV_HVINFO, None)
2923     test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
2924     _ErrorIf(test, constants.CV_ENODEHV, node,
2925              "rpc call to node failed (hvinfo)")
2926     if not test:
2927       try:
2928         nimg.mfree = int(hv_info["memory_free"])
2929       except (ValueError, TypeError):
2930         _ErrorIf(True, constants.CV_ENODERPC, node,
2931                  "node returned invalid nodeinfo, check hypervisor")
2932
2933     # FIXME: devise a free space model for file based instances as well
2934     if vg_name is not None:
2935       test = (constants.NV_VGLIST not in nresult or
2936               vg_name not in nresult[constants.NV_VGLIST])
2937       _ErrorIf(test, constants.CV_ENODELVM, node,
2938                "node didn't return data for the volume group '%s'"
2939                " - it is either missing or broken", vg_name)
2940       if not test:
2941         try:
2942           nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
2943         except (ValueError, TypeError):
2944           _ErrorIf(True, constants.CV_ENODERPC, node,
2945                    "node returned invalid LVM info, check LVM status")
2946
2947   def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
2948     """Gets per-disk status information for all instances.
2949
2950     @type nodelist: list of strings
2951     @param nodelist: Node names
2952     @type node_image: dict of (name, L{objects.Node})
2953     @param node_image: Node objects
2954     @type instanceinfo: dict of (name, L{objects.Instance})
2955     @param instanceinfo: Instance objects
2956     @rtype: {instance: {node: [(succes, payload)]}}
2957     @return: a dictionary of per-instance dictionaries with nodes as
2958         keys and disk information as values; the disk information is a
2959         list of tuples (success, payload)
2960
2961     """
2962     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2963
2964     node_disks = {}
2965     node_disks_devonly = {}
2966     diskless_instances = set()
2967     diskless = constants.DT_DISKLESS
2968
2969     for nname in nodelist:
2970       node_instances = list(itertools.chain(node_image[nname].pinst,
2971                                             node_image[nname].sinst))
2972       diskless_instances.update(inst for inst in node_instances
2973                                 if instanceinfo[inst].disk_template == diskless)
2974       disks = [(inst, disk)
2975                for inst in node_instances
2976                for disk in instanceinfo[inst].disks]
2977
2978       if not disks:
2979         # No need to collect data
2980         continue
2981
2982       node_disks[nname] = disks
2983
2984       # _AnnotateDiskParams makes already copies of the disks
2985       devonly = []
2986       for (inst, dev) in disks:
2987         (anno_disk,) = _AnnotateDiskParams(instanceinfo[inst], [dev], self.cfg)
2988         self.cfg.SetDiskID(anno_disk, nname)
2989         devonly.append(anno_disk)
2990
2991       node_disks_devonly[nname] = devonly
2992
2993     assert len(node_disks) == len(node_disks_devonly)
2994
2995     # Collect data from all nodes with disks
2996     result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
2997                                                           node_disks_devonly)
2998
2999     assert len(result) == len(node_disks)
3000
3001     instdisk = {}
3002
3003     for (nname, nres) in result.items():
3004       disks = node_disks[nname]
3005
3006       if nres.offline:
3007         # No data from this node
3008         data = len(disks) * [(False, "node offline")]
3009       else:
3010         msg = nres.fail_msg
3011         _ErrorIf(msg, constants.CV_ENODERPC, nname,
3012                  "while getting disk information: %s", msg)
3013         if msg:
3014           # No data from this node
3015           data = len(disks) * [(False, msg)]
3016         else:
3017           data = []
3018           for idx, i in enumerate(nres.payload):
3019             if isinstance(i, (tuple, list)) and len(i) == 2:
3020               data.append(i)
3021             else:
3022               logging.warning("Invalid result from node %s, entry %d: %s",
3023                               nname, idx, i)
3024               data.append((False, "Invalid result from the remote node"))
3025
3026       for ((inst, _), status) in zip(disks, data):
3027         instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
3028
3029     # Add empty entries for diskless instances.
3030     for inst in diskless_instances:
3031       assert inst not in instdisk
3032       instdisk[inst] = {}
3033
3034     assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
3035                       len(nnames) <= len(instanceinfo[inst].all_nodes) and
3036                       compat.all(isinstance(s, (tuple, list)) and
3037                                  len(s) == 2 for s in statuses)
3038                       for inst, nnames in instdisk.items()
3039                       for nname, statuses in nnames.items())
3040     assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
3041
3042     return instdisk
3043
3044   @staticmethod
3045   def _SshNodeSelector(group_uuid, all_nodes):
3046     """Create endless iterators for all potential SSH check hosts.
3047
3048     """
3049     nodes = [node for node in all_nodes
3050              if (node.group != group_uuid and
3051                  not node.offline)]
3052     keyfunc = operator.attrgetter("group")
3053
3054     return map(itertools.cycle,
3055                [sorted(map(operator.attrgetter("name"), names))
3056                 for _, names in itertools.groupby(sorted(nodes, key=keyfunc),
3057                                                   keyfunc)])
3058
3059   @classmethod
3060   def _SelectSshCheckNodes(cls, group_nodes, group_uuid, all_nodes):
3061     """Choose which nodes should talk to which other nodes.
3062
3063     We will make nodes contact all nodes in their group, and one node from
3064     every other group.
3065
3066     @warning: This algorithm has a known issue if one node group is much
3067       smaller than others (e.g. just one node). In such a case all other
3068       nodes will talk to the single node.
3069
3070     """
3071     online_nodes = sorted(node.name for node in group_nodes if not node.offline)
3072     sel = cls._SshNodeSelector(group_uuid, all_nodes)
3073
3074     return (online_nodes,
3075             dict((name, sorted([i.next() for i in sel]))
3076                  for name in online_nodes))
3077
3078   def BuildHooksEnv(self):
3079     """Build hooks env.
3080
3081     Cluster-Verify hooks just ran in the post phase and their failure makes
3082     the output be logged in the verify output and the verification to fail.
3083
3084     """
3085     env = {
3086       "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
3087       }
3088
3089     env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
3090                for node in self.my_node_info.values())
3091
3092     return env
3093
3094   def BuildHooksNodes(self):
3095     """Build hooks nodes.
3096
3097     """
3098     return ([], self.my_node_names)
3099
3100   def Exec(self, feedback_fn):
3101     """Verify integrity of the node group, performing various test on nodes.
3102
3103     """
3104     # This method has too many local variables. pylint: disable=R0914
3105     feedback_fn("* Verifying group '%s'" % self.group_info.name)
3106
3107     if not self.my_node_names:
3108       # empty node group
3109       feedback_fn("* Empty node group, skipping verification")
3110       return True
3111
3112     self.bad = False
3113     _ErrorIf = self._ErrorIf # pylint: disable=C0103
3114     verbose = self.op.verbose
3115     self._feedback_fn = feedback_fn
3116
3117     vg_name = self.cfg.GetVGName()
3118     drbd_helper = self.cfg.GetDRBDHelper()
3119     cluster = self.cfg.GetClusterInfo()
3120     groupinfo = self.cfg.GetAllNodeGroupsInfo()
3121     hypervisors = cluster.enabled_hypervisors
3122     node_data_list = [self.my_node_info[name] for name in self.my_node_names]
3123
3124     i_non_redundant = [] # Non redundant instances
3125     i_non_a_balanced = [] # Non auto-balanced instances
3126     i_offline = 0 # Count of offline instances
3127     n_offline = 0 # Count of offline nodes
3128     n_drained = 0 # Count of nodes being drained
3129     node_vol_should = {}
3130
3131     # FIXME: verify OS list
3132
3133     # File verification
3134     filemap = _ComputeAncillaryFiles(cluster, False)
3135
3136     # do local checksums
3137     master_node = self.master_node = self.cfg.GetMasterNode()
3138     master_ip = self.cfg.GetMasterIP()
3139
3140     feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names))
3141
3142     user_scripts = []
3143     if self.cfg.GetUseExternalMipScript():
3144       user_scripts.append(pathutils.EXTERNAL_MASTER_SETUP_SCRIPT)
3145
3146     node_verify_param = {
3147       constants.NV_FILELIST:
3148         map(vcluster.MakeVirtualPath,
3149             utils.UniqueSequence(filename
3150                                  for files in filemap
3151                                  for filename in files)),
3152       constants.NV_NODELIST:
3153         self._SelectSshCheckNodes(node_data_list, self.group_uuid,
3154                                   self.all_node_info.values()),
3155       constants.NV_HYPERVISOR: hypervisors,
3156       constants.NV_HVPARAMS:
3157         _GetAllHypervisorParameters(cluster, self.all_inst_info.values()),
3158       constants.NV_NODENETTEST: [(node.name, node.primary_ip, node.secondary_ip)
3159                                  for node in node_data_list
3160                                  if not node.offline],
3161       constants.NV_INSTANCELIST: hypervisors,
3162       constants.NV_VERSION: None,
3163       constants.NV_HVINFO: self.cfg.GetHypervisorType(),
3164       constants.NV_NODESETUP: None,
3165       constants.NV_TIME: None,
3166       constants.NV_MASTERIP: (master_node, master_ip),
3167       constants.NV_OSLIST: None,
3168       constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
3169       constants.NV_USERSCRIPTS: user_scripts,
3170       }
3171
3172     if vg_name is not None:
3173       node_verify_param[constants.NV_VGLIST] = None
3174       node_verify_param[constants.NV_LVLIST] = vg_name
3175       node_verify_param[constants.NV_PVLIST] = [vg_name]
3176
3177     if drbd_helper:
3178       node_verify_param[constants.NV_DRBDLIST] = None
3179       node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
3180
3181     if constants.ENABLE_FILE_STORAGE or constants.ENABLE_SHARED_FILE_STORAGE:
3182       # Load file storage paths only from master node
3183       node_verify_param[constants.NV_FILE_STORAGE_PATHS] = master_node
3184
3185     # bridge checks
3186     # FIXME: this needs to be changed per node-group, not cluster-wide
3187     bridges = set()
3188     default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
3189     if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3190       bridges.add(default_nicpp[constants.NIC_LINK])
3191     for instance in self.my_inst_info.values():
3192       for nic in instance.nics:
3193         full_nic = cluster.SimpleFillNIC(nic.nicparams)
3194         if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3195           bridges.add(full_nic[constants.NIC_LINK])
3196
3197     if bridges:
3198       node_verify_param[constants.NV_BRIDGES] = list(bridges)
3199
3200     # Build our expected cluster state
3201     node_image = dict((node.name, self.NodeImage(offline=node.offline,
3202                                                  name=node.name,
3203                                                  vm_capable=node.vm_capable))
3204                       for node in node_data_list)
3205
3206     # Gather OOB paths
3207     oob_paths = []
3208     for node in self.all_node_info.values():
3209       path = _SupportsOob(self.cfg, node)
3210       if path and path not in oob_paths:
3211         oob_paths.append(path)
3212
3213     if oob_paths:
3214       node_verify_param[constants.NV_OOB_PATHS] = oob_paths
3215
3216     for instance in self.my_inst_names:
3217       inst_config = self.my_inst_info[instance]
3218       if inst_config.admin_state == constants.ADMINST_OFFLINE:
3219         i_offline += 1
3220
3221       for nname in inst_config.all_nodes:
3222         if nname not in node_image:
3223           gnode = self.NodeImage(name=nname)
3224           gnode.ghost = (nname not in self.all_node_info)
3225           node_image[nname] = gnode
3226
3227       inst_config.MapLVsByNode(node_vol_should)
3228
3229       pnode = inst_config.primary_node
3230       node_image[pnode].pinst.append(instance)
3231
3232       for snode in inst_config.secondary_nodes:
3233         nimg = node_image[snode]
3234         nimg.sinst.append(instance)
3235         if pnode not in nimg.sbp:
3236           nimg.sbp[pnode] = []
3237         nimg.sbp[pnode].append(instance)
3238
3239     # At this point, we have the in-memory data structures complete,
3240     # except for the runtime information, which we'll gather next
3241
3242     # Due to the way our RPC system works, exact response times cannot be
3243     # guaranteed (e.g. a broken node could run into a timeout). By keeping the
3244     # time before and after executing the request, we can at least have a time
3245     # window.
3246     nvinfo_starttime = time.time()
3247     all_nvinfo = self.rpc.call_node_verify(self.my_node_names,
3248                                            node_verify_param,
3249                                            self.cfg.GetClusterName())
3250     nvinfo_endtime = time.time()
3251
3252     if self.extra_lv_nodes and vg_name is not None:
3253       extra_lv_nvinfo = \
3254           self.rpc.call_node_verify(self.extra_lv_nodes,
3255                                     {constants.NV_LVLIST: vg_name},
3256                                     self.cfg.GetClusterName())
3257     else:
3258       extra_lv_nvinfo = {}
3259
3260     all_drbd_map = self.cfg.ComputeDRBDMap()
3261
3262     feedback_fn("* Gathering disk information (%s nodes)" %
3263                 len(self.my_node_names))
3264     instdisk = self._CollectDiskInfo(self.my_node_names, node_image,
3265                                      self.my_inst_info)
3266
3267     feedback_fn("* Verifying configuration file consistency")
3268
3269     # If not all nodes are being checked, we need to make sure the master node
3270     # and a non-checked vm_capable node are in the list.
3271     absent_nodes = set(self.all_node_info).difference(self.my_node_info)
3272     if absent_nodes:
3273       vf_nvinfo = all_nvinfo.copy()
3274       vf_node_info = list(self.my_node_info.values())
3275       additional_nodes = []
3276       if master_node not in self.my_node_info:
3277         additional_nodes.append(master_node)
3278         vf_node_info.append(self.all_node_info[master_node])
3279       # Add the first vm_capable node we find which is not included,
3280       # excluding the master node (which we already have)
3281       for node in absent_nodes:
3282         nodeinfo = self.all_node_info[node]
3283         if (nodeinfo.vm_capable and not nodeinfo.offline and
3284             node != master_node):
3285           additional_nodes.append(node)
3286           vf_node_info.append(self.all_node_info[node])
3287           break
3288       key = constants.NV_FILELIST
3289       vf_nvinfo.update(self.rpc.call_node_verify(additional_nodes,
3290                                                  {key: node_verify_param[key]},
3291                                                  self.cfg.GetClusterName()))
3292     else:
3293       vf_nvinfo = all_nvinfo
3294       vf_node_info = self.my_node_info.values()
3295
3296     self._VerifyFiles(_ErrorIf, vf_node_info, master_node, vf_nvinfo, filemap)
3297
3298     feedback_fn("* Verifying node status")
3299
3300     refos_img = None
3301
3302     for node_i in node_data_list:
3303       node = node_i.name
3304       nimg = node_image[node]
3305
3306       if node_i.offline:
3307         if verbose:
3308           feedback_fn("* Skipping offline node %s" % (node,))
3309         n_offline += 1
3310         continue
3311
3312       if node == master_node:
3313         ntype = "master"
3314       elif node_i.master_candidate:
3315         ntype = "master candidate"
3316       elif node_i.drained:
3317         ntype = "drained"
3318         n_drained += 1
3319       else:
3320         ntype = "regular"
3321       if verbose:
3322         feedback_fn("* Verifying node %s (%s)" % (node, ntype))
3323
3324       msg = all_nvinfo[node].fail_msg
3325       _ErrorIf(msg, constants.CV_ENODERPC, node, "while contacting node: %s",
3326                msg)
3327       if msg:
3328         nimg.rpc_fail = True
3329         continue
3330
3331       nresult = all_nvinfo[node].payload
3332
3333       nimg.call_ok = self._VerifyNode(node_i, nresult)
3334       self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
3335       self._VerifyNodeNetwork(node_i, nresult)
3336       self._VerifyNodeUserScripts(node_i, nresult)
3337       self._VerifyOob(node_i, nresult)
3338       self._VerifyFileStoragePaths(node_i, nresult,
3339                                    node == master_node)
3340
3341       if nimg.vm_capable:
3342         self._VerifyNodeLVM(node_i, nresult, vg_name)
3343         self._VerifyNodeDrbd(node_i, nresult, self.all_inst_info, drbd_helper,
3344                              all_drbd_map)
3345
3346         self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
3347         self._UpdateNodeInstances(node_i, nresult, nimg)
3348         self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
3349         self._UpdateNodeOS(node_i, nresult, nimg)
3350
3351         if not nimg.os_fail:
3352           if refos_img is None:
3353             refos_img = nimg
3354           self._VerifyNodeOS(node_i, nimg, refos_img)
3355         self._VerifyNodeBridges(node_i, nresult, bridges)
3356
3357         # Check whether all running instancies are primary for the node. (This
3358         # can no longer be done from _VerifyInstance below, since some of the
3359         # wrong instances could be from other node groups.)
3360         non_primary_inst = set(nimg.instances).difference(nimg.pinst)
3361
3362         for inst in non_primary_inst:
3363           test = inst in self.all_inst_info
3364           _ErrorIf(test, constants.CV_EINSTANCEWRONGNODE, inst,
3365                    "instance should not run on node %s", node_i.name)
3366           _ErrorIf(not test, constants.CV_ENODEORPHANINSTANCE, node_i.name,
3367                    "node is running unknown instance %s", inst)
3368
3369     for node, result in extra_lv_nvinfo.items():
3370       self._UpdateNodeVolumes(self.all_node_info[node], result.payload,
3371                               node_image[node], vg_name)
3372
3373     feedback_fn("* Verifying instance status")
3374     for instance in self.my_inst_names:
3375       if verbose:
3376         feedback_fn("* Verifying instance %s" % instance)
3377       inst_config = self.my_inst_info[instance]
3378       self._VerifyInstance(instance, inst_config, node_image,
3379                            instdisk[instance])
3380       inst_nodes_offline = []
3381
3382       pnode = inst_config.primary_node
3383       pnode_img = node_image[pnode]
3384       _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
3385                constants.CV_ENODERPC, pnode, "instance %s, connection to"
3386                " primary node failed", instance)
3387
3388       _ErrorIf(inst_config.admin_state == constants.ADMINST_UP and
3389                pnode_img.offline,
3390                constants.CV_EINSTANCEBADNODE, instance,
3391                "instance is marked as running and lives on offline node %s",
3392                inst_config.primary_node)
3393
3394       # If the instance is non-redundant we cannot survive losing its primary
3395       # node, so we are not N+1 compliant.
3396       if inst_config.disk_template not in constants.DTS_MIRRORED:
3397         i_non_redundant.append(instance)
3398
3399       _ErrorIf(len(inst_config.secondary_nodes) > 1,
3400                constants.CV_EINSTANCELAYOUT,
3401                instance, "instance has multiple secondary nodes: %s",
3402                utils.CommaJoin(inst_config.secondary_nodes),
3403                code=self.ETYPE_WARNING)
3404
3405       if inst_config.disk_template in constants.DTS_INT_MIRROR:
3406         pnode = inst_config.primary_node
3407         instance_nodes = utils.NiceSort(inst_config.all_nodes)
3408         instance_groups = {}
3409
3410         for node in instance_nodes:
3411           instance_groups.setdefault(self.all_node_info[node].group,
3412                                      []).append(node)
3413
3414         pretty_list = [
3415           "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
3416           # Sort so that we always list the primary node first.
3417           for group, nodes in sorted(instance_groups.items(),
3418                                      key=lambda (_, nodes): pnode in nodes,
3419                                      reverse=True)]
3420
3421         self._ErrorIf(len(instance_groups) > 1,
3422                       constants.CV_EINSTANCESPLITGROUPS,
3423                       instance, "instance has primary and secondary nodes in"
3424                       " different groups: %s", utils.CommaJoin(pretty_list),
3425                       code=self.ETYPE_WARNING)
3426
3427       if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
3428         i_non_a_balanced.append(instance)
3429
3430       for snode in inst_config.secondary_nodes:
3431         s_img = node_image[snode]
3432         _ErrorIf(s_img.rpc_fail and not s_img.offline, constants.CV_ENODERPC,
3433                  snode, "instance %s, connection to secondary node failed",
3434                  instance)
3435
3436         if s_img.offline:
3437           inst_nodes_offline.append(snode)
3438
3439       # warn that the instance lives on offline nodes
3440       _ErrorIf(inst_nodes_offline, constants.CV_EINSTANCEBADNODE, instance,
3441                "instance has offline secondary node(s) %s",
3442                utils.CommaJoin(inst_nodes_offline))
3443       # ... or ghost/non-vm_capable nodes
3444       for node in inst_config.all_nodes:
3445         _ErrorIf(node_image[node].ghost, constants.CV_EINSTANCEBADNODE,
3446                  instance, "instance lives on ghost node %s", node)
3447         _ErrorIf(not node_image[node].vm_capable, constants.CV_EINSTANCEBADNODE,
3448                  instance, "instance lives on non-vm_capable node %s", node)
3449
3450     feedback_fn("* Verifying orphan volumes")
3451     reserved = utils.FieldSet(*cluster.reserved_lvs)
3452
3453     # We will get spurious "unknown volume" warnings if any node of this group
3454     # is secondary for an instance whose primary is in another group. To avoid
3455     # them, we find these instances and add their volumes to node_vol_should.
3456     for inst in self.all_inst_info.values():
3457       for secondary in inst.secondary_nodes:
3458         if (secondary in self.my_node_info
3459             and inst.name not in self.my_inst_info):
3460           inst.MapLVsByNode(node_vol_should)
3461           break
3462
3463     self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
3464
3465     if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
3466       feedback_fn("* Verifying N+1 Memory redundancy")
3467       self._VerifyNPlusOneMemory(node_image, self.my_inst_info)
3468
3469     feedback_fn("* Other Notes")
3470     if i_non_redundant:
3471       feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
3472                   % len(i_non_redundant))
3473
3474     if i_non_a_balanced:
3475       feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
3476                   % len(i_non_a_balanced))
3477
3478     if i_offline:
3479       feedback_fn("  - NOTICE: %d offline instance(s) found." % i_offline)
3480
3481     if n_offline:
3482       feedback_fn("  - NOTICE: %d offline node(s) found." % n_offline)
3483
3484     if n_drained:
3485       feedback_fn("  - NOTICE: %d drained node(s) found." % n_drained)
3486
3487     return not self.bad
3488
3489   def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
3490     """Analyze the post-hooks' result
3491
3492     This method analyses the hook result, handles it, and sends some
3493     nicely-formatted feedback back to the user.
3494
3495     @param phase: one of L{constants.HOOKS_PHASE_POST} or
3496         L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
3497     @param hooks_results: the results of the multi-node hooks rpc call
3498     @param feedback_fn: function used send feedback back to the caller
3499     @param lu_result: previous Exec result
3500     @return: the new Exec result, based on the previous result
3501         and hook results
3502
3503     """
3504     # We only really run POST phase hooks, only for non-empty groups,
3505     # and are only interested in their results
3506     if not self.my_node_names:
3507       # empty node group
3508       pass
3509     elif phase == constants.HOOKS_PHASE_POST:
3510       # Used to change hooks' output to proper indentation
3511       feedback_fn("* Hooks Results")
3512       assert hooks_results, "invalid result from hooks"
3513
3514       for node_name in hooks_results:
3515         res = hooks_results[node_name]
3516         msg = res.fail_msg
3517         test = msg and not res.offline
3518         self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3519                       "Communication failure in hooks execution: %s", msg)
3520         if res.offline or msg:
3521           # No need to investigate payload if node is offline or gave
3522           # an error.
3523           continue
3524         for script, hkr, output in res.payload:
3525           test = hkr == constants.HKR_FAIL
3526           self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3527                         "Script %s failed, output:", script)
3528           if test:
3529             output = self._HOOKS_INDENT_RE.sub("      ", output)
3530             feedback_fn("%s" % output)
3531             lu_result = False
3532
3533     return lu_result
3534
3535
3536 class LUClusterVerifyDisks(NoHooksLU):
3537   """Verifies the cluster disks status.
3538
3539   """
3540   REQ_BGL = False
3541
3542   def ExpandNames(self):
3543     self.share_locks = _ShareAll()
3544     self.needed_locks = {
3545       locking.LEVEL_NODEGROUP: locking.ALL_SET,
3546       }
3547
3548   def Exec(self, feedback_fn):
3549     group_names = self.owned_locks(locking.LEVEL_NODEGROUP)
3550
3551     # Submit one instance of L{opcodes.OpGroupVerifyDisks} per node group
3552     return ResultWithJobs([[opcodes.OpGroupVerifyDisks(group_name=group)]
3553                            for group in group_names])
3554
3555
3556 class LUGroupVerifyDisks(NoHooksLU):
3557   """Verifies the status of all disks in a node group.
3558
3559   """
3560   REQ_BGL = False
3561
3562   def ExpandNames(self):
3563     # Raises errors.OpPrereqError on its own if group can't be found
3564     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
3565
3566     self.share_locks = _ShareAll()
3567     self.needed_locks = {
3568       locking.LEVEL_INSTANCE: [],
3569       locking.LEVEL_NODEGROUP: [],
3570       locking.LEVEL_NODE: [],
3571       }
3572
3573   def DeclareLocks(self, level):
3574     if level == locking.LEVEL_INSTANCE:
3575       assert not self.needed_locks[locking.LEVEL_INSTANCE]
3576
3577       # Lock instances optimistically, needs verification once node and group
3578       # locks have been acquired
3579       self.needed_locks[locking.LEVEL_INSTANCE] = \
3580         self.cfg.GetNodeGroupInstances(self.group_uuid)
3581
3582     elif level == locking.LEVEL_NODEGROUP:
3583       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
3584
3585       self.needed_locks[locking.LEVEL_NODEGROUP] = \
3586         set([self.group_uuid] +
3587             # Lock all groups used by instances optimistically; this requires
3588             # going via the node before it's locked, requiring verification
3589             # later on
3590             [group_uuid
3591              for instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
3592              for group_uuid in self.cfg.GetInstanceNodeGroups(instance_name)])
3593
3594     elif level == locking.LEVEL_NODE:
3595       # This will only lock the nodes in the group to be verified which contain
3596       # actual instances
3597       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
3598       self._LockInstancesNodes()
3599
3600       # Lock all nodes in group to be verified
3601       assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
3602       member_nodes = self.cfg.GetNodeGroup(self.group_uuid).members
3603       self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
3604
3605   def CheckPrereq(self):
3606     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
3607     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
3608     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
3609
3610     assert self.group_uuid in owned_groups
3611
3612     # Check if locked instances are still correct
3613     _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
3614
3615     # Get instance information
3616     self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
3617
3618     # Check if node groups for locked instances are still correct
3619     _CheckInstancesNodeGroups(self.cfg, self.instances,
3620                               owned_groups, owned_nodes, self.group_uuid)
3621
3622   def Exec(self, feedback_fn):
3623     """Verify integrity of cluster disks.
3624
3625     @rtype: tuple of three items
3626     @return: a tuple of (dict of node-to-node_error, list of instances
3627         which need activate-disks, dict of instance: (node, volume) for
3628         missing volumes
3629
3630     """
3631     res_nodes = {}
3632     res_instances = set()
3633     res_missing = {}
3634
3635     nv_dict = _MapInstanceDisksToNodes(
3636       [inst for inst in self.instances.values()
3637        if inst.admin_state == constants.ADMINST_UP])
3638
3639     if nv_dict:
3640       nodes = utils.NiceSort(set(self.owned_locks(locking.LEVEL_NODE)) &
3641                              set(self.cfg.GetVmCapableNodeList()))
3642
3643       node_lvs = self.rpc.call_lv_list(nodes, [])
3644
3645       for (node, node_res) in node_lvs.items():
3646         if node_res.offline:
3647           continue
3648
3649         msg = node_res.fail_msg
3650         if msg:
3651           logging.warning("Error enumerating LVs on node %s: %s", node, msg)
3652           res_nodes[node] = msg
3653           continue
3654
3655         for lv_name, (_, _, lv_online) in node_res.payload.items():
3656           inst = nv_dict.pop((node, lv_name), None)
3657           if not (lv_online or inst is None):
3658             res_instances.add(inst)
3659
3660       # any leftover items in nv_dict are missing LVs, let's arrange the data
3661       # better
3662       for key, inst in nv_dict.iteritems():
3663         res_missing.setdefault(inst, []).append(list(key))
3664
3665     return (res_nodes, list(res_instances), res_missing)
3666
3667
3668 class LUClusterRepairDiskSizes(NoHooksLU):
3669   """Verifies the cluster disks sizes.
3670
3671   """
3672   REQ_BGL = False
3673
3674   def ExpandNames(self):
3675     if self.op.instances:
3676       self.wanted_names = _GetWantedInstances(self, self.op.instances)
3677       self.needed_locks = {
3678         locking.LEVEL_NODE_RES: [],
3679         locking.LEVEL_INSTANCE: self.wanted_names,
3680         }
3681       self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
3682     else:
3683       self.wanted_names = None
3684       self.needed_locks = {
3685         locking.LEVEL_NODE_RES: locking.ALL_SET,
3686         locking.LEVEL_INSTANCE: locking.ALL_SET,
3687         }
3688     self.share_locks = {
3689       locking.LEVEL_NODE_RES: 1,
3690       locking.LEVEL_INSTANCE: 0,
3691       }
3692
3693   def DeclareLocks(self, level):
3694     if level == locking.LEVEL_NODE_RES and self.wanted_names is not None:
3695       self._LockInstancesNodes(primary_only=True, level=level)
3696
3697   def CheckPrereq(self):
3698     """Check prerequisites.
3699
3700     This only checks the optional instance list against the existing names.
3701
3702     """
3703     if self.wanted_names is None:
3704       self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
3705
3706     self.wanted_instances = \
3707         map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
3708
3709   def _EnsureChildSizes(self, disk):
3710     """Ensure children of the disk have the needed disk size.
3711
3712     This is valid mainly for DRBD8 and fixes an issue where the
3713     children have smaller disk size.
3714
3715     @param disk: an L{ganeti.objects.Disk} object
3716
3717     """
3718     if disk.dev_type == constants.LD_DRBD8:
3719       assert disk.children, "Empty children for DRBD8?"
3720       fchild = disk.children[0]
3721       mismatch = fchild.size < disk.size
3722       if mismatch:
3723         self.LogInfo("Child disk has size %d, parent %d, fixing",
3724                      fchild.size, disk.size)
3725         fchild.size = disk.size
3726
3727       # and we recurse on this child only, not on the metadev
3728       return self._EnsureChildSizes(fchild) or mismatch
3729     else:
3730       return False
3731
3732   def Exec(self, feedback_fn):
3733     """Verify the size of cluster disks.
3734
3735     """
3736     # TODO: check child disks too
3737     # TODO: check differences in size between primary/secondary nodes
3738     per_node_disks = {}
3739     for instance in self.wanted_instances:
3740       pnode = instance.primary_node
3741       if pnode not in per_node_disks:
3742         per_node_disks[pnode] = []
3743       for idx, disk in enumerate(instance.disks):
3744         per_node_disks[pnode].append((instance, idx, disk))
3745
3746     assert not (frozenset(per_node_disks.keys()) -
3747                 self.owned_locks(locking.LEVEL_NODE_RES)), \
3748       "Not owning correct locks"
3749     assert not self.owned_locks(locking.LEVEL_NODE)
3750
3751     changed = []
3752     for node, dskl in per_node_disks.items():
3753       newl = [v[2].Copy() for v in dskl]
3754       for dsk in newl:
3755         self.cfg.SetDiskID(dsk, node)
3756       result = self.rpc.call_blockdev_getsize(node, newl)
3757       if result.fail_msg:
3758         self.LogWarning("Failure in blockdev_getsize call to node"
3759                         " %s, ignoring", node)
3760         continue
3761       if len(result.payload) != len(dskl):
3762         logging.warning("Invalid result from node %s: len(dksl)=%d,"
3763                         " result.payload=%s", node, len(dskl), result.payload)
3764         self.LogWarning("Invalid result from node %s, ignoring node results",
3765                         node)
3766         continue
3767       for ((instance, idx, disk), size) in zip(dskl, result.payload):
3768         if size is None:
3769           self.LogWarning("Disk %d of instance %s did not return size"
3770                           " information, ignoring", idx, instance.name)
3771           continue
3772         if not isinstance(size, (int, long)):
3773           self.LogWarning("Disk %d of instance %s did not return valid"
3774                           " size information, ignoring", idx, instance.name)
3775           continue
3776         size = size >> 20
3777         if size != disk.size:
3778           self.LogInfo("Disk %d of instance %s has mismatched size,"
3779                        " correcting: recorded %d, actual %d", idx,
3780                        instance.name, disk.size, size)
3781           disk.size = size
3782           self.cfg.Update(instance, feedback_fn)
3783           changed.append((instance.name, idx, size))
3784         if self._EnsureChildSizes(disk):
3785           self.cfg.Update(instance, feedback_fn)
3786           changed.append((instance.name, idx, disk.size))
3787     return changed
3788
3789
3790 class LUClusterRename(LogicalUnit):
3791   """Rename the cluster.
3792
3793   """
3794   HPATH = "cluster-rename"
3795   HTYPE = constants.HTYPE_CLUSTER
3796
3797   def BuildHooksEnv(self):
3798     """Build hooks env.
3799
3800     """
3801     return {
3802       "OP_TARGET": self.cfg.GetClusterName(),
3803       "NEW_NAME": self.op.name,
3804       }
3805
3806   def BuildHooksNodes(self):
3807     """Build hooks nodes.
3808
3809     """
3810     return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
3811
3812   def CheckPrereq(self):
3813     """Verify that the passed name is a valid one.
3814
3815     """
3816     hostname = netutils.GetHostname(name=self.op.name,
3817                                     family=self.cfg.GetPrimaryIPFamily())
3818
3819     new_name = hostname.name
3820     self.ip = new_ip = hostname.ip
3821     old_name = self.cfg.GetClusterName()
3822     old_ip = self.cfg.GetMasterIP()
3823     if new_name == old_name and new_ip == old_ip:
3824       raise errors.OpPrereqError("Neither the name nor the IP address of the"
3825                                  " cluster has changed",
3826                                  errors.ECODE_INVAL)
3827     if new_ip != old_ip:
3828       if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
3829         raise errors.OpPrereqError("The given cluster IP address (%s) is"
3830                                    " reachable on the network" %
3831                                    new_ip, errors.ECODE_NOTUNIQUE)
3832
3833     self.op.name = new_name
3834
3835   def Exec(self, feedback_fn):
3836     """Rename the cluster.
3837
3838     """
3839     clustername = self.op.name
3840     new_ip = self.ip
3841
3842     # shutdown the master IP
3843     master_params = self.cfg.GetMasterNetworkParameters()
3844     ems = self.cfg.GetUseExternalMipScript()
3845     result = self.rpc.call_node_deactivate_master_ip(master_params.name,
3846                                                      master_params, ems)
3847     result.Raise("Could not disable the master role")
3848
3849     try:
3850       cluster = self.cfg.GetClusterInfo()
3851       cluster.cluster_name = clustername
3852       cluster.master_ip = new_ip
3853       self.cfg.Update(cluster, feedback_fn)
3854
3855       # update the known hosts file
3856       ssh.WriteKnownHostsFile(self.cfg, pathutils.SSH_KNOWN_HOSTS_FILE)
3857       node_list = self.cfg.GetOnlineNodeList()
3858       try:
3859         node_list.remove(master_params.name)
3860       except ValueError:
3861         pass
3862       _UploadHelper(self, node_list, pathutils.SSH_KNOWN_HOSTS_FILE)
3863     finally:
3864       master_params.ip = new_ip
3865       result = self.rpc.call_node_activate_master_ip(master_params.name,
3866                                                      master_params, ems)
3867       msg = result.fail_msg
3868       if msg:
3869         self.LogWarning("Could not re-enable the master role on"
3870                         " the master, please restart manually: %s", msg)
3871
3872     return clustername
3873
3874
3875 def _ValidateNetmask(cfg, netmask):
3876   """Checks if a netmask is valid.
3877
3878   @type cfg: L{config.ConfigWriter}
3879   @param cfg: The cluster configuration
3880   @type netmask: int
3881   @param netmask: the netmask to be verified
3882   @raise errors.OpPrereqError: if the validation fails
3883
3884   """
3885   ip_family = cfg.GetPrimaryIPFamily()
3886   try:
3887     ipcls = netutils.IPAddress.GetClassFromIpFamily(ip_family)
3888   except errors.ProgrammerError:
3889     raise errors.OpPrereqError("Invalid primary ip family: %s." %
3890                                ip_family, errors.ECODE_INVAL)
3891   if not ipcls.ValidateNetmask(netmask):
3892     raise errors.OpPrereqError("CIDR netmask (%s) not valid" %
3893                                 (netmask), errors.ECODE_INVAL)
3894
3895
3896 class LUClusterSetParams(LogicalUnit):
3897   """Change the parameters of the cluster.
3898
3899   """
3900   HPATH = "cluster-modify"
3901   HTYPE = constants.HTYPE_CLUSTER
3902   REQ_BGL = False
3903
3904   def CheckArguments(self):
3905     """Check parameters
3906
3907     """
3908     if self.op.uid_pool:
3909       uidpool.CheckUidPool(self.op.uid_pool)
3910
3911     if self.op.add_uids:
3912       uidpool.CheckUidPool(self.op.add_uids)
3913
3914     if self.op.remove_uids:
3915       uidpool.CheckUidPool(self.op.remove_uids)
3916
3917     if self.op.master_netmask is not None:
3918       _ValidateNetmask(self.cfg, self.op.master_netmask)
3919
3920     if self.op.diskparams:
3921       for dt_params in self.op.diskparams.values():
3922         utils.ForceDictType(dt_params, constants.DISK_DT_TYPES)
3923       try:
3924         utils.VerifyDictOptions(self.op.diskparams, constants.DISK_DT_DEFAULTS)
3925       except errors.OpPrereqError, err:
3926         raise errors.OpPrereqError("While verify diskparams options: %s" % err,
3927                                    errors.ECODE_INVAL)
3928
3929   def ExpandNames(self):
3930     # FIXME: in the future maybe other cluster params won't require checking on
3931     # all nodes to be modified.
3932     self.needed_locks = {
3933       locking.LEVEL_NODE: locking.ALL_SET,
3934       locking.LEVEL_INSTANCE: locking.ALL_SET,
3935       locking.LEVEL_NODEGROUP: locking.ALL_SET,
3936     }
3937     self.share_locks = {
3938         locking.LEVEL_NODE: 1,
3939         locking.LEVEL_INSTANCE: 1,
3940         locking.LEVEL_NODEGROUP: 1,
3941     }
3942
3943   def BuildHooksEnv(self):
3944     """Build hooks env.
3945
3946     """
3947     return {
3948       "OP_TARGET": self.cfg.GetClusterName(),
3949       "NEW_VG_NAME": self.op.vg_name,
3950       }
3951
3952   def BuildHooksNodes(self):
3953     """Build hooks nodes.
3954
3955     """
3956     mn = self.cfg.GetMasterNode()
3957     return ([mn], [mn])
3958
3959   def CheckPrereq(self):
3960     """Check prerequisites.
3961
3962     This checks whether the given params don't conflict and
3963     if the given volume group is valid.
3964
3965     """
3966     if self.op.vg_name is not None and not self.op.vg_name:
3967       if self.cfg.HasAnyDiskOfType(constants.LD_LV):
3968         raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
3969                                    " instances exist", errors.ECODE_INVAL)
3970
3971     if self.op.drbd_helper is not None and not self.op.drbd_helper:
3972       if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
3973         raise errors.OpPrereqError("Cannot disable drbd helper while"
3974                                    " drbd-based instances exist",
3975                                    errors.ECODE_INVAL)
3976
3977     node_list = self.owned_locks(locking.LEVEL_NODE)
3978
3979     # if vg_name not None, checks given volume group on all nodes
3980     if self.op.vg_name:
3981       vglist = self.rpc.call_vg_list(node_list)
3982       for node in node_list:
3983         msg = vglist[node].fail_msg
3984         if msg:
3985           # ignoring down node
3986           self.LogWarning("Error while gathering data on node %s"
3987                           " (ignoring node): %s", node, msg)
3988           continue
3989         vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
3990                                               self.op.vg_name,
3991                                               constants.MIN_VG_SIZE)
3992         if vgstatus:
3993           raise errors.OpPrereqError("Error on node '%s': %s" %
3994                                      (node, vgstatus), errors.ECODE_ENVIRON)
3995
3996     if self.op.drbd_helper:
3997       # checks given drbd helper on all nodes
3998       helpers = self.rpc.call_drbd_helper(node_list)
3999       for (node, ninfo) in self.cfg.GetMultiNodeInfo(node_list):
4000         if ninfo.offline:
4001           self.LogInfo("Not checking drbd helper on offline node %s", node)
4002           continue
4003         msg = helpers[node].fail_msg
4004         if msg:
4005           raise errors.OpPrereqError("Error checking drbd helper on node"
4006                                      " '%s': %s" % (node, msg),
4007                                      errors.ECODE_ENVIRON)
4008         node_helper = helpers[node].payload
4009         if node_helper != self.op.drbd_helper:
4010           raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
4011                                      (node, node_helper), errors.ECODE_ENVIRON)
4012
4013     self.cluster = cluster = self.cfg.GetClusterInfo()
4014     # validate params changes
4015     if self.op.beparams:
4016       objects.UpgradeBeParams(self.op.beparams)
4017       utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
4018       self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
4019
4020     if self.op.ndparams:
4021       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
4022       self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
4023
4024       # TODO: we need a more general way to handle resetting
4025       # cluster-level parameters to default values
4026       if self.new_ndparams["oob_program"] == "":
4027         self.new_ndparams["oob_program"] = \
4028             constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
4029
4030     if self.op.hv_state:
4031       new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
4032                                             self.cluster.hv_state_static)
4033       self.new_hv_state = dict((hv, cluster.SimpleFillHvState(values))
4034                                for hv, values in new_hv_state.items())
4035
4036     if self.op.disk_state:
4037       new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state,
4038                                                 self.cluster.disk_state_static)
4039       self.new_disk_state = \
4040         dict((storage, dict((name, cluster.SimpleFillDiskState(values))
4041                             for name, values in svalues.items()))
4042              for storage, svalues in new_disk_state.items())
4043
4044     if self.op.ipolicy:
4045       self.new_ipolicy = _GetUpdatedIPolicy(cluster.ipolicy, self.op.ipolicy,
4046                                             group_policy=False)
4047
4048       all_instances = self.cfg.GetAllInstancesInfo().values()
4049       violations = set()
4050       for group in self.cfg.GetAllNodeGroupsInfo().values():
4051         instances = frozenset([inst for inst in all_instances
4052                                if compat.any(node in group.members
4053                                              for node in inst.all_nodes)])
4054         new_ipolicy = objects.FillIPolicy(self.new_ipolicy, group.ipolicy)
4055         ipol = ganeti.masterd.instance.CalculateGroupIPolicy(cluster, group)
4056         new = _ComputeNewInstanceViolations(ipol,
4057                                             new_ipolicy, instances)
4058         if new:
4059           violations.update(new)
4060
4061       if violations:
4062         self.LogWarning("After the ipolicy change the following instances"
4063                         " violate them: %s",
4064                         utils.CommaJoin(utils.NiceSort(violations)))
4065
4066     if self.op.nicparams:
4067       utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
4068       self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
4069       objects.NIC.CheckParameterSyntax(self.new_nicparams)
4070       nic_errors = []
4071
4072       # check all instances for consistency
4073       for instance in self.cfg.GetAllInstancesInfo().values():
4074         for nic_idx, nic in enumerate(instance.nics):
4075           params_copy = copy.deepcopy(nic.nicparams)
4076           params_filled = objects.FillDict(self.new_nicparams, params_copy)
4077
4078           # check parameter syntax
4079           try:
4080             objects.NIC.CheckParameterSyntax(params_filled)
4081           except errors.ConfigurationError, err:
4082             nic_errors.append("Instance %s, nic/%d: %s" %
4083                               (instance.name, nic_idx, err))
4084
4085           # if we're moving instances to routed, check that they have an ip
4086           target_mode = params_filled[constants.NIC_MODE]
4087           if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
4088             nic_errors.append("Instance %s, nic/%d: routed NIC with no ip"
4089                               " address" % (instance.name, nic_idx))
4090       if nic_errors:
4091         raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
4092                                    "\n".join(nic_errors), errors.ECODE_INVAL)
4093
4094     # hypervisor list/parameters
4095     self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
4096     if self.op.hvparams:
4097       for hv_name, hv_dict in self.op.hvparams.items():
4098         if hv_name not in self.new_hvparams:
4099           self.new_hvparams[hv_name] = hv_dict
4100         else:
4101           self.new_hvparams[hv_name].update(hv_dict)
4102
4103     # disk template parameters
4104     self.new_diskparams = objects.FillDict(cluster.diskparams, {})
4105     if self.op.diskparams:
4106       for dt_name, dt_params in self.op.diskparams.items():
4107         if dt_name not in self.op.diskparams:
4108           self.new_diskparams[dt_name] = dt_params
4109         else:
4110           self.new_diskparams[dt_name].update(dt_params)
4111
4112     # os hypervisor parameters
4113     self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
4114     if self.op.os_hvp:
4115       for os_name, hvs in self.op.os_hvp.items():
4116         if os_name not in self.new_os_hvp:
4117           self.new_os_hvp[os_name] = hvs
4118         else:
4119           for hv_name, hv_dict in hvs.items():
4120             if hv_name not in self.new_os_hvp[os_name]:
4121               self.new_os_hvp[os_name][hv_name] = hv_dict
4122             else:
4123               self.new_os_hvp[os_name][hv_name].update(hv_dict)
4124
4125     # os parameters
4126     self.new_osp = objects.FillDict(cluster.osparams, {})
4127     if self.op.osparams:
4128       for os_name, osp in self.op.osparams.items():
4129         if os_name not in self.new_osp:
4130           self.new_osp[os_name] = {}
4131
4132         self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
4133                                                   use_none=True)
4134
4135         if not self.new_osp[os_name]:
4136           # we removed all parameters
4137           del self.new_osp[os_name]
4138         else:
4139           # check the parameter validity (remote check)
4140           _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
4141                          os_name, self.new_osp[os_name])
4142
4143     # changes to the hypervisor list
4144     if self.op.enabled_hypervisors is not None:
4145       self.hv_list = self.op.enabled_hypervisors
4146       for hv in self.hv_list:
4147         # if the hypervisor doesn't already exist in the cluster
4148         # hvparams, we initialize it to empty, and then (in both
4149         # cases) we make sure to fill the defaults, as we might not
4150         # have a complete defaults list if the hypervisor wasn't
4151         # enabled before
4152         if hv not in new_hvp:
4153           new_hvp[hv] = {}
4154         new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
4155         utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
4156     else:
4157       self.hv_list = cluster.enabled_hypervisors
4158
4159     if self.op.hvparams or self.op.enabled_hypervisors is not None:
4160       # either the enabled list has changed, or the parameters have, validate
4161       for hv_name, hv_params in self.new_hvparams.items():
4162         if ((self.op.hvparams and hv_name in self.op.hvparams) or
4163             (self.op.enabled_hypervisors and
4164              hv_name in self.op.enabled_hypervisors)):
4165           # either this is a new hypervisor, or its parameters have changed
4166           hv_class = hypervisor.GetHypervisor(hv_name)
4167           utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
4168           hv_class.CheckParameterSyntax(hv_params)
4169           _CheckHVParams(self, node_list, hv_name, hv_params)
4170
4171     if self.op.os_hvp:
4172       # no need to check any newly-enabled hypervisors, since the
4173       # defaults have already been checked in the above code-block
4174       for os_name, os_hvp in self.new_os_hvp.items():
4175         for hv_name, hv_params in os_hvp.items():
4176           utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
4177           # we need to fill in the new os_hvp on top of the actual hv_p
4178           cluster_defaults = self.new_hvparams.get(hv_name, {})
4179           new_osp = objects.FillDict(cluster_defaults, hv_params)
4180           hv_class = hypervisor.GetHypervisor(hv_name)
4181           hv_class.CheckParameterSyntax(new_osp)
4182           _CheckHVParams(self, node_list, hv_name, new_osp)
4183
4184     if self.op.default_iallocator:
4185       alloc_script = utils.FindFile(self.op.default_iallocator,
4186                                     constants.IALLOCATOR_SEARCH_PATH,
4187                                     os.path.isfile)
4188       if alloc_script is None:
4189         raise errors.OpPrereqError("Invalid default iallocator script '%s'"
4190                                    " specified" % self.op.default_iallocator,
4191                                    errors.ECODE_INVAL)
4192
4193   def Exec(self, feedback_fn):
4194     """Change the parameters of the cluster.
4195
4196     """
4197     if self.op.vg_name is not None:
4198       new_volume = self.op.vg_name
4199       if not new_volume:
4200         new_volume = None
4201       if new_volume != self.cfg.GetVGName():
4202         self.cfg.SetVGName(new_volume)
4203       else:
4204         feedback_fn("Cluster LVM configuration already in desired"
4205                     " state, not changing")
4206     if self.op.drbd_helper is not None:
4207       new_helper = self.op.drbd_helper
4208       if not new_helper:
4209         new_helper = None
4210       if new_helper != self.cfg.GetDRBDHelper():
4211         self.cfg.SetDRBDHelper(new_helper)
4212       else:
4213         feedback_fn("Cluster DRBD helper already in desired state,"
4214                     " not changing")
4215     if self.op.hvparams:
4216       self.cluster.hvparams = self.new_hvparams
4217     if self.op.os_hvp:
4218       self.cluster.os_hvp = self.new_os_hvp
4219     if self.op.enabled_hypervisors is not None:
4220       self.cluster.hvparams = self.new_hvparams
4221       self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
4222     if self.op.beparams:
4223       self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
4224     if self.op.nicparams:
4225       self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
4226     if self.op.ipolicy:
4227       self.cluster.ipolicy = self.new_ipolicy
4228     if self.op.osparams:
4229       self.cluster.osparams = self.new_osp
4230     if self.op.ndparams:
4231       self.cluster.ndparams = self.new_ndparams
4232     if self.op.diskparams:
4233       self.cluster.diskparams = self.new_diskparams
4234     if self.op.hv_state:
4235       self.cluster.hv_state_static = self.new_hv_state
4236     if self.op.disk_state:
4237       self.cluster.disk_state_static = self.new_disk_state
4238
4239     if self.op.candidate_pool_size is not None:
4240       self.cluster.candidate_pool_size = self.op.candidate_pool_size
4241       # we need to update the pool size here, otherwise the save will fail
4242       _AdjustCandidatePool(self, [])
4243
4244     if self.op.maintain_node_health is not None:
4245       if self.op.maintain_node_health and not constants.ENABLE_CONFD:
4246         feedback_fn("Note: CONFD was disabled at build time, node health"
4247                     " maintenance is not useful (still enabling it)")
4248       self.cluster.maintain_node_health = self.op.maintain_node_health
4249
4250     if self.op.prealloc_wipe_disks is not None:
4251       self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
4252
4253     if self.op.add_uids is not None:
4254       uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
4255
4256     if self.op.remove_uids is not None:
4257       uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
4258
4259     if self.op.uid_pool is not None:
4260       self.cluster.uid_pool = self.op.uid_pool
4261
4262     if self.op.default_iallocator is not None:
4263       self.cluster.default_iallocator = self.op.default_iallocator
4264
4265     if self.op.reserved_lvs is not None:
4266       self.cluster.reserved_lvs = self.op.reserved_lvs
4267
4268     if self.op.use_external_mip_script is not None:
4269       self.cluster.use_external_mip_script = self.op.use_external_mip_script
4270
4271     def helper_os(aname, mods, desc):
4272       desc += " OS list"
4273       lst = getattr(self.cluster, aname)
4274       for key, val in mods:
4275         if key == constants.DDM_ADD:
4276           if val in lst:
4277             feedback_fn("OS %s already in %s, ignoring" % (val, desc))
4278           else:
4279             lst.append(val)
4280         elif key == constants.DDM_REMOVE:
4281           if val in lst:
4282             lst.remove(val)
4283           else:
4284             feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
4285         else:
4286           raise errors.ProgrammerError("Invalid modification '%s'" % key)
4287
4288     if self.op.hidden_os:
4289       helper_os("hidden_os", self.op.hidden_os, "hidden")
4290
4291     if self.op.blacklisted_os:
4292       helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
4293
4294     if self.op.master_netdev:
4295       master_params = self.cfg.GetMasterNetworkParameters()
4296       ems = self.cfg.GetUseExternalMipScript()
4297       feedback_fn("Shutting down master ip on the current netdev (%s)" %
4298                   self.cluster.master_netdev)
4299       result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4300                                                        master_params, ems)
4301       result.Raise("Could not disable the master ip")
4302       feedback_fn("Changing master_netdev from %s to %s" %
4303                   (master_params.netdev, self.op.master_netdev))
4304       self.cluster.master_netdev = self.op.master_netdev
4305
4306     if self.op.master_netmask:
4307       master_params = self.cfg.GetMasterNetworkParameters()
4308       feedback_fn("Changing master IP netmask to %s" % self.op.master_netmask)
4309       result = self.rpc.call_node_change_master_netmask(master_params.name,
4310                                                         master_params.netmask,
4311                                                         self.op.master_netmask,
4312                                                         master_params.ip,
4313                                                         master_params.netdev)
4314       if result.fail_msg:
4315         msg = "Could not change the master IP netmask: %s" % result.fail_msg
4316         feedback_fn(msg)
4317
4318       self.cluster.master_netmask = self.op.master_netmask
4319
4320     self.cfg.Update(self.cluster, feedback_fn)
4321
4322     if self.op.master_netdev:
4323       master_params = self.cfg.GetMasterNetworkParameters()
4324       feedback_fn("Starting the master ip on the new master netdev (%s)" %
4325                   self.op.master_netdev)
4326       ems = self.cfg.GetUseExternalMipScript()
4327       result = self.rpc.call_node_activate_master_ip(master_params.name,
4328                                                      master_params, ems)
4329       if result.fail_msg:
4330         self.LogWarning("Could not re-enable the master ip on"
4331                         " the master, please restart manually: %s",
4332                         result.fail_msg)
4333
4334
4335 def _UploadHelper(lu, nodes, fname):
4336   """Helper for uploading a file and showing warnings.
4337
4338   """
4339   if os.path.exists(fname):
4340     result = lu.rpc.call_upload_file(nodes, fname)
4341     for to_node, to_result in result.items():
4342       msg = to_result.fail_msg
4343       if msg:
4344         msg = ("Copy of file %s to node %s failed: %s" %
4345                (fname, to_node, msg))
4346         lu.proc.LogWarning(msg)
4347
4348
4349 def _ComputeAncillaryFiles(cluster, redist):
4350   """Compute files external to Ganeti which need to be consistent.
4351
4352   @type redist: boolean
4353   @param redist: Whether to include files which need to be redistributed
4354
4355   """
4356   # Compute files for all nodes
4357   files_all = set([
4358     pathutils.SSH_KNOWN_HOSTS_FILE,
4359     pathutils.CONFD_HMAC_KEY,
4360     pathutils.CLUSTER_DOMAIN_SECRET_FILE,
4361     pathutils.SPICE_CERT_FILE,
4362     pathutils.SPICE_CACERT_FILE,
4363     pathutils.RAPI_USERS_FILE,
4364     ])
4365
4366   if redist:
4367     # we need to ship at least the RAPI certificate
4368     files_all.add(pathutils.RAPI_CERT_FILE)
4369   else:
4370     files_all.update(pathutils.ALL_CERT_FILES)
4371     files_all.update(ssconf.SimpleStore().GetFileList())
4372
4373   if cluster.modify_etc_hosts:
4374     files_all.add(pathutils.ETC_HOSTS)
4375
4376   if cluster.use_external_mip_script:
4377     files_all.add(pathutils.EXTERNAL_MASTER_SETUP_SCRIPT)
4378
4379   # Files which are optional, these must:
4380   # - be present in one other category as well
4381   # - either exist or not exist on all nodes of that category (mc, vm all)
4382   files_opt = set([
4383     pathutils.RAPI_USERS_FILE,
4384     ])
4385
4386   # Files which should only be on master candidates
4387   files_mc = set()
4388
4389   if not redist:
4390     files_mc.add(pathutils.CLUSTER_CONF_FILE)
4391
4392   # File storage
4393   if (not redist and
4394       (constants.ENABLE_FILE_STORAGE or constants.ENABLE_SHARED_FILE_STORAGE)):
4395     files_all.add(pathutils.FILE_STORAGE_PATHS_FILE)
4396     files_opt.add(pathutils.FILE_STORAGE_PATHS_FILE)
4397
4398   # Files which should only be on VM-capable nodes
4399   files_vm = set(
4400     filename
4401     for hv_name in cluster.enabled_hypervisors
4402     for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[0])
4403
4404   files_opt |= set(
4405     filename
4406     for hv_name in cluster.enabled_hypervisors
4407     for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[1])
4408
4409   # Filenames in each category must be unique
4410   all_files_set = files_all | files_mc | files_vm
4411   assert (len(all_files_set) ==
4412           sum(map(len, [files_all, files_mc, files_vm]))), \
4413          "Found file listed in more than one file list"
4414
4415   # Optional files must be present in one other category
4416   assert all_files_set.issuperset(files_opt), \
4417          "Optional file not in a different required list"
4418
4419   # This one file should never ever be re-distributed via RPC
4420   assert not (redist and
4421               pathutils.FILE_STORAGE_PATHS_FILE in all_files_set)
4422
4423   return (files_all, files_opt, files_mc, files_vm)
4424
4425
4426 def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
4427   """Distribute additional files which are part of the cluster configuration.
4428
4429   ConfigWriter takes care of distributing the config and ssconf files, but
4430   there are more files which should be distributed to all nodes. This function
4431   makes sure those are copied.
4432
4433   @param lu: calling logical unit
4434   @param additional_nodes: list of nodes not in the config to distribute to
4435   @type additional_vm: boolean
4436   @param additional_vm: whether the additional nodes are vm-capable or not
4437
4438   """
4439   # Gather target nodes
4440   cluster = lu.cfg.GetClusterInfo()
4441   master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
4442
4443   online_nodes = lu.cfg.GetOnlineNodeList()
4444   online_set = frozenset(online_nodes)
4445   vm_nodes = list(online_set.intersection(lu.cfg.GetVmCapableNodeList()))
4446
4447   if additional_nodes is not None:
4448     online_nodes.extend(additional_nodes)
4449     if additional_vm:
4450       vm_nodes.extend(additional_nodes)
4451
4452   # Never distribute to master node
4453   for nodelist in [online_nodes, vm_nodes]:
4454     if master_info.name in nodelist:
4455       nodelist.remove(master_info.name)
4456
4457   # Gather file lists
4458   (files_all, _, files_mc, files_vm) = \
4459     _ComputeAncillaryFiles(cluster, True)
4460
4461   # Never re-distribute configuration file from here
4462   assert not (pathutils.CLUSTER_CONF_FILE in files_all or
4463               pathutils.CLUSTER_CONF_FILE in files_vm)
4464   assert not files_mc, "Master candidates not handled in this function"
4465
4466   filemap = [
4467     (online_nodes, files_all),
4468     (vm_nodes, files_vm),
4469     ]
4470
4471   # Upload the files
4472   for (node_list, files) in filemap:
4473     for fname in files:
4474       _UploadHelper(lu, node_list, fname)
4475
4476
4477 class LUClusterRedistConf(NoHooksLU):
4478   """Force the redistribution of cluster configuration.
4479
4480   This is a very simple LU.
4481
4482   """
4483   REQ_BGL = False
4484
4485   def ExpandNames(self):
4486     self.needed_locks = {
4487       locking.LEVEL_NODE: locking.ALL_SET,
4488     }
4489     self.share_locks[locking.LEVEL_NODE] = 1
4490
4491   def Exec(self, feedback_fn):
4492     """Redistribute the configuration.
4493
4494     """
4495     self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
4496     _RedistributeAncillaryFiles(self)
4497
4498
4499 class LUClusterActivateMasterIp(NoHooksLU):
4500   """Activate the master IP on the master node.
4501
4502   """
4503   def Exec(self, feedback_fn):
4504     """Activate the master IP.
4505
4506     """
4507     master_params = self.cfg.GetMasterNetworkParameters()
4508     ems = self.cfg.GetUseExternalMipScript()
4509     result = self.rpc.call_node_activate_master_ip(master_params.name,
4510                                                    master_params, ems)
4511     result.Raise("Could not activate the master IP")
4512
4513
4514 class LUClusterDeactivateMasterIp(NoHooksLU):
4515   """Deactivate the master IP on the master node.
4516
4517   """
4518   def Exec(self, feedback_fn):
4519     """Deactivate the master IP.
4520
4521     """
4522     master_params = self.cfg.GetMasterNetworkParameters()
4523     ems = self.cfg.GetUseExternalMipScript()
4524     result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4525                                                      master_params, ems)
4526     result.Raise("Could not deactivate the master IP")
4527
4528
4529 def _WaitForSync(lu, instance, disks=None, oneshot=False):
4530   """Sleep and poll for an instance's disk to sync.
4531
4532   """
4533   if not instance.disks or disks is not None and not disks:
4534     return True
4535
4536   disks = _ExpandCheckDisks(instance, disks)
4537
4538   if not oneshot:
4539     lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
4540
4541   node = instance.primary_node
4542
4543   for dev in disks:
4544     lu.cfg.SetDiskID(dev, node)
4545
4546   # TODO: Convert to utils.Retry
4547
4548   retries = 0
4549   degr_retries = 10 # in seconds, as we sleep 1 second each time
4550   while True:
4551     max_time = 0
4552     done = True
4553     cumul_degraded = False
4554     rstats = lu.rpc.call_blockdev_getmirrorstatus(node, (disks, instance))
4555     msg = rstats.fail_msg
4556     if msg:
4557       lu.LogWarning("Can't get any data from node %s: %s", node, msg)
4558       retries += 1
4559       if retries >= 10:
4560         raise errors.RemoteError("Can't contact node %s for mirror data,"
4561                                  " aborting." % node)
4562       time.sleep(6)
4563       continue
4564     rstats = rstats.payload
4565     retries = 0
4566     for i, mstat in enumerate(rstats):
4567       if mstat is None:
4568         lu.LogWarning("Can't compute data for node %s/%s",
4569                            node, disks[i].iv_name)
4570         continue
4571
4572       cumul_degraded = (cumul_degraded or
4573                         (mstat.is_degraded and mstat.sync_percent is None))
4574       if mstat.sync_percent is not None:
4575         done = False
4576         if mstat.estimated_time is not None:
4577           rem_time = ("%s remaining (estimated)" %
4578                       utils.FormatSeconds(mstat.estimated_time))
4579           max_time = mstat.estimated_time
4580         else:
4581           rem_time = "no time estimate"
4582         lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
4583                         (disks[i].iv_name, mstat.sync_percent, rem_time))
4584
4585     # if we're done but degraded, let's do a few small retries, to
4586     # make sure we see a stable and not transient situation; therefore
4587     # we force restart of the loop
4588     if (done or oneshot) and cumul_degraded and degr_retries > 0:
4589       logging.info("Degraded disks found, %d retries left", degr_retries)
4590       degr_retries -= 1
4591       time.sleep(1)
4592       continue
4593
4594     if done or oneshot:
4595       break
4596
4597     time.sleep(min(60, max_time))
4598
4599   if done:
4600     lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
4601   return not cumul_degraded
4602
4603
4604 def _BlockdevFind(lu, node, dev, instance):
4605   """Wrapper around call_blockdev_find to annotate diskparams.
4606
4607   @param lu: A reference to the lu object
4608   @param node: The node to call out
4609   @param dev: The device to find
4610   @param instance: The instance object the device belongs to
4611   @returns The result of the rpc call
4612
4613   """
4614   (disk,) = _AnnotateDiskParams(instance, [dev], lu.cfg)
4615   return lu.rpc.call_blockdev_find(node, disk)
4616
4617
4618 def _CheckDiskConsistency(lu, instance, dev, node, on_primary, ldisk=False):
4619   """Wrapper around L{_CheckDiskConsistencyInner}.
4620
4621   """
4622   (disk,) = _AnnotateDiskParams(instance, [dev], lu.cfg)
4623   return _CheckDiskConsistencyInner(lu, instance, disk, node, on_primary,
4624                                     ldisk=ldisk)
4625
4626
4627 def _CheckDiskConsistencyInner(lu, instance, dev, node, on_primary,
4628                                ldisk=False):
4629   """Check that mirrors are not degraded.
4630
4631   @attention: The device has to be annotated already.
4632
4633   The ldisk parameter, if True, will change the test from the
4634   is_degraded attribute (which represents overall non-ok status for
4635   the device(s)) to the ldisk (representing the local storage status).
4636
4637   """
4638   lu.cfg.SetDiskID(dev, node)
4639
4640   result = True
4641
4642   if on_primary or dev.AssembleOnSecondary():
4643     rstats = lu.rpc.call_blockdev_find(node, dev)
4644     msg = rstats.fail_msg
4645     if msg:
4646       lu.LogWarning("Can't find disk on node %s: %s", node, msg)
4647       result = False
4648     elif not rstats.payload:
4649       lu.LogWarning("Can't find disk on node %s", node)
4650       result = False
4651     else:
4652       if ldisk:
4653         result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
4654       else:
4655         result = result and not rstats.payload.is_degraded
4656
4657   if dev.children:
4658     for child in dev.children:
4659       result = result and _CheckDiskConsistencyInner(lu, instance, child, node,
4660                                                      on_primary)
4661
4662   return result
4663
4664
4665 class LUOobCommand(NoHooksLU):
4666   """Logical unit for OOB handling.
4667
4668   """
4669   REQ_BGL = False
4670   _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
4671
4672   def ExpandNames(self):
4673     """Gather locks we need.
4674
4675     """
4676     if self.op.node_names:
4677       self.op.node_names = _GetWantedNodes(self, self.op.node_names)
4678       lock_names = self.op.node_names
4679     else:
4680       lock_names = locking.ALL_SET
4681
4682     self.needed_locks = {
4683       locking.LEVEL_NODE: lock_names,
4684       }
4685
4686   def CheckPrereq(self):
4687     """Check prerequisites.
4688
4689     This checks:
4690      - the node exists in the configuration
4691      - OOB is supported
4692
4693     Any errors are signaled by raising errors.OpPrereqError.
4694
4695     """
4696     self.nodes = []
4697     self.master_node = self.cfg.GetMasterNode()
4698
4699     assert self.op.power_delay >= 0.0
4700
4701     if self.op.node_names:
4702       if (self.op.command in self._SKIP_MASTER and
4703           self.master_node in self.op.node_names):
4704         master_node_obj = self.cfg.GetNodeInfo(self.master_node)
4705         master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
4706
4707         if master_oob_handler:
4708           additional_text = ("run '%s %s %s' if you want to operate on the"
4709                              " master regardless") % (master_oob_handler,
4710                                                       self.op.command,
4711                                                       self.master_node)
4712         else:
4713           additional_text = "it does not support out-of-band operations"
4714
4715         raise errors.OpPrereqError(("Operating on the master node %s is not"
4716                                     " allowed for %s; %s") %
4717                                    (self.master_node, self.op.command,
4718                                     additional_text), errors.ECODE_INVAL)
4719     else:
4720       self.op.node_names = self.cfg.GetNodeList()
4721       if self.op.command in self._SKIP_MASTER:
4722         self.op.node_names.remove(self.master_node)
4723
4724     if self.op.command in self._SKIP_MASTER:
4725       assert self.master_node not in self.op.node_names
4726
4727     for (node_name, node) in self.cfg.GetMultiNodeInfo(self.op.node_names):
4728       if node is None:
4729         raise errors.OpPrereqError("Node %s not found" % node_name,
4730                                    errors.ECODE_NOENT)
4731       else:
4732         self.nodes.append(node)
4733
4734       if (not self.op.ignore_status and
4735           (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
4736         raise errors.OpPrereqError(("Cannot power off node %s because it is"
4737                                     " not marked offline") % node_name,
4738                                    errors.ECODE_STATE)
4739
4740   def Exec(self, feedback_fn):
4741     """Execute OOB and return result if we expect any.
4742
4743     """
4744     master_node = self.master_node
4745     ret = []
4746
4747     for idx, node in enumerate(utils.NiceSort(self.nodes,
4748                                               key=lambda node: node.name)):
4749       node_entry = [(constants.RS_NORMAL, node.name)]
4750       ret.append(node_entry)
4751
4752       oob_program = _SupportsOob(self.cfg, node)
4753
4754       if not oob_program:
4755         node_entry.append((constants.RS_UNAVAIL, None))
4756         continue
4757
4758       logging.info("Executing out-of-band command '%s' using '%s' on %s",
4759                    self.op.command, oob_program, node.name)
4760       result = self.rpc.call_run_oob(master_node, oob_program,
4761                                      self.op.command, node.name,
4762                                      self.op.timeout)
4763
4764       if result.fail_msg:
4765         self.LogWarning("Out-of-band RPC failed on node '%s': %s",
4766                         node.name, result.fail_msg)
4767         node_entry.append((constants.RS_NODATA, None))
4768       else:
4769         try:
4770           self._CheckPayload(result)
4771         except errors.OpExecError, err:
4772           self.LogWarning("Payload returned by node '%s' is not valid: %s",
4773                           node.name, err)
4774           node_entry.append((constants.RS_NODATA, None))
4775         else:
4776           if self.op.command == constants.OOB_HEALTH:
4777             # For health we should log important events
4778             for item, status in result.payload:
4779               if status in [constants.OOB_STATUS_WARNING,
4780                             constants.OOB_STATUS_CRITICAL]:
4781                 self.LogWarning("Item '%s' on node '%s' has status '%s'",
4782                                 item, node.name, status)
4783
4784           if self.op.command == constants.OOB_POWER_ON:
4785             node.powered = True
4786           elif self.op.command == constants.OOB_POWER_OFF:
4787             node.powered = False
4788           elif self.op.command == constants.OOB_POWER_STATUS:
4789             powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
4790             if powered != node.powered:
4791               logging.warning(("Recorded power state (%s) of node '%s' does not"
4792                                " match actual power state (%s)"), node.powered,
4793                               node.name, powered)
4794
4795           # For configuration changing commands we should update the node
4796           if self.op.command in (constants.OOB_POWER_ON,
4797                                  constants.OOB_POWER_OFF):
4798             self.cfg.Update(node, feedback_fn)
4799
4800           node_entry.append((constants.RS_NORMAL, result.payload))
4801
4802           if (self.op.command == constants.OOB_POWER_ON and
4803               idx < len(self.nodes) - 1):
4804             time.sleep(self.op.power_delay)
4805
4806     return ret
4807
4808   def _CheckPayload(self, result):
4809     """Checks if the payload is valid.
4810
4811     @param result: RPC result
4812     @raises errors.OpExecError: If payload is not valid
4813
4814     """
4815     errs = []
4816     if self.op.command == constants.OOB_HEALTH:
4817       if not isinstance(result.payload, list):
4818         errs.append("command 'health' is expected to return a list but got %s" %
4819                     type(result.payload))
4820       else:
4821         for item, status in result.payload:
4822           if status not in constants.OOB_STATUSES:
4823             errs.append("health item '%s' has invalid status '%s'" %
4824                         (item, status))
4825
4826     if self.op.command == constants.OOB_POWER_STATUS:
4827       if not isinstance(result.payload, dict):
4828         errs.append("power-status is expected to return a dict but got %s" %
4829                     type(result.payload))
4830
4831     if self.op.command in [
4832       constants.OOB_POWER_ON,
4833       constants.OOB_POWER_OFF,
4834       constants.OOB_POWER_CYCLE,
4835       ]:
4836       if result.payload is not None:
4837         errs.append("%s is expected to not return payload but got '%s'" %
4838                     (self.op.command, result.payload))
4839
4840     if errs:
4841       raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
4842                                utils.CommaJoin(errs))
4843
4844
4845 class _OsQuery(_QueryBase):
4846   FIELDS = query.OS_FIELDS
4847
4848   def ExpandNames(self, lu):
4849     # Lock all nodes in shared mode
4850     # Temporary removal of locks, should be reverted later
4851     # TODO: reintroduce locks when they are lighter-weight
4852     lu.needed_locks = {}
4853     #self.share_locks[locking.LEVEL_NODE] = 1
4854     #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4855
4856     # The following variables interact with _QueryBase._GetNames
4857     if self.names:
4858       self.wanted = self.names
4859     else:
4860       self.wanted = locking.ALL_SET
4861
4862     self.do_locking = self.use_locking
4863
4864   def DeclareLocks(self, lu, level):
4865     pass
4866
4867   @staticmethod
4868   def _DiagnoseByOS(rlist):
4869     """Remaps a per-node return list into an a per-os per-node dictionary
4870
4871     @param rlist: a map with node names as keys and OS objects as values
4872
4873     @rtype: dict
4874     @return: a dictionary with osnames as keys and as value another
4875         map, with nodes as keys and tuples of (path, status, diagnose,
4876         variants, parameters, api_versions) as values, eg::
4877
4878           {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
4879                                      (/srv/..., False, "invalid api")],
4880                            "node2": [(/srv/..., True, "", [], [])]}
4881           }
4882
4883     """
4884     all_os = {}
4885     # we build here the list of nodes that didn't fail the RPC (at RPC
4886     # level), so that nodes with a non-responding node daemon don't
4887     # make all OSes invalid
4888     good_nodes = [node_name for node_name in rlist
4889                   if not rlist[node_name].fail_msg]
4890     for node_name, nr in rlist.items():
4891       if nr.fail_msg or not nr.payload:
4892         continue
4893       for (name, path, status, diagnose, variants,
4894            params, api_versions) in nr.payload:
4895         if name not in all_os:
4896           # build a list of nodes for this os containing empty lists
4897           # for each node in node_list
4898           all_os[name] = {}
4899           for nname in good_nodes:
4900             all_os[name][nname] = []
4901         # convert params from [name, help] to (name, help)
4902         params = [tuple(v) for v in params]
4903         all_os[name][node_name].append((path, status, diagnose,
4904                                         variants, params, api_versions))
4905     return all_os
4906
4907   def _GetQueryData(self, lu):
4908     """Computes the list of nodes and their attributes.
4909
4910     """
4911     # Locking is not used
4912     assert not (compat.any(lu.glm.is_owned(level)
4913                            for level in locking.LEVELS
4914                            if level != locking.LEVEL_CLUSTER) or
4915                 self.do_locking or self.use_locking)
4916
4917     valid_nodes = [node.name
4918                    for node in lu.cfg.GetAllNodesInfo().values()
4919                    if not node.offline and node.vm_capable]
4920     pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
4921     cluster = lu.cfg.GetClusterInfo()
4922
4923     data = {}
4924
4925     for (os_name, os_data) in pol.items():
4926       info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
4927                           hidden=(os_name in cluster.hidden_os),
4928                           blacklisted=(os_name in cluster.blacklisted_os))
4929
4930       variants = set()
4931       parameters = set()
4932       api_versions = set()
4933
4934       for idx, osl in enumerate(os_data.values()):
4935         info.valid = bool(info.valid and osl and osl[0][1])
4936         if not info.valid:
4937           break
4938
4939         (node_variants, node_params, node_api) = osl[0][3:6]
4940         if idx == 0:
4941           # First entry
4942           variants.update(node_variants)
4943           parameters.update(node_params)
4944           api_versions.update(node_api)
4945         else:
4946           # Filter out inconsistent values
4947           variants.intersection_update(node_variants)
4948           parameters.intersection_update(node_params)
4949           api_versions.intersection_update(node_api)
4950
4951       info.variants = list(variants)
4952       info.parameters = list(parameters)
4953       info.api_versions = list(api_versions)
4954
4955       data[os_name] = info
4956
4957     # Prepare data in requested order
4958     return [data[name] for name in self._GetNames(lu, pol.keys(), None)
4959             if name in data]
4960
4961
4962 class LUOsDiagnose(NoHooksLU):
4963   """Logical unit for OS diagnose/query.
4964
4965   """
4966   REQ_BGL = False
4967
4968   @staticmethod
4969   def _BuildFilter(fields, names):
4970     """Builds a filter for querying OSes.
4971
4972     """
4973     name_filter = qlang.MakeSimpleFilter("name", names)
4974
4975     # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
4976     # respective field is not requested
4977     status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
4978                      for fname in ["hidden", "blacklisted"]
4979                      if fname not in fields]
4980     if "valid" not in fields:
4981       status_filter.append([qlang.OP_TRUE, "valid"])
4982
4983     if status_filter:
4984       status_filter.insert(0, qlang.OP_AND)
4985     else:
4986       status_filter = None
4987
4988     if name_filter and status_filter:
4989       return [qlang.OP_AND, name_filter, status_filter]
4990     elif name_filter:
4991       return name_filter
4992     else:
4993       return status_filter
4994
4995   def CheckArguments(self):
4996     self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
4997                        self.op.output_fields, False)
4998
4999   def ExpandNames(self):
5000     self.oq.ExpandNames(self)
5001
5002   def Exec(self, feedback_fn):
5003     return self.oq.OldStyleQuery(self)
5004
5005
5006 class LUNodeRemove(LogicalUnit):
5007   """Logical unit for removing a node.
5008
5009   """
5010   HPATH = "node-remove"
5011   HTYPE = constants.HTYPE_NODE
5012
5013   def BuildHooksEnv(self):
5014     """Build hooks env.
5015
5016     """
5017     return {
5018       "OP_TARGET": self.op.node_name,
5019       "NODE_NAME": self.op.node_name,
5020       }
5021
5022   def BuildHooksNodes(self):
5023     """Build hooks nodes.
5024
5025     This doesn't run on the target node in the pre phase as a failed
5026     node would then be impossible to remove.
5027
5028     """
5029     all_nodes = self.cfg.GetNodeList()
5030     try:
5031       all_nodes.remove(self.op.node_name)
5032     except ValueError:
5033       pass
5034     return (all_nodes, all_nodes)
5035
5036   def CheckPrereq(self):
5037     """Check prerequisites.
5038
5039     This checks:
5040      - the node exists in the configuration
5041      - it does not have primary or secondary instances
5042      - it's not the master
5043
5044     Any errors are signaled by raising errors.OpPrereqError.
5045
5046     """
5047     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5048     node = self.cfg.GetNodeInfo(self.op.node_name)
5049     assert node is not None
5050
5051     masternode = self.cfg.GetMasterNode()
5052     if node.name == masternode:
5053       raise errors.OpPrereqError("Node is the master node, failover to another"
5054                                  " node is required", errors.ECODE_INVAL)
5055
5056     for instance_name, instance in self.cfg.GetAllInstancesInfo().items():
5057       if node.name in instance.all_nodes:
5058         raise errors.OpPrereqError("Instance %s is still running on the node,"
5059                                    " please remove first" % instance_name,
5060                                    errors.ECODE_INVAL)
5061     self.op.node_name = node.name
5062     self.node = node
5063
5064   def Exec(self, feedback_fn):
5065     """Removes the node from the cluster.
5066
5067     """
5068     node = self.node
5069     logging.info("Stopping the node daemon and removing configs from node %s",
5070                  node.name)
5071
5072     modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
5073
5074     assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
5075       "Not owning BGL"
5076
5077     # Promote nodes to master candidate as needed
5078     _AdjustCandidatePool(self, exceptions=[node.name])
5079     self.context.RemoveNode(node.name)
5080
5081     # Run post hooks on the node before it's removed
5082     _RunPostHook(self, node.name)
5083
5084     result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
5085     msg = result.fail_msg
5086     if msg:
5087       self.LogWarning("Errors encountered on the remote node while leaving"
5088                       " the cluster: %s", msg)
5089
5090     # Remove node from our /etc/hosts
5091     if self.cfg.GetClusterInfo().modify_etc_hosts:
5092       master_node = self.cfg.GetMasterNode()
5093       result = self.rpc.call_etc_hosts_modify(master_node,
5094                                               constants.ETC_HOSTS_REMOVE,
5095                                               node.name, None)
5096       result.Raise("Can't update hosts file with new host data")
5097       _RedistributeAncillaryFiles(self)
5098
5099
5100 class _NodeQuery(_QueryBase):
5101   FIELDS = query.NODE_FIELDS
5102
5103   def ExpandNames(self, lu):
5104     lu.needed_locks = {}
5105     lu.share_locks = _ShareAll()
5106
5107     if self.names:
5108       self.wanted = _GetWantedNodes(lu, self.names)
5109     else:
5110       self.wanted = locking.ALL_SET
5111
5112     self.do_locking = (self.use_locking and
5113                        query.NQ_LIVE in self.requested_data)
5114
5115     if self.do_locking:
5116       # If any non-static field is requested we need to lock the nodes
5117       lu.needed_locks[locking.LEVEL_NODE] = self.wanted
5118
5119   def DeclareLocks(self, lu, level):
5120     pass
5121
5122   def _GetQueryData(self, lu):
5123     """Computes the list of nodes and their attributes.
5124
5125     """
5126     all_info = lu.cfg.GetAllNodesInfo()
5127
5128     nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
5129
5130     # Gather data as requested
5131     if query.NQ_LIVE in self.requested_data:
5132       # filter out non-vm_capable nodes
5133       toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
5134
5135       node_data = lu.rpc.call_node_info(toquery_nodes, [lu.cfg.GetVGName()],
5136                                         [lu.cfg.GetHypervisorType()])
5137       live_data = dict((name, rpc.MakeLegacyNodeInfo(nresult.payload))
5138                        for (name, nresult) in node_data.items()
5139                        if not nresult.fail_msg and nresult.payload)
5140     else:
5141       live_data = None
5142
5143     if query.NQ_INST in self.requested_data:
5144       node_to_primary = dict([(name, set()) for name in nodenames])
5145       node_to_secondary = dict([(name, set()) for name in nodenames])
5146
5147       inst_data = lu.cfg.GetAllInstancesInfo()
5148
5149       for inst in inst_data.values():
5150         if inst.primary_node in node_to_primary:
5151           node_to_primary[inst.primary_node].add(inst.name)
5152         for secnode in inst.secondary_nodes:
5153           if secnode in node_to_secondary:
5154             node_to_secondary[secnode].add(inst.name)
5155     else:
5156       node_to_primary = None
5157       node_to_secondary = None
5158
5159     if query.NQ_OOB in self.requested_data:
5160       oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
5161                          for name, node in all_info.iteritems())
5162     else:
5163       oob_support = None
5164
5165     if query.NQ_GROUP in self.requested_data:
5166       groups = lu.cfg.GetAllNodeGroupsInfo()
5167     else:
5168       groups = {}
5169
5170     return query.NodeQueryData([all_info[name] for name in nodenames],
5171                                live_data, lu.cfg.GetMasterNode(),
5172                                node_to_primary, node_to_secondary, groups,
5173                                oob_support, lu.cfg.GetClusterInfo())
5174
5175
5176 class LUNodeQuery(NoHooksLU):
5177   """Logical unit for querying nodes.
5178
5179   """
5180   # pylint: disable=W0142
5181   REQ_BGL = False
5182
5183   def CheckArguments(self):
5184     self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
5185                          self.op.output_fields, self.op.use_locking)
5186
5187   def ExpandNames(self):
5188     self.nq.ExpandNames(self)
5189
5190   def DeclareLocks(self, level):
5191     self.nq.DeclareLocks(self, level)
5192
5193   def Exec(self, feedback_fn):
5194     return self.nq.OldStyleQuery(self)
5195
5196
5197 class LUNodeQueryvols(NoHooksLU):
5198   """Logical unit for getting volumes on node(s).
5199
5200   """
5201   REQ_BGL = False
5202   _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
5203   _FIELDS_STATIC = utils.FieldSet("node")
5204
5205   def CheckArguments(self):
5206     _CheckOutputFields(static=self._FIELDS_STATIC,
5207                        dynamic=self._FIELDS_DYNAMIC,
5208                        selected=self.op.output_fields)
5209
5210   def ExpandNames(self):
5211     self.share_locks = _ShareAll()
5212     self.needed_locks = {}
5213
5214     if not self.op.nodes:
5215       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5216     else:
5217       self.needed_locks[locking.LEVEL_NODE] = \
5218         _GetWantedNodes(self, self.op.nodes)
5219
5220   def Exec(self, feedback_fn):
5221     """Computes the list of nodes and their attributes.
5222
5223     """
5224     nodenames = self.owned_locks(locking.LEVEL_NODE)
5225     volumes = self.rpc.call_node_volumes(nodenames)
5226
5227     ilist = self.cfg.GetAllInstancesInfo()
5228     vol2inst = _MapInstanceDisksToNodes(ilist.values())
5229
5230     output = []
5231     for node in nodenames:
5232       nresult = volumes[node]
5233       if nresult.offline:
5234         continue
5235       msg = nresult.fail_msg
5236       if msg:
5237         self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
5238         continue
5239
5240       node_vols = sorted(nresult.payload,
5241                          key=operator.itemgetter("dev"))
5242
5243       for vol in node_vols:
5244         node_output = []
5245         for field in self.op.output_fields:
5246           if field == "node":
5247             val = node
5248           elif field == "phys":
5249             val = vol["dev"]
5250           elif field == "vg":
5251             val = vol["vg"]
5252           elif field == "name":
5253             val = vol["name"]
5254           elif field == "size":
5255             val = int(float(vol["size"]))
5256           elif field == "instance":
5257             val = vol2inst.get((node, vol["vg"] + "/" + vol["name"]), "-")
5258           else:
5259             raise errors.ParameterError(field)
5260           node_output.append(str(val))
5261
5262         output.append(node_output)
5263
5264     return output
5265
5266
5267 class LUNodeQueryStorage(NoHooksLU):
5268   """Logical unit for getting information on storage units on node(s).
5269
5270   """
5271   _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
5272   REQ_BGL = False
5273
5274   def CheckArguments(self):
5275     _CheckOutputFields(static=self._FIELDS_STATIC,
5276                        dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
5277                        selected=self.op.output_fields)
5278
5279   def ExpandNames(self):
5280     self.share_locks = _ShareAll()
5281     self.needed_locks = {}
5282
5283     if self.op.nodes:
5284       self.needed_locks[locking.LEVEL_NODE] = \
5285         _GetWantedNodes(self, self.op.nodes)
5286     else:
5287       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5288
5289   def Exec(self, feedback_fn):
5290     """Computes the list of nodes and their attributes.
5291
5292     """
5293     self.nodes = self.owned_locks(locking.LEVEL_NODE)
5294
5295     # Always get name to sort by
5296     if constants.SF_NAME in self.op.output_fields:
5297       fields = self.op.output_fields[:]
5298     else:
5299       fields = [constants.SF_NAME] + self.op.output_fields
5300
5301     # Never ask for node or type as it's only known to the LU
5302     for extra in [constants.SF_NODE, constants.SF_TYPE]:
5303       while extra in fields:
5304         fields.remove(extra)
5305
5306     field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
5307     name_idx = field_idx[constants.SF_NAME]
5308
5309     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5310     data = self.rpc.call_storage_list(self.nodes,
5311                                       self.op.storage_type, st_args,
5312                                       self.op.name, fields)
5313
5314     result = []
5315
5316     for node in utils.NiceSort(self.nodes):
5317       nresult = data[node]
5318       if nresult.offline:
5319         continue
5320
5321       msg = nresult.fail_msg
5322       if msg:
5323         self.LogWarning("Can't get storage data from node %s: %s", node, msg)
5324         continue
5325
5326       rows = dict([(row[name_idx], row) for row in nresult.payload])
5327
5328       for name in utils.NiceSort(rows.keys()):
5329         row = rows[name]
5330
5331         out = []
5332
5333         for field in self.op.output_fields:
5334           if field == constants.SF_NODE:
5335             val = node
5336           elif field == constants.SF_TYPE:
5337             val = self.op.storage_type
5338           elif field in field_idx:
5339             val = row[field_idx[field]]
5340           else:
5341             raise errors.ParameterError(field)
5342
5343           out.append(val)
5344
5345         result.append(out)
5346
5347     return result
5348
5349
5350 class _InstanceQuery(_QueryBase):
5351   FIELDS = query.INSTANCE_FIELDS
5352
5353   def ExpandNames(self, lu):
5354     lu.needed_locks = {}
5355     lu.share_locks = _ShareAll()
5356
5357     if self.names:
5358       self.wanted = _GetWantedInstances(lu, self.names)
5359     else:
5360       self.wanted = locking.ALL_SET
5361
5362     self.do_locking = (self.use_locking and
5363                        query.IQ_LIVE in self.requested_data)
5364     if self.do_locking:
5365       lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
5366       lu.needed_locks[locking.LEVEL_NODEGROUP] = []
5367       lu.needed_locks[locking.LEVEL_NODE] = []
5368       lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5369
5370     self.do_grouplocks = (self.do_locking and
5371                           query.IQ_NODES in self.requested_data)
5372
5373   def DeclareLocks(self, lu, level):
5374     if self.do_locking:
5375       if level == locking.LEVEL_NODEGROUP and self.do_grouplocks:
5376         assert not lu.needed_locks[locking.LEVEL_NODEGROUP]
5377
5378         # Lock all groups used by instances optimistically; this requires going
5379         # via the node before it's locked, requiring verification later on
5380         lu.needed_locks[locking.LEVEL_NODEGROUP] = \
5381           set(group_uuid
5382               for instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
5383               for group_uuid in lu.cfg.GetInstanceNodeGroups(instance_name))
5384       elif level == locking.LEVEL_NODE:
5385         lu._LockInstancesNodes() # pylint: disable=W0212
5386
5387   @staticmethod
5388   def _CheckGroupLocks(lu):
5389     owned_instances = frozenset(lu.owned_locks(locking.LEVEL_INSTANCE))
5390     owned_groups = frozenset(lu.owned_locks(locking.LEVEL_NODEGROUP))
5391
5392     # Check if node groups for locked instances are still correct
5393     for instance_name in owned_instances:
5394       _CheckInstanceNodeGroups(lu.cfg, instance_name, owned_groups)
5395
5396   def _GetQueryData(self, lu):
5397     """Computes the list of instances and their attributes.
5398
5399     """
5400     if self.do_grouplocks:
5401       self._CheckGroupLocks(lu)
5402
5403     cluster = lu.cfg.GetClusterInfo()
5404     all_info = lu.cfg.GetAllInstancesInfo()
5405
5406     instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
5407
5408     instance_list = [all_info[name] for name in instance_names]
5409     nodes = frozenset(itertools.chain(*(inst.all_nodes
5410                                         for inst in instance_list)))
5411     hv_list = list(set([inst.hypervisor for inst in instance_list]))
5412     bad_nodes = []
5413     offline_nodes = []
5414     wrongnode_inst = set()
5415
5416     # Gather data as requested
5417     if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
5418       live_data = {}
5419       node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
5420       for name in nodes:
5421         result = node_data[name]
5422         if result.offline:
5423           # offline nodes will be in both lists
5424           assert result.fail_msg
5425           offline_nodes.append(name)
5426         if result.fail_msg:
5427           bad_nodes.append(name)
5428         elif result.payload:
5429           for inst in result.payload:
5430             if inst in all_info:
5431               if all_info[inst].primary_node == name:
5432                 live_data.update(result.payload)
5433               else:
5434                 wrongnode_inst.add(inst)
5435             else:
5436               # orphan instance; we don't list it here as we don't
5437               # handle this case yet in the output of instance listing
5438               logging.warning("Orphan instance '%s' found on node %s",
5439                               inst, name)
5440         # else no instance is alive
5441     else:
5442       live_data = {}
5443
5444     if query.IQ_DISKUSAGE in self.requested_data:
5445       gmi = ganeti.masterd.instance
5446       disk_usage = dict((inst.name,
5447                          gmi.ComputeDiskSize(inst.disk_template,
5448                                              [{constants.IDISK_SIZE: disk.size}
5449                                               for disk in inst.disks]))
5450                         for inst in instance_list)
5451     else:
5452       disk_usage = None
5453
5454     if query.IQ_CONSOLE in self.requested_data:
5455       consinfo = {}
5456       for inst in instance_list:
5457         if inst.name in live_data:
5458           # Instance is running
5459           consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
5460         else:
5461           consinfo[inst.name] = None
5462       assert set(consinfo.keys()) == set(instance_names)
5463     else:
5464       consinfo = None
5465
5466     if query.IQ_NODES in self.requested_data:
5467       node_names = set(itertools.chain(*map(operator.attrgetter("all_nodes"),
5468                                             instance_list)))
5469       nodes = dict(lu.cfg.GetMultiNodeInfo(node_names))
5470       groups = dict((uuid, lu.cfg.GetNodeGroup(uuid))
5471                     for uuid in set(map(operator.attrgetter("group"),
5472                                         nodes.values())))
5473     else:
5474       nodes = None
5475       groups = None
5476
5477     return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
5478                                    disk_usage, offline_nodes, bad_nodes,
5479                                    live_data, wrongnode_inst, consinfo,
5480                                    nodes, groups)
5481
5482
5483 class LUQuery(NoHooksLU):
5484   """Query for resources/items of a certain kind.
5485
5486   """
5487   # pylint: disable=W0142
5488   REQ_BGL = False
5489
5490   def CheckArguments(self):
5491     qcls = _GetQueryImplementation(self.op.what)
5492
5493     self.impl = qcls(self.op.qfilter, self.op.fields, self.op.use_locking)
5494
5495   def ExpandNames(self):
5496     self.impl.ExpandNames(self)
5497
5498   def DeclareLocks(self, level):
5499     self.impl.DeclareLocks(self, level)
5500
5501   def Exec(self, feedback_fn):
5502     return self.impl.NewStyleQuery(self)
5503
5504
5505 class LUQueryFields(NoHooksLU):
5506   """Query for resources/items of a certain kind.
5507
5508   """
5509   # pylint: disable=W0142
5510   REQ_BGL = False
5511
5512   def CheckArguments(self):
5513     self.qcls = _GetQueryImplementation(self.op.what)
5514
5515   def ExpandNames(self):
5516     self.needed_locks = {}
5517
5518   def Exec(self, feedback_fn):
5519     return query.QueryFields(self.qcls.FIELDS, self.op.fields)
5520
5521
5522 class LUNodeModifyStorage(NoHooksLU):
5523   """Logical unit for modifying a storage volume on a node.
5524
5525   """
5526   REQ_BGL = False
5527
5528   def CheckArguments(self):
5529     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5530
5531     storage_type = self.op.storage_type
5532
5533     try:
5534       modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
5535     except KeyError:
5536       raise errors.OpPrereqError("Storage units of type '%s' can not be"
5537                                  " modified" % storage_type,
5538                                  errors.ECODE_INVAL)
5539
5540     diff = set(self.op.changes.keys()) - modifiable
5541     if diff:
5542       raise errors.OpPrereqError("The following fields can not be modified for"
5543                                  " storage units of type '%s': %r" %
5544                                  (storage_type, list(diff)),
5545                                  errors.ECODE_INVAL)
5546
5547   def ExpandNames(self):
5548     self.needed_locks = {
5549       locking.LEVEL_NODE: self.op.node_name,
5550       }
5551
5552   def Exec(self, feedback_fn):
5553     """Computes the list of nodes and their attributes.
5554
5555     """
5556     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5557     result = self.rpc.call_storage_modify(self.op.node_name,
5558                                           self.op.storage_type, st_args,
5559                                           self.op.name, self.op.changes)
5560     result.Raise("Failed to modify storage unit '%s' on %s" %
5561                  (self.op.name, self.op.node_name))
5562
5563
5564 class LUNodeAdd(LogicalUnit):
5565   """Logical unit for adding node to the cluster.
5566
5567   """
5568   HPATH = "node-add"
5569   HTYPE = constants.HTYPE_NODE
5570   _NFLAGS = ["master_capable", "vm_capable"]
5571
5572   def CheckArguments(self):
5573     self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
5574     # validate/normalize the node name
5575     self.hostname = netutils.GetHostname(name=self.op.node_name,
5576                                          family=self.primary_ip_family)
5577     self.op.node_name = self.hostname.name
5578
5579     if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
5580       raise errors.OpPrereqError("Cannot readd the master node",
5581                                  errors.ECODE_STATE)
5582
5583     if self.op.readd and self.op.group:
5584       raise errors.OpPrereqError("Cannot pass a node group when a node is"
5585                                  " being readded", errors.ECODE_INVAL)
5586
5587   def BuildHooksEnv(self):
5588     """Build hooks env.
5589
5590     This will run on all nodes before, and on all nodes + the new node after.
5591
5592     """
5593     return {
5594       "OP_TARGET": self.op.node_name,
5595       "NODE_NAME": self.op.node_name,
5596       "NODE_PIP": self.op.primary_ip,
5597       "NODE_SIP": self.op.secondary_ip,
5598       "MASTER_CAPABLE": str(self.op.master_capable),
5599       "VM_CAPABLE": str(self.op.vm_capable),
5600       }
5601
5602   def BuildHooksNodes(self):
5603     """Build hooks nodes.
5604
5605     """
5606     # Exclude added node
5607     pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
5608     post_nodes = pre_nodes + [self.op.node_name, ]
5609
5610     return (pre_nodes, post_nodes)
5611
5612   def CheckPrereq(self):
5613     """Check prerequisites.
5614
5615     This checks:
5616      - the new node is not already in the config
5617      - it is resolvable
5618      - its parameters (single/dual homed) matches the cluster
5619
5620     Any errors are signaled by raising errors.OpPrereqError.
5621
5622     """
5623     cfg = self.cfg
5624     hostname = self.hostname
5625     node = hostname.name
5626     primary_ip = self.op.primary_ip = hostname.ip
5627     if self.op.secondary_ip is None:
5628       if self.primary_ip_family == netutils.IP6Address.family:
5629         raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
5630                                    " IPv4 address must be given as secondary",
5631                                    errors.ECODE_INVAL)
5632       self.op.secondary_ip = primary_ip
5633
5634     secondary_ip = self.op.secondary_ip
5635     if not netutils.IP4Address.IsValid(secondary_ip):
5636       raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5637                                  " address" % secondary_ip, errors.ECODE_INVAL)
5638
5639     node_list = cfg.GetNodeList()
5640     if not self.op.readd and node in node_list:
5641       raise errors.OpPrereqError("Node %s is already in the configuration" %
5642                                  node, errors.ECODE_EXISTS)
5643     elif self.op.readd and node not in node_list:
5644       raise errors.OpPrereqError("Node %s is not in the configuration" % node,
5645                                  errors.ECODE_NOENT)
5646
5647     self.changed_primary_ip = False
5648
5649     for existing_node_name, existing_node in cfg.GetMultiNodeInfo(node_list):
5650       if self.op.readd and node == existing_node_name:
5651         if existing_node.secondary_ip != secondary_ip:
5652           raise errors.OpPrereqError("Readded node doesn't have the same IP"
5653                                      " address configuration as before",
5654                                      errors.ECODE_INVAL)
5655         if existing_node.primary_ip != primary_ip:
5656           self.changed_primary_ip = True
5657
5658         continue
5659
5660       if (existing_node.primary_ip == primary_ip or
5661           existing_node.secondary_ip == primary_ip or
5662           existing_node.primary_ip == secondary_ip or
5663           existing_node.secondary_ip == secondary_ip):
5664         raise errors.OpPrereqError("New node ip address(es) conflict with"
5665                                    " existing node %s" % existing_node.name,
5666                                    errors.ECODE_NOTUNIQUE)
5667
5668     # After this 'if' block, None is no longer a valid value for the
5669     # _capable op attributes
5670     if self.op.readd:
5671       old_node = self.cfg.GetNodeInfo(node)
5672       assert old_node is not None, "Can't retrieve locked node %s" % node
5673       for attr in self._NFLAGS:
5674         if getattr(self.op, attr) is None:
5675           setattr(self.op, attr, getattr(old_node, attr))
5676     else:
5677       for attr in self._NFLAGS:
5678         if getattr(self.op, attr) is None:
5679           setattr(self.op, attr, True)
5680
5681     if self.op.readd and not self.op.vm_capable:
5682       pri, sec = cfg.GetNodeInstances(node)
5683       if pri or sec:
5684         raise errors.OpPrereqError("Node %s being re-added with vm_capable"
5685                                    " flag set to false, but it already holds"
5686                                    " instances" % node,
5687                                    errors.ECODE_STATE)
5688
5689     # check that the type of the node (single versus dual homed) is the
5690     # same as for the master
5691     myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
5692     master_singlehomed = myself.secondary_ip == myself.primary_ip
5693     newbie_singlehomed = secondary_ip == primary_ip
5694     if master_singlehomed != newbie_singlehomed:
5695       if master_singlehomed:
5696         raise errors.OpPrereqError("The master has no secondary ip but the"
5697                                    " new node has one",
5698                                    errors.ECODE_INVAL)
5699       else:
5700         raise errors.OpPrereqError("The master has a secondary ip but the"
5701                                    " new node doesn't have one",
5702                                    errors.ECODE_INVAL)
5703
5704     # checks reachability
5705     if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
5706       raise errors.OpPrereqError("Node not reachable by ping",
5707                                  errors.ECODE_ENVIRON)
5708
5709     if not newbie_singlehomed:
5710       # check reachability from my secondary ip to newbie's secondary ip
5711       if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
5712                               source=myself.secondary_ip):
5713         raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5714                                    " based ping to node daemon port",
5715                                    errors.ECODE_ENVIRON)
5716
5717     if self.op.readd:
5718       exceptions = [node]
5719     else:
5720       exceptions = []
5721
5722     if self.op.master_capable:
5723       self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
5724     else:
5725       self.master_candidate = False
5726
5727     if self.op.readd:
5728       self.new_node = old_node
5729     else:
5730       node_group = cfg.LookupNodeGroup(self.op.group)
5731       self.new_node = objects.Node(name=node,
5732                                    primary_ip=primary_ip,
5733                                    secondary_ip=secondary_ip,
5734                                    master_candidate=self.master_candidate,
5735                                    offline=False, drained=False,
5736                                    group=node_group)
5737
5738     if self.op.ndparams:
5739       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
5740
5741     if self.op.hv_state:
5742       self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
5743
5744     if self.op.disk_state:
5745       self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
5746
5747     # TODO: If we need to have multiple DnsOnlyRunner we probably should make
5748     #       it a property on the base class.
5749     result = rpc.DnsOnlyRunner().call_version([node])[node]
5750     result.Raise("Can't get version information from node %s" % node)
5751     if constants.PROTOCOL_VERSION == result.payload:
5752       logging.info("Communication to node %s fine, sw version %s match",
5753                    node, result.payload)
5754     else:
5755       raise errors.OpPrereqError("Version mismatch master version %s,"
5756                                  " node version %s" %
5757                                  (constants.PROTOCOL_VERSION, result.payload),
5758                                  errors.ECODE_ENVIRON)
5759
5760   def Exec(self, feedback_fn):
5761     """Adds the new node to the cluster.
5762
5763     """
5764     new_node = self.new_node
5765     node = new_node.name
5766
5767     assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
5768       "Not owning BGL"
5769
5770     # We adding a new node so we assume it's powered
5771     new_node.powered = True
5772
5773     # for re-adds, reset the offline/drained/master-candidate flags;
5774     # we need to reset here, otherwise offline would prevent RPC calls
5775     # later in the procedure; this also means that if the re-add
5776     # fails, we are left with a non-offlined, broken node
5777     if self.op.readd:
5778       new_node.drained = new_node.offline = False # pylint: disable=W0201
5779       self.LogInfo("Readding a node, the offline/drained flags were reset")
5780       # if we demote the node, we do cleanup later in the procedure
5781       new_node.master_candidate = self.master_candidate
5782       if self.changed_primary_ip:
5783         new_node.primary_ip = self.op.primary_ip
5784
5785     # copy the master/vm_capable flags
5786     for attr in self._NFLAGS:
5787       setattr(new_node, attr, getattr(self.op, attr))
5788
5789     # notify the user about any possible mc promotion
5790     if new_node.master_candidate:
5791       self.LogInfo("Node will be a master candidate")
5792
5793     if self.op.ndparams:
5794       new_node.ndparams = self.op.ndparams
5795     else:
5796       new_node.ndparams = {}
5797
5798     if self.op.hv_state:
5799       new_node.hv_state_static = self.new_hv_state
5800
5801     if self.op.disk_state:
5802       new_node.disk_state_static = self.new_disk_state
5803
5804     # Add node to our /etc/hosts, and add key to known_hosts
5805     if self.cfg.GetClusterInfo().modify_etc_hosts:
5806       master_node = self.cfg.GetMasterNode()
5807       result = self.rpc.call_etc_hosts_modify(master_node,
5808                                               constants.ETC_HOSTS_ADD,
5809                                               self.hostname.name,
5810                                               self.hostname.ip)
5811       result.Raise("Can't update hosts file with new host data")
5812
5813     if new_node.secondary_ip != new_node.primary_ip:
5814       _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
5815                                False)
5816
5817     node_verify_list = [self.cfg.GetMasterNode()]
5818     node_verify_param = {
5819       constants.NV_NODELIST: ([node], {}),
5820       # TODO: do a node-net-test as well?
5821     }
5822
5823     result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
5824                                        self.cfg.GetClusterName())
5825     for verifier in node_verify_list:
5826       result[verifier].Raise("Cannot communicate with node %s" % verifier)
5827       nl_payload = result[verifier].payload[constants.NV_NODELIST]
5828       if nl_payload:
5829         for failed in nl_payload:
5830           feedback_fn("ssh/hostname verification failed"
5831                       " (checking from %s): %s" %
5832                       (verifier, nl_payload[failed]))
5833         raise errors.OpExecError("ssh/hostname verification failed")
5834
5835     if self.op.readd:
5836       _RedistributeAncillaryFiles(self)
5837       self.context.ReaddNode(new_node)
5838       # make sure we redistribute the config
5839       self.cfg.Update(new_node, feedback_fn)
5840       # and make sure the new node will not have old files around
5841       if not new_node.master_candidate:
5842         result = self.rpc.call_node_demote_from_mc(new_node.name)
5843         msg = result.fail_msg
5844         if msg:
5845           self.LogWarning("Node failed to demote itself from master"
5846                           " candidate status: %s" % msg)
5847     else:
5848       _RedistributeAncillaryFiles(self, additional_nodes=[node],
5849                                   additional_vm=self.op.vm_capable)
5850       self.context.AddNode(new_node, self.proc.GetECId())
5851
5852
5853 class LUNodeSetParams(LogicalUnit):
5854   """Modifies the parameters of a node.
5855
5856   @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
5857       to the node role (as _ROLE_*)
5858   @cvar _R2F: a dictionary from node role to tuples of flags
5859   @cvar _FLAGS: a list of attribute names corresponding to the flags
5860
5861   """
5862   HPATH = "node-modify"
5863   HTYPE = constants.HTYPE_NODE
5864   REQ_BGL = False
5865   (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
5866   _F2R = {
5867     (True, False, False): _ROLE_CANDIDATE,
5868     (False, True, False): _ROLE_DRAINED,
5869     (False, False, True): _ROLE_OFFLINE,
5870     (False, False, False): _ROLE_REGULAR,
5871     }
5872   _R2F = dict((v, k) for k, v in _F2R.items())
5873   _FLAGS = ["master_candidate", "drained", "offline"]
5874
5875   def CheckArguments(self):
5876     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5877     all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
5878                 self.op.master_capable, self.op.vm_capable,
5879                 self.op.secondary_ip, self.op.ndparams, self.op.hv_state,
5880                 self.op.disk_state]
5881     if all_mods.count(None) == len(all_mods):
5882       raise errors.OpPrereqError("Please pass at least one modification",
5883                                  errors.ECODE_INVAL)
5884     if all_mods.count(True) > 1:
5885       raise errors.OpPrereqError("Can't set the node into more than one"
5886                                  " state at the same time",
5887                                  errors.ECODE_INVAL)
5888
5889     # Boolean value that tells us whether we might be demoting from MC
5890     self.might_demote = (self.op.master_candidate is False or
5891                          self.op.offline is True or
5892                          self.op.drained is True or
5893                          self.op.master_capable is False)
5894
5895     if self.op.secondary_ip:
5896       if not netutils.IP4Address.IsValid(self.op.secondary_ip):
5897         raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5898                                    " address" % self.op.secondary_ip,
5899                                    errors.ECODE_INVAL)
5900
5901     self.lock_all = self.op.auto_promote and self.might_demote
5902     self.lock_instances = self.op.secondary_ip is not None
5903
5904   def _InstanceFilter(self, instance):
5905     """Filter for getting affected instances.
5906
5907     """
5908     return (instance.disk_template in constants.DTS_INT_MIRROR and
5909             self.op.node_name in instance.all_nodes)
5910
5911   def ExpandNames(self):
5912     if self.lock_all:
5913       self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
5914     else:
5915       self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
5916
5917     # Since modifying a node can have severe effects on currently running
5918     # operations the resource lock is at least acquired in shared mode
5919     self.needed_locks[locking.LEVEL_NODE_RES] = \
5920       self.needed_locks[locking.LEVEL_NODE]
5921
5922     # Get node resource and instance locks in shared mode; they are not used
5923     # for anything but read-only access
5924     self.share_locks[locking.LEVEL_NODE_RES] = 1
5925     self.share_locks[locking.LEVEL_INSTANCE] = 1
5926
5927     if self.lock_instances:
5928       self.needed_locks[locking.LEVEL_INSTANCE] = \
5929         frozenset(self.cfg.GetInstancesInfoByFilter(self._InstanceFilter))
5930
5931   def BuildHooksEnv(self):
5932     """Build hooks env.
5933
5934     This runs on the master node.
5935
5936     """
5937     return {
5938       "OP_TARGET": self.op.node_name,
5939       "MASTER_CANDIDATE": str(self.op.master_candidate),
5940       "OFFLINE": str(self.op.offline),
5941       "DRAINED": str(self.op.drained),
5942       "MASTER_CAPABLE": str(self.op.master_capable),
5943       "VM_CAPABLE": str(self.op.vm_capable),
5944       }
5945
5946   def BuildHooksNodes(self):
5947     """Build hooks nodes.
5948
5949     """
5950     nl = [self.cfg.GetMasterNode(), self.op.node_name]
5951     return (nl, nl)
5952
5953   def CheckPrereq(self):
5954     """Check prerequisites.
5955
5956     This only checks the instance list against the existing names.
5957
5958     """
5959     node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
5960
5961     if self.lock_instances:
5962       affected_instances = \
5963         self.cfg.GetInstancesInfoByFilter(self._InstanceFilter)
5964
5965       # Verify instance locks
5966       owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
5967       wanted_instances = frozenset(affected_instances.keys())
5968       if wanted_instances - owned_instances:
5969         raise errors.OpPrereqError("Instances affected by changing node %s's"
5970                                    " secondary IP address have changed since"
5971                                    " locks were acquired, wanted '%s', have"
5972                                    " '%s'; retry the operation" %
5973                                    (self.op.node_name,
5974                                     utils.CommaJoin(wanted_instances),
5975                                     utils.CommaJoin(owned_instances)),
5976                                    errors.ECODE_STATE)
5977     else:
5978       affected_instances = None
5979
5980     if (self.op.master_candidate is not None or
5981         self.op.drained is not None or
5982         self.op.offline is not None):
5983       # we can't change the master's node flags
5984       if self.op.node_name == self.cfg.GetMasterNode():
5985         raise errors.OpPrereqError("The master role can be changed"
5986                                    " only via master-failover",
5987                                    errors.ECODE_INVAL)
5988
5989     if self.op.master_candidate and not node.master_capable:
5990       raise errors.OpPrereqError("Node %s is not master capable, cannot make"
5991                                  " it a master candidate" % node.name,
5992                                  errors.ECODE_STATE)
5993
5994     if self.op.vm_capable is False:
5995       (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
5996       if ipri or isec:
5997         raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
5998                                    " the vm_capable flag" % node.name,
5999                                    errors.ECODE_STATE)
6000
6001     if node.master_candidate and self.might_demote and not self.lock_all:
6002       assert not self.op.auto_promote, "auto_promote set but lock_all not"
6003       # check if after removing the current node, we're missing master
6004       # candidates
6005       (mc_remaining, mc_should, _) = \
6006           self.cfg.GetMasterCandidateStats(exceptions=[node.name])
6007       if mc_remaining < mc_should:
6008         raise errors.OpPrereqError("Not enough master candidates, please"
6009                                    " pass auto promote option to allow"
6010                                    " promotion (--auto-promote or RAPI"
6011                                    " auto_promote=True)", errors.ECODE_STATE)
6012
6013     self.old_flags = old_flags = (node.master_candidate,
6014                                   node.drained, node.offline)
6015     assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
6016     self.old_role = old_role = self._F2R[old_flags]
6017
6018     # Check for ineffective changes
6019     for attr in self._FLAGS:
6020       if (getattr(self.op, attr) is False and getattr(node, attr) is False):
6021         self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
6022         setattr(self.op, attr, None)
6023
6024     # Past this point, any flag change to False means a transition
6025     # away from the respective state, as only real changes are kept
6026
6027     # TODO: We might query the real power state if it supports OOB
6028     if _SupportsOob(self.cfg, node):
6029       if self.op.offline is False and not (node.powered or
6030                                            self.op.powered is True):
6031         raise errors.OpPrereqError(("Node %s needs to be turned on before its"
6032                                     " offline status can be reset") %
6033                                    self.op.node_name, errors.ECODE_STATE)
6034     elif self.op.powered is not None:
6035       raise errors.OpPrereqError(("Unable to change powered state for node %s"
6036                                   " as it does not support out-of-band"
6037                                   " handling") % self.op.node_name,
6038                                  errors.ECODE_STATE)
6039
6040     # If we're being deofflined/drained, we'll MC ourself if needed
6041     if (self.op.drained is False or self.op.offline is False or
6042         (self.op.master_capable and not node.master_capable)):
6043       if _DecideSelfPromotion(self):
6044         self.op.master_candidate = True
6045         self.LogInfo("Auto-promoting node to master candidate")
6046
6047     # If we're no longer master capable, we'll demote ourselves from MC
6048     if self.op.master_capable is False and node.master_candidate:
6049       self.LogInfo("Demoting from master candidate")
6050       self.op.master_candidate = False
6051
6052     # Compute new role
6053     assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
6054     if self.op.master_candidate:
6055       new_role = self._ROLE_CANDIDATE
6056     elif self.op.drained:
6057       new_role = self._ROLE_DRAINED
6058     elif self.op.offline:
6059       new_role = self._ROLE_OFFLINE
6060     elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
6061       # False is still in new flags, which means we're un-setting (the
6062       # only) True flag
6063       new_role = self._ROLE_REGULAR
6064     else: # no new flags, nothing, keep old role
6065       new_role = old_role
6066
6067     self.new_role = new_role
6068
6069     if old_role == self._ROLE_OFFLINE and new_role != old_role:
6070       # Trying to transition out of offline status
6071       result = self.rpc.call_version([node.name])[node.name]
6072       if result.fail_msg:
6073         raise errors.OpPrereqError("Node %s is being de-offlined but fails"
6074                                    " to report its version: %s" %
6075                                    (node.name, result.fail_msg),
6076                                    errors.ECODE_STATE)
6077       else:
6078         self.LogWarning("Transitioning node from offline to online state"
6079                         " without using re-add. Please make sure the node"
6080                         " is healthy!")
6081
6082     # When changing the secondary ip, verify if this is a single-homed to
6083     # multi-homed transition or vice versa, and apply the relevant
6084     # restrictions.
6085     if self.op.secondary_ip:
6086       # Ok even without locking, because this can't be changed by any LU
6087       master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
6088       master_singlehomed = master.secondary_ip == master.primary_ip
6089       if master_singlehomed and self.op.secondary_ip != node.primary_ip:
6090         if self.op.force and node.name == master.name:
6091           self.LogWarning("Transitioning from single-homed to multi-homed"
6092                           " cluster. All nodes will require a secondary ip.")
6093         else:
6094           raise errors.OpPrereqError("Changing the secondary ip on a"
6095                                      " single-homed cluster requires the"
6096                                      " --force option to be passed, and the"
6097                                      " target node to be the master",
6098                                      errors.ECODE_INVAL)
6099       elif not master_singlehomed and self.op.secondary_ip == node.primary_ip:
6100         if self.op.force and node.name == master.name:
6101           self.LogWarning("Transitioning from multi-homed to single-homed"
6102                           " cluster. Secondary IPs will have to be removed.")
6103         else:
6104           raise errors.OpPrereqError("Cannot set the secondary IP to be the"
6105                                      " same as the primary IP on a multi-homed"
6106                                      " cluster, unless the --force option is"
6107                                      " passed, and the target node is the"
6108                                      " master", errors.ECODE_INVAL)
6109
6110       assert not (frozenset(affected_instances) -
6111                   self.owned_locks(locking.LEVEL_INSTANCE))
6112
6113       if node.offline:
6114         if affected_instances:
6115           msg = ("Cannot change secondary IP address: offline node has"
6116                  " instances (%s) configured to use it" %
6117                  utils.CommaJoin(affected_instances.keys()))
6118           raise errors.OpPrereqError(msg, errors.ECODE_STATE)
6119       else:
6120         # On online nodes, check that no instances are running, and that
6121         # the node has the new ip and we can reach it.
6122         for instance in affected_instances.values():
6123           _CheckInstanceState(self, instance, INSTANCE_DOWN,
6124                               msg="cannot change secondary ip")
6125
6126         _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
6127         if master.name != node.name:
6128           # check reachability from master secondary ip to new secondary ip
6129           if not netutils.TcpPing(self.op.secondary_ip,
6130                                   constants.DEFAULT_NODED_PORT,
6131                                   source=master.secondary_ip):
6132             raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
6133                                        " based ping to node daemon port",
6134                                        errors.ECODE_ENVIRON)
6135
6136     if self.op.ndparams:
6137       new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
6138       utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
6139       self.new_ndparams = new_ndparams
6140
6141     if self.op.hv_state:
6142       self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
6143                                                  self.node.hv_state_static)
6144
6145     if self.op.disk_state:
6146       self.new_disk_state = \
6147         _MergeAndVerifyDiskState(self.op.disk_state,
6148                                  self.node.disk_state_static)
6149
6150   def Exec(self, feedback_fn):
6151     """Modifies a node.
6152
6153     """
6154     node = self.node
6155     old_role = self.old_role
6156     new_role = self.new_role
6157
6158     result = []
6159
6160     if self.op.ndparams:
6161       node.ndparams = self.new_ndparams
6162
6163     if self.op.powered is not None:
6164       node.powered = self.op.powered
6165
6166     if self.op.hv_state:
6167       node.hv_state_static = self.new_hv_state
6168
6169     if self.op.disk_state:
6170       node.disk_state_static = self.new_disk_state
6171
6172     for attr in ["master_capable", "vm_capable"]:
6173       val = getattr(self.op, attr)
6174       if val is not None:
6175         setattr(node, attr, val)
6176         result.append((attr, str(val)))
6177
6178     if new_role != old_role:
6179       # Tell the node to demote itself, if no longer MC and not offline
6180       if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
6181         msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
6182         if msg:
6183           self.LogWarning("Node failed to demote itself: %s", msg)
6184
6185       new_flags = self._R2F[new_role]
6186       for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
6187         if of != nf:
6188           result.append((desc, str(nf)))
6189       (node.master_candidate, node.drained, node.offline) = new_flags
6190
6191       # we locked all nodes, we adjust the CP before updating this node
6192       if self.lock_all:
6193         _AdjustCandidatePool(self, [node.name])
6194
6195     if self.op.secondary_ip:
6196       node.secondary_ip = self.op.secondary_ip
6197       result.append(("secondary_ip", self.op.secondary_ip))
6198
6199     # this will trigger configuration file update, if needed
6200     self.cfg.Update(node, feedback_fn)
6201
6202     # this will trigger job queue propagation or cleanup if the mc
6203     # flag changed
6204     if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
6205       self.context.ReaddNode(node)
6206
6207     return result
6208
6209
6210 class LUNodePowercycle(NoHooksLU):
6211   """Powercycles a node.
6212
6213   """
6214   REQ_BGL = False
6215
6216   def CheckArguments(self):
6217     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6218     if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
6219       raise errors.OpPrereqError("The node is the master and the force"
6220                                  " parameter was not set",
6221                                  errors.ECODE_INVAL)
6222
6223   def ExpandNames(self):
6224     """Locking for PowercycleNode.
6225
6226     This is a last-resort option and shouldn't block on other
6227     jobs. Therefore, we grab no locks.
6228
6229     """
6230     self.needed_locks = {}
6231
6232   def Exec(self, feedback_fn):
6233     """Reboots a node.
6234
6235     """
6236     result = self.rpc.call_node_powercycle(self.op.node_name,
6237                                            self.cfg.GetHypervisorType())
6238     result.Raise("Failed to schedule the reboot")
6239     return result.payload
6240
6241
6242 class LUClusterQuery(NoHooksLU):
6243   """Query cluster configuration.
6244
6245   """
6246   REQ_BGL = False
6247
6248   def ExpandNames(self):
6249     self.needed_locks = {}
6250
6251   def Exec(self, feedback_fn):
6252     """Return cluster config.
6253
6254     """
6255     cluster = self.cfg.GetClusterInfo()
6256     os_hvp = {}
6257
6258     # Filter just for enabled hypervisors
6259     for os_name, hv_dict in cluster.os_hvp.items():
6260       os_hvp[os_name] = {}
6261       for hv_name, hv_params in hv_dict.items():
6262         if hv_name in cluster.enabled_hypervisors:
6263           os_hvp[os_name][hv_name] = hv_params
6264
6265     # Convert ip_family to ip_version
6266     primary_ip_version = constants.IP4_VERSION
6267     if cluster.primary_ip_family == netutils.IP6Address.family:
6268       primary_ip_version = constants.IP6_VERSION
6269
6270     result = {
6271       "software_version": constants.RELEASE_VERSION,
6272       "protocol_version": constants.PROTOCOL_VERSION,
6273       "config_version": constants.CONFIG_VERSION,
6274       "os_api_version": max(constants.OS_API_VERSIONS),
6275       "export_version": constants.EXPORT_VERSION,
6276       "architecture": runtime.GetArchInfo(),
6277       "name": cluster.cluster_name,
6278       "master": cluster.master_node,
6279       "default_hypervisor": cluster.primary_hypervisor,
6280       "enabled_hypervisors": cluster.enabled_hypervisors,
6281       "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
6282                         for hypervisor_name in cluster.enabled_hypervisors]),
6283       "os_hvp": os_hvp,
6284       "beparams": cluster.beparams,
6285       "osparams": cluster.osparams,
6286       "ipolicy": cluster.ipolicy,
6287       "nicparams": cluster.nicparams,
6288       "ndparams": cluster.ndparams,
6289       "diskparams": cluster.diskparams,
6290       "candidate_pool_size": cluster.candidate_pool_size,
6291       "master_netdev": cluster.master_netdev,
6292       "master_netmask": cluster.master_netmask,
6293       "use_external_mip_script": cluster.use_external_mip_script,
6294       "volume_group_name": cluster.volume_group_name,
6295       "drbd_usermode_helper": cluster.drbd_usermode_helper,
6296       "file_storage_dir": cluster.file_storage_dir,
6297       "shared_file_storage_dir": cluster.shared_file_storage_dir,
6298       "maintain_node_health": cluster.maintain_node_health,
6299       "ctime": cluster.ctime,
6300       "mtime": cluster.mtime,
6301       "uuid": cluster.uuid,
6302       "tags": list(cluster.GetTags()),
6303       "uid_pool": cluster.uid_pool,
6304       "default_iallocator": cluster.default_iallocator,
6305       "reserved_lvs": cluster.reserved_lvs,
6306       "primary_ip_version": primary_ip_version,
6307       "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
6308       "hidden_os": cluster.hidden_os,
6309       "blacklisted_os": cluster.blacklisted_os,
6310       }
6311
6312     return result
6313
6314
6315 class LUClusterConfigQuery(NoHooksLU):
6316   """Return configuration values.
6317
6318   """
6319   REQ_BGL = False
6320
6321   def CheckArguments(self):
6322     self.cq = _ClusterQuery(None, self.op.output_fields, False)
6323
6324   def ExpandNames(self):
6325     self.cq.ExpandNames(self)
6326
6327   def DeclareLocks(self, level):
6328     self.cq.DeclareLocks(self, level)
6329
6330   def Exec(self, feedback_fn):
6331     result = self.cq.OldStyleQuery(self)
6332
6333     assert len(result) == 1
6334
6335     return result[0]
6336
6337
6338 class _ClusterQuery(_QueryBase):
6339   FIELDS = query.CLUSTER_FIELDS
6340
6341   #: Do not sort (there is only one item)
6342   SORT_FIELD = None
6343
6344   def ExpandNames(self, lu):
6345     lu.needed_locks = {}
6346
6347     # The following variables interact with _QueryBase._GetNames
6348     self.wanted = locking.ALL_SET
6349     self.do_locking = self.use_locking
6350
6351     if self.do_locking:
6352       raise errors.OpPrereqError("Can not use locking for cluster queries",
6353                                  errors.ECODE_INVAL)
6354
6355   def DeclareLocks(self, lu, level):
6356     pass
6357
6358   def _GetQueryData(self, lu):
6359     """Computes the list of nodes and their attributes.
6360
6361     """
6362     # Locking is not used
6363     assert not (compat.any(lu.glm.is_owned(level)
6364                            for level in locking.LEVELS
6365                            if level != locking.LEVEL_CLUSTER) or
6366                 self.do_locking or self.use_locking)
6367
6368     if query.CQ_CONFIG in self.requested_data:
6369       cluster = lu.cfg.GetClusterInfo()
6370     else:
6371       cluster = NotImplemented
6372
6373     if query.CQ_QUEUE_DRAINED in self.requested_data:
6374       drain_flag = os.path.exists(pathutils.JOB_QUEUE_DRAIN_FILE)
6375     else:
6376       drain_flag = NotImplemented
6377
6378     if query.CQ_WATCHER_PAUSE in self.requested_data:
6379       watcher_pause = utils.ReadWatcherPauseFile(pathutils.WATCHER_PAUSEFILE)
6380     else:
6381       watcher_pause = NotImplemented
6382
6383     return query.ClusterQueryData(cluster, drain_flag, watcher_pause)
6384
6385
6386 class LUInstanceActivateDisks(NoHooksLU):
6387   """Bring up an instance's disks.
6388
6389   """
6390   REQ_BGL = False
6391
6392   def ExpandNames(self):
6393     self._ExpandAndLockInstance()
6394     self.needed_locks[locking.LEVEL_NODE] = []
6395     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6396
6397   def DeclareLocks(self, level):
6398     if level == locking.LEVEL_NODE:
6399       self._LockInstancesNodes()
6400
6401   def CheckPrereq(self):
6402     """Check prerequisites.
6403
6404     This checks that the instance is in the cluster.
6405
6406     """
6407     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6408     assert self.instance is not None, \
6409       "Cannot retrieve locked instance %s" % self.op.instance_name
6410     _CheckNodeOnline(self, self.instance.primary_node)
6411
6412   def Exec(self, feedback_fn):
6413     """Activate the disks.
6414
6415     """
6416     disks_ok, disks_info = \
6417               _AssembleInstanceDisks(self, self.instance,
6418                                      ignore_size=self.op.ignore_size)
6419     if not disks_ok:
6420       raise errors.OpExecError("Cannot activate block devices")
6421
6422     if self.op.wait_for_sync:
6423       if not _WaitForSync(self, self.instance):
6424         raise errors.OpExecError("Some disks of the instance are degraded!")
6425
6426     return disks_info
6427
6428
6429 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
6430                            ignore_size=False):
6431   """Prepare the block devices for an instance.
6432
6433   This sets up the block devices on all nodes.
6434
6435   @type lu: L{LogicalUnit}
6436   @param lu: the logical unit on whose behalf we execute
6437   @type instance: L{objects.Instance}
6438   @param instance: the instance for whose disks we assemble
6439   @type disks: list of L{objects.Disk} or None
6440   @param disks: which disks to assemble (or all, if None)
6441   @type ignore_secondaries: boolean
6442   @param ignore_secondaries: if true, errors on secondary nodes
6443       won't result in an error return from the function
6444   @type ignore_size: boolean
6445   @param ignore_size: if true, the current known size of the disk
6446       will not be used during the disk activation, useful for cases
6447       when the size is wrong
6448   @return: False if the operation failed, otherwise a list of
6449       (host, instance_visible_name, node_visible_name)
6450       with the mapping from node devices to instance devices
6451
6452   """
6453   device_info = []
6454   disks_ok = True
6455   iname = instance.name
6456   disks = _ExpandCheckDisks(instance, disks)
6457
6458   # With the two passes mechanism we try to reduce the window of
6459   # opportunity for the race condition of switching DRBD to primary
6460   # before handshaking occured, but we do not eliminate it
6461
6462   # The proper fix would be to wait (with some limits) until the
6463   # connection has been made and drbd transitions from WFConnection
6464   # into any other network-connected state (Connected, SyncTarget,
6465   # SyncSource, etc.)
6466
6467   # 1st pass, assemble on all nodes in secondary mode
6468   for idx, inst_disk in enumerate(disks):
6469     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6470       if ignore_size:
6471         node_disk = node_disk.Copy()
6472         node_disk.UnsetSize()
6473       lu.cfg.SetDiskID(node_disk, node)
6474       result = lu.rpc.call_blockdev_assemble(node, (node_disk, instance), iname,
6475                                              False, idx)
6476       msg = result.fail_msg
6477       if msg:
6478         is_offline_secondary = (node in instance.secondary_nodes and
6479                                 result.offline)
6480         lu.proc.LogWarning("Could not prepare block device %s on node %s"
6481                            " (is_primary=False, pass=1): %s",
6482                            inst_disk.iv_name, node, msg)
6483         if not (ignore_secondaries or is_offline_secondary):
6484           disks_ok = False
6485
6486   # FIXME: race condition on drbd migration to primary
6487
6488   # 2nd pass, do only the primary node
6489   for idx, inst_disk in enumerate(disks):
6490     dev_path = None
6491
6492     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6493       if node != instance.primary_node:
6494         continue
6495       if ignore_size:
6496         node_disk = node_disk.Copy()
6497         node_disk.UnsetSize()
6498       lu.cfg.SetDiskID(node_disk, node)
6499       result = lu.rpc.call_blockdev_assemble(node, (node_disk, instance), iname,
6500                                              True, idx)
6501       msg = result.fail_msg
6502       if msg:
6503         lu.proc.LogWarning("Could not prepare block device %s on node %s"
6504                            " (is_primary=True, pass=2): %s",
6505                            inst_disk.iv_name, node, msg)
6506         disks_ok = False
6507       else:
6508         dev_path = result.payload
6509
6510     device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
6511
6512   # leave the disks configured for the primary node
6513   # this is a workaround that would be fixed better by
6514   # improving the logical/physical id handling
6515   for disk in disks:
6516     lu.cfg.SetDiskID(disk, instance.primary_node)
6517
6518   return disks_ok, device_info
6519
6520
6521 def _StartInstanceDisks(lu, instance, force):
6522   """Start the disks of an instance.
6523
6524   """
6525   disks_ok, _ = _AssembleInstanceDisks(lu, instance,
6526                                            ignore_secondaries=force)
6527   if not disks_ok:
6528     _ShutdownInstanceDisks(lu, instance)
6529     if force is not None and not force:
6530       lu.proc.LogWarning("", hint="If the message above refers to a"
6531                          " secondary node,"
6532                          " you can retry the operation using '--force'.")
6533     raise errors.OpExecError("Disk consistency error")
6534
6535
6536 class LUInstanceDeactivateDisks(NoHooksLU):
6537   """Shutdown an instance's disks.
6538
6539   """
6540   REQ_BGL = False
6541
6542   def ExpandNames(self):
6543     self._ExpandAndLockInstance()
6544     self.needed_locks[locking.LEVEL_NODE] = []
6545     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6546
6547   def DeclareLocks(self, level):
6548     if level == locking.LEVEL_NODE:
6549       self._LockInstancesNodes()
6550
6551   def CheckPrereq(self):
6552     """Check prerequisites.
6553
6554     This checks that the instance is in the cluster.
6555
6556     """
6557     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6558     assert self.instance is not None, \
6559       "Cannot retrieve locked instance %s" % self.op.instance_name
6560
6561   def Exec(self, feedback_fn):
6562     """Deactivate the disks
6563
6564     """
6565     instance = self.instance
6566     if self.op.force:
6567       _ShutdownInstanceDisks(self, instance)
6568     else:
6569       _SafeShutdownInstanceDisks(self, instance)
6570
6571
6572 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
6573   """Shutdown block devices of an instance.
6574
6575   This function checks if an instance is running, before calling
6576   _ShutdownInstanceDisks.
6577
6578   """
6579   _CheckInstanceState(lu, instance, INSTANCE_DOWN, msg="cannot shutdown disks")
6580   _ShutdownInstanceDisks(lu, instance, disks=disks)
6581
6582
6583 def _ExpandCheckDisks(instance, disks):
6584   """Return the instance disks selected by the disks list
6585
6586   @type disks: list of L{objects.Disk} or None
6587   @param disks: selected disks
6588   @rtype: list of L{objects.Disk}
6589   @return: selected instance disks to act on
6590
6591   """
6592   if disks is None:
6593     return instance.disks
6594   else:
6595     if not set(disks).issubset(instance.disks):
6596       raise errors.ProgrammerError("Can only act on disks belonging to the"
6597                                    " target instance")
6598     return disks
6599
6600
6601 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
6602   """Shutdown block devices of an instance.
6603
6604   This does the shutdown on all nodes of the instance.
6605
6606   If the ignore_primary is false, errors on the primary node are
6607   ignored.
6608
6609   """
6610   all_result = True
6611   disks = _ExpandCheckDisks(instance, disks)
6612
6613   for disk in disks:
6614     for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
6615       lu.cfg.SetDiskID(top_disk, node)
6616       result = lu.rpc.call_blockdev_shutdown(node, (top_disk, instance))
6617       msg = result.fail_msg
6618       if msg:
6619         lu.LogWarning("Could not shutdown block device %s on node %s: %s",
6620                       disk.iv_name, node, msg)
6621         if ((node == instance.primary_node and not ignore_primary) or
6622             (node != instance.primary_node and not result.offline)):
6623           all_result = False
6624   return all_result
6625
6626
6627 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
6628   """Checks if a node has enough free memory.
6629
6630   This function check if a given node has the needed amount of free
6631   memory. In case the node has less memory or we cannot get the
6632   information from the node, this function raise an OpPrereqError
6633   exception.
6634
6635   @type lu: C{LogicalUnit}
6636   @param lu: a logical unit from which we get configuration data
6637   @type node: C{str}
6638   @param node: the node to check
6639   @type reason: C{str}
6640   @param reason: string to use in the error message
6641   @type requested: C{int}
6642   @param requested: the amount of memory in MiB to check for
6643   @type hypervisor_name: C{str}
6644   @param hypervisor_name: the hypervisor to ask for memory stats
6645   @rtype: integer
6646   @return: node current free memory
6647   @raise errors.OpPrereqError: if the node doesn't have enough memory, or
6648       we cannot check the node
6649
6650   """
6651   nodeinfo = lu.rpc.call_node_info([node], None, [hypervisor_name])
6652   nodeinfo[node].Raise("Can't get data from node %s" % node,
6653                        prereq=True, ecode=errors.ECODE_ENVIRON)
6654   (_, _, (hv_info, )) = nodeinfo[node].payload
6655
6656   free_mem = hv_info.get("memory_free", None)
6657   if not isinstance(free_mem, int):
6658     raise errors.OpPrereqError("Can't compute free memory on node %s, result"
6659                                " was '%s'" % (node, free_mem),
6660                                errors.ECODE_ENVIRON)
6661   if requested > free_mem:
6662     raise errors.OpPrereqError("Not enough memory on node %s for %s:"
6663                                " needed %s MiB, available %s MiB" %
6664                                (node, reason, requested, free_mem),
6665                                errors.ECODE_NORES)
6666   return free_mem
6667
6668
6669 def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
6670   """Checks if nodes have enough free disk space in the all VGs.
6671
6672   This function check if all given nodes have the needed amount of
6673   free disk. In case any node has less disk or we cannot get the
6674   information from the node, this function raise an OpPrereqError
6675   exception.
6676
6677   @type lu: C{LogicalUnit}
6678   @param lu: a logical unit from which we get configuration data
6679   @type nodenames: C{list}
6680   @param nodenames: the list of node names to check
6681   @type req_sizes: C{dict}
6682   @param req_sizes: the hash of vg and corresponding amount of disk in
6683       MiB to check for
6684   @raise errors.OpPrereqError: if the node doesn't have enough disk,
6685       or we cannot check the node
6686
6687   """
6688   for vg, req_size in req_sizes.items():
6689     _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
6690
6691
6692 def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
6693   """Checks if nodes have enough free disk space in the specified VG.
6694
6695   This function check if all given nodes have the needed amount of
6696   free disk. In case any node has less disk or we cannot get the
6697   information from the node, this function raise an OpPrereqError
6698   exception.
6699
6700   @type lu: C{LogicalUnit}
6701   @param lu: a logical unit from which we get configuration data
6702   @type nodenames: C{list}
6703   @param nodenames: the list of node names to check
6704   @type vg: C{str}
6705   @param vg: the volume group to check
6706   @type requested: C{int}
6707   @param requested: the amount of disk in MiB to check for
6708   @raise errors.OpPrereqError: if the node doesn't have enough disk,
6709       or we cannot check the node
6710
6711   """
6712   nodeinfo = lu.rpc.call_node_info(nodenames, [vg], None)
6713   for node in nodenames:
6714     info = nodeinfo[node]
6715     info.Raise("Cannot get current information from node %s" % node,
6716                prereq=True, ecode=errors.ECODE_ENVIRON)
6717     (_, (vg_info, ), _) = info.payload
6718     vg_free = vg_info.get("vg_free", None)
6719     if not isinstance(vg_free, int):
6720       raise errors.OpPrereqError("Can't compute free disk space on node"
6721                                  " %s for vg %s, result was '%s'" %
6722                                  (node, vg, vg_free), errors.ECODE_ENVIRON)
6723     if requested > vg_free:
6724       raise errors.OpPrereqError("Not enough disk space on target node %s"
6725                                  " vg %s: required %d MiB, available %d MiB" %
6726                                  (node, vg, requested, vg_free),
6727                                  errors.ECODE_NORES)
6728
6729
6730 def _CheckNodesPhysicalCPUs(lu, nodenames, requested, hypervisor_name):
6731   """Checks if nodes have enough physical CPUs
6732
6733   This function checks if all given nodes have the needed number of
6734   physical CPUs. In case any node has less CPUs or we cannot get the
6735   information from the node, this function raises an OpPrereqError
6736   exception.
6737
6738   @type lu: C{LogicalUnit}
6739   @param lu: a logical unit from which we get configuration data
6740   @type nodenames: C{list}
6741   @param nodenames: the list of node names to check
6742   @type requested: C{int}
6743   @param requested: the minimum acceptable number of physical CPUs
6744   @raise errors.OpPrereqError: if the node doesn't have enough CPUs,
6745       or we cannot check the node
6746
6747   """
6748   nodeinfo = lu.rpc.call_node_info(nodenames, None, [hypervisor_name])
6749   for node in nodenames:
6750     info = nodeinfo[node]
6751     info.Raise("Cannot get current information from node %s" % node,
6752                prereq=True, ecode=errors.ECODE_ENVIRON)
6753     (_, _, (hv_info, )) = info.payload
6754     num_cpus = hv_info.get("cpu_total", None)
6755     if not isinstance(num_cpus, int):
6756       raise errors.OpPrereqError("Can't compute the number of physical CPUs"
6757                                  " on node %s, result was '%s'" %
6758                                  (node, num_cpus), errors.ECODE_ENVIRON)
6759     if requested > num_cpus:
6760       raise errors.OpPrereqError("Node %s has %s physical CPUs, but %s are "
6761                                  "required" % (node, num_cpus, requested),
6762                                  errors.ECODE_NORES)
6763
6764
6765 class LUInstanceStartup(LogicalUnit):
6766   """Starts an instance.
6767
6768   """
6769   HPATH = "instance-start"
6770   HTYPE = constants.HTYPE_INSTANCE
6771   REQ_BGL = False
6772
6773   def CheckArguments(self):
6774     # extra beparams
6775     if self.op.beparams:
6776       # fill the beparams dict
6777       objects.UpgradeBeParams(self.op.beparams)
6778       utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
6779
6780   def ExpandNames(self):
6781     self._ExpandAndLockInstance()
6782     self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
6783
6784   def DeclareLocks(self, level):
6785     if level == locking.LEVEL_NODE_RES:
6786       self._LockInstancesNodes(primary_only=True, level=locking.LEVEL_NODE_RES)
6787
6788   def BuildHooksEnv(self):
6789     """Build hooks env.
6790
6791     This runs on master, primary and secondary nodes of the instance.
6792
6793     """
6794     env = {
6795       "FORCE": self.op.force,
6796       }
6797
6798     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6799
6800     return env
6801
6802   def BuildHooksNodes(self):
6803     """Build hooks nodes.
6804
6805     """
6806     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6807     return (nl, nl)
6808
6809   def CheckPrereq(self):
6810     """Check prerequisites.
6811
6812     This checks that the instance is in the cluster.
6813
6814     """
6815     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6816     assert self.instance is not None, \
6817       "Cannot retrieve locked instance %s" % self.op.instance_name
6818
6819     # extra hvparams
6820     if self.op.hvparams:
6821       # check hypervisor parameter syntax (locally)
6822       cluster = self.cfg.GetClusterInfo()
6823       utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
6824       filled_hvp = cluster.FillHV(instance)
6825       filled_hvp.update(self.op.hvparams)
6826       hv_type = hypervisor.GetHypervisor(instance.hypervisor)
6827       hv_type.CheckParameterSyntax(filled_hvp)
6828       _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
6829
6830     _CheckInstanceState(self, instance, INSTANCE_ONLINE)
6831
6832     self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
6833
6834     if self.primary_offline and self.op.ignore_offline_nodes:
6835       self.proc.LogWarning("Ignoring offline primary node")
6836
6837       if self.op.hvparams or self.op.beparams:
6838         self.proc.LogWarning("Overridden parameters are ignored")
6839     else:
6840       _CheckNodeOnline(self, instance.primary_node)
6841
6842       bep = self.cfg.GetClusterInfo().FillBE(instance)
6843       bep.update(self.op.beparams)
6844
6845       # check bridges existence
6846       _CheckInstanceBridgesExist(self, instance)
6847
6848       remote_info = self.rpc.call_instance_info(instance.primary_node,
6849                                                 instance.name,
6850                                                 instance.hypervisor)
6851       remote_info.Raise("Error checking node %s" % instance.primary_node,
6852                         prereq=True, ecode=errors.ECODE_ENVIRON)
6853       if not remote_info.payload: # not running already
6854         _CheckNodeFreeMemory(self, instance.primary_node,
6855                              "starting instance %s" % instance.name,
6856                              bep[constants.BE_MINMEM], instance.hypervisor)
6857
6858   def Exec(self, feedback_fn):
6859     """Start the instance.
6860
6861     """
6862     instance = self.instance
6863     force = self.op.force
6864
6865     if not self.op.no_remember:
6866       self.cfg.MarkInstanceUp(instance.name)
6867
6868     if self.primary_offline:
6869       assert self.op.ignore_offline_nodes
6870       self.proc.LogInfo("Primary node offline, marked instance as started")
6871     else:
6872       node_current = instance.primary_node
6873
6874       _StartInstanceDisks(self, instance, force)
6875
6876       result = \
6877         self.rpc.call_instance_start(node_current,
6878                                      (instance, self.op.hvparams,
6879                                       self.op.beparams),
6880                                      self.op.startup_paused)
6881       msg = result.fail_msg
6882       if msg:
6883         _ShutdownInstanceDisks(self, instance)
6884         raise errors.OpExecError("Could not start instance: %s" % msg)
6885
6886
6887 class LUInstanceReboot(LogicalUnit):
6888   """Reboot an instance.
6889
6890   """
6891   HPATH = "instance-reboot"
6892   HTYPE = constants.HTYPE_INSTANCE
6893   REQ_BGL = False
6894
6895   def ExpandNames(self):
6896     self._ExpandAndLockInstance()
6897
6898   def BuildHooksEnv(self):
6899     """Build hooks env.
6900
6901     This runs on master, primary and secondary nodes of the instance.
6902
6903     """
6904     env = {
6905       "IGNORE_SECONDARIES": self.op.ignore_secondaries,
6906       "REBOOT_TYPE": self.op.reboot_type,
6907       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6908       }
6909
6910     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6911
6912     return env
6913
6914   def BuildHooksNodes(self):
6915     """Build hooks nodes.
6916
6917     """
6918     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6919     return (nl, nl)
6920
6921   def CheckPrereq(self):
6922     """Check prerequisites.
6923
6924     This checks that the instance is in the cluster.
6925
6926     """
6927     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6928     assert self.instance is not None, \
6929       "Cannot retrieve locked instance %s" % self.op.instance_name
6930     _CheckInstanceState(self, instance, INSTANCE_ONLINE)
6931     _CheckNodeOnline(self, instance.primary_node)
6932
6933     # check bridges existence
6934     _CheckInstanceBridgesExist(self, instance)
6935
6936   def Exec(self, feedback_fn):
6937     """Reboot the instance.
6938
6939     """
6940     instance = self.instance
6941     ignore_secondaries = self.op.ignore_secondaries
6942     reboot_type = self.op.reboot_type
6943
6944     remote_info = self.rpc.call_instance_info(instance.primary_node,
6945                                               instance.name,
6946                                               instance.hypervisor)
6947     remote_info.Raise("Error checking node %s" % instance.primary_node)
6948     instance_running = bool(remote_info.payload)
6949
6950     node_current = instance.primary_node
6951
6952     if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
6953                                             constants.INSTANCE_REBOOT_HARD]:
6954       for disk in instance.disks:
6955         self.cfg.SetDiskID(disk, node_current)
6956       result = self.rpc.call_instance_reboot(node_current, instance,
6957                                              reboot_type,
6958                                              self.op.shutdown_timeout)
6959       result.Raise("Could not reboot instance")
6960     else:
6961       if instance_running:
6962         result = self.rpc.call_instance_shutdown(node_current, instance,
6963                                                  self.op.shutdown_timeout)
6964         result.Raise("Could not shutdown instance for full reboot")
6965         _ShutdownInstanceDisks(self, instance)
6966       else:
6967         self.LogInfo("Instance %s was already stopped, starting now",
6968                      instance.name)
6969       _StartInstanceDisks(self, instance, ignore_secondaries)
6970       result = self.rpc.call_instance_start(node_current,
6971                                             (instance, None, None), False)
6972       msg = result.fail_msg
6973       if msg:
6974         _ShutdownInstanceDisks(self, instance)
6975         raise errors.OpExecError("Could not start instance for"
6976                                  " full reboot: %s" % msg)
6977
6978     self.cfg.MarkInstanceUp(instance.name)
6979
6980
6981 class LUInstanceShutdown(LogicalUnit):
6982   """Shutdown an instance.
6983
6984   """
6985   HPATH = "instance-stop"
6986   HTYPE = constants.HTYPE_INSTANCE
6987   REQ_BGL = False
6988
6989   def ExpandNames(self):
6990     self._ExpandAndLockInstance()
6991
6992   def BuildHooksEnv(self):
6993     """Build hooks env.
6994
6995     This runs on master, primary and secondary nodes of the instance.
6996
6997     """
6998     env = _BuildInstanceHookEnvByObject(self, self.instance)
6999     env["TIMEOUT"] = self.op.timeout
7000     return env
7001
7002   def BuildHooksNodes(self):
7003     """Build hooks nodes.
7004
7005     """
7006     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7007     return (nl, nl)
7008
7009   def CheckPrereq(self):
7010     """Check prerequisites.
7011
7012     This checks that the instance is in the cluster.
7013
7014     """
7015     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7016     assert self.instance is not None, \
7017       "Cannot retrieve locked instance %s" % self.op.instance_name
7018
7019     _CheckInstanceState(self, self.instance, INSTANCE_ONLINE)
7020
7021     self.primary_offline = \
7022       self.cfg.GetNodeInfo(self.instance.primary_node).offline
7023
7024     if self.primary_offline and self.op.ignore_offline_nodes:
7025       self.proc.LogWarning("Ignoring offline primary node")
7026     else:
7027       _CheckNodeOnline(self, self.instance.primary_node)
7028
7029   def Exec(self, feedback_fn):
7030     """Shutdown the instance.
7031
7032     """
7033     instance = self.instance
7034     node_current = instance.primary_node
7035     timeout = self.op.timeout
7036
7037     if not self.op.no_remember:
7038       self.cfg.MarkInstanceDown(instance.name)
7039
7040     if self.primary_offline:
7041       assert self.op.ignore_offline_nodes
7042       self.proc.LogInfo("Primary node offline, marked instance as stopped")
7043     else:
7044       result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
7045       msg = result.fail_msg
7046       if msg:
7047         self.proc.LogWarning("Could not shutdown instance: %s" % msg)
7048
7049       _ShutdownInstanceDisks(self, instance)
7050
7051
7052 class LUInstanceReinstall(LogicalUnit):
7053   """Reinstall an instance.
7054
7055   """
7056   HPATH = "instance-reinstall"
7057   HTYPE = constants.HTYPE_INSTANCE
7058   REQ_BGL = False
7059
7060   def ExpandNames(self):
7061     self._ExpandAndLockInstance()
7062
7063   def BuildHooksEnv(self):
7064     """Build hooks env.
7065
7066     This runs on master, primary and secondary nodes of the instance.
7067
7068     """
7069     return _BuildInstanceHookEnvByObject(self, self.instance)
7070
7071   def BuildHooksNodes(self):
7072     """Build hooks nodes.
7073
7074     """
7075     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7076     return (nl, nl)
7077
7078   def CheckPrereq(self):
7079     """Check prerequisites.
7080
7081     This checks that the instance is in the cluster and is not running.
7082
7083     """
7084     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7085     assert instance is not None, \
7086       "Cannot retrieve locked instance %s" % self.op.instance_name
7087     _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
7088                      " offline, cannot reinstall")
7089
7090     if instance.disk_template == constants.DT_DISKLESS:
7091       raise errors.OpPrereqError("Instance '%s' has no disks" %
7092                                  self.op.instance_name,
7093                                  errors.ECODE_INVAL)
7094     _CheckInstanceState(self, instance, INSTANCE_DOWN, msg="cannot reinstall")
7095
7096     if self.op.os_type is not None:
7097       # OS verification
7098       pnode = _ExpandNodeName(self.cfg, instance.primary_node)
7099       _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
7100       instance_os = self.op.os_type
7101     else:
7102       instance_os = instance.os
7103
7104     nodelist = list(instance.all_nodes)
7105
7106     if self.op.osparams:
7107       i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
7108       _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
7109       self.os_inst = i_osdict # the new dict (without defaults)
7110     else:
7111       self.os_inst = None
7112
7113     self.instance = instance
7114
7115   def Exec(self, feedback_fn):
7116     """Reinstall the instance.
7117
7118     """
7119     inst = self.instance
7120
7121     if self.op.os_type is not None:
7122       feedback_fn("Changing OS to '%s'..." % self.op.os_type)
7123       inst.os = self.op.os_type
7124       # Write to configuration
7125       self.cfg.Update(inst, feedback_fn)
7126
7127     _StartInstanceDisks(self, inst, None)
7128     try:
7129       feedback_fn("Running the instance OS create scripts...")
7130       # FIXME: pass debug option from opcode to backend
7131       result = self.rpc.call_instance_os_add(inst.primary_node,
7132                                              (inst, self.os_inst), True,
7133                                              self.op.debug_level)
7134       result.Raise("Could not install OS for instance %s on node %s" %
7135                    (inst.name, inst.primary_node))
7136     finally:
7137       _ShutdownInstanceDisks(self, inst)
7138
7139
7140 class LUInstanceRecreateDisks(LogicalUnit):
7141   """Recreate an instance's missing disks.
7142
7143   """
7144   HPATH = "instance-recreate-disks"
7145   HTYPE = constants.HTYPE_INSTANCE
7146   REQ_BGL = False
7147
7148   _MODIFYABLE = frozenset([
7149     constants.IDISK_SIZE,
7150     constants.IDISK_MODE,
7151     ])
7152
7153   # New or changed disk parameters may have different semantics
7154   assert constants.IDISK_PARAMS == (_MODIFYABLE | frozenset([
7155     constants.IDISK_ADOPT,
7156
7157     # TODO: Implement support changing VG while recreating
7158     constants.IDISK_VG,
7159     constants.IDISK_METAVG,
7160     ]))
7161
7162   def _RunAllocator(self):
7163     """Run the allocator based on input opcode.
7164
7165     """
7166     be_full = self.cfg.GetClusterInfo().FillBE(self.instance)
7167
7168     # FIXME
7169     # The allocator should actually run in "relocate" mode, but current
7170     # allocators don't support relocating all the nodes of an instance at
7171     # the same time. As a workaround we use "allocate" mode, but this is
7172     # suboptimal for two reasons:
7173     # - The instance name passed to the allocator is present in the list of
7174     #   existing instances, so there could be a conflict within the
7175     #   internal structures of the allocator. This doesn't happen with the
7176     #   current allocators, but it's a liability.
7177     # - The allocator counts the resources used by the instance twice: once
7178     #   because the instance exists already, and once because it tries to
7179     #   allocate a new instance.
7180     # The allocator could choose some of the nodes on which the instance is
7181     # running, but that's not a problem. If the instance nodes are broken,
7182     # they should be already be marked as drained or offline, and hence
7183     # skipped by the allocator. If instance disks have been lost for other
7184     # reasons, then recreating the disks on the same nodes should be fine.
7185     disk_template = self.instance.disk_template
7186     spindle_use = be_full[constants.BE_SPINDLE_USE]
7187     req = iallocator.IAReqInstanceAlloc(name=self.op.instance_name,
7188                                         disk_template=disk_template,
7189                                         tags=list(self.instance.GetTags()),
7190                                         os=self.instance.os,
7191                                         nics=[{}],
7192                                         vcpus=be_full[constants.BE_VCPUS],
7193                                         memory=be_full[constants.BE_MAXMEM],
7194                                         spindle_use=spindle_use,
7195                                         disks=[{constants.IDISK_SIZE: d.size,
7196                                                 constants.IDISK_MODE: d.mode}
7197                                                 for d in self.instance.disks],
7198                                         hypervisor=self.instance.hypervisor)
7199     ial = iallocator.IAllocator(self.cfg, self.rpc, req)
7200
7201     ial.Run(self.op.iallocator)
7202
7203     assert req.RequiredNodes() == len(self.instance.all_nodes)
7204
7205     if not ial.success:
7206       raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
7207                                  " %s" % (self.op.iallocator, ial.info),
7208                                  errors.ECODE_NORES)
7209
7210     self.op.nodes = ial.result
7211     self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
7212                  self.op.instance_name, self.op.iallocator,
7213                  utils.CommaJoin(ial.result))
7214
7215   def CheckArguments(self):
7216     if self.op.disks and ht.TNonNegativeInt(self.op.disks[0]):
7217       # Normalize and convert deprecated list of disk indices
7218       self.op.disks = [(idx, {}) for idx in sorted(frozenset(self.op.disks))]
7219
7220     duplicates = utils.FindDuplicates(map(compat.fst, self.op.disks))
7221     if duplicates:
7222       raise errors.OpPrereqError("Some disks have been specified more than"
7223                                  " once: %s" % utils.CommaJoin(duplicates),
7224                                  errors.ECODE_INVAL)
7225
7226     # We don't want _CheckIAllocatorOrNode selecting the default iallocator
7227     # when neither iallocator nor nodes are specified
7228     if self.op.iallocator or self.op.nodes:
7229       _CheckIAllocatorOrNode(self, "iallocator", "nodes")
7230
7231     for (idx, params) in self.op.disks:
7232       utils.ForceDictType(params, constants.IDISK_PARAMS_TYPES)
7233       unsupported = frozenset(params.keys()) - self._MODIFYABLE
7234       if unsupported:
7235         raise errors.OpPrereqError("Parameters for disk %s try to change"
7236                                    " unmodifyable parameter(s): %s" %
7237                                    (idx, utils.CommaJoin(unsupported)),
7238                                    errors.ECODE_INVAL)
7239
7240   def ExpandNames(self):
7241     self._ExpandAndLockInstance()
7242     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7243     if self.op.nodes:
7244       self.op.nodes = [_ExpandNodeName(self.cfg, n) for n in self.op.nodes]
7245       self.needed_locks[locking.LEVEL_NODE] = list(self.op.nodes)
7246     else:
7247       self.needed_locks[locking.LEVEL_NODE] = []
7248       if self.op.iallocator:
7249         # iallocator will select a new node in the same group
7250         self.needed_locks[locking.LEVEL_NODEGROUP] = []
7251     self.needed_locks[locking.LEVEL_NODE_RES] = []
7252
7253   def DeclareLocks(self, level):
7254     if level == locking.LEVEL_NODEGROUP:
7255       assert self.op.iallocator is not None
7256       assert not self.op.nodes
7257       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
7258       self.share_locks[locking.LEVEL_NODEGROUP] = 1
7259       # Lock the primary group used by the instance optimistically; this
7260       # requires going via the node before it's locked, requiring
7261       # verification later on
7262       self.needed_locks[locking.LEVEL_NODEGROUP] = \
7263         self.cfg.GetInstanceNodeGroups(self.op.instance_name, primary_only=True)
7264
7265     elif level == locking.LEVEL_NODE:
7266       # If an allocator is used, then we lock all the nodes in the current
7267       # instance group, as we don't know yet which ones will be selected;
7268       # if we replace the nodes without using an allocator, locks are
7269       # already declared in ExpandNames; otherwise, we need to lock all the
7270       # instance nodes for disk re-creation
7271       if self.op.iallocator:
7272         assert not self.op.nodes
7273         assert not self.needed_locks[locking.LEVEL_NODE]
7274         assert len(self.owned_locks(locking.LEVEL_NODEGROUP)) == 1
7275
7276         # Lock member nodes of the group of the primary node
7277         for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP):
7278           self.needed_locks[locking.LEVEL_NODE].extend(
7279             self.cfg.GetNodeGroup(group_uuid).members)
7280       elif not self.op.nodes:
7281         self._LockInstancesNodes(primary_only=False)
7282     elif level == locking.LEVEL_NODE_RES:
7283       # Copy node locks
7284       self.needed_locks[locking.LEVEL_NODE_RES] = \
7285         _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
7286
7287   def BuildHooksEnv(self):
7288     """Build hooks env.
7289
7290     This runs on master, primary and secondary nodes of the instance.
7291
7292     """
7293     return _BuildInstanceHookEnvByObject(self, self.instance)
7294
7295   def BuildHooksNodes(self):
7296     """Build hooks nodes.
7297
7298     """
7299     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7300     return (nl, nl)
7301
7302   def CheckPrereq(self):
7303     """Check prerequisites.
7304
7305     This checks that the instance is in the cluster and is not running.
7306
7307     """
7308     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7309     assert instance is not None, \
7310       "Cannot retrieve locked instance %s" % self.op.instance_name
7311     if self.op.nodes:
7312       if len(self.op.nodes) != len(instance.all_nodes):
7313         raise errors.OpPrereqError("Instance %s currently has %d nodes, but"
7314                                    " %d replacement nodes were specified" %
7315                                    (instance.name, len(instance.all_nodes),
7316                                     len(self.op.nodes)),
7317                                    errors.ECODE_INVAL)
7318       assert instance.disk_template != constants.DT_DRBD8 or \
7319           len(self.op.nodes) == 2
7320       assert instance.disk_template != constants.DT_PLAIN or \
7321           len(self.op.nodes) == 1
7322       primary_node = self.op.nodes[0]
7323     else:
7324       primary_node = instance.primary_node
7325     if not self.op.iallocator:
7326       _CheckNodeOnline(self, primary_node)
7327
7328     if instance.disk_template == constants.DT_DISKLESS:
7329       raise errors.OpPrereqError("Instance '%s' has no disks" %
7330                                  self.op.instance_name, errors.ECODE_INVAL)
7331
7332     # Verify if node group locks are still correct
7333     owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
7334     if owned_groups:
7335       # Node group locks are acquired only for the primary node (and only
7336       # when the allocator is used)
7337       _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups,
7338                                primary_only=True)
7339
7340     # if we replace nodes *and* the old primary is offline, we don't
7341     # check the instance state
7342     old_pnode = self.cfg.GetNodeInfo(instance.primary_node)
7343     if not ((self.op.iallocator or self.op.nodes) and old_pnode.offline):
7344       _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
7345                           msg="cannot recreate disks")
7346
7347     if self.op.disks:
7348       self.disks = dict(self.op.disks)
7349     else:
7350       self.disks = dict((idx, {}) for idx in range(len(instance.disks)))
7351
7352     maxidx = max(self.disks.keys())
7353     if maxidx >= len(instance.disks):
7354       raise errors.OpPrereqError("Invalid disk index '%s'" % maxidx,
7355                                  errors.ECODE_INVAL)
7356
7357     if ((self.op.nodes or self.op.iallocator) and
7358         sorted(self.disks.keys()) != range(len(instance.disks))):
7359       raise errors.OpPrereqError("Can't recreate disks partially and"
7360                                  " change the nodes at the same time",
7361                                  errors.ECODE_INVAL)
7362
7363     self.instance = instance
7364
7365     if self.op.iallocator:
7366       self._RunAllocator()
7367       # Release unneeded node and node resource locks
7368       _ReleaseLocks(self, locking.LEVEL_NODE, keep=self.op.nodes)
7369       _ReleaseLocks(self, locking.LEVEL_NODE_RES, keep=self.op.nodes)
7370
7371   def Exec(self, feedback_fn):
7372     """Recreate the disks.
7373
7374     """
7375     instance = self.instance
7376
7377     assert (self.owned_locks(locking.LEVEL_NODE) ==
7378             self.owned_locks(locking.LEVEL_NODE_RES))
7379
7380     to_skip = []
7381     mods = [] # keeps track of needed changes
7382
7383     for idx, disk in enumerate(instance.disks):
7384       try:
7385         changes = self.disks[idx]
7386       except KeyError:
7387         # Disk should not be recreated
7388         to_skip.append(idx)
7389         continue
7390
7391       # update secondaries for disks, if needed
7392       if self.op.nodes and disk.dev_type == constants.LD_DRBD8:
7393         # need to update the nodes and minors
7394         assert len(self.op.nodes) == 2
7395         assert len(disk.logical_id) == 6 # otherwise disk internals
7396                                          # have changed
7397         (_, _, old_port, _, _, old_secret) = disk.logical_id
7398         new_minors = self.cfg.AllocateDRBDMinor(self.op.nodes, instance.name)
7399         new_id = (self.op.nodes[0], self.op.nodes[1], old_port,
7400                   new_minors[0], new_minors[1], old_secret)
7401         assert len(disk.logical_id) == len(new_id)
7402       else:
7403         new_id = None
7404
7405       mods.append((idx, new_id, changes))
7406
7407     # now that we have passed all asserts above, we can apply the mods
7408     # in a single run (to avoid partial changes)
7409     for idx, new_id, changes in mods:
7410       disk = instance.disks[idx]
7411       if new_id is not None:
7412         assert disk.dev_type == constants.LD_DRBD8
7413         disk.logical_id = new_id
7414       if changes:
7415         disk.Update(size=changes.get(constants.IDISK_SIZE, None),
7416                     mode=changes.get(constants.IDISK_MODE, None))
7417
7418     # change primary node, if needed
7419     if self.op.nodes:
7420       instance.primary_node = self.op.nodes[0]
7421       self.LogWarning("Changing the instance's nodes, you will have to"
7422                       " remove any disks left on the older nodes manually")
7423
7424     if self.op.nodes:
7425       self.cfg.Update(instance, feedback_fn)
7426
7427     # All touched nodes must be locked
7428     mylocks = self.owned_locks(locking.LEVEL_NODE)
7429     assert mylocks.issuperset(frozenset(instance.all_nodes))
7430     _CreateDisks(self, instance, to_skip=to_skip)
7431
7432
7433 class LUInstanceRename(LogicalUnit):
7434   """Rename an instance.
7435
7436   """
7437   HPATH = "instance-rename"
7438   HTYPE = constants.HTYPE_INSTANCE
7439
7440   def CheckArguments(self):
7441     """Check arguments.
7442
7443     """
7444     if self.op.ip_check and not self.op.name_check:
7445       # TODO: make the ip check more flexible and not depend on the name check
7446       raise errors.OpPrereqError("IP address check requires a name check",
7447                                  errors.ECODE_INVAL)
7448
7449   def BuildHooksEnv(self):
7450     """Build hooks env.
7451
7452     This runs on master, primary and secondary nodes of the instance.
7453
7454     """
7455     env = _BuildInstanceHookEnvByObject(self, self.instance)
7456     env["INSTANCE_NEW_NAME"] = self.op.new_name
7457     return env
7458
7459   def BuildHooksNodes(self):
7460     """Build hooks nodes.
7461
7462     """
7463     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7464     return (nl, nl)
7465
7466   def CheckPrereq(self):
7467     """Check prerequisites.
7468
7469     This checks that the instance is in the cluster and is not running.
7470
7471     """
7472     self.op.instance_name = _ExpandInstanceName(self.cfg,
7473                                                 self.op.instance_name)
7474     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7475     assert instance is not None
7476     _CheckNodeOnline(self, instance.primary_node)
7477     _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
7478                         msg="cannot rename")
7479     self.instance = instance
7480
7481     new_name = self.op.new_name
7482     if self.op.name_check:
7483       hostname = _CheckHostnameSane(self, new_name)
7484       new_name = self.op.new_name = hostname.name
7485       if (self.op.ip_check and
7486           netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
7487         raise errors.OpPrereqError("IP %s of instance %s already in use" %
7488                                    (hostname.ip, new_name),
7489                                    errors.ECODE_NOTUNIQUE)
7490
7491     instance_list = self.cfg.GetInstanceList()
7492     if new_name in instance_list and new_name != instance.name:
7493       raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
7494                                  new_name, errors.ECODE_EXISTS)
7495
7496   def Exec(self, feedback_fn):
7497     """Rename the instance.
7498
7499     """
7500     inst = self.instance
7501     old_name = inst.name
7502
7503     rename_file_storage = False
7504     if (inst.disk_template in constants.DTS_FILEBASED and
7505         self.op.new_name != inst.name):
7506       old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
7507       rename_file_storage = True
7508
7509     self.cfg.RenameInstance(inst.name, self.op.new_name)
7510     # Change the instance lock. This is definitely safe while we hold the BGL.
7511     # Otherwise the new lock would have to be added in acquired mode.
7512     assert self.REQ_BGL
7513     self.glm.remove(locking.LEVEL_INSTANCE, old_name)
7514     self.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
7515
7516     # re-read the instance from the configuration after rename
7517     inst = self.cfg.GetInstanceInfo(self.op.new_name)
7518
7519     if rename_file_storage:
7520       new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
7521       result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
7522                                                      old_file_storage_dir,
7523                                                      new_file_storage_dir)
7524       result.Raise("Could not rename on node %s directory '%s' to '%s'"
7525                    " (but the instance has been renamed in Ganeti)" %
7526                    (inst.primary_node, old_file_storage_dir,
7527                     new_file_storage_dir))
7528
7529     _StartInstanceDisks(self, inst, None)
7530     # update info on disks
7531     info = _GetInstanceInfoText(inst)
7532     for (idx, disk) in enumerate(inst.disks):
7533       for node in inst.all_nodes:
7534         self.cfg.SetDiskID(disk, node)
7535         result = self.rpc.call_blockdev_setinfo(node, disk, info)
7536         if result.fail_msg:
7537           self.LogWarning("Error setting info on node %s for disk %s: %s",
7538                           node, idx, result.fail_msg)
7539     try:
7540       result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
7541                                                  old_name, self.op.debug_level)
7542       msg = result.fail_msg
7543       if msg:
7544         msg = ("Could not run OS rename script for instance %s on node %s"
7545                " (but the instance has been renamed in Ganeti): %s" %
7546                (inst.name, inst.primary_node, msg))
7547         self.proc.LogWarning(msg)
7548     finally:
7549       _ShutdownInstanceDisks(self, inst)
7550
7551     return inst.name
7552
7553
7554 class LUInstanceRemove(LogicalUnit):
7555   """Remove an instance.
7556
7557   """
7558   HPATH = "instance-remove"
7559   HTYPE = constants.HTYPE_INSTANCE
7560   REQ_BGL = False
7561
7562   def ExpandNames(self):
7563     self._ExpandAndLockInstance()
7564     self.needed_locks[locking.LEVEL_NODE] = []
7565     self.needed_locks[locking.LEVEL_NODE_RES] = []
7566     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7567
7568   def DeclareLocks(self, level):
7569     if level == locking.LEVEL_NODE:
7570       self._LockInstancesNodes()
7571     elif level == locking.LEVEL_NODE_RES:
7572       # Copy node locks
7573       self.needed_locks[locking.LEVEL_NODE_RES] = \
7574         _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
7575
7576   def BuildHooksEnv(self):
7577     """Build hooks env.
7578
7579     This runs on master, primary and secondary nodes of the instance.
7580
7581     """
7582     env = _BuildInstanceHookEnvByObject(self, self.instance)
7583     env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
7584     return env
7585
7586   def BuildHooksNodes(self):
7587     """Build hooks nodes.
7588
7589     """
7590     nl = [self.cfg.GetMasterNode()]
7591     nl_post = list(self.instance.all_nodes) + nl
7592     return (nl, nl_post)
7593
7594   def CheckPrereq(self):
7595     """Check prerequisites.
7596
7597     This checks that the instance is in the cluster.
7598
7599     """
7600     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7601     assert self.instance is not None, \
7602       "Cannot retrieve locked instance %s" % self.op.instance_name
7603
7604   def Exec(self, feedback_fn):
7605     """Remove the instance.
7606
7607     """
7608     instance = self.instance
7609     logging.info("Shutting down instance %s on node %s",
7610                  instance.name, instance.primary_node)
7611
7612     result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
7613                                              self.op.shutdown_timeout)
7614     msg = result.fail_msg
7615     if msg:
7616       if self.op.ignore_failures:
7617         feedback_fn("Warning: can't shutdown instance: %s" % msg)
7618       else:
7619         raise errors.OpExecError("Could not shutdown instance %s on"
7620                                  " node %s: %s" %
7621                                  (instance.name, instance.primary_node, msg))
7622
7623     assert (self.owned_locks(locking.LEVEL_NODE) ==
7624             self.owned_locks(locking.LEVEL_NODE_RES))
7625     assert not (set(instance.all_nodes) -
7626                 self.owned_locks(locking.LEVEL_NODE)), \
7627       "Not owning correct locks"
7628
7629     _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
7630
7631
7632 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
7633   """Utility function to remove an instance.
7634
7635   """
7636   logging.info("Removing block devices for instance %s", instance.name)
7637
7638   if not _RemoveDisks(lu, instance, ignore_failures=ignore_failures):
7639     if not ignore_failures:
7640       raise errors.OpExecError("Can't remove instance's disks")
7641     feedback_fn("Warning: can't remove instance's disks")
7642
7643   logging.info("Removing instance %s out of cluster config", instance.name)
7644
7645   lu.cfg.RemoveInstance(instance.name)
7646
7647   assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
7648     "Instance lock removal conflict"
7649
7650   # Remove lock for the instance
7651   lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
7652
7653
7654 class LUInstanceQuery(NoHooksLU):
7655   """Logical unit for querying instances.
7656
7657   """
7658   # pylint: disable=W0142
7659   REQ_BGL = False
7660
7661   def CheckArguments(self):
7662     self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
7663                              self.op.output_fields, self.op.use_locking)
7664
7665   def ExpandNames(self):
7666     self.iq.ExpandNames(self)
7667
7668   def DeclareLocks(self, level):
7669     self.iq.DeclareLocks(self, level)
7670
7671   def Exec(self, feedback_fn):
7672     return self.iq.OldStyleQuery(self)
7673
7674
7675 class LUInstanceFailover(LogicalUnit):
7676   """Failover an instance.
7677
7678   """
7679   HPATH = "instance-failover"
7680   HTYPE = constants.HTYPE_INSTANCE
7681   REQ_BGL = False
7682
7683   def CheckArguments(self):
7684     """Check the arguments.
7685
7686     """
7687     self.iallocator = getattr(self.op, "iallocator", None)
7688     self.target_node = getattr(self.op, "target_node", None)
7689
7690   def ExpandNames(self):
7691     self._ExpandAndLockInstance()
7692
7693     if self.op.target_node is not None:
7694       self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7695
7696     self.needed_locks[locking.LEVEL_NODE] = []
7697     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7698
7699     self.needed_locks[locking.LEVEL_NODE_RES] = []
7700     self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
7701
7702     ignore_consistency = self.op.ignore_consistency
7703     shutdown_timeout = self.op.shutdown_timeout
7704     self._migrater = TLMigrateInstance(self, self.op.instance_name,
7705                                        cleanup=False,
7706                                        failover=True,
7707                                        ignore_consistency=ignore_consistency,
7708                                        shutdown_timeout=shutdown_timeout,
7709                                        ignore_ipolicy=self.op.ignore_ipolicy)
7710     self.tasklets = [self._migrater]
7711
7712   def DeclareLocks(self, level):
7713     if level == locking.LEVEL_NODE:
7714       instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
7715       if instance.disk_template in constants.DTS_EXT_MIRROR:
7716         if self.op.target_node is None:
7717           self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7718         else:
7719           self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
7720                                                    self.op.target_node]
7721         del self.recalculate_locks[locking.LEVEL_NODE]
7722       else:
7723         self._LockInstancesNodes()
7724     elif level == locking.LEVEL_NODE_RES:
7725       # Copy node locks
7726       self.needed_locks[locking.LEVEL_NODE_RES] = \
7727         _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
7728
7729   def BuildHooksEnv(self):
7730     """Build hooks env.
7731
7732     This runs on master, primary and secondary nodes of the instance.
7733
7734     """
7735     instance = self._migrater.instance
7736     source_node = instance.primary_node
7737     target_node = self.op.target_node
7738     env = {
7739       "IGNORE_CONSISTENCY": self.op.ignore_consistency,
7740       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7741       "OLD_PRIMARY": source_node,
7742       "NEW_PRIMARY": target_node,
7743       }
7744
7745     if instance.disk_template in constants.DTS_INT_MIRROR:
7746       env["OLD_SECONDARY"] = instance.secondary_nodes[0]
7747       env["NEW_SECONDARY"] = source_node
7748     else:
7749       env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
7750
7751     env.update(_BuildInstanceHookEnvByObject(self, instance))
7752
7753     return env
7754
7755   def BuildHooksNodes(self):
7756     """Build hooks nodes.
7757
7758     """
7759     instance = self._migrater.instance
7760     nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
7761     return (nl, nl + [instance.primary_node])
7762
7763
7764 class LUInstanceMigrate(LogicalUnit):
7765   """Migrate an instance.
7766
7767   This is migration without shutting down, compared to the failover,
7768   which is done with shutdown.
7769
7770   """
7771   HPATH = "instance-migrate"
7772   HTYPE = constants.HTYPE_INSTANCE
7773   REQ_BGL = False
7774
7775   def ExpandNames(self):
7776     self._ExpandAndLockInstance()
7777
7778     if self.op.target_node is not None:
7779       self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7780
7781     self.needed_locks[locking.LEVEL_NODE] = []
7782     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7783
7784     self.needed_locks[locking.LEVEL_NODE] = []
7785     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7786
7787     self._migrater = \
7788       TLMigrateInstance(self, self.op.instance_name,
7789                         cleanup=self.op.cleanup,
7790                         failover=False,
7791                         fallback=self.op.allow_failover,
7792                         allow_runtime_changes=self.op.allow_runtime_changes,
7793                         ignore_ipolicy=self.op.ignore_ipolicy)
7794     self.tasklets = [self._migrater]
7795
7796   def DeclareLocks(self, level):
7797     if level == locking.LEVEL_NODE:
7798       instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
7799       if instance.disk_template in constants.DTS_EXT_MIRROR:
7800         if self.op.target_node is None:
7801           self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7802         else:
7803           self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
7804                                                    self.op.target_node]
7805         del self.recalculate_locks[locking.LEVEL_NODE]
7806       else:
7807         self._LockInstancesNodes()
7808     elif level == locking.LEVEL_NODE_RES:
7809       # Copy node locks
7810       self.needed_locks[locking.LEVEL_NODE_RES] = \
7811         _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
7812
7813   def BuildHooksEnv(self):
7814     """Build hooks env.
7815
7816     This runs on master, primary and secondary nodes of the instance.
7817
7818     """
7819     instance = self._migrater.instance
7820     source_node = instance.primary_node
7821     target_node = self.op.target_node
7822     env = _BuildInstanceHookEnvByObject(self, instance)
7823     env.update({
7824       "MIGRATE_LIVE": self._migrater.live,
7825       "MIGRATE_CLEANUP": self.op.cleanup,
7826       "OLD_PRIMARY": source_node,
7827       "NEW_PRIMARY": target_node,
7828       "ALLOW_RUNTIME_CHANGES": self.op.allow_runtime_changes,
7829       })
7830
7831     if instance.disk_template in constants.DTS_INT_MIRROR:
7832       env["OLD_SECONDARY"] = target_node
7833       env["NEW_SECONDARY"] = source_node
7834     else:
7835       env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
7836
7837     return env
7838
7839   def BuildHooksNodes(self):
7840     """Build hooks nodes.
7841
7842     """
7843     instance = self._migrater.instance
7844     nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
7845     return (nl, nl + [instance.primary_node])
7846
7847
7848 class LUInstanceMove(LogicalUnit):
7849   """Move an instance by data-copying.
7850
7851   """
7852   HPATH = "instance-move"
7853   HTYPE = constants.HTYPE_INSTANCE
7854   REQ_BGL = False
7855
7856   def ExpandNames(self):
7857     self._ExpandAndLockInstance()
7858     target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7859     self.op.target_node = target_node
7860     self.needed_locks[locking.LEVEL_NODE] = [target_node]
7861     self.needed_locks[locking.LEVEL_NODE_RES] = []
7862     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7863
7864   def DeclareLocks(self, level):
7865     if level == locking.LEVEL_NODE:
7866       self._LockInstancesNodes(primary_only=True)
7867     elif level == locking.LEVEL_NODE_RES:
7868       # Copy node locks
7869       self.needed_locks[locking.LEVEL_NODE_RES] = \
7870         _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
7871
7872   def BuildHooksEnv(self):
7873     """Build hooks env.
7874
7875     This runs on master, primary and secondary nodes of the instance.
7876
7877     """
7878     env = {
7879       "TARGET_NODE": self.op.target_node,
7880       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7881       }
7882     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
7883     return env
7884
7885   def BuildHooksNodes(self):
7886     """Build hooks nodes.
7887
7888     """
7889     nl = [
7890       self.cfg.GetMasterNode(),
7891       self.instance.primary_node,
7892       self.op.target_node,
7893       ]
7894     return (nl, nl)
7895
7896   def CheckPrereq(self):
7897     """Check prerequisites.
7898
7899     This checks that the instance is in the cluster.
7900
7901     """
7902     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7903     assert self.instance is not None, \
7904       "Cannot retrieve locked instance %s" % self.op.instance_name
7905
7906     node = self.cfg.GetNodeInfo(self.op.target_node)
7907     assert node is not None, \
7908       "Cannot retrieve locked node %s" % self.op.target_node
7909
7910     self.target_node = target_node = node.name
7911
7912     if target_node == instance.primary_node:
7913       raise errors.OpPrereqError("Instance %s is already on the node %s" %
7914                                  (instance.name, target_node),
7915                                  errors.ECODE_STATE)
7916
7917     bep = self.cfg.GetClusterInfo().FillBE(instance)
7918
7919     for idx, dsk in enumerate(instance.disks):
7920       if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
7921         raise errors.OpPrereqError("Instance disk %d has a complex layout,"
7922                                    " cannot copy" % idx, errors.ECODE_STATE)
7923
7924     _CheckNodeOnline(self, target_node)
7925     _CheckNodeNotDrained(self, target_node)
7926     _CheckNodeVmCapable(self, target_node)
7927     cluster = self.cfg.GetClusterInfo()
7928     group_info = self.cfg.GetNodeGroup(node.group)
7929     ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster, group_info)
7930     _CheckTargetNodeIPolicy(self, ipolicy, instance, node,
7931                             ignore=self.op.ignore_ipolicy)
7932
7933     if instance.admin_state == constants.ADMINST_UP:
7934       # check memory requirements on the secondary node
7935       _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
7936                            instance.name, bep[constants.BE_MAXMEM],
7937                            instance.hypervisor)
7938     else:
7939       self.LogInfo("Not checking memory on the secondary node as"
7940                    " instance will not be started")
7941
7942     # check bridge existance
7943     _CheckInstanceBridgesExist(self, instance, node=target_node)
7944
7945   def Exec(self, feedback_fn):
7946     """Move an instance.
7947
7948     The move is done by shutting it down on its present node, copying
7949     the data over (slow) and starting it on the new node.
7950
7951     """
7952     instance = self.instance
7953
7954     source_node = instance.primary_node
7955     target_node = self.target_node
7956
7957     self.LogInfo("Shutting down instance %s on source node %s",
7958                  instance.name, source_node)
7959
7960     assert (self.owned_locks(locking.LEVEL_NODE) ==
7961             self.owned_locks(locking.LEVEL_NODE_RES))
7962
7963     result = self.rpc.call_instance_shutdown(source_node, instance,
7964                                              self.op.shutdown_timeout)
7965     msg = result.fail_msg
7966     if msg:
7967       if self.op.ignore_consistency:
7968         self.proc.LogWarning("Could not shutdown instance %s on node %s."
7969                              " Proceeding anyway. Please make sure node"
7970                              " %s is down. Error details: %s",
7971                              instance.name, source_node, source_node, msg)
7972       else:
7973         raise errors.OpExecError("Could not shutdown instance %s on"
7974                                  " node %s: %s" %
7975                                  (instance.name, source_node, msg))
7976
7977     # create the target disks
7978     try:
7979       _CreateDisks(self, instance, target_node=target_node)
7980     except errors.OpExecError:
7981       self.LogWarning("Device creation failed, reverting...")
7982       try:
7983         _RemoveDisks(self, instance, target_node=target_node)
7984       finally:
7985         self.cfg.ReleaseDRBDMinors(instance.name)
7986         raise
7987
7988     cluster_name = self.cfg.GetClusterInfo().cluster_name
7989
7990     errs = []
7991     # activate, get path, copy the data over
7992     for idx, disk in enumerate(instance.disks):
7993       self.LogInfo("Copying data for disk %d", idx)
7994       result = self.rpc.call_blockdev_assemble(target_node, (disk, instance),
7995                                                instance.name, True, idx)
7996       if result.fail_msg:
7997         self.LogWarning("Can't assemble newly created disk %d: %s",
7998                         idx, result.fail_msg)
7999         errs.append(result.fail_msg)
8000         break
8001       dev_path = result.payload
8002       result = self.rpc.call_blockdev_export(source_node, (disk, instance),
8003                                              target_node, dev_path,
8004                                              cluster_name)
8005       if result.fail_msg:
8006         self.LogWarning("Can't copy data over for disk %d: %s",
8007                         idx, result.fail_msg)
8008         errs.append(result.fail_msg)
8009         break
8010
8011     if errs:
8012       self.LogWarning("Some disks failed to copy, aborting")
8013       try:
8014         _RemoveDisks(self, instance, target_node=target_node)
8015       finally:
8016         self.cfg.ReleaseDRBDMinors(instance.name)
8017         raise errors.OpExecError("Errors during disk copy: %s" %
8018                                  (",".join(errs),))
8019
8020     instance.primary_node = target_node
8021     self.cfg.Update(instance, feedback_fn)
8022
8023     self.LogInfo("Removing the disks on the original node")
8024     _RemoveDisks(self, instance, target_node=source_node)
8025
8026     # Only start the instance if it's marked as up
8027     if instance.admin_state == constants.ADMINST_UP:
8028       self.LogInfo("Starting instance %s on node %s",
8029                    instance.name, target_node)
8030
8031       disks_ok, _ = _AssembleInstanceDisks(self, instance,
8032                                            ignore_secondaries=True)
8033       if not disks_ok:
8034         _ShutdownInstanceDisks(self, instance)
8035         raise errors.OpExecError("Can't activate the instance's disks")
8036
8037       result = self.rpc.call_instance_start(target_node,
8038                                             (instance, None, None), False)
8039       msg = result.fail_msg
8040       if msg:
8041         _ShutdownInstanceDisks(self, instance)
8042         raise errors.OpExecError("Could not start instance %s on node %s: %s" %
8043                                  (instance.name, target_node, msg))
8044
8045
8046 class LUNodeMigrate(LogicalUnit):
8047   """Migrate all instances from a node.
8048
8049   """
8050   HPATH = "node-migrate"
8051   HTYPE = constants.HTYPE_NODE
8052   REQ_BGL = False
8053
8054   def CheckArguments(self):
8055     pass
8056
8057   def ExpandNames(self):
8058     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
8059
8060     self.share_locks = _ShareAll()
8061     self.needed_locks = {
8062       locking.LEVEL_NODE: [self.op.node_name],
8063       }
8064
8065   def BuildHooksEnv(self):
8066     """Build hooks env.
8067
8068     This runs on the master, the primary and all the secondaries.
8069
8070     """
8071     return {
8072       "NODE_NAME": self.op.node_name,
8073       "ALLOW_RUNTIME_CHANGES": self.op.allow_runtime_changes,
8074       }
8075
8076   def BuildHooksNodes(self):
8077     """Build hooks nodes.
8078
8079     """
8080     nl = [self.cfg.GetMasterNode()]
8081     return (nl, nl)
8082
8083   def CheckPrereq(self):
8084     pass
8085
8086   def Exec(self, feedback_fn):
8087     # Prepare jobs for migration instances
8088     allow_runtime_changes = self.op.allow_runtime_changes
8089     jobs = [
8090       [opcodes.OpInstanceMigrate(instance_name=inst.name,
8091                                  mode=self.op.mode,
8092                                  live=self.op.live,
8093                                  iallocator=self.op.iallocator,
8094                                  target_node=self.op.target_node,
8095                                  allow_runtime_changes=allow_runtime_changes,
8096                                  ignore_ipolicy=self.op.ignore_ipolicy)]
8097       for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name)
8098       ]
8099
8100     # TODO: Run iallocator in this opcode and pass correct placement options to
8101     # OpInstanceMigrate. Since other jobs can modify the cluster between
8102     # running the iallocator and the actual migration, a good consistency model
8103     # will have to be found.
8104
8105     assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
8106             frozenset([self.op.node_name]))
8107
8108     return ResultWithJobs(jobs)
8109
8110
8111 class TLMigrateInstance(Tasklet):
8112   """Tasklet class for instance migration.
8113
8114   @type live: boolean
8115   @ivar live: whether the migration will be done live or non-live;
8116       this variable is initalized only after CheckPrereq has run
8117   @type cleanup: boolean
8118   @ivar cleanup: Wheater we cleanup from a failed migration
8119   @type iallocator: string
8120   @ivar iallocator: The iallocator used to determine target_node
8121   @type target_node: string
8122   @ivar target_node: If given, the target_node to reallocate the instance to
8123   @type failover: boolean
8124   @ivar failover: Whether operation results in failover or migration
8125   @type fallback: boolean
8126   @ivar fallback: Whether fallback to failover is allowed if migration not
8127                   possible
8128   @type ignore_consistency: boolean
8129   @ivar ignore_consistency: Wheter we should ignore consistency between source
8130                             and target node
8131   @type shutdown_timeout: int
8132   @ivar shutdown_timeout: In case of failover timeout of the shutdown
8133   @type ignore_ipolicy: bool
8134   @ivar ignore_ipolicy: If true, we can ignore instance policy when migrating
8135
8136   """
8137
8138   # Constants
8139   _MIGRATION_POLL_INTERVAL = 1      # seconds
8140   _MIGRATION_FEEDBACK_INTERVAL = 10 # seconds
8141
8142   def __init__(self, lu, instance_name, cleanup=False,
8143                failover=False, fallback=False,
8144                ignore_consistency=False,
8145                allow_runtime_changes=True,
8146                shutdown_timeout=constants.DEFAULT_SHUTDOWN_TIMEOUT,
8147                ignore_ipolicy=False):
8148     """Initializes this class.
8149
8150     """
8151     Tasklet.__init__(self, lu)
8152
8153     # Parameters
8154     self.instance_name = instance_name
8155     self.cleanup = cleanup
8156     self.live = False # will be overridden later
8157     self.failover = failover
8158     self.fallback = fallback
8159     self.ignore_consistency = ignore_consistency
8160     self.shutdown_timeout = shutdown_timeout
8161     self.ignore_ipolicy = ignore_ipolicy
8162     self.allow_runtime_changes = allow_runtime_changes
8163
8164   def CheckPrereq(self):
8165     """Check prerequisites.
8166
8167     This checks that the instance is in the cluster.
8168
8169     """
8170     instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
8171     instance = self.cfg.GetInstanceInfo(instance_name)
8172     assert instance is not None
8173     self.instance = instance
8174     cluster = self.cfg.GetClusterInfo()
8175
8176     if (not self.cleanup and
8177         not instance.admin_state == constants.ADMINST_UP and
8178         not self.failover and self.fallback):
8179       self.lu.LogInfo("Instance is marked down or offline, fallback allowed,"
8180                       " switching to failover")
8181       self.failover = True
8182
8183     if instance.disk_template not in constants.DTS_MIRRORED:
8184       if self.failover:
8185         text = "failovers"
8186       else:
8187         text = "migrations"
8188       raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
8189                                  " %s" % (instance.disk_template, text),
8190                                  errors.ECODE_STATE)
8191
8192     if instance.disk_template in constants.DTS_EXT_MIRROR:
8193       _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
8194
8195       if self.lu.op.iallocator:
8196         self._RunAllocator()
8197       else:
8198         # We set set self.target_node as it is required by
8199         # BuildHooksEnv
8200         self.target_node = self.lu.op.target_node
8201
8202       # Check that the target node is correct in terms of instance policy
8203       nodeinfo = self.cfg.GetNodeInfo(self.target_node)
8204       group_info = self.cfg.GetNodeGroup(nodeinfo.group)
8205       ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
8206                                                               group_info)
8207       _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo,
8208                               ignore=self.ignore_ipolicy)
8209
8210       # self.target_node is already populated, either directly or by the
8211       # iallocator run
8212       target_node = self.target_node
8213       if self.target_node == instance.primary_node:
8214         raise errors.OpPrereqError("Cannot migrate instance %s"
8215                                    " to its primary (%s)" %
8216                                    (instance.name, instance.primary_node),
8217                                    errors.ECODE_STATE)
8218
8219       if len(self.lu.tasklets) == 1:
8220         # It is safe to release locks only when we're the only tasklet
8221         # in the LU
8222         _ReleaseLocks(self.lu, locking.LEVEL_NODE,
8223                       keep=[instance.primary_node, self.target_node])
8224
8225     else:
8226       secondary_nodes = instance.secondary_nodes
8227       if not secondary_nodes:
8228         raise errors.ConfigurationError("No secondary node but using"
8229                                         " %s disk template" %
8230                                         instance.disk_template)
8231       target_node = secondary_nodes[0]
8232       if self.lu.op.iallocator or (self.lu.op.target_node and
8233                                    self.lu.op.target_node != target_node):
8234         if self.failover:
8235           text = "failed over"
8236         else:
8237           text = "migrated"
8238         raise errors.OpPrereqError("Instances with disk template %s cannot"
8239                                    " be %s to arbitrary nodes"
8240                                    " (neither an iallocator nor a target"
8241                                    " node can be passed)" %
8242                                    (instance.disk_template, text),
8243                                    errors.ECODE_INVAL)
8244       nodeinfo = self.cfg.GetNodeInfo(target_node)
8245       group_info = self.cfg.GetNodeGroup(nodeinfo.group)
8246       ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
8247                                                               group_info)
8248       _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo,
8249                               ignore=self.ignore_ipolicy)
8250
8251     i_be = cluster.FillBE(instance)
8252
8253     # check memory requirements on the secondary node
8254     if (not self.cleanup and
8255          (not self.failover or instance.admin_state == constants.ADMINST_UP)):
8256       self.tgt_free_mem = _CheckNodeFreeMemory(self.lu, target_node,
8257                                                "migrating instance %s" %
8258                                                instance.name,
8259                                                i_be[constants.BE_MINMEM],
8260                                                instance.hypervisor)
8261     else:
8262       self.lu.LogInfo("Not checking memory on the secondary node as"
8263                       " instance will not be started")
8264
8265     # check if failover must be forced instead of migration
8266     if (not self.cleanup and not self.failover and
8267         i_be[constants.BE_ALWAYS_FAILOVER]):
8268       self.lu.LogInfo("Instance configured to always failover; fallback"
8269                       " to failover")
8270       self.failover = True
8271
8272     # check bridge existance
8273     _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
8274
8275     if not self.cleanup:
8276       _CheckNodeNotDrained(self.lu, target_node)
8277       if not self.failover:
8278         result = self.rpc.call_instance_migratable(instance.primary_node,
8279                                                    instance)
8280         if result.fail_msg and self.fallback:
8281           self.lu.LogInfo("Can't migrate, instance offline, fallback to"
8282                           " failover")
8283           self.failover = True
8284         else:
8285           result.Raise("Can't migrate, please use failover",
8286                        prereq=True, ecode=errors.ECODE_STATE)
8287
8288     assert not (self.failover and self.cleanup)
8289
8290     if not self.failover:
8291       if self.lu.op.live is not None and self.lu.op.mode is not None:
8292         raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
8293                                    " parameters are accepted",
8294                                    errors.ECODE_INVAL)
8295       if self.lu.op.live is not None:
8296         if self.lu.op.live:
8297           self.lu.op.mode = constants.HT_MIGRATION_LIVE
8298         else:
8299           self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
8300         # reset the 'live' parameter to None so that repeated
8301         # invocations of CheckPrereq do not raise an exception
8302         self.lu.op.live = None
8303       elif self.lu.op.mode is None:
8304         # read the default value from the hypervisor
8305         i_hv = cluster.FillHV(self.instance, skip_globals=False)
8306         self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
8307
8308       self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
8309     else:
8310       # Failover is never live
8311       self.live = False
8312
8313     if not (self.failover or self.cleanup):
8314       remote_info = self.rpc.call_instance_info(instance.primary_node,
8315                                                 instance.name,
8316                                                 instance.hypervisor)
8317       remote_info.Raise("Error checking instance on node %s" %
8318                         instance.primary_node)
8319       instance_running = bool(remote_info.payload)
8320       if instance_running:
8321         self.current_mem = int(remote_info.payload["memory"])
8322
8323   def _RunAllocator(self):
8324     """Run the allocator based on input opcode.
8325
8326     """
8327     # FIXME: add a self.ignore_ipolicy option
8328     req = iallocator.IAReqRelocate(name=self.instance_name,
8329                                    relocate_from=[self.instance.primary_node])
8330     ial = iallocator.IAllocator(self.cfg, self.rpc, req)
8331
8332     ial.Run(self.lu.op.iallocator)
8333
8334     if not ial.success:
8335       raise errors.OpPrereqError("Can't compute nodes using"
8336                                  " iallocator '%s': %s" %
8337                                  (self.lu.op.iallocator, ial.info),
8338                                  errors.ECODE_NORES)
8339     self.target_node = ial.result[0]
8340     self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
8341                     self.instance_name, self.lu.op.iallocator,
8342                     utils.CommaJoin(ial.result))
8343
8344   def _WaitUntilSync(self):
8345     """Poll with custom rpc for disk sync.
8346
8347     This uses our own step-based rpc call.
8348
8349     """
8350     self.feedback_fn("* wait until resync is done")
8351     all_done = False
8352     while not all_done:
8353       all_done = True
8354       result = self.rpc.call_drbd_wait_sync(self.all_nodes,
8355                                             self.nodes_ip,
8356                                             (self.instance.disks,
8357                                              self.instance))
8358       min_percent = 100
8359       for node, nres in result.items():
8360         nres.Raise("Cannot resync disks on node %s" % node)
8361         node_done, node_percent = nres.payload
8362         all_done = all_done and node_done
8363         if node_percent is not None:
8364           min_percent = min(min_percent, node_percent)
8365       if not all_done:
8366         if min_percent < 100:
8367           self.feedback_fn("   - progress: %.1f%%" % min_percent)
8368         time.sleep(2)
8369
8370   def _EnsureSecondary(self, node):
8371     """Demote a node to secondary.
8372
8373     """
8374     self.feedback_fn("* switching node %s to secondary mode" % node)
8375
8376     for dev in self.instance.disks:
8377       self.cfg.SetDiskID(dev, node)
8378
8379     result = self.rpc.call_blockdev_close(node, self.instance.name,
8380                                           self.instance.disks)
8381     result.Raise("Cannot change disk to secondary on node %s" % node)
8382
8383   def _GoStandalone(self):
8384     """Disconnect from the network.
8385
8386     """
8387     self.feedback_fn("* changing into standalone mode")
8388     result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
8389                                                self.instance.disks)
8390     for node, nres in result.items():
8391       nres.Raise("Cannot disconnect disks node %s" % node)
8392
8393   def _GoReconnect(self, multimaster):
8394     """Reconnect to the network.
8395
8396     """
8397     if multimaster:
8398       msg = "dual-master"
8399     else:
8400       msg = "single-master"
8401     self.feedback_fn("* changing disks into %s mode" % msg)
8402     result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
8403                                            (self.instance.disks, self.instance),
8404                                            self.instance.name, multimaster)
8405     for node, nres in result.items():
8406       nres.Raise("Cannot change disks config on node %s" % node)
8407
8408   def _ExecCleanup(self):
8409     """Try to cleanup after a failed migration.
8410
8411     The cleanup is done by:
8412       - check that the instance is running only on one node
8413         (and update the config if needed)
8414       - change disks on its secondary node to secondary
8415       - wait until disks are fully synchronized
8416       - disconnect from the network
8417       - change disks into single-master mode
8418       - wait again until disks are fully synchronized
8419
8420     """
8421     instance = self.instance
8422     target_node = self.target_node
8423     source_node = self.source_node
8424
8425     # check running on only one node
8426     self.feedback_fn("* checking where the instance actually runs"
8427                      " (if this hangs, the hypervisor might be in"
8428                      " a bad state)")
8429     ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
8430     for node, result in ins_l.items():
8431       result.Raise("Can't contact node %s" % node)
8432
8433     runningon_source = instance.name in ins_l[source_node].payload
8434     runningon_target = instance.name in ins_l[target_node].payload
8435
8436     if runningon_source and runningon_target:
8437       raise errors.OpExecError("Instance seems to be running on two nodes,"
8438                                " or the hypervisor is confused; you will have"
8439                                " to ensure manually that it runs only on one"
8440                                " and restart this operation")
8441
8442     if not (runningon_source or runningon_target):
8443       raise errors.OpExecError("Instance does not seem to be running at all;"
8444                                " in this case it's safer to repair by"
8445                                " running 'gnt-instance stop' to ensure disk"
8446                                " shutdown, and then restarting it")
8447
8448     if runningon_target:
8449       # the migration has actually succeeded, we need to update the config
8450       self.feedback_fn("* instance running on secondary node (%s),"
8451                        " updating config" % target_node)
8452       instance.primary_node = target_node
8453       self.cfg.Update(instance, self.feedback_fn)
8454       demoted_node = source_node
8455     else:
8456       self.feedback_fn("* instance confirmed to be running on its"
8457                        " primary node (%s)" % source_node)
8458       demoted_node = target_node
8459
8460     if instance.disk_template in constants.DTS_INT_MIRROR:
8461       self._EnsureSecondary(demoted_node)
8462       try:
8463         self._WaitUntilSync()
8464       except errors.OpExecError:
8465         # we ignore here errors, since if the device is standalone, it
8466         # won't be able to sync
8467         pass
8468       self._GoStandalone()
8469       self._GoReconnect(False)
8470       self._WaitUntilSync()
8471
8472     self.feedback_fn("* done")
8473
8474   def _RevertDiskStatus(self):
8475     """Try to revert the disk status after a failed migration.
8476
8477     """
8478     target_node = self.target_node
8479     if self.instance.disk_template in constants.DTS_EXT_MIRROR:
8480       return
8481
8482     try:
8483       self._EnsureSecondary(target_node)
8484       self._GoStandalone()
8485       self._GoReconnect(False)
8486       self._WaitUntilSync()
8487     except errors.OpExecError, err:
8488       self.lu.LogWarning("Migration failed and I can't reconnect the drives,"
8489                          " please try to recover the instance manually;"
8490                          " error '%s'" % str(err))
8491
8492   def _AbortMigration(self):
8493     """Call the hypervisor code to abort a started migration.
8494
8495     """
8496     instance = self.instance
8497     target_node = self.target_node
8498     source_node = self.source_node
8499     migration_info = self.migration_info
8500
8501     abort_result = self.rpc.call_instance_finalize_migration_dst(target_node,
8502                                                                  instance,
8503                                                                  migration_info,
8504                                                                  False)
8505     abort_msg = abort_result.fail_msg
8506     if abort_msg:
8507       logging.error("Aborting migration failed on target node %s: %s",
8508                     target_node, abort_msg)
8509       # Don't raise an exception here, as we stil have to try to revert the
8510       # disk status, even if this step failed.
8511
8512     abort_result = self.rpc.call_instance_finalize_migration_src(
8513       source_node, instance, False, self.live)
8514     abort_msg = abort_result.fail_msg
8515     if abort_msg:
8516       logging.error("Aborting migration failed on source node %s: %s",
8517                     source_node, abort_msg)
8518
8519   def _ExecMigration(self):
8520     """Migrate an instance.
8521
8522     The migrate is done by:
8523       - change the disks into dual-master mode
8524       - wait until disks are fully synchronized again
8525       - migrate the instance
8526       - change disks on the new secondary node (the old primary) to secondary
8527       - wait until disks are fully synchronized
8528       - change disks into single-master mode
8529
8530     """
8531     instance = self.instance
8532     target_node = self.target_node
8533     source_node = self.source_node
8534
8535     # Check for hypervisor version mismatch and warn the user.
8536     nodeinfo = self.rpc.call_node_info([source_node, target_node],
8537                                        None, [self.instance.hypervisor])
8538     for ninfo in nodeinfo.values():
8539       ninfo.Raise("Unable to retrieve node information from node '%s'" %
8540                   ninfo.node)
8541     (_, _, (src_info, )) = nodeinfo[source_node].payload
8542     (_, _, (dst_info, )) = nodeinfo[target_node].payload
8543
8544     if ((constants.HV_NODEINFO_KEY_VERSION in src_info) and
8545         (constants.HV_NODEINFO_KEY_VERSION in dst_info)):
8546       src_version = src_info[constants.HV_NODEINFO_KEY_VERSION]
8547       dst_version = dst_info[constants.HV_NODEINFO_KEY_VERSION]
8548       if src_version != dst_version:
8549         self.feedback_fn("* warning: hypervisor version mismatch between"
8550                          " source (%s) and target (%s) node" %
8551                          (src_version, dst_version))
8552
8553     self.feedback_fn("* checking disk consistency between source and target")
8554     for (idx, dev) in enumerate(instance.disks):
8555       if not _CheckDiskConsistency(self.lu, instance, dev, target_node, False):
8556         raise errors.OpExecError("Disk %s is degraded or not fully"
8557                                  " synchronized on target node,"
8558                                  " aborting migration" % idx)
8559
8560     if self.current_mem > self.tgt_free_mem:
8561       if not self.allow_runtime_changes:
8562         raise errors.OpExecError("Memory ballooning not allowed and not enough"
8563                                  " free memory to fit instance %s on target"
8564                                  " node %s (have %dMB, need %dMB)" %
8565                                  (instance.name, target_node,
8566                                   self.tgt_free_mem, self.current_mem))
8567       self.feedback_fn("* setting instance memory to %s" % self.tgt_free_mem)
8568       rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node,
8569                                                      instance,
8570                                                      self.tgt_free_mem)
8571       rpcres.Raise("Cannot modify instance runtime memory")
8572
8573     # First get the migration information from the remote node
8574     result = self.rpc.call_migration_info(source_node, instance)
8575     msg = result.fail_msg
8576     if msg:
8577       log_err = ("Failed fetching source migration information from %s: %s" %
8578                  (source_node, msg))
8579       logging.error(log_err)
8580       raise errors.OpExecError(log_err)
8581
8582     self.migration_info = migration_info = result.payload
8583
8584     if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
8585       # Then switch the disks to master/master mode
8586       self._EnsureSecondary(target_node)
8587       self._GoStandalone()
8588       self._GoReconnect(True)
8589       self._WaitUntilSync()
8590
8591     self.feedback_fn("* preparing %s to accept the instance" % target_node)
8592     result = self.rpc.call_accept_instance(target_node,
8593                                            instance,
8594                                            migration_info,
8595                                            self.nodes_ip[target_node])
8596
8597     msg = result.fail_msg
8598     if msg:
8599       logging.error("Instance pre-migration failed, trying to revert"
8600                     " disk status: %s", msg)
8601       self.feedback_fn("Pre-migration failed, aborting")
8602       self._AbortMigration()
8603       self._RevertDiskStatus()
8604       raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
8605                                (instance.name, msg))
8606
8607     self.feedback_fn("* migrating instance to %s" % target_node)
8608     result = self.rpc.call_instance_migrate(source_node, instance,
8609                                             self.nodes_ip[target_node],
8610                                             self.live)
8611     msg = result.fail_msg
8612     if msg:
8613       logging.error("Instance migration failed, trying to revert"
8614                     " disk status: %s", msg)
8615       self.feedback_fn("Migration failed, aborting")
8616       self._AbortMigration()
8617       self._RevertDiskStatus()
8618       raise errors.OpExecError("Could not migrate instance %s: %s" %
8619                                (instance.name, msg))
8620
8621     self.feedback_fn("* starting memory transfer")
8622     last_feedback = time.time()
8623     while True:
8624       result = self.rpc.call_instance_get_migration_status(source_node,
8625                                                            instance)
8626       msg = result.fail_msg
8627       ms = result.payload   # MigrationStatus instance
8628       if msg or (ms.status in constants.HV_MIGRATION_FAILED_STATUSES):
8629         logging.error("Instance migration failed, trying to revert"
8630                       " disk status: %s", msg)
8631         self.feedback_fn("Migration failed, aborting")
8632         self._AbortMigration()
8633         self._RevertDiskStatus()
8634         if not msg:
8635           msg = "hypervisor returned failure"
8636         raise errors.OpExecError("Could not migrate instance %s: %s" %
8637                                  (instance.name, msg))
8638
8639       if result.payload.status != constants.HV_MIGRATION_ACTIVE:
8640         self.feedback_fn("* memory transfer complete")
8641         break
8642
8643       if (utils.TimeoutExpired(last_feedback,
8644                                self._MIGRATION_FEEDBACK_INTERVAL) and
8645           ms.transferred_ram is not None):
8646         mem_progress = 100 * float(ms.transferred_ram) / float(ms.total_ram)
8647         self.feedback_fn("* memory transfer progress: %.2f %%" % mem_progress)
8648         last_feedback = time.time()
8649
8650       time.sleep(self._MIGRATION_POLL_INTERVAL)
8651
8652     result = self.rpc.call_instance_finalize_migration_src(source_node,
8653                                                            instance,
8654                                                            True,
8655                                                            self.live)
8656     msg = result.fail_msg
8657     if msg:
8658       logging.error("Instance migration succeeded, but finalization failed"
8659                     " on the source node: %s", msg)
8660       raise errors.OpExecError("Could not finalize instance migration: %s" %
8661                                msg)
8662
8663     instance.primary_node = target_node
8664
8665     # distribute new instance config to the other nodes
8666     self.cfg.Update(instance, self.feedback_fn)
8667
8668     result = self.rpc.call_instance_finalize_migration_dst(target_node,
8669                                                            instance,
8670                                                            migration_info,
8671                                                            True)
8672     msg = result.fail_msg
8673     if msg:
8674       logging.error("Instance migration succeeded, but finalization failed"
8675                     " on the target node: %s", msg)
8676       raise errors.OpExecError("Could not finalize instance migration: %s" %
8677                                msg)
8678
8679     if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
8680       self._EnsureSecondary(source_node)
8681       self._WaitUntilSync()
8682       self._GoStandalone()
8683       self._GoReconnect(False)
8684       self._WaitUntilSync()
8685
8686     # If the instance's disk template is `rbd' and there was a successful
8687     # migration, unmap the device from the source node.
8688     if self.instance.disk_template == constants.DT_RBD:
8689       disks = _ExpandCheckDisks(instance, instance.disks)
8690       self.feedback_fn("* unmapping instance's disks from %s" % source_node)
8691       for disk in disks:
8692         result = self.rpc.call_blockdev_shutdown(source_node, (disk, instance))
8693         msg = result.fail_msg
8694         if msg:
8695           logging.error("Migration was successful, but couldn't unmap the"
8696                         " block device %s on source node %s: %s",
8697                         disk.iv_name, source_node, msg)
8698           logging.error("You need to unmap the device %s manually on %s",
8699                         disk.iv_name, source_node)
8700
8701     self.feedback_fn("* done")
8702
8703   def _ExecFailover(self):
8704     """Failover an instance.
8705
8706     The failover is done by shutting it down on its present node and
8707     starting it on the secondary.
8708
8709     """
8710     instance = self.instance
8711     primary_node = self.cfg.GetNodeInfo(instance.primary_node)
8712
8713     source_node = instance.primary_node
8714     target_node = self.target_node
8715
8716     if instance.admin_state == constants.ADMINST_UP:
8717       self.feedback_fn("* checking disk consistency between source and target")
8718       for (idx, dev) in enumerate(instance.disks):
8719         # for drbd, these are drbd over lvm
8720         if not _CheckDiskConsistency(self.lu, instance, dev, target_node,
8721                                      False):
8722           if primary_node.offline:
8723             self.feedback_fn("Node %s is offline, ignoring degraded disk %s on"
8724                              " target node %s" %
8725                              (primary_node.name, idx, target_node))
8726           elif not self.ignore_consistency:
8727             raise errors.OpExecError("Disk %s is degraded on target node,"
8728                                      " aborting failover" % idx)
8729     else:
8730       self.feedback_fn("* not checking disk consistency as instance is not"
8731                        " running")
8732
8733     self.feedback_fn("* shutting down instance on source node")
8734     logging.info("Shutting down instance %s on node %s",
8735                  instance.name, source_node)
8736
8737     result = self.rpc.call_instance_shutdown(source_node, instance,
8738                                              self.shutdown_timeout)
8739     msg = result.fail_msg
8740     if msg:
8741       if self.ignore_consistency or primary_node.offline:
8742         self.lu.LogWarning("Could not shutdown instance %s on node %s,"
8743                            " proceeding anyway; please make sure node"
8744                            " %s is down; error details: %s",
8745                            instance.name, source_node, source_node, msg)
8746       else:
8747         raise errors.OpExecError("Could not shutdown instance %s on"
8748                                  " node %s: %s" %
8749                                  (instance.name, source_node, msg))
8750
8751     self.feedback_fn("* deactivating the instance's disks on source node")
8752     if not _ShutdownInstanceDisks(self.lu, instance, ignore_primary=True):
8753       raise errors.OpExecError("Can't shut down the instance's disks")
8754
8755     instance.primary_node = target_node
8756     # distribute new instance config to the other nodes
8757     self.cfg.Update(instance, self.feedback_fn)
8758
8759     # Only start the instance if it's marked as up
8760     if instance.admin_state == constants.ADMINST_UP:
8761       self.feedback_fn("* activating the instance's disks on target node %s" %
8762                        target_node)
8763       logging.info("Starting instance %s on node %s",
8764                    instance.name, target_node)
8765
8766       disks_ok, _ = _AssembleInstanceDisks(self.lu, instance,
8767                                            ignore_secondaries=True)
8768       if not disks_ok:
8769         _ShutdownInstanceDisks(self.lu, instance)
8770         raise errors.OpExecError("Can't activate the instance's disks")
8771
8772       self.feedback_fn("* starting the instance on the target node %s" %
8773                        target_node)
8774       result = self.rpc.call_instance_start(target_node, (instance, None, None),
8775                                             False)
8776       msg = result.fail_msg
8777       if msg:
8778         _ShutdownInstanceDisks(self.lu, instance)
8779         raise errors.OpExecError("Could not start instance %s on node %s: %s" %
8780                                  (instance.name, target_node, msg))
8781
8782   def Exec(self, feedback_fn):
8783     """Perform the migration.
8784
8785     """
8786     self.feedback_fn = feedback_fn
8787     self.source_node = self.instance.primary_node
8788
8789     # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
8790     if self.instance.disk_template in constants.DTS_INT_MIRROR:
8791       self.target_node = self.instance.secondary_nodes[0]
8792       # Otherwise self.target_node has been populated either
8793       # directly, or through an iallocator.
8794
8795     self.all_nodes = [self.source_node, self.target_node]
8796     self.nodes_ip = dict((name, node.secondary_ip) for (name, node)
8797                          in self.cfg.GetMultiNodeInfo(self.all_nodes))
8798
8799     if self.failover:
8800       feedback_fn("Failover instance %s" % self.instance.name)
8801       self._ExecFailover()
8802     else:
8803       feedback_fn("Migrating instance %s" % self.instance.name)
8804
8805       if self.cleanup:
8806         return self._ExecCleanup()
8807       else:
8808         return self._ExecMigration()
8809
8810
8811 def _CreateBlockDev(lu, node, instance, device, force_create, info,
8812                     force_open):
8813   """Wrapper around L{_CreateBlockDevInner}.
8814
8815   This method annotates the root device first.
8816
8817   """
8818   (disk,) = _AnnotateDiskParams(instance, [device], lu.cfg)
8819   return _CreateBlockDevInner(lu, node, instance, disk, force_create, info,
8820                               force_open)
8821
8822
8823 def _CreateBlockDevInner(lu, node, instance, device, force_create,
8824                          info, force_open):
8825   """Create a tree of block devices on a given node.
8826
8827   If this device type has to be created on secondaries, create it and
8828   all its children.
8829
8830   If not, just recurse to children keeping the same 'force' value.
8831
8832   @attention: The device has to be annotated already.
8833
8834   @param lu: the lu on whose behalf we execute
8835   @param node: the node on which to create the device
8836   @type instance: L{objects.Instance}
8837   @param instance: the instance which owns the device
8838   @type device: L{objects.Disk}
8839   @param device: the device to create
8840   @type force_create: boolean
8841   @param force_create: whether to force creation of this device; this
8842       will be change to True whenever we find a device which has
8843       CreateOnSecondary() attribute
8844   @param info: the extra 'metadata' we should attach to the device
8845       (this will be represented as a LVM tag)
8846   @type force_open: boolean
8847   @param force_open: this parameter will be passes to the
8848       L{backend.BlockdevCreate} function where it specifies
8849       whether we run on primary or not, and it affects both
8850       the child assembly and the device own Open() execution
8851
8852   """
8853   if device.CreateOnSecondary():
8854     force_create = True
8855
8856   if device.children:
8857     for child in device.children:
8858       _CreateBlockDevInner(lu, node, instance, child, force_create,
8859                            info, force_open)
8860
8861   if not force_create:
8862     return
8863
8864   _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
8865
8866
8867 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
8868   """Create a single block device on a given node.
8869
8870   This will not recurse over children of the device, so they must be
8871   created in advance.
8872
8873   @param lu: the lu on whose behalf we execute
8874   @param node: the node on which to create the device
8875   @type instance: L{objects.Instance}
8876   @param instance: the instance which owns the device
8877   @type device: L{objects.Disk}
8878   @param device: the device to create
8879   @param info: the extra 'metadata' we should attach to the device
8880       (this will be represented as a LVM tag)
8881   @type force_open: boolean
8882   @param force_open: this parameter will be passes to the
8883       L{backend.BlockdevCreate} function where it specifies
8884       whether we run on primary or not, and it affects both
8885       the child assembly and the device own Open() execution
8886
8887   """
8888   lu.cfg.SetDiskID(device, node)
8889   result = lu.rpc.call_blockdev_create(node, device, device.size,
8890                                        instance.name, force_open, info)
8891   result.Raise("Can't create block device %s on"
8892                " node %s for instance %s" % (device, node, instance.name))
8893   if device.physical_id is None:
8894     device.physical_id = result.payload
8895
8896
8897 def _GenerateUniqueNames(lu, exts):
8898   """Generate a suitable LV name.
8899
8900   This will generate a logical volume name for the given instance.
8901
8902   """
8903   results = []
8904   for val in exts:
8905     new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
8906     results.append("%s%s" % (new_id, val))
8907   return results
8908
8909
8910 def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
8911                          iv_name, p_minor, s_minor):
8912   """Generate a drbd8 device complete with its children.
8913
8914   """
8915   assert len(vgnames) == len(names) == 2
8916   port = lu.cfg.AllocatePort()
8917   shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
8918
8919   dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
8920                           logical_id=(vgnames[0], names[0]),
8921                           params={})
8922   dev_meta = objects.Disk(dev_type=constants.LD_LV,
8923                           size=constants.DRBD_META_SIZE,
8924                           logical_id=(vgnames[1], names[1]),
8925                           params={})
8926   drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
8927                           logical_id=(primary, secondary, port,
8928                                       p_minor, s_minor,
8929                                       shared_secret),
8930                           children=[dev_data, dev_meta],
8931                           iv_name=iv_name, params={})
8932   return drbd_dev
8933
8934
8935 _DISK_TEMPLATE_NAME_PREFIX = {
8936   constants.DT_PLAIN: "",
8937   constants.DT_RBD: ".rbd",
8938   }
8939
8940
8941 _DISK_TEMPLATE_DEVICE_TYPE = {
8942   constants.DT_PLAIN: constants.LD_LV,
8943   constants.DT_FILE: constants.LD_FILE,
8944   constants.DT_SHARED_FILE: constants.LD_FILE,
8945   constants.DT_BLOCK: constants.LD_BLOCKDEV,
8946   constants.DT_RBD: constants.LD_RBD,
8947   }
8948
8949
8950 def _GenerateDiskTemplate(
8951   lu, template_name, instance_name, primary_node, secondary_nodes,
8952   disk_info, file_storage_dir, file_driver, base_index,
8953   feedback_fn, full_disk_params, _req_file_storage=opcodes.RequireFileStorage,
8954   _req_shr_file_storage=opcodes.RequireSharedFileStorage):
8955   """Generate the entire disk layout for a given template type.
8956
8957   """
8958   #TODO: compute space requirements
8959
8960   vgname = lu.cfg.GetVGName()
8961   disk_count = len(disk_info)
8962   disks = []
8963
8964   if template_name == constants.DT_DISKLESS:
8965     pass
8966   elif template_name == constants.DT_DRBD8:
8967     if len(secondary_nodes) != 1:
8968       raise errors.ProgrammerError("Wrong template configuration")
8969     remote_node = secondary_nodes[0]
8970     minors = lu.cfg.AllocateDRBDMinor(
8971       [primary_node, remote_node] * len(disk_info), instance_name)
8972
8973     (drbd_params, _, _) = objects.Disk.ComputeLDParams(template_name,
8974                                                        full_disk_params)
8975     drbd_default_metavg = drbd_params[constants.LDP_DEFAULT_METAVG]
8976
8977     names = []
8978     for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
8979                                                for i in range(disk_count)]):
8980       names.append(lv_prefix + "_data")
8981       names.append(lv_prefix + "_meta")
8982     for idx, disk in enumerate(disk_info):
8983       disk_index = idx + base_index
8984       data_vg = disk.get(constants.IDISK_VG, vgname)
8985       meta_vg = disk.get(constants.IDISK_METAVG, drbd_default_metavg)
8986       disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
8987                                       disk[constants.IDISK_SIZE],
8988                                       [data_vg, meta_vg],
8989                                       names[idx * 2:idx * 2 + 2],
8990                                       "disk/%d" % disk_index,
8991                                       minors[idx * 2], minors[idx * 2 + 1])
8992       disk_dev.mode = disk[constants.IDISK_MODE]
8993       disks.append(disk_dev)
8994   else:
8995     if secondary_nodes:
8996       raise errors.ProgrammerError("Wrong template configuration")
8997
8998     if template_name == constants.DT_FILE:
8999       _req_file_storage()
9000     elif template_name == constants.DT_SHARED_FILE:
9001       _req_shr_file_storage()
9002
9003     name_prefix = _DISK_TEMPLATE_NAME_PREFIX.get(template_name, None)
9004     if name_prefix is None:
9005       names = None
9006     else:
9007       names = _GenerateUniqueNames(lu, ["%s.disk%s" %
9008                                         (name_prefix, base_index + i)
9009                                         for i in range(disk_count)])
9010
9011     if template_name == constants.DT_PLAIN:
9012       def logical_id_fn(idx, _, disk):
9013         vg = disk.get(constants.IDISK_VG, vgname)
9014         return (vg, names[idx])
9015     elif template_name in (constants.DT_FILE, constants.DT_SHARED_FILE):
9016       logical_id_fn = \
9017         lambda _, disk_index, disk: (file_driver,
9018                                      "%s/disk%d" % (file_storage_dir,
9019                                                     disk_index))
9020     elif template_name == constants.DT_BLOCK:
9021       logical_id_fn = \
9022         lambda idx, disk_index, disk: (constants.BLOCKDEV_DRIVER_MANUAL,
9023                                        disk[constants.IDISK_ADOPT])
9024     elif template_name == constants.DT_RBD:
9025       logical_id_fn = lambda idx, _, disk: ("rbd", names[idx])
9026     else:
9027       raise errors.ProgrammerError("Unknown disk template '%s'" % template_name)
9028
9029     dev_type = _DISK_TEMPLATE_DEVICE_TYPE[template_name]
9030
9031     for idx, disk in enumerate(disk_info):
9032       disk_index = idx + base_index
9033       size = disk[constants.IDISK_SIZE]
9034       feedback_fn("* disk %s, size %s" %
9035                   (disk_index, utils.FormatUnit(size, "h")))
9036       disks.append(objects.Disk(dev_type=dev_type, size=size,
9037                                 logical_id=logical_id_fn(idx, disk_index, disk),
9038                                 iv_name="disk/%d" % disk_index,
9039                                 mode=disk[constants.IDISK_MODE],
9040                                 params={}))
9041
9042   return disks
9043
9044
9045 def _GetInstanceInfoText(instance):
9046   """Compute that text that should be added to the disk's metadata.
9047
9048   """
9049   return "originstname+%s" % instance.name
9050
9051
9052 def _CalcEta(time_taken, written, total_size):
9053   """Calculates the ETA based on size written and total size.
9054
9055   @param time_taken: The time taken so far
9056   @param written: amount written so far
9057   @param total_size: The total size of data to be written
9058   @return: The remaining time in seconds
9059
9060   """
9061   avg_time = time_taken / float(written)
9062   return (total_size - written) * avg_time
9063
9064
9065 def _WipeDisks(lu, instance, disks=None):
9066   """Wipes instance disks.
9067
9068   @type lu: L{LogicalUnit}
9069   @param lu: the logical unit on whose behalf we execute
9070   @type instance: L{objects.Instance}
9071   @param instance: the instance whose disks we should create
9072   @return: the success of the wipe
9073
9074   """
9075   node = instance.primary_node
9076
9077   if disks is None:
9078     disks = [(idx, disk, 0)
9079              for (idx, disk) in enumerate(instance.disks)]
9080
9081   for (_, device, _) in disks:
9082     lu.cfg.SetDiskID(device, node)
9083
9084   logging.info("Pausing synchronization of disks of instance '%s'",
9085                instance.name)
9086   result = lu.rpc.call_blockdev_pause_resume_sync(node,
9087                                                   (map(compat.snd, disks),
9088                                                    instance),
9089                                                   True)
9090   result.Raise("Failed to pause disk synchronization on node '%s'" % node)
9091
9092   for idx, success in enumerate(result.payload):
9093     if not success:
9094       logging.warn("Pausing synchronization of disk %s of instance '%s'"
9095                    " failed", idx, instance.name)
9096
9097   try:
9098     for (idx, device, offset) in disks:
9099       # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
9100       # MAX_WIPE_CHUNK at max. Truncating to integer to avoid rounding errors.
9101       wipe_chunk_size = \
9102         int(min(constants.MAX_WIPE_CHUNK,
9103                 device.size / 100.0 * constants.MIN_WIPE_CHUNK_PERCENT))
9104
9105       size = device.size
9106       last_output = 0
9107       start_time = time.time()
9108
9109       if offset == 0:
9110         info_text = ""
9111       else:
9112         info_text = (" (from %s to %s)" %
9113                      (utils.FormatUnit(offset, "h"),
9114                       utils.FormatUnit(size, "h")))
9115
9116       lu.LogInfo("* Wiping disk %s%s", idx, info_text)
9117
9118       logging.info("Wiping disk %d for instance %s on node %s using"
9119                    " chunk size %s", idx, instance.name, node, wipe_chunk_size)
9120
9121       while offset < size:
9122         wipe_size = min(wipe_chunk_size, size - offset)
9123
9124         logging.debug("Wiping disk %d, offset %s, chunk %s",
9125                       idx, offset, wipe_size)
9126
9127         result = lu.rpc.call_blockdev_wipe(node, (device, instance), offset,
9128                                            wipe_size)
9129         result.Raise("Could not wipe disk %d at offset %d for size %d" %
9130                      (idx, offset, wipe_size))
9131
9132         now = time.time()
9133         offset += wipe_size
9134         if now - last_output >= 60:
9135           eta = _CalcEta(now - start_time, offset, size)
9136           lu.LogInfo(" - done: %.1f%% ETA: %s",
9137                      offset / float(size) * 100, utils.FormatSeconds(eta))
9138           last_output = now
9139   finally:
9140     logging.info("Resuming synchronization of disks for instance '%s'",
9141                  instance.name)
9142
9143     result = lu.rpc.call_blockdev_pause_resume_sync(node,
9144                                                     (map(compat.snd, disks),
9145                                                      instance),
9146                                                     False)
9147
9148     if result.fail_msg:
9149       lu.LogWarning("Failed to resume disk synchronization on node '%s': %s",
9150                     node, result.fail_msg)
9151     else:
9152       for idx, success in enumerate(result.payload):
9153         if not success:
9154           lu.LogWarning("Resuming synchronization of disk %s of instance '%s'"
9155                         " failed", idx, instance.name)
9156
9157
9158 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
9159   """Create all disks for an instance.
9160
9161   This abstracts away some work from AddInstance.
9162
9163   @type lu: L{LogicalUnit}
9164   @param lu: the logical unit on whose behalf we execute
9165   @type instance: L{objects.Instance}
9166   @param instance: the instance whose disks we should create
9167   @type to_skip: list
9168   @param to_skip: list of indices to skip
9169   @type target_node: string
9170   @param target_node: if passed, overrides the target node for creation
9171   @rtype: boolean
9172   @return: the success of the creation
9173
9174   """
9175   info = _GetInstanceInfoText(instance)
9176   if target_node is None:
9177     pnode = instance.primary_node
9178     all_nodes = instance.all_nodes
9179   else:
9180     pnode = target_node
9181     all_nodes = [pnode]
9182
9183   if instance.disk_template in constants.DTS_FILEBASED:
9184     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
9185     result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
9186
9187     result.Raise("Failed to create directory '%s' on"
9188                  " node %s" % (file_storage_dir, pnode))
9189
9190   # Note: this needs to be kept in sync with adding of disks in
9191   # LUInstanceSetParams
9192   for idx, device in enumerate(instance.disks):
9193     if to_skip and idx in to_skip:
9194       continue
9195     logging.info("Creating disk %s for instance '%s'", idx, instance.name)
9196     #HARDCODE
9197     for node in all_nodes:
9198       f_create = node == pnode
9199       _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
9200
9201
9202 def _RemoveDisks(lu, instance, target_node=None, ignore_failures=False):
9203   """Remove all disks for an instance.
9204
9205   This abstracts away some work from `AddInstance()` and
9206   `RemoveInstance()`. Note that in case some of the devices couldn't
9207   be removed, the removal will continue with the other ones (compare
9208   with `_CreateDisks()`).
9209
9210   @type lu: L{LogicalUnit}
9211   @param lu: the logical unit on whose behalf we execute
9212   @type instance: L{objects.Instance}
9213   @param instance: the instance whose disks we should remove
9214   @type target_node: string
9215   @param target_node: used to override the node on which to remove the disks
9216   @rtype: boolean
9217   @return: the success of the removal
9218
9219   """
9220   logging.info("Removing block devices for instance %s", instance.name)
9221
9222   all_result = True
9223   ports_to_release = set()
9224   anno_disks = _AnnotateDiskParams(instance, instance.disks, lu.cfg)
9225   for (idx, device) in enumerate(anno_disks):
9226     if target_node:
9227       edata = [(target_node, device)]
9228     else:
9229       edata = device.ComputeNodeTree(instance.primary_node)
9230     for node, disk in edata:
9231       lu.cfg.SetDiskID(disk, node)
9232       result = lu.rpc.call_blockdev_remove(node, disk)
9233       if result.fail_msg:
9234         lu.LogWarning("Could not remove disk %s on node %s,"
9235                       " continuing anyway: %s", idx, node, result.fail_msg)
9236         if not (result.offline and node != instance.primary_node):
9237           all_result = False
9238
9239     # if this is a DRBD disk, return its port to the pool
9240     if device.dev_type in constants.LDS_DRBD:
9241       ports_to_release.add(device.logical_id[2])
9242
9243   if all_result or ignore_failures:
9244     for port in ports_to_release:
9245       lu.cfg.AddTcpUdpPort(port)
9246
9247   if instance.disk_template in constants.DTS_FILEBASED:
9248     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
9249     if target_node:
9250       tgt = target_node
9251     else:
9252       tgt = instance.primary_node
9253     result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
9254     if result.fail_msg:
9255       lu.LogWarning("Could not remove directory '%s' on node %s: %s",
9256                     file_storage_dir, instance.primary_node, result.fail_msg)
9257       all_result = False
9258
9259   return all_result
9260
9261
9262 def _ComputeDiskSizePerVG(disk_template, disks):
9263   """Compute disk size requirements in the volume group
9264
9265   """
9266   def _compute(disks, payload):
9267     """Universal algorithm.
9268
9269     """
9270     vgs = {}
9271     for disk in disks:
9272       vgs[disk[constants.IDISK_VG]] = \
9273         vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
9274
9275     return vgs
9276
9277   # Required free disk space as a function of disk and swap space
9278   req_size_dict = {
9279     constants.DT_DISKLESS: {},
9280     constants.DT_PLAIN: _compute(disks, 0),
9281     # 128 MB are added for drbd metadata for each disk
9282     constants.DT_DRBD8: _compute(disks, constants.DRBD_META_SIZE),
9283     constants.DT_FILE: {},
9284     constants.DT_SHARED_FILE: {},
9285   }
9286
9287   if disk_template not in req_size_dict:
9288     raise errors.ProgrammerError("Disk template '%s' size requirement"
9289                                  " is unknown" % disk_template)
9290
9291   return req_size_dict[disk_template]
9292
9293
9294 def _FilterVmNodes(lu, nodenames):
9295   """Filters out non-vm_capable nodes from a list.
9296
9297   @type lu: L{LogicalUnit}
9298   @param lu: the logical unit for which we check
9299   @type nodenames: list
9300   @param nodenames: the list of nodes on which we should check
9301   @rtype: list
9302   @return: the list of vm-capable nodes
9303
9304   """
9305   vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
9306   return [name for name in nodenames if name not in vm_nodes]
9307
9308
9309 def _CheckHVParams(lu, nodenames, hvname, hvparams):
9310   """Hypervisor parameter validation.
9311
9312   This function abstract the hypervisor parameter validation to be
9313   used in both instance create and instance modify.
9314
9315   @type lu: L{LogicalUnit}
9316   @param lu: the logical unit for which we check
9317   @type nodenames: list
9318   @param nodenames: the list of nodes on which we should check
9319   @type hvname: string
9320   @param hvname: the name of the hypervisor we should use
9321   @type hvparams: dict
9322   @param hvparams: the parameters which we need to check
9323   @raise errors.OpPrereqError: if the parameters are not valid
9324
9325   """
9326   nodenames = _FilterVmNodes(lu, nodenames)
9327
9328   cluster = lu.cfg.GetClusterInfo()
9329   hvfull = objects.FillDict(cluster.hvparams.get(hvname, {}), hvparams)
9330
9331   hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames, hvname, hvfull)
9332   for node in nodenames:
9333     info = hvinfo[node]
9334     if info.offline:
9335       continue
9336     info.Raise("Hypervisor parameter validation failed on node %s" % node)
9337
9338
9339 def _CheckOSParams(lu, required, nodenames, osname, osparams):
9340   """OS parameters validation.
9341
9342   @type lu: L{LogicalUnit}
9343   @param lu: the logical unit for which we check
9344   @type required: boolean
9345   @param required: whether the validation should fail if the OS is not
9346       found
9347   @type nodenames: list
9348   @param nodenames: the list of nodes on which we should check
9349   @type osname: string
9350   @param osname: the name of the hypervisor we should use
9351   @type osparams: dict
9352   @param osparams: the parameters which we need to check
9353   @raise errors.OpPrereqError: if the parameters are not valid
9354
9355   """
9356   nodenames = _FilterVmNodes(lu, nodenames)
9357   result = lu.rpc.call_os_validate(nodenames, required, osname,
9358                                    [constants.OS_VALIDATE_PARAMETERS],
9359                                    osparams)
9360   for node, nres in result.items():
9361     # we don't check for offline cases since this should be run only
9362     # against the master node and/or an instance's nodes
9363     nres.Raise("OS Parameters validation failed on node %s" % node)
9364     if not nres.payload:
9365       lu.LogInfo("OS %s not found on node %s, validation skipped",
9366                  osname, node)
9367
9368
9369 def _CreateInstanceAllocRequest(op, disks, nics, beparams):
9370   """Wrapper around IAReqInstanceAlloc.
9371
9372   @param op: The instance opcode
9373   @param disks: The computed disks
9374   @param nics: The computed nics
9375   @param beparams: The full filled beparams
9376
9377   @returns: A filled L{iallocator.IAReqInstanceAlloc}
9378
9379   """
9380   spindle_use = beparams[constants.BE_SPINDLE_USE]
9381   return iallocator.IAReqInstanceAlloc(name=op.instance_name,
9382                                        disk_template=op.disk_template,
9383                                        tags=op.tags,
9384                                        os=op.os_type,
9385                                        vcpus=beparams[constants.BE_VCPUS],
9386                                        memory=beparams[constants.BE_MAXMEM],
9387                                        spindle_use=spindle_use,
9388                                        disks=disks,
9389                                        nics=[n.ToDict() for n in nics],
9390                                        hypervisor=op.hypervisor)
9391
9392
9393 def _ComputeNics(op, cluster, default_ip, cfg, proc):
9394   """Computes the nics.
9395
9396   @param op: The instance opcode
9397   @param cluster: Cluster configuration object
9398   @param default_ip: The default ip to assign
9399   @param cfg: An instance of the configuration object
9400   @param proc: The executer instance
9401
9402   @returns: The build up nics
9403
9404   """
9405   nics = []
9406   for idx, nic in enumerate(op.nics):
9407     nic_mode_req = nic.get(constants.INIC_MODE, None)
9408     nic_mode = nic_mode_req
9409     if nic_mode is None or nic_mode == constants.VALUE_AUTO:
9410       nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
9411
9412     # in routed mode, for the first nic, the default ip is 'auto'
9413     if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
9414       default_ip_mode = constants.VALUE_AUTO
9415     else:
9416       default_ip_mode = constants.VALUE_NONE
9417
9418     # ip validity checks
9419     ip = nic.get(constants.INIC_IP, default_ip_mode)
9420     if ip is None or ip.lower() == constants.VALUE_NONE:
9421       nic_ip = None
9422     elif ip.lower() == constants.VALUE_AUTO:
9423       if not op.name_check:
9424         raise errors.OpPrereqError("IP address set to auto but name checks"
9425                                    " have been skipped",
9426                                    errors.ECODE_INVAL)
9427       nic_ip = default_ip
9428     else:
9429       if not netutils.IPAddress.IsValid(ip):
9430         raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
9431                                    errors.ECODE_INVAL)
9432       nic_ip = ip
9433
9434     # TODO: check the ip address for uniqueness
9435     if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
9436       raise errors.OpPrereqError("Routed nic mode requires an ip address",
9437                                  errors.ECODE_INVAL)
9438
9439     # MAC address verification
9440     mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
9441     if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9442       mac = utils.NormalizeAndValidateMac(mac)
9443
9444       try:
9445         # TODO: We need to factor this out
9446         cfg.ReserveMAC(mac, proc.GetECId())
9447       except errors.ReservationError:
9448         raise errors.OpPrereqError("MAC address %s already in use"
9449                                    " in cluster" % mac,
9450                                    errors.ECODE_NOTUNIQUE)
9451
9452     #  Build nic parameters
9453     link = nic.get(constants.INIC_LINK, None)
9454     if link == constants.VALUE_AUTO:
9455       link = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_LINK]
9456     nicparams = {}
9457     if nic_mode_req:
9458       nicparams[constants.NIC_MODE] = nic_mode
9459     if link:
9460       nicparams[constants.NIC_LINK] = link
9461
9462     check_params = cluster.SimpleFillNIC(nicparams)
9463     objects.NIC.CheckParameterSyntax(check_params)
9464     nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
9465
9466   return nics
9467
9468
9469 def _ComputeDisks(op, default_vg):
9470   """Computes the instance disks.
9471
9472   @param op: The instance opcode
9473   @param default_vg: The default_vg to assume
9474
9475   @return: The computer disks
9476
9477   """
9478   disks = []
9479   for disk in op.disks:
9480     mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
9481     if mode not in constants.DISK_ACCESS_SET:
9482       raise errors.OpPrereqError("Invalid disk access mode '%s'" %
9483                                  mode, errors.ECODE_INVAL)
9484     size = disk.get(constants.IDISK_SIZE, None)
9485     if size is None:
9486       raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
9487     try:
9488       size = int(size)
9489     except (TypeError, ValueError):
9490       raise errors.OpPrereqError("Invalid disk size '%s'" % size,
9491                                  errors.ECODE_INVAL)
9492
9493     data_vg = disk.get(constants.IDISK_VG, default_vg)
9494     new_disk = {
9495       constants.IDISK_SIZE: size,
9496       constants.IDISK_MODE: mode,
9497       constants.IDISK_VG: data_vg,
9498       }
9499     if constants.IDISK_METAVG in disk:
9500       new_disk[constants.IDISK_METAVG] = disk[constants.IDISK_METAVG]
9501     if constants.IDISK_ADOPT in disk:
9502       new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
9503     disks.append(new_disk)
9504
9505   return disks
9506
9507
9508 def _ComputeFullBeParams(op, cluster):
9509   """Computes the full beparams.
9510
9511   @param op: The instance opcode
9512   @param cluster: The cluster config object
9513
9514   @return: The fully filled beparams
9515
9516   """
9517   default_beparams = cluster.beparams[constants.PP_DEFAULT]
9518   for param, value in op.beparams.iteritems():
9519     if value == constants.VALUE_AUTO:
9520       op.beparams[param] = default_beparams[param]
9521   objects.UpgradeBeParams(op.beparams)
9522   utils.ForceDictType(op.beparams, constants.BES_PARAMETER_TYPES)
9523   return cluster.SimpleFillBE(op.beparams)
9524
9525
9526 class LUInstanceCreate(LogicalUnit):
9527   """Create an instance.
9528
9529   """
9530   HPATH = "instance-add"
9531   HTYPE = constants.HTYPE_INSTANCE
9532   REQ_BGL = False
9533
9534   def CheckArguments(self):
9535     """Check arguments.
9536
9537     """
9538     # do not require name_check to ease forward/backward compatibility
9539     # for tools
9540     if self.op.no_install and self.op.start:
9541       self.LogInfo("No-installation mode selected, disabling startup")
9542       self.op.start = False
9543     # validate/normalize the instance name
9544     self.op.instance_name = \
9545       netutils.Hostname.GetNormalizedName(self.op.instance_name)
9546
9547     if self.op.ip_check and not self.op.name_check:
9548       # TODO: make the ip check more flexible and not depend on the name check
9549       raise errors.OpPrereqError("Cannot do IP address check without a name"
9550                                  " check", errors.ECODE_INVAL)
9551
9552     # check nics' parameter names
9553     for nic in self.op.nics:
9554       utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
9555
9556     # check disks. parameter names and consistent adopt/no-adopt strategy
9557     has_adopt = has_no_adopt = False
9558     for disk in self.op.disks:
9559       utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
9560       if constants.IDISK_ADOPT in disk:
9561         has_adopt = True
9562       else:
9563         has_no_adopt = True
9564     if has_adopt and has_no_adopt:
9565       raise errors.OpPrereqError("Either all disks are adopted or none is",
9566                                  errors.ECODE_INVAL)
9567     if has_adopt:
9568       if self.op.disk_template not in constants.DTS_MAY_ADOPT:
9569         raise errors.OpPrereqError("Disk adoption is not supported for the"
9570                                    " '%s' disk template" %
9571                                    self.op.disk_template,
9572                                    errors.ECODE_INVAL)
9573       if self.op.iallocator is not None:
9574         raise errors.OpPrereqError("Disk adoption not allowed with an"
9575                                    " iallocator script", errors.ECODE_INVAL)
9576       if self.op.mode == constants.INSTANCE_IMPORT:
9577         raise errors.OpPrereqError("Disk adoption not allowed for"
9578                                    " instance import", errors.ECODE_INVAL)
9579     else:
9580       if self.op.disk_template in constants.DTS_MUST_ADOPT:
9581         raise errors.OpPrereqError("Disk template %s requires disk adoption,"
9582                                    " but no 'adopt' parameter given" %
9583                                    self.op.disk_template,
9584                                    errors.ECODE_INVAL)
9585
9586     self.adopt_disks = has_adopt
9587
9588     # instance name verification
9589     if self.op.name_check:
9590       self.hostname1 = _CheckHostnameSane(self, self.op.instance_name)
9591       self.op.instance_name = self.hostname1.name
9592       # used in CheckPrereq for ip ping check
9593       self.check_ip = self.hostname1.ip
9594     else:
9595       self.check_ip = None
9596
9597     # file storage checks
9598     if (self.op.file_driver and
9599         not self.op.file_driver in constants.FILE_DRIVER):
9600       raise errors.OpPrereqError("Invalid file driver name '%s'" %
9601                                  self.op.file_driver, errors.ECODE_INVAL)
9602
9603     if self.op.disk_template == constants.DT_FILE:
9604       opcodes.RequireFileStorage()
9605     elif self.op.disk_template == constants.DT_SHARED_FILE:
9606       opcodes.RequireSharedFileStorage()
9607
9608     ### Node/iallocator related checks
9609     _CheckIAllocatorOrNode(self, "iallocator", "pnode")
9610
9611     if self.op.pnode is not None:
9612       if self.op.disk_template in constants.DTS_INT_MIRROR:
9613         if self.op.snode is None:
9614           raise errors.OpPrereqError("The networked disk templates need"
9615                                      " a mirror node", errors.ECODE_INVAL)
9616       elif self.op.snode:
9617         self.LogWarning("Secondary node will be ignored on non-mirrored disk"
9618                         " template")
9619         self.op.snode = None
9620
9621     self._cds = _GetClusterDomainSecret()
9622
9623     if self.op.mode == constants.INSTANCE_IMPORT:
9624       # On import force_variant must be True, because if we forced it at
9625       # initial install, our only chance when importing it back is that it
9626       # works again!
9627       self.op.force_variant = True
9628
9629       if self.op.no_install:
9630         self.LogInfo("No-installation mode has no effect during import")
9631
9632     elif self.op.mode == constants.INSTANCE_CREATE:
9633       if self.op.os_type is None:
9634         raise errors.OpPrereqError("No guest OS specified",
9635                                    errors.ECODE_INVAL)
9636       if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
9637         raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
9638                                    " installation" % self.op.os_type,
9639                                    errors.ECODE_STATE)
9640       if self.op.disk_template is None:
9641         raise errors.OpPrereqError("No disk template specified",
9642                                    errors.ECODE_INVAL)
9643
9644     elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
9645       # Check handshake to ensure both clusters have the same domain secret
9646       src_handshake = self.op.source_handshake
9647       if not src_handshake:
9648         raise errors.OpPrereqError("Missing source handshake",
9649                                    errors.ECODE_INVAL)
9650
9651       errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
9652                                                            src_handshake)
9653       if errmsg:
9654         raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
9655                                    errors.ECODE_INVAL)
9656
9657       # Load and check source CA
9658       self.source_x509_ca_pem = self.op.source_x509_ca
9659       if not self.source_x509_ca_pem:
9660         raise errors.OpPrereqError("Missing source X509 CA",
9661                                    errors.ECODE_INVAL)
9662
9663       try:
9664         (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
9665                                                     self._cds)
9666       except OpenSSL.crypto.Error, err:
9667         raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
9668                                    (err, ), errors.ECODE_INVAL)
9669
9670       (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
9671       if errcode is not None:
9672         raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
9673                                    errors.ECODE_INVAL)
9674
9675       self.source_x509_ca = cert
9676
9677       src_instance_name = self.op.source_instance_name
9678       if not src_instance_name:
9679         raise errors.OpPrereqError("Missing source instance name",
9680                                    errors.ECODE_INVAL)
9681
9682       self.source_instance_name = \
9683           netutils.GetHostname(name=src_instance_name).name
9684
9685     else:
9686       raise errors.OpPrereqError("Invalid instance creation mode %r" %
9687                                  self.op.mode, errors.ECODE_INVAL)
9688
9689   def ExpandNames(self):
9690     """ExpandNames for CreateInstance.
9691
9692     Figure out the right locks for instance creation.
9693
9694     """
9695     self.needed_locks = {}
9696
9697     instance_name = self.op.instance_name
9698     # this is just a preventive check, but someone might still add this
9699     # instance in the meantime, and creation will fail at lock-add time
9700     if instance_name in self.cfg.GetInstanceList():
9701       raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
9702                                  instance_name, errors.ECODE_EXISTS)
9703
9704     self.add_locks[locking.LEVEL_INSTANCE] = instance_name
9705
9706     if self.op.iallocator:
9707       # TODO: Find a solution to not lock all nodes in the cluster, e.g. by
9708       # specifying a group on instance creation and then selecting nodes from
9709       # that group
9710       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9711       self.needed_locks[locking.LEVEL_NODE_RES] = locking.ALL_SET
9712     else:
9713       self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
9714       nodelist = [self.op.pnode]
9715       if self.op.snode is not None:
9716         self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
9717         nodelist.append(self.op.snode)
9718       self.needed_locks[locking.LEVEL_NODE] = nodelist
9719       # Lock resources of instance's primary and secondary nodes (copy to
9720       # prevent accidential modification)
9721       self.needed_locks[locking.LEVEL_NODE_RES] = list(nodelist)
9722
9723     # in case of import lock the source node too
9724     if self.op.mode == constants.INSTANCE_IMPORT:
9725       src_node = self.op.src_node
9726       src_path = self.op.src_path
9727
9728       if src_path is None:
9729         self.op.src_path = src_path = self.op.instance_name
9730
9731       if src_node is None:
9732         self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9733         self.op.src_node = None
9734         if os.path.isabs(src_path):
9735           raise errors.OpPrereqError("Importing an instance from a path"
9736                                      " requires a source node option",
9737                                      errors.ECODE_INVAL)
9738       else:
9739         self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
9740         if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
9741           self.needed_locks[locking.LEVEL_NODE].append(src_node)
9742         if not os.path.isabs(src_path):
9743           self.op.src_path = src_path = \
9744             utils.PathJoin(pathutils.EXPORT_DIR, src_path)
9745
9746   def _RunAllocator(self):
9747     """Run the allocator based on input opcode.
9748
9749     """
9750     req = _CreateInstanceAllocRequest(self.op, self.disks,
9751                                       self.nics, self.be_full)
9752     ial = iallocator.IAllocator(self.cfg, self.rpc, req)
9753
9754     ial.Run(self.op.iallocator)
9755
9756     if not ial.success:
9757       raise errors.OpPrereqError("Can't compute nodes using"
9758                                  " iallocator '%s': %s" %
9759                                  (self.op.iallocator, ial.info),
9760                                  errors.ECODE_NORES)
9761     self.op.pnode = ial.result[0]
9762     self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
9763                  self.op.instance_name, self.op.iallocator,
9764                  utils.CommaJoin(ial.result))
9765
9766     assert req.RequiredNodes() in (1, 2), "Wrong node count from iallocator"
9767
9768     if req.RequiredNodes() == 2:
9769       self.op.snode = ial.result[1]
9770
9771   def BuildHooksEnv(self):
9772     """Build hooks env.
9773
9774     This runs on master, primary and secondary nodes of the instance.
9775
9776     """
9777     env = {
9778       "ADD_MODE": self.op.mode,
9779       }
9780     if self.op.mode == constants.INSTANCE_IMPORT:
9781       env["SRC_NODE"] = self.op.src_node
9782       env["SRC_PATH"] = self.op.src_path
9783       env["SRC_IMAGES"] = self.src_images
9784
9785     env.update(_BuildInstanceHookEnv(
9786       name=self.op.instance_name,
9787       primary_node=self.op.pnode,
9788       secondary_nodes=self.secondaries,
9789       status=self.op.start,
9790       os_type=self.op.os_type,
9791       minmem=self.be_full[constants.BE_MINMEM],
9792       maxmem=self.be_full[constants.BE_MAXMEM],
9793       vcpus=self.be_full[constants.BE_VCPUS],
9794       nics=_NICListToTuple(self, self.nics),
9795       disk_template=self.op.disk_template,
9796       disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE])
9797              for d in self.disks],
9798       bep=self.be_full,
9799       hvp=self.hv_full,
9800       hypervisor_name=self.op.hypervisor,
9801       tags=self.op.tags,
9802     ))
9803
9804     return env
9805
9806   def BuildHooksNodes(self):
9807     """Build hooks nodes.
9808
9809     """
9810     nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
9811     return nl, nl
9812
9813   def _ReadExportInfo(self):
9814     """Reads the export information from disk.
9815
9816     It will override the opcode source node and path with the actual
9817     information, if these two were not specified before.
9818
9819     @return: the export information
9820
9821     """
9822     assert self.op.mode == constants.INSTANCE_IMPORT
9823
9824     src_node = self.op.src_node
9825     src_path = self.op.src_path
9826
9827     if src_node is None:
9828       locked_nodes = self.owned_locks(locking.LEVEL_NODE)
9829       exp_list = self.rpc.call_export_list(locked_nodes)
9830       found = False
9831       for node in exp_list:
9832         if exp_list[node].fail_msg:
9833           continue
9834         if src_path in exp_list[node].payload:
9835           found = True
9836           self.op.src_node = src_node = node
9837           self.op.src_path = src_path = utils.PathJoin(pathutils.EXPORT_DIR,
9838                                                        src_path)
9839           break
9840       if not found:
9841         raise errors.OpPrereqError("No export found for relative path %s" %
9842                                     src_path, errors.ECODE_INVAL)
9843
9844     _CheckNodeOnline(self, src_node)
9845     result = self.rpc.call_export_info(src_node, src_path)
9846     result.Raise("No export or invalid export found in dir %s" % src_path)
9847
9848     export_info = objects.SerializableConfigParser.Loads(str(result.payload))
9849     if not export_info.has_section(constants.INISECT_EXP):
9850       raise errors.ProgrammerError("Corrupted export config",
9851                                    errors.ECODE_ENVIRON)
9852
9853     ei_version = export_info.get(constants.INISECT_EXP, "version")
9854     if (int(ei_version) != constants.EXPORT_VERSION):
9855       raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
9856                                  (ei_version, constants.EXPORT_VERSION),
9857                                  errors.ECODE_ENVIRON)
9858     return export_info
9859
9860   def _ReadExportParams(self, einfo):
9861     """Use export parameters as defaults.
9862
9863     In case the opcode doesn't specify (as in override) some instance
9864     parameters, then try to use them from the export information, if
9865     that declares them.
9866
9867     """
9868     self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
9869
9870     if self.op.disk_template is None:
9871       if einfo.has_option(constants.INISECT_INS, "disk_template"):
9872         self.op.disk_template = einfo.get(constants.INISECT_INS,
9873                                           "disk_template")
9874         if self.op.disk_template not in constants.DISK_TEMPLATES:
9875           raise errors.OpPrereqError("Disk template specified in configuration"
9876                                      " file is not one of the allowed values:"
9877                                      " %s" %
9878                                      " ".join(constants.DISK_TEMPLATES),
9879                                      errors.ECODE_INVAL)
9880       else:
9881         raise errors.OpPrereqError("No disk template specified and the export"
9882                                    " is missing the disk_template information",
9883                                    errors.ECODE_INVAL)
9884
9885     if not self.op.disks:
9886       disks = []
9887       # TODO: import the disk iv_name too
9888       for idx in range(constants.MAX_DISKS):
9889         if einfo.has_option(constants.INISECT_INS, "disk%d_size" % idx):
9890           disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
9891           disks.append({constants.IDISK_SIZE: disk_sz})
9892       self.op.disks = disks
9893       if not disks and self.op.disk_template != constants.DT_DISKLESS:
9894         raise errors.OpPrereqError("No disk info specified and the export"
9895                                    " is missing the disk information",
9896                                    errors.ECODE_INVAL)
9897
9898     if not self.op.nics:
9899       nics = []
9900       for idx in range(constants.MAX_NICS):
9901         if einfo.has_option(constants.INISECT_INS, "nic%d_mac" % idx):
9902           ndict = {}
9903           for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
9904             v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
9905             ndict[name] = v
9906           nics.append(ndict)
9907         else:
9908           break
9909       self.op.nics = nics
9910
9911     if not self.op.tags and einfo.has_option(constants.INISECT_INS, "tags"):
9912       self.op.tags = einfo.get(constants.INISECT_INS, "tags").split()
9913
9914     if (self.op.hypervisor is None and
9915         einfo.has_option(constants.INISECT_INS, "hypervisor")):
9916       self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
9917
9918     if einfo.has_section(constants.INISECT_HYP):
9919       # use the export parameters but do not override the ones
9920       # specified by the user
9921       for name, value in einfo.items(constants.INISECT_HYP):
9922         if name not in self.op.hvparams:
9923           self.op.hvparams[name] = value
9924
9925     if einfo.has_section(constants.INISECT_BEP):
9926       # use the parameters, without overriding
9927       for name, value in einfo.items(constants.INISECT_BEP):
9928         if name not in self.op.beparams:
9929           self.op.beparams[name] = value
9930         # Compatibility for the old "memory" be param
9931         if name == constants.BE_MEMORY:
9932           if constants.BE_MAXMEM not in self.op.beparams:
9933             self.op.beparams[constants.BE_MAXMEM] = value
9934           if constants.BE_MINMEM not in self.op.beparams:
9935             self.op.beparams[constants.BE_MINMEM] = value
9936     else:
9937       # try to read the parameters old style, from the main section
9938       for name in constants.BES_PARAMETERS:
9939         if (name not in self.op.beparams and
9940             einfo.has_option(constants.INISECT_INS, name)):
9941           self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
9942
9943     if einfo.has_section(constants.INISECT_OSP):
9944       # use the parameters, without overriding
9945       for name, value in einfo.items(constants.INISECT_OSP):
9946         if name not in self.op.osparams:
9947           self.op.osparams[name] = value
9948
9949   def _RevertToDefaults(self, cluster):
9950     """Revert the instance parameters to the default values.
9951
9952     """
9953     # hvparams
9954     hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
9955     for name in self.op.hvparams.keys():
9956       if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
9957         del self.op.hvparams[name]
9958     # beparams
9959     be_defs = cluster.SimpleFillBE({})
9960     for name in self.op.beparams.keys():
9961       if name in be_defs and be_defs[name] == self.op.beparams[name]:
9962         del self.op.beparams[name]
9963     # nic params
9964     nic_defs = cluster.SimpleFillNIC({})
9965     for nic in self.op.nics:
9966       for name in constants.NICS_PARAMETERS:
9967         if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
9968           del nic[name]
9969     # osparams
9970     os_defs = cluster.SimpleFillOS(self.op.os_type, {})
9971     for name in self.op.osparams.keys():
9972       if name in os_defs and os_defs[name] == self.op.osparams[name]:
9973         del self.op.osparams[name]
9974
9975   def _CalculateFileStorageDir(self):
9976     """Calculate final instance file storage dir.
9977
9978     """
9979     # file storage dir calculation/check
9980     self.instance_file_storage_dir = None
9981     if self.op.disk_template in constants.DTS_FILEBASED:
9982       # build the full file storage dir path
9983       joinargs = []
9984
9985       if self.op.disk_template == constants.DT_SHARED_FILE:
9986         get_fsd_fn = self.cfg.GetSharedFileStorageDir
9987       else:
9988         get_fsd_fn = self.cfg.GetFileStorageDir
9989
9990       cfg_storagedir = get_fsd_fn()
9991       if not cfg_storagedir:
9992         raise errors.OpPrereqError("Cluster file storage dir not defined",
9993                                    errors.ECODE_STATE)
9994       joinargs.append(cfg_storagedir)
9995
9996       if self.op.file_storage_dir is not None:
9997         joinargs.append(self.op.file_storage_dir)
9998
9999       joinargs.append(self.op.instance_name)
10000
10001       # pylint: disable=W0142
10002       self.instance_file_storage_dir = utils.PathJoin(*joinargs)
10003
10004   def CheckPrereq(self): # pylint: disable=R0914
10005     """Check prerequisites.
10006
10007     """
10008     self._CalculateFileStorageDir()
10009
10010     if self.op.mode == constants.INSTANCE_IMPORT:
10011       export_info = self._ReadExportInfo()
10012       self._ReadExportParams(export_info)
10013       self._old_instance_name = export_info.get(constants.INISECT_INS, "name")
10014     else:
10015       self._old_instance_name = None
10016
10017     if (not self.cfg.GetVGName() and
10018         self.op.disk_template not in constants.DTS_NOT_LVM):
10019       raise errors.OpPrereqError("Cluster does not support lvm-based"
10020                                  " instances", errors.ECODE_STATE)
10021
10022     if (self.op.hypervisor is None or
10023         self.op.hypervisor == constants.VALUE_AUTO):
10024       self.op.hypervisor = self.cfg.GetHypervisorType()
10025
10026     cluster = self.cfg.GetClusterInfo()
10027     enabled_hvs = cluster.enabled_hypervisors
10028     if self.op.hypervisor not in enabled_hvs:
10029       raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
10030                                  " cluster (%s)" %
10031                                  (self.op.hypervisor, ",".join(enabled_hvs)),
10032                                  errors.ECODE_STATE)
10033
10034     # Check tag validity
10035     for tag in self.op.tags:
10036       objects.TaggableObject.ValidateTag(tag)
10037
10038     # check hypervisor parameter syntax (locally)
10039     utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
10040     filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
10041                                       self.op.hvparams)
10042     hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
10043     hv_type.CheckParameterSyntax(filled_hvp)
10044     self.hv_full = filled_hvp
10045     # check that we don't specify global parameters on an instance
10046     _CheckGlobalHvParams(self.op.hvparams)
10047
10048     # fill and remember the beparams dict
10049     self.be_full = _ComputeFullBeParams(self.op, cluster)
10050
10051     # build os parameters
10052     self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
10053
10054     # now that hvp/bep are in final format, let's reset to defaults,
10055     # if told to do so
10056     if self.op.identify_defaults:
10057       self._RevertToDefaults(cluster)
10058
10059     # NIC buildup
10060     self.nics = _ComputeNics(self.op, cluster, self.hostname1.ip, self.cfg,
10061                              self.proc)
10062
10063     # disk checks/pre-build
10064     default_vg = self.cfg.GetVGName()
10065     self.disks = _ComputeDisks(self.op, default_vg)
10066
10067     if self.op.mode == constants.INSTANCE_IMPORT:
10068       disk_images = []
10069       for idx in range(len(self.disks)):
10070         option = "disk%d_dump" % idx
10071         if export_info.has_option(constants.INISECT_INS, option):
10072           # FIXME: are the old os-es, disk sizes, etc. useful?
10073           export_name = export_info.get(constants.INISECT_INS, option)
10074           image = utils.PathJoin(self.op.src_path, export_name)
10075           disk_images.append(image)
10076         else:
10077           disk_images.append(False)
10078
10079       self.src_images = disk_images
10080
10081       if self.op.instance_name == self._old_instance_name:
10082         for idx, nic in enumerate(self.nics):
10083           if nic.mac == constants.VALUE_AUTO:
10084             nic_mac_ini = "nic%d_mac" % idx
10085             nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
10086
10087     # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
10088
10089     # ip ping checks (we use the same ip that was resolved in ExpandNames)
10090     if self.op.ip_check:
10091       if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
10092         raise errors.OpPrereqError("IP %s of instance %s already in use" %
10093                                    (self.check_ip, self.op.instance_name),
10094                                    errors.ECODE_NOTUNIQUE)
10095
10096     #### mac address generation
10097     # By generating here the mac address both the allocator and the hooks get
10098     # the real final mac address rather than the 'auto' or 'generate' value.
10099     # There is a race condition between the generation and the instance object
10100     # creation, which means that we know the mac is valid now, but we're not
10101     # sure it will be when we actually add the instance. If things go bad
10102     # adding the instance will abort because of a duplicate mac, and the
10103     # creation job will fail.
10104     for nic in self.nics:
10105       if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
10106         nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
10107
10108     #### allocator run
10109
10110     if self.op.iallocator is not None:
10111       self._RunAllocator()
10112
10113     # Release all unneeded node locks
10114     _ReleaseLocks(self, locking.LEVEL_NODE,
10115                   keep=filter(None, [self.op.pnode, self.op.snode,
10116                                      self.op.src_node]))
10117     _ReleaseLocks(self, locking.LEVEL_NODE_RES,
10118                   keep=filter(None, [self.op.pnode, self.op.snode,
10119                                      self.op.src_node]))
10120
10121     #### node related checks
10122
10123     # check primary node
10124     self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
10125     assert self.pnode is not None, \
10126       "Cannot retrieve locked node %s" % self.op.pnode
10127     if pnode.offline:
10128       raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
10129                                  pnode.name, errors.ECODE_STATE)
10130     if pnode.drained:
10131       raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
10132                                  pnode.name, errors.ECODE_STATE)
10133     if not pnode.vm_capable:
10134       raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
10135                                  " '%s'" % pnode.name, errors.ECODE_STATE)
10136
10137     self.secondaries = []
10138
10139     # mirror node verification
10140     if self.op.disk_template in constants.DTS_INT_MIRROR:
10141       if self.op.snode == pnode.name:
10142         raise errors.OpPrereqError("The secondary node cannot be the"
10143                                    " primary node", errors.ECODE_INVAL)
10144       _CheckNodeOnline(self, self.op.snode)
10145       _CheckNodeNotDrained(self, self.op.snode)
10146       _CheckNodeVmCapable(self, self.op.snode)
10147       self.secondaries.append(self.op.snode)
10148
10149       snode = self.cfg.GetNodeInfo(self.op.snode)
10150       if pnode.group != snode.group:
10151         self.LogWarning("The primary and secondary nodes are in two"
10152                         " different node groups; the disk parameters"
10153                         " from the first disk's node group will be"
10154                         " used")
10155
10156     nodenames = [pnode.name] + self.secondaries
10157
10158     # Verify instance specs
10159     spindle_use = self.be_full.get(constants.BE_SPINDLE_USE, None)
10160     ispec = {
10161       constants.ISPEC_MEM_SIZE: self.be_full.get(constants.BE_MAXMEM, None),
10162       constants.ISPEC_CPU_COUNT: self.be_full.get(constants.BE_VCPUS, None),
10163       constants.ISPEC_DISK_COUNT: len(self.disks),
10164       constants.ISPEC_DISK_SIZE: [disk["size"] for disk in self.disks],
10165       constants.ISPEC_NIC_COUNT: len(self.nics),
10166       constants.ISPEC_SPINDLE_USE: spindle_use,
10167       }
10168
10169     group_info = self.cfg.GetNodeGroup(pnode.group)
10170     ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster, group_info)
10171     res = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec)
10172     if not self.op.ignore_ipolicy and res:
10173       msg = ("Instance allocation to group %s (%s) violates policy: %s" %
10174              (pnode.group, group_info.name, utils.CommaJoin(res)))
10175       raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
10176
10177     if not self.adopt_disks:
10178       if self.op.disk_template == constants.DT_RBD:
10179         # _CheckRADOSFreeSpace() is just a placeholder.
10180         # Any function that checks prerequisites can be placed here.
10181         # Check if there is enough space on the RADOS cluster.
10182         _CheckRADOSFreeSpace()
10183       else:
10184         # Check lv size requirements, if not adopting
10185         req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
10186         _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
10187
10188     elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
10189       all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
10190                                 disk[constants.IDISK_ADOPT])
10191                      for disk in self.disks])
10192       if len(all_lvs) != len(self.disks):
10193         raise errors.OpPrereqError("Duplicate volume names given for adoption",
10194                                    errors.ECODE_INVAL)
10195       for lv_name in all_lvs:
10196         try:
10197           # FIXME: lv_name here is "vg/lv" need to ensure that other calls
10198           # to ReserveLV uses the same syntax
10199           self.cfg.ReserveLV(lv_name, self.proc.GetECId())
10200         except errors.ReservationError:
10201           raise errors.OpPrereqError("LV named %s used by another instance" %
10202                                      lv_name, errors.ECODE_NOTUNIQUE)
10203
10204       vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
10205       vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
10206
10207       node_lvs = self.rpc.call_lv_list([pnode.name],
10208                                        vg_names.payload.keys())[pnode.name]
10209       node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
10210       node_lvs = node_lvs.payload
10211
10212       delta = all_lvs.difference(node_lvs.keys())
10213       if delta:
10214         raise errors.OpPrereqError("Missing logical volume(s): %s" %
10215                                    utils.CommaJoin(delta),
10216                                    errors.ECODE_INVAL)
10217       online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
10218       if online_lvs:
10219         raise errors.OpPrereqError("Online logical volumes found, cannot"
10220                                    " adopt: %s" % utils.CommaJoin(online_lvs),
10221                                    errors.ECODE_STATE)
10222       # update the size of disk based on what is found
10223       for dsk in self.disks:
10224         dsk[constants.IDISK_SIZE] = \
10225           int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
10226                                         dsk[constants.IDISK_ADOPT])][0]))
10227
10228     elif self.op.disk_template == constants.DT_BLOCK:
10229       # Normalize and de-duplicate device paths
10230       all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
10231                        for disk in self.disks])
10232       if len(all_disks) != len(self.disks):
10233         raise errors.OpPrereqError("Duplicate disk names given for adoption",
10234                                    errors.ECODE_INVAL)
10235       baddisks = [d for d in all_disks
10236                   if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
10237       if baddisks:
10238         raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
10239                                    " cannot be adopted" %
10240                                    (", ".join(baddisks),
10241                                     constants.ADOPTABLE_BLOCKDEV_ROOT),
10242                                    errors.ECODE_INVAL)
10243
10244       node_disks = self.rpc.call_bdev_sizes([pnode.name],
10245                                             list(all_disks))[pnode.name]
10246       node_disks.Raise("Cannot get block device information from node %s" %
10247                        pnode.name)
10248       node_disks = node_disks.payload
10249       delta = all_disks.difference(node_disks.keys())
10250       if delta:
10251         raise errors.OpPrereqError("Missing block device(s): %s" %
10252                                    utils.CommaJoin(delta),
10253                                    errors.ECODE_INVAL)
10254       for dsk in self.disks:
10255         dsk[constants.IDISK_SIZE] = \
10256           int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
10257
10258     # Verify instance specs
10259     spindle_use = self.be_full.get(constants.BE_SPINDLE_USE, None)
10260     ispec = {
10261       constants.ISPEC_MEM_SIZE: self.be_full.get(constants.BE_MAXMEM, None),
10262       constants.ISPEC_CPU_COUNT: self.be_full.get(constants.BE_VCPUS, None),
10263       constants.ISPEC_DISK_COUNT: len(self.disks),
10264       constants.ISPEC_DISK_SIZE: [disk[constants.IDISK_SIZE]
10265                                   for disk in self.disks],
10266       constants.ISPEC_NIC_COUNT: len(self.nics),
10267       constants.ISPEC_SPINDLE_USE: spindle_use,
10268       }
10269
10270     group_info = self.cfg.GetNodeGroup(pnode.group)
10271     ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster, group_info)
10272     res = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec)
10273     if not self.op.ignore_ipolicy and res:
10274       raise errors.OpPrereqError(("Instance allocation to group %s violates"
10275                                   " policy: %s") % (pnode.group,
10276                                                     utils.CommaJoin(res)),
10277                                   errors.ECODE_INVAL)
10278
10279     _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
10280
10281     _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
10282     # check OS parameters (remotely)
10283     _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
10284
10285     _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
10286
10287     # memory check on primary node
10288     #TODO(dynmem): use MINMEM for checking
10289     if self.op.start:
10290       _CheckNodeFreeMemory(self, self.pnode.name,
10291                            "creating instance %s" % self.op.instance_name,
10292                            self.be_full[constants.BE_MAXMEM],
10293                            self.op.hypervisor)
10294
10295     self.dry_run_result = list(nodenames)
10296
10297   def Exec(self, feedback_fn):
10298     """Create and add the instance to the cluster.
10299
10300     """
10301     instance = self.op.instance_name
10302     pnode_name = self.pnode.name
10303
10304     assert not (self.owned_locks(locking.LEVEL_NODE_RES) -
10305                 self.owned_locks(locking.LEVEL_NODE)), \
10306       "Node locks differ from node resource locks"
10307
10308     ht_kind = self.op.hypervisor
10309     if ht_kind in constants.HTS_REQ_PORT:
10310       network_port = self.cfg.AllocatePort()
10311     else:
10312       network_port = None
10313
10314     # This is ugly but we got a chicken-egg problem here
10315     # We can only take the group disk parameters, as the instance
10316     # has no disks yet (we are generating them right here).
10317     node = self.cfg.GetNodeInfo(pnode_name)
10318     nodegroup = self.cfg.GetNodeGroup(node.group)
10319     disks = _GenerateDiskTemplate(self,
10320                                   self.op.disk_template,
10321                                   instance, pnode_name,
10322                                   self.secondaries,
10323                                   self.disks,
10324                                   self.instance_file_storage_dir,
10325                                   self.op.file_driver,
10326                                   0,
10327                                   feedback_fn,
10328                                   self.cfg.GetGroupDiskParams(nodegroup))
10329
10330     iobj = objects.Instance(name=instance, os=self.op.os_type,
10331                             primary_node=pnode_name,
10332                             nics=self.nics, disks=disks,
10333                             disk_template=self.op.disk_template,
10334                             admin_state=constants.ADMINST_DOWN,
10335                             network_port=network_port,
10336                             beparams=self.op.beparams,
10337                             hvparams=self.op.hvparams,
10338                             hypervisor=self.op.hypervisor,
10339                             osparams=self.op.osparams,
10340                             )
10341
10342     if self.op.tags:
10343       for tag in self.op.tags:
10344         iobj.AddTag(tag)
10345
10346     if self.adopt_disks:
10347       if self.op.disk_template == constants.DT_PLAIN:
10348         # rename LVs to the newly-generated names; we need to construct
10349         # 'fake' LV disks with the old data, plus the new unique_id
10350         tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
10351         rename_to = []
10352         for t_dsk, a_dsk in zip(tmp_disks, self.disks):
10353           rename_to.append(t_dsk.logical_id)
10354           t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
10355           self.cfg.SetDiskID(t_dsk, pnode_name)
10356         result = self.rpc.call_blockdev_rename(pnode_name,
10357                                                zip(tmp_disks, rename_to))
10358         result.Raise("Failed to rename adoped LVs")
10359     else:
10360       feedback_fn("* creating instance disks...")
10361       try:
10362         _CreateDisks(self, iobj)
10363       except errors.OpExecError:
10364         self.LogWarning("Device creation failed, reverting...")
10365         try:
10366           _RemoveDisks(self, iobj)
10367         finally:
10368           self.cfg.ReleaseDRBDMinors(instance)
10369           raise
10370
10371     feedback_fn("adding instance %s to cluster config" % instance)
10372
10373     self.cfg.AddInstance(iobj, self.proc.GetECId())
10374
10375     # Declare that we don't want to remove the instance lock anymore, as we've
10376     # added the instance to the config
10377     del self.remove_locks[locking.LEVEL_INSTANCE]
10378
10379     if self.op.mode == constants.INSTANCE_IMPORT:
10380       # Release unused nodes
10381       _ReleaseLocks(self, locking.LEVEL_NODE, keep=[self.op.src_node])
10382     else:
10383       # Release all nodes
10384       _ReleaseLocks(self, locking.LEVEL_NODE)
10385
10386     disk_abort = False
10387     if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
10388       feedback_fn("* wiping instance disks...")
10389       try:
10390         _WipeDisks(self, iobj)
10391       except errors.OpExecError, err:
10392         logging.exception("Wiping disks failed")
10393         self.LogWarning("Wiping instance disks failed (%s)", err)
10394         disk_abort = True
10395
10396     if disk_abort:
10397       # Something is already wrong with the disks, don't do anything else
10398       pass
10399     elif self.op.wait_for_sync:
10400       disk_abort = not _WaitForSync(self, iobj)
10401     elif iobj.disk_template in constants.DTS_INT_MIRROR:
10402       # make sure the disks are not degraded (still sync-ing is ok)
10403       feedback_fn("* checking mirrors status")
10404       disk_abort = not _WaitForSync(self, iobj, oneshot=True)
10405     else:
10406       disk_abort = False
10407
10408     if disk_abort:
10409       _RemoveDisks(self, iobj)
10410       self.cfg.RemoveInstance(iobj.name)
10411       # Make sure the instance lock gets removed
10412       self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
10413       raise errors.OpExecError("There are some degraded disks for"
10414                                " this instance")
10415
10416     # Release all node resource locks
10417     _ReleaseLocks(self, locking.LEVEL_NODE_RES)
10418
10419     if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
10420       # we need to set the disks ID to the primary node, since the
10421       # preceding code might or might have not done it, depending on
10422       # disk template and other options
10423       for disk in iobj.disks:
10424         self.cfg.SetDiskID(disk, pnode_name)
10425       if self.op.mode == constants.INSTANCE_CREATE:
10426         if not self.op.no_install:
10427           pause_sync = (iobj.disk_template in constants.DTS_INT_MIRROR and
10428                         not self.op.wait_for_sync)
10429           if pause_sync:
10430             feedback_fn("* pausing disk sync to install instance OS")
10431             result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
10432                                                               (iobj.disks,
10433                                                                iobj), True)
10434             for idx, success in enumerate(result.payload):
10435               if not success:
10436                 logging.warn("pause-sync of instance %s for disk %d failed",
10437                              instance, idx)
10438
10439           feedback_fn("* running the instance OS create scripts...")
10440           # FIXME: pass debug option from opcode to backend
10441           os_add_result = \
10442             self.rpc.call_instance_os_add(pnode_name, (iobj, None), False,
10443                                           self.op.debug_level)
10444           if pause_sync:
10445             feedback_fn("* resuming disk sync")
10446             result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
10447                                                               (iobj.disks,
10448                                                                iobj), False)
10449             for idx, success in enumerate(result.payload):
10450               if not success:
10451                 logging.warn("resume-sync of instance %s for disk %d failed",
10452                              instance, idx)
10453
10454           os_add_result.Raise("Could not add os for instance %s"
10455                               " on node %s" % (instance, pnode_name))
10456
10457       else:
10458         if self.op.mode == constants.INSTANCE_IMPORT:
10459           feedback_fn("* running the instance OS import scripts...")
10460
10461           transfers = []
10462
10463           for idx, image in enumerate(self.src_images):
10464             if not image:
10465               continue
10466
10467             # FIXME: pass debug option from opcode to backend
10468             dt = masterd.instance.DiskTransfer("disk/%s" % idx,
10469                                                constants.IEIO_FILE, (image, ),
10470                                                constants.IEIO_SCRIPT,
10471                                                (iobj.disks[idx], idx),
10472                                                None)
10473             transfers.append(dt)
10474
10475           import_result = \
10476             masterd.instance.TransferInstanceData(self, feedback_fn,
10477                                                   self.op.src_node, pnode_name,
10478                                                   self.pnode.secondary_ip,
10479                                                   iobj, transfers)
10480           if not compat.all(import_result):
10481             self.LogWarning("Some disks for instance %s on node %s were not"
10482                             " imported successfully" % (instance, pnode_name))
10483
10484           rename_from = self._old_instance_name
10485
10486         elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
10487           feedback_fn("* preparing remote import...")
10488           # The source cluster will stop the instance before attempting to make
10489           # a connection. In some cases stopping an instance can take a long
10490           # time, hence the shutdown timeout is added to the connection
10491           # timeout.
10492           connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
10493                              self.op.source_shutdown_timeout)
10494           timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
10495
10496           assert iobj.primary_node == self.pnode.name
10497           disk_results = \
10498             masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
10499                                           self.source_x509_ca,
10500                                           self._cds, timeouts)
10501           if not compat.all(disk_results):
10502             # TODO: Should the instance still be started, even if some disks
10503             # failed to import (valid for local imports, too)?
10504             self.LogWarning("Some disks for instance %s on node %s were not"
10505                             " imported successfully" % (instance, pnode_name))
10506
10507           rename_from = self.source_instance_name
10508
10509         else:
10510           # also checked in the prereq part
10511           raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
10512                                        % self.op.mode)
10513
10514         # Run rename script on newly imported instance
10515         assert iobj.name == instance
10516         feedback_fn("Running rename script for %s" % instance)
10517         result = self.rpc.call_instance_run_rename(pnode_name, iobj,
10518                                                    rename_from,
10519                                                    self.op.debug_level)
10520         if result.fail_msg:
10521           self.LogWarning("Failed to run rename script for %s on node"
10522                           " %s: %s" % (instance, pnode_name, result.fail_msg))
10523
10524     assert not self.owned_locks(locking.LEVEL_NODE_RES)
10525
10526     if self.op.start:
10527       iobj.admin_state = constants.ADMINST_UP
10528       self.cfg.Update(iobj, feedback_fn)
10529       logging.info("Starting instance %s on node %s", instance, pnode_name)
10530       feedback_fn("* starting instance...")
10531       result = self.rpc.call_instance_start(pnode_name, (iobj, None, None),
10532                                             False)
10533       result.Raise("Could not start instance")
10534
10535     return list(iobj.all_nodes)
10536
10537
10538 class LUInstanceMultiAlloc(NoHooksLU):
10539   """Allocates multiple instances at the same time.
10540
10541   """
10542   REQ_BGL = False
10543
10544   def CheckArguments(self):
10545     """Check arguments.
10546
10547     """
10548     nodes = []
10549     for inst in self.op.instances:
10550       if inst.iallocator is not None:
10551         raise errors.OpPrereqError("iallocator are not allowed to be set on"
10552                                    " instance objects", errors.ECODE_INVAL)
10553       nodes.append(bool(inst.pnode))
10554       if inst.disk_template in constants.DTS_INT_MIRROR:
10555         nodes.append(bool(inst.snode))
10556
10557     has_nodes = compat.any(nodes)
10558     if compat.all(nodes) ^ has_nodes:
10559       raise errors.OpPrereqError("There are instance objects providing"
10560                                  " pnode/snode while others do not",
10561                                  errors.ECODE_INVAL)
10562
10563     if self.op.iallocator is None:
10564       default_iallocator = self.cfg.GetDefaultIAllocator()
10565       if default_iallocator and has_nodes:
10566         self.op.iallocator = default_iallocator
10567       else:
10568         raise errors.OpPrereqError("No iallocator or nodes on the instances"
10569                                    " given and no cluster-wide default"
10570                                    " iallocator found; please specify either"
10571                                    " an iallocator or nodes on the instances"
10572                                    " or set a cluster-wide default iallocator",
10573                                    errors.ECODE_INVAL)
10574
10575     dups = utils.FindDuplicates([op.instance_name for op in self.op.instances])
10576     if dups:
10577       raise errors.OpPrereqError("There are duplicate instance names: %s" %
10578                                  utils.CommaJoin(dups), errors.ECODE_INVAL)
10579
10580   def ExpandNames(self):
10581     """Calculate the locks.
10582
10583     """
10584     self.share_locks = _ShareAll()
10585     self.needed_locks = {}
10586
10587     if self.op.iallocator:
10588       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
10589       self.needed_locks[locking.LEVEL_NODE_RES] = locking.ALL_SET
10590     else:
10591       nodeslist = []
10592       for inst in self.op.instances:
10593         inst.pnode = _ExpandNodeName(self.cfg, inst.pnode)
10594         nodeslist.append(inst.pnode)
10595         if inst.snode is not None:
10596           inst.snode = _ExpandNodeName(self.cfg, inst.snode)
10597           nodeslist.append(inst.snode)
10598
10599       self.needed_locks[locking.LEVEL_NODE] = nodeslist
10600       # Lock resources of instance's primary and secondary nodes (copy to
10601       # prevent accidential modification)
10602       self.needed_locks[locking.LEVEL_NODE_RES] = list(nodeslist)
10603
10604   def CheckPrereq(self):
10605     """Check prerequisite.
10606
10607     """
10608     cluster = self.cfg.GetClusterInfo()
10609     default_vg = self.cfg.GetVGName()
10610     insts = [_CreateInstanceAllocRequest(op, _ComputeDisks(op, default_vg),
10611                                          _ComputeNics(op, cluster, None,
10612                                                       self.cfg, self.proc),
10613                                          _ComputeFullBeParams(op, cluster))
10614              for op in self.op.instances]
10615     req = iallocator.IAReqMultiInstanceAlloc(instances=insts)
10616     ial = iallocator.IAllocator(self.cfg, self.rpc, req)
10617
10618     ial.Run(self.op.iallocator)
10619
10620     if not ial.success:
10621       raise errors.OpPrereqError("Can't compute nodes using"
10622                                  " iallocator '%s': %s" %
10623                                  (self.op.iallocator, ial.info),
10624                                  errors.ECODE_NORES)
10625
10626     self.ia_result = ial.result
10627
10628     if self.op.dry_run:
10629       self.dry_run_rsult = objects.FillDict(self._ConstructPartialResult(), {
10630         constants.JOB_IDS_KEY: [],
10631         })
10632
10633   def _ConstructPartialResult(self):
10634     """Contructs the partial result.
10635
10636     """
10637     (allocatable, failed) = self.ia_result
10638     return {
10639       opcodes.OpInstanceMultiAlloc.ALLOCATABLE_KEY:
10640         map(compat.fst, allocatable),
10641       opcodes.OpInstanceMultiAlloc.FAILED_KEY: failed,
10642       }
10643
10644   def Exec(self, feedback_fn):
10645     """Executes the opcode.
10646
10647     """
10648     op2inst = dict((op.instance_name, op) for op in self.op.instances)
10649     (allocatable, failed) = self.ia_result
10650
10651     jobs = []
10652     for (name, nodes) in allocatable:
10653       op = op2inst.pop(name)
10654
10655       if len(nodes) > 1:
10656         (op.pnode, op.snode) = nodes
10657       else:
10658         (op.pnode,) = nodes
10659
10660       jobs.append([op])
10661
10662     missing = set(op2inst.keys()) - set(failed)
10663     assert not missing, \
10664       "Iallocator did return incomplete result: %s" % utils.CommaJoin(missing)
10665
10666     return ResultWithJobs(jobs, **self._ConstructPartialResult())
10667
10668
10669 def _CheckRADOSFreeSpace():
10670   """Compute disk size requirements inside the RADOS cluster.
10671
10672   """
10673   # For the RADOS cluster we assume there is always enough space.
10674   pass
10675
10676
10677 class LUInstanceConsole(NoHooksLU):
10678   """Connect to an instance's console.
10679
10680   This is somewhat special in that it returns the command line that
10681   you need to run on the master node in order to connect to the
10682   console.
10683
10684   """
10685   REQ_BGL = False
10686
10687   def ExpandNames(self):
10688     self.share_locks = _ShareAll()
10689     self._ExpandAndLockInstance()
10690
10691   def CheckPrereq(self):
10692     """Check prerequisites.
10693
10694     This checks that the instance is in the cluster.
10695
10696     """
10697     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10698     assert self.instance is not None, \
10699       "Cannot retrieve locked instance %s" % self.op.instance_name
10700     _CheckNodeOnline(self, self.instance.primary_node)
10701
10702   def Exec(self, feedback_fn):
10703     """Connect to the console of an instance
10704
10705     """
10706     instance = self.instance
10707     node = instance.primary_node
10708
10709     node_insts = self.rpc.call_instance_list([node],
10710                                              [instance.hypervisor])[node]
10711     node_insts.Raise("Can't get node information from %s" % node)
10712
10713     if instance.name not in node_insts.payload:
10714       if instance.admin_state == constants.ADMINST_UP:
10715         state = constants.INSTST_ERRORDOWN
10716       elif instance.admin_state == constants.ADMINST_DOWN:
10717         state = constants.INSTST_ADMINDOWN
10718       else:
10719         state = constants.INSTST_ADMINOFFLINE
10720       raise errors.OpExecError("Instance %s is not running (state %s)" %
10721                                (instance.name, state))
10722
10723     logging.debug("Connecting to console of %s on %s", instance.name, node)
10724
10725     return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
10726
10727
10728 def _GetInstanceConsole(cluster, instance):
10729   """Returns console information for an instance.
10730
10731   @type cluster: L{objects.Cluster}
10732   @type instance: L{objects.Instance}
10733   @rtype: dict
10734
10735   """
10736   hyper = hypervisor.GetHypervisor(instance.hypervisor)
10737   # beparams and hvparams are passed separately, to avoid editing the
10738   # instance and then saving the defaults in the instance itself.
10739   hvparams = cluster.FillHV(instance)
10740   beparams = cluster.FillBE(instance)
10741   console = hyper.GetInstanceConsole(instance, hvparams, beparams)
10742
10743   assert console.instance == instance.name
10744   assert console.Validate()
10745
10746   return console.ToDict()
10747
10748
10749 class LUInstanceReplaceDisks(LogicalUnit):
10750   """Replace the disks of an instance.
10751
10752   """
10753   HPATH = "mirrors-replace"
10754   HTYPE = constants.HTYPE_INSTANCE
10755   REQ_BGL = False
10756
10757   def CheckArguments(self):
10758     """Check arguments.
10759
10760     """
10761     remote_node = self.op.remote_node
10762     ialloc = self.op.iallocator
10763     if self.op.mode == constants.REPLACE_DISK_CHG:
10764       if remote_node is None and ialloc is None:
10765         raise errors.OpPrereqError("When changing the secondary either an"
10766                                    " iallocator script must be used or the"
10767                                    " new node given", errors.ECODE_INVAL)
10768       else:
10769         _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
10770
10771     elif remote_node is not None or ialloc is not None:
10772       # Not replacing the secondary
10773       raise errors.OpPrereqError("The iallocator and new node options can"
10774                                  " only be used when changing the"
10775                                  " secondary node", errors.ECODE_INVAL)
10776
10777   def ExpandNames(self):
10778     self._ExpandAndLockInstance()
10779
10780     assert locking.LEVEL_NODE not in self.needed_locks
10781     assert locking.LEVEL_NODE_RES not in self.needed_locks
10782     assert locking.LEVEL_NODEGROUP not in self.needed_locks
10783
10784     assert self.op.iallocator is None or self.op.remote_node is None, \
10785       "Conflicting options"
10786
10787     if self.op.remote_node is not None:
10788       self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10789
10790       # Warning: do not remove the locking of the new secondary here
10791       # unless DRBD8.AddChildren is changed to work in parallel;
10792       # currently it doesn't since parallel invocations of
10793       # FindUnusedMinor will conflict
10794       self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
10795       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
10796     else:
10797       self.needed_locks[locking.LEVEL_NODE] = []
10798       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10799
10800       if self.op.iallocator is not None:
10801         # iallocator will select a new node in the same group
10802         self.needed_locks[locking.LEVEL_NODEGROUP] = []
10803
10804     self.needed_locks[locking.LEVEL_NODE_RES] = []
10805
10806     self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
10807                                    self.op.iallocator, self.op.remote_node,
10808                                    self.op.disks, False, self.op.early_release,
10809                                    self.op.ignore_ipolicy)
10810
10811     self.tasklets = [self.replacer]
10812
10813   def DeclareLocks(self, level):
10814     if level == locking.LEVEL_NODEGROUP:
10815       assert self.op.remote_node is None
10816       assert self.op.iallocator is not None
10817       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
10818
10819       self.share_locks[locking.LEVEL_NODEGROUP] = 1
10820       # Lock all groups used by instance optimistically; this requires going
10821       # via the node before it's locked, requiring verification later on
10822       self.needed_locks[locking.LEVEL_NODEGROUP] = \
10823         self.cfg.GetInstanceNodeGroups(self.op.instance_name)
10824
10825     elif level == locking.LEVEL_NODE:
10826       if self.op.iallocator is not None:
10827         assert self.op.remote_node is None
10828         assert not self.needed_locks[locking.LEVEL_NODE]
10829
10830         # Lock member nodes of all locked groups
10831         self.needed_locks[locking.LEVEL_NODE] = \
10832             [node_name
10833              for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
10834              for node_name in self.cfg.GetNodeGroup(group_uuid).members]
10835       else:
10836         self._LockInstancesNodes()
10837     elif level == locking.LEVEL_NODE_RES:
10838       # Reuse node locks
10839       self.needed_locks[locking.LEVEL_NODE_RES] = \
10840         self.needed_locks[locking.LEVEL_NODE]
10841
10842   def BuildHooksEnv(self):
10843     """Build hooks env.
10844
10845     This runs on the master, the primary and all the secondaries.
10846
10847     """
10848     instance = self.replacer.instance
10849     env = {
10850       "MODE": self.op.mode,
10851       "NEW_SECONDARY": self.op.remote_node,
10852       "OLD_SECONDARY": instance.secondary_nodes[0],
10853       }
10854     env.update(_BuildInstanceHookEnvByObject(self, instance))
10855     return env
10856
10857   def BuildHooksNodes(self):
10858     """Build hooks nodes.
10859
10860     """
10861     instance = self.replacer.instance
10862     nl = [
10863       self.cfg.GetMasterNode(),
10864       instance.primary_node,
10865       ]
10866     if self.op.remote_node is not None:
10867       nl.append(self.op.remote_node)
10868     return nl, nl
10869
10870   def CheckPrereq(self):
10871     """Check prerequisites.
10872
10873     """
10874     assert (self.glm.is_owned(locking.LEVEL_NODEGROUP) or
10875             self.op.iallocator is None)
10876
10877     # Verify if node group locks are still correct
10878     owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
10879     if owned_groups:
10880       _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
10881
10882     return LogicalUnit.CheckPrereq(self)
10883
10884
10885 class TLReplaceDisks(Tasklet):
10886   """Replaces disks for an instance.
10887
10888   Note: Locking is not within the scope of this class.
10889
10890   """
10891   def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
10892                disks, delay_iallocator, early_release, ignore_ipolicy):
10893     """Initializes this class.
10894
10895     """
10896     Tasklet.__init__(self, lu)
10897
10898     # Parameters
10899     self.instance_name = instance_name
10900     self.mode = mode
10901     self.iallocator_name = iallocator_name
10902     self.remote_node = remote_node
10903     self.disks = disks
10904     self.delay_iallocator = delay_iallocator
10905     self.early_release = early_release
10906     self.ignore_ipolicy = ignore_ipolicy
10907
10908     # Runtime data
10909     self.instance = None
10910     self.new_node = None
10911     self.target_node = None
10912     self.other_node = None
10913     self.remote_node_info = None
10914     self.node_secondary_ip = None
10915
10916   @staticmethod
10917   def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
10918     """Compute a new secondary node using an IAllocator.
10919
10920     """
10921     req = iallocator.IAReqRelocate(name=instance_name,
10922                                    relocate_from=list(relocate_from))
10923     ial = iallocator.IAllocator(lu.cfg, lu.rpc, req)
10924
10925     ial.Run(iallocator_name)
10926
10927     if not ial.success:
10928       raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
10929                                  " %s" % (iallocator_name, ial.info),
10930                                  errors.ECODE_NORES)
10931
10932     remote_node_name = ial.result[0]
10933
10934     lu.LogInfo("Selected new secondary for instance '%s': %s",
10935                instance_name, remote_node_name)
10936
10937     return remote_node_name
10938
10939   def _FindFaultyDisks(self, node_name):
10940     """Wrapper for L{_FindFaultyInstanceDisks}.
10941
10942     """
10943     return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
10944                                     node_name, True)
10945
10946   def _CheckDisksActivated(self, instance):
10947     """Checks if the instance disks are activated.
10948
10949     @param instance: The instance to check disks
10950     @return: True if they are activated, False otherwise
10951
10952     """
10953     nodes = instance.all_nodes
10954
10955     for idx, dev in enumerate(instance.disks):
10956       for node in nodes:
10957         self.lu.LogInfo("Checking disk/%d on %s", idx, node)
10958         self.cfg.SetDiskID(dev, node)
10959
10960         result = _BlockdevFind(self, node, dev, instance)
10961
10962         if result.offline:
10963           continue
10964         elif result.fail_msg or not result.payload:
10965           return False
10966
10967     return True
10968
10969   def CheckPrereq(self):
10970     """Check prerequisites.
10971
10972     This checks that the instance is in the cluster.
10973
10974     """
10975     self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
10976     assert instance is not None, \
10977       "Cannot retrieve locked instance %s" % self.instance_name
10978
10979     if instance.disk_template != constants.DT_DRBD8:
10980       raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
10981                                  " instances", errors.ECODE_INVAL)
10982
10983     if len(instance.secondary_nodes) != 1:
10984       raise errors.OpPrereqError("The instance has a strange layout,"
10985                                  " expected one secondary but found %d" %
10986                                  len(instance.secondary_nodes),
10987                                  errors.ECODE_FAULT)
10988
10989     if not self.delay_iallocator:
10990       self._CheckPrereq2()
10991
10992   def _CheckPrereq2(self):
10993     """Check prerequisites, second part.
10994
10995     This function should always be part of CheckPrereq. It was separated and is
10996     now called from Exec because during node evacuation iallocator was only
10997     called with an unmodified cluster model, not taking planned changes into
10998     account.
10999
11000     """
11001     instance = self.instance
11002     secondary_node = instance.secondary_nodes[0]
11003
11004     if self.iallocator_name is None:
11005       remote_node = self.remote_node
11006     else:
11007       remote_node = self._RunAllocator(self.lu, self.iallocator_name,
11008                                        instance.name, instance.secondary_nodes)
11009
11010     if remote_node is None:
11011       self.remote_node_info = None
11012     else:
11013       assert remote_node in self.lu.owned_locks(locking.LEVEL_NODE), \
11014              "Remote node '%s' is not locked" % remote_node
11015
11016       self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
11017       assert self.remote_node_info is not None, \
11018         "Cannot retrieve locked node %s" % remote_node
11019
11020     if remote_node == self.instance.primary_node:
11021       raise errors.OpPrereqError("The specified node is the primary node of"
11022                                  " the instance", errors.ECODE_INVAL)
11023
11024     if remote_node == secondary_node:
11025       raise errors.OpPrereqError("The specified node is already the"
11026                                  " secondary node of the instance",
11027                                  errors.ECODE_INVAL)
11028
11029     if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
11030                                     constants.REPLACE_DISK_CHG):
11031       raise errors.OpPrereqError("Cannot specify disks to be replaced",
11032                                  errors.ECODE_INVAL)
11033
11034     if self.mode == constants.REPLACE_DISK_AUTO:
11035       if not self._CheckDisksActivated(instance):
11036         raise errors.OpPrereqError("Please run activate-disks on instance %s"
11037                                    " first" % self.instance_name,
11038                                    errors.ECODE_STATE)
11039       faulty_primary = self._FindFaultyDisks(instance.primary_node)
11040       faulty_secondary = self._FindFaultyDisks(secondary_node)
11041
11042       if faulty_primary and faulty_secondary:
11043         raise errors.OpPrereqError("Instance %s has faulty disks on more than"
11044                                    " one node and can not be repaired"
11045                                    " automatically" % self.instance_name,
11046                                    errors.ECODE_STATE)
11047
11048       if faulty_primary:
11049         self.disks = faulty_primary
11050         self.target_node = instance.primary_node
11051         self.other_node = secondary_node
11052         check_nodes = [self.target_node, self.other_node]
11053       elif faulty_secondary:
11054         self.disks = faulty_secondary
11055         self.target_node = secondary_node
11056         self.other_node = instance.primary_node
11057         check_nodes = [self.target_node, self.other_node]
11058       else:
11059         self.disks = []
11060         check_nodes = []
11061
11062     else:
11063       # Non-automatic modes
11064       if self.mode == constants.REPLACE_DISK_PRI:
11065         self.target_node = instance.primary_node
11066         self.other_node = secondary_node
11067         check_nodes = [self.target_node, self.other_node]
11068
11069       elif self.mode == constants.REPLACE_DISK_SEC:
11070         self.target_node = secondary_node
11071         self.other_node = instance.primary_node
11072         check_nodes = [self.target_node, self.other_node]
11073
11074       elif self.mode == constants.REPLACE_DISK_CHG:
11075         self.new_node = remote_node
11076         self.other_node = instance.primary_node
11077         self.target_node = secondary_node
11078         check_nodes = [self.new_node, self.other_node]
11079
11080         _CheckNodeNotDrained(self.lu, remote_node)
11081         _CheckNodeVmCapable(self.lu, remote_node)
11082
11083         old_node_info = self.cfg.GetNodeInfo(secondary_node)
11084         assert old_node_info is not None
11085         if old_node_info.offline and not self.early_release:
11086           # doesn't make sense to delay the release
11087           self.early_release = True
11088           self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
11089                           " early-release mode", secondary_node)
11090
11091       else:
11092         raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
11093                                      self.mode)
11094
11095       # If not specified all disks should be replaced
11096       if not self.disks:
11097         self.disks = range(len(self.instance.disks))
11098
11099     # TODO: This is ugly, but right now we can't distinguish between internal
11100     # submitted opcode and external one. We should fix that.
11101     if self.remote_node_info:
11102       # We change the node, lets verify it still meets instance policy
11103       new_group_info = self.cfg.GetNodeGroup(self.remote_node_info.group)
11104       cluster = self.cfg.GetClusterInfo()
11105       ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
11106                                                               new_group_info)
11107       _CheckTargetNodeIPolicy(self, ipolicy, instance, self.remote_node_info,
11108                               ignore=self.ignore_ipolicy)
11109
11110     for node in check_nodes:
11111       _CheckNodeOnline(self.lu, node)
11112
11113     touched_nodes = frozenset(node_name for node_name in [self.new_node,
11114                                                           self.other_node,
11115                                                           self.target_node]
11116                               if node_name is not None)
11117
11118     # Release unneeded node and node resource locks
11119     _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
11120     _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES, keep=touched_nodes)
11121
11122     # Release any owned node group
11123     if self.lu.glm.is_owned(locking.LEVEL_NODEGROUP):
11124       _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
11125
11126     # Check whether disks are valid
11127     for disk_idx in self.disks:
11128       instance.FindDisk(disk_idx)
11129
11130     # Get secondary node IP addresses
11131     self.node_secondary_ip = dict((name, node.secondary_ip) for (name, node)
11132                                   in self.cfg.GetMultiNodeInfo(touched_nodes))
11133
11134   def Exec(self, feedback_fn):
11135     """Execute disk replacement.
11136
11137     This dispatches the disk replacement to the appropriate handler.
11138
11139     """
11140     if self.delay_iallocator:
11141       self._CheckPrereq2()
11142
11143     if __debug__:
11144       # Verify owned locks before starting operation
11145       owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
11146       assert set(owned_nodes) == set(self.node_secondary_ip), \
11147           ("Incorrect node locks, owning %s, expected %s" %
11148            (owned_nodes, self.node_secondary_ip.keys()))
11149       assert (self.lu.owned_locks(locking.LEVEL_NODE) ==
11150               self.lu.owned_locks(locking.LEVEL_NODE_RES))
11151
11152       owned_instances = self.lu.owned_locks(locking.LEVEL_INSTANCE)
11153       assert list(owned_instances) == [self.instance_name], \
11154           "Instance '%s' not locked" % self.instance_name
11155
11156       assert not self.lu.glm.is_owned(locking.LEVEL_NODEGROUP), \
11157           "Should not own any node group lock at this point"
11158
11159     if not self.disks:
11160       feedback_fn("No disks need replacement for instance '%s'" %
11161                   self.instance.name)
11162       return
11163
11164     feedback_fn("Replacing disk(s) %s for instance '%s'" %
11165                 (utils.CommaJoin(self.disks), self.instance.name))
11166     feedback_fn("Current primary node: %s", self.instance.primary_node)
11167     feedback_fn("Current seconary node: %s",
11168                 utils.CommaJoin(self.instance.secondary_nodes))
11169
11170     activate_disks = (self.instance.admin_state != constants.ADMINST_UP)
11171
11172     # Activate the instance disks if we're replacing them on a down instance
11173     if activate_disks:
11174       _StartInstanceDisks(self.lu, self.instance, True)
11175
11176     try:
11177       # Should we replace the secondary node?
11178       if self.new_node is not None:
11179         fn = self._ExecDrbd8Secondary
11180       else:
11181         fn = self._ExecDrbd8DiskOnly
11182
11183       result = fn(feedback_fn)
11184     finally:
11185       # Deactivate the instance disks if we're replacing them on a
11186       # down instance
11187       if activate_disks:
11188         _SafeShutdownInstanceDisks(self.lu, self.instance)
11189
11190     assert not self.lu.owned_locks(locking.LEVEL_NODE)
11191
11192     if __debug__:
11193       # Verify owned locks
11194       owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE_RES)
11195       nodes = frozenset(self.node_secondary_ip)
11196       assert ((self.early_release and not owned_nodes) or
11197               (not self.early_release and not (set(owned_nodes) - nodes))), \
11198         ("Not owning the correct locks, early_release=%s, owned=%r,"
11199          " nodes=%r" % (self.early_release, owned_nodes, nodes))
11200
11201     return result
11202
11203   def _CheckVolumeGroup(self, nodes):
11204     self.lu.LogInfo("Checking volume groups")
11205
11206     vgname = self.cfg.GetVGName()
11207
11208     # Make sure volume group exists on all involved nodes
11209     results = self.rpc.call_vg_list(nodes)
11210     if not results:
11211       raise errors.OpExecError("Can't list volume groups on the nodes")
11212
11213     for node in nodes:
11214       res = results[node]
11215       res.Raise("Error checking node %s" % node)
11216       if vgname not in res.payload:
11217         raise errors.OpExecError("Volume group '%s' not found on node %s" %
11218                                  (vgname, node))
11219
11220   def _CheckDisksExistence(self, nodes):
11221     # Check disk existence
11222     for idx, dev in enumerate(self.instance.disks):
11223       if idx not in self.disks:
11224         continue
11225
11226       for node in nodes:
11227         self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
11228         self.cfg.SetDiskID(dev, node)
11229
11230         result = _BlockdevFind(self, node, dev, self.instance)
11231
11232         msg = result.fail_msg
11233         if msg or not result.payload:
11234           if not msg:
11235             msg = "disk not found"
11236           raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
11237                                    (idx, node, msg))
11238
11239   def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
11240     for idx, dev in enumerate(self.instance.disks):
11241       if idx not in self.disks:
11242         continue
11243
11244       self.lu.LogInfo("Checking disk/%d consistency on node %s" %
11245                       (idx, node_name))
11246
11247       if not _CheckDiskConsistency(self.lu, self.instance, dev, node_name,
11248                                    on_primary, ldisk=ldisk):
11249         raise errors.OpExecError("Node %s has degraded storage, unsafe to"
11250                                  " replace disks for instance %s" %
11251                                  (node_name, self.instance.name))
11252
11253   def _CreateNewStorage(self, node_name):
11254     """Create new storage on the primary or secondary node.
11255
11256     This is only used for same-node replaces, not for changing the
11257     secondary node, hence we don't want to modify the existing disk.
11258
11259     """
11260     iv_names = {}
11261
11262     disks = _AnnotateDiskParams(self.instance, self.instance.disks, self.cfg)
11263     for idx, dev in enumerate(disks):
11264       if idx not in self.disks:
11265         continue
11266
11267       self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
11268
11269       self.cfg.SetDiskID(dev, node_name)
11270
11271       lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
11272       names = _GenerateUniqueNames(self.lu, lv_names)
11273
11274       (data_disk, meta_disk) = dev.children
11275       vg_data = data_disk.logical_id[0]
11276       lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
11277                              logical_id=(vg_data, names[0]),
11278                              params=data_disk.params)
11279       vg_meta = meta_disk.logical_id[0]
11280       lv_meta = objects.Disk(dev_type=constants.LD_LV,
11281                              size=constants.DRBD_META_SIZE,
11282                              logical_id=(vg_meta, names[1]),
11283                              params=meta_disk.params)
11284
11285       new_lvs = [lv_data, lv_meta]
11286       old_lvs = [child.Copy() for child in dev.children]
11287       iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
11288
11289       # we pass force_create=True to force the LVM creation
11290       for new_lv in new_lvs:
11291         _CreateBlockDevInner(self.lu, node_name, self.instance, new_lv, True,
11292                              _GetInstanceInfoText(self.instance), False)
11293
11294     return iv_names
11295
11296   def _CheckDevices(self, node_name, iv_names):
11297     for name, (dev, _, _) in iv_names.iteritems():
11298       self.cfg.SetDiskID(dev, node_name)
11299
11300       result = _BlockdevFind(self, node_name, dev, self.instance)
11301
11302       msg = result.fail_msg
11303       if msg or not result.payload:
11304         if not msg:
11305           msg = "disk not found"
11306         raise errors.OpExecError("Can't find DRBD device %s: %s" %
11307                                  (name, msg))
11308
11309       if result.payload.is_degraded:
11310         raise errors.OpExecError("DRBD device %s is degraded!" % name)
11311
11312   def _RemoveOldStorage(self, node_name, iv_names):
11313     for name, (_, old_lvs, _) in iv_names.iteritems():
11314       self.lu.LogInfo("Remove logical volumes for %s" % name)
11315
11316       for lv in old_lvs:
11317         self.cfg.SetDiskID(lv, node_name)
11318
11319         msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
11320         if msg:
11321           self.lu.LogWarning("Can't remove old LV: %s" % msg,
11322                              hint="remove unused LVs manually")
11323
11324   def _ExecDrbd8DiskOnly(self, feedback_fn): # pylint: disable=W0613
11325     """Replace a disk on the primary or secondary for DRBD 8.
11326
11327     The algorithm for replace is quite complicated:
11328
11329       1. for each disk to be replaced:
11330
11331         1. create new LVs on the target node with unique names
11332         1. detach old LVs from the drbd device
11333         1. rename old LVs to name_replaced.<time_t>
11334         1. rename new LVs to old LVs
11335         1. attach the new LVs (with the old names now) to the drbd device
11336
11337       1. wait for sync across all devices
11338
11339       1. for each modified disk:
11340
11341         1. remove old LVs (which have the name name_replaces.<time_t>)
11342
11343     Failures are not very well handled.
11344
11345     """
11346     steps_total = 6
11347
11348     # Step: check device activation
11349     self.lu.LogStep(1, steps_total, "Check device existence")
11350     self._CheckDisksExistence([self.other_node, self.target_node])
11351     self._CheckVolumeGroup([self.target_node, self.other_node])
11352
11353     # Step: check other node consistency
11354     self.lu.LogStep(2, steps_total, "Check peer consistency")
11355     self._CheckDisksConsistency(self.other_node,
11356                                 self.other_node == self.instance.primary_node,
11357                                 False)
11358
11359     # Step: create new storage
11360     self.lu.LogStep(3, steps_total, "Allocate new storage")
11361     iv_names = self._CreateNewStorage(self.target_node)
11362
11363     # Step: for each lv, detach+rename*2+attach
11364     self.lu.LogStep(4, steps_total, "Changing drbd configuration")
11365     for dev, old_lvs, new_lvs in iv_names.itervalues():
11366       self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
11367
11368       result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
11369                                                      old_lvs)
11370       result.Raise("Can't detach drbd from local storage on node"
11371                    " %s for device %s" % (self.target_node, dev.iv_name))
11372       #dev.children = []
11373       #cfg.Update(instance)
11374
11375       # ok, we created the new LVs, so now we know we have the needed
11376       # storage; as such, we proceed on the target node to rename
11377       # old_lv to _old, and new_lv to old_lv; note that we rename LVs
11378       # using the assumption that logical_id == physical_id (which in
11379       # turn is the unique_id on that node)
11380
11381       # FIXME(iustin): use a better name for the replaced LVs
11382       temp_suffix = int(time.time())
11383       ren_fn = lambda d, suff: (d.physical_id[0],
11384                                 d.physical_id[1] + "_replaced-%s" % suff)
11385
11386       # Build the rename list based on what LVs exist on the node
11387       rename_old_to_new = []
11388       for to_ren in old_lvs:
11389         result = self.rpc.call_blockdev_find(self.target_node, to_ren)
11390         if not result.fail_msg and result.payload:
11391           # device exists
11392           rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
11393
11394       self.lu.LogInfo("Renaming the old LVs on the target node")
11395       result = self.rpc.call_blockdev_rename(self.target_node,
11396                                              rename_old_to_new)
11397       result.Raise("Can't rename old LVs on node %s" % self.target_node)
11398
11399       # Now we rename the new LVs to the old LVs
11400       self.lu.LogInfo("Renaming the new LVs on the target node")
11401       rename_new_to_old = [(new, old.physical_id)
11402                            for old, new in zip(old_lvs, new_lvs)]
11403       result = self.rpc.call_blockdev_rename(self.target_node,
11404                                              rename_new_to_old)
11405       result.Raise("Can't rename new LVs on node %s" % self.target_node)
11406
11407       # Intermediate steps of in memory modifications
11408       for old, new in zip(old_lvs, new_lvs):
11409         new.logical_id = old.logical_id
11410         self.cfg.SetDiskID(new, self.target_node)
11411
11412       # We need to modify old_lvs so that removal later removes the
11413       # right LVs, not the newly added ones; note that old_lvs is a
11414       # copy here
11415       for disk in old_lvs:
11416         disk.logical_id = ren_fn(disk, temp_suffix)
11417         self.cfg.SetDiskID(disk, self.target_node)
11418
11419       # Now that the new lvs have the old name, we can add them to the device
11420       self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
11421       result = self.rpc.call_blockdev_addchildren(self.target_node,
11422                                                   (dev, self.instance), new_lvs)
11423       msg = result.fail_msg
11424       if msg:
11425         for new_lv in new_lvs:
11426           msg2 = self.rpc.call_blockdev_remove(self.target_node,
11427                                                new_lv).fail_msg
11428           if msg2:
11429             self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
11430                                hint=("cleanup manually the unused logical"
11431                                      "volumes"))
11432         raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
11433
11434     cstep = itertools.count(5)
11435
11436     if self.early_release:
11437       self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11438       self._RemoveOldStorage(self.target_node, iv_names)
11439       # TODO: Check if releasing locks early still makes sense
11440       _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
11441     else:
11442       # Release all resource locks except those used by the instance
11443       _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
11444                     keep=self.node_secondary_ip.keys())
11445
11446     # Release all node locks while waiting for sync
11447     _ReleaseLocks(self.lu, locking.LEVEL_NODE)
11448
11449     # TODO: Can the instance lock be downgraded here? Take the optional disk
11450     # shutdown in the caller into consideration.
11451
11452     # Wait for sync
11453     # This can fail as the old devices are degraded and _WaitForSync
11454     # does a combined result over all disks, so we don't check its return value
11455     self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
11456     _WaitForSync(self.lu, self.instance)
11457
11458     # Check all devices manually
11459     self._CheckDevices(self.instance.primary_node, iv_names)
11460
11461     # Step: remove old storage
11462     if not self.early_release:
11463       self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11464       self._RemoveOldStorage(self.target_node, iv_names)
11465
11466   def _ExecDrbd8Secondary(self, feedback_fn):
11467     """Replace the secondary node for DRBD 8.
11468
11469     The algorithm for replace is quite complicated:
11470       - for all disks of the instance:
11471         - create new LVs on the new node with same names
11472         - shutdown the drbd device on the old secondary
11473         - disconnect the drbd network on the primary
11474         - create the drbd device on the new secondary
11475         - network attach the drbd on the primary, using an artifice:
11476           the drbd code for Attach() will connect to the network if it
11477           finds a device which is connected to the good local disks but
11478           not network enabled
11479       - wait for sync across all devices
11480       - remove all disks from the old secondary
11481
11482     Failures are not very well handled.
11483
11484     """
11485     steps_total = 6
11486
11487     pnode = self.instance.primary_node
11488
11489     # Step: check device activation
11490     self.lu.LogStep(1, steps_total, "Check device existence")
11491     self._CheckDisksExistence([self.instance.primary_node])
11492     self._CheckVolumeGroup([self.instance.primary_node])
11493
11494     # Step: check other node consistency
11495     self.lu.LogStep(2, steps_total, "Check peer consistency")
11496     self._CheckDisksConsistency(self.instance.primary_node, True, True)
11497
11498     # Step: create new storage
11499     self.lu.LogStep(3, steps_total, "Allocate new storage")
11500     disks = _AnnotateDiskParams(self.instance, self.instance.disks, self.cfg)
11501     for idx, dev in enumerate(disks):
11502       self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
11503                       (self.new_node, idx))
11504       # we pass force_create=True to force LVM creation
11505       for new_lv in dev.children:
11506         _CreateBlockDevInner(self.lu, self.new_node, self.instance, new_lv,
11507                              True, _GetInstanceInfoText(self.instance), False)
11508
11509     # Step 4: dbrd minors and drbd setups changes
11510     # after this, we must manually remove the drbd minors on both the
11511     # error and the success paths
11512     self.lu.LogStep(4, steps_total, "Changing drbd configuration")
11513     minors = self.cfg.AllocateDRBDMinor([self.new_node
11514                                          for dev in self.instance.disks],
11515                                         self.instance.name)
11516     logging.debug("Allocated minors %r", minors)
11517
11518     iv_names = {}
11519     for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
11520       self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
11521                       (self.new_node, idx))
11522       # create new devices on new_node; note that we create two IDs:
11523       # one without port, so the drbd will be activated without
11524       # networking information on the new node at this stage, and one
11525       # with network, for the latter activation in step 4
11526       (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
11527       if self.instance.primary_node == o_node1:
11528         p_minor = o_minor1
11529       else:
11530         assert self.instance.primary_node == o_node2, "Three-node instance?"
11531         p_minor = o_minor2
11532
11533       new_alone_id = (self.instance.primary_node, self.new_node, None,
11534                       p_minor, new_minor, o_secret)
11535       new_net_id = (self.instance.primary_node, self.new_node, o_port,
11536                     p_minor, new_minor, o_secret)
11537
11538       iv_names[idx] = (dev, dev.children, new_net_id)
11539       logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
11540                     new_net_id)
11541       new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
11542                               logical_id=new_alone_id,
11543                               children=dev.children,
11544                               size=dev.size,
11545                               params={})
11546       (anno_new_drbd,) = _AnnotateDiskParams(self.instance, [new_drbd],
11547                                              self.cfg)
11548       try:
11549         _CreateSingleBlockDev(self.lu, self.new_node, self.instance,
11550                               anno_new_drbd,
11551                               _GetInstanceInfoText(self.instance), False)
11552       except errors.GenericError:
11553         self.cfg.ReleaseDRBDMinors(self.instance.name)
11554         raise
11555
11556     # We have new devices, shutdown the drbd on the old secondary
11557     for idx, dev in enumerate(self.instance.disks):
11558       self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
11559       self.cfg.SetDiskID(dev, self.target_node)
11560       msg = self.rpc.call_blockdev_shutdown(self.target_node,
11561                                             (dev, self.instance)).fail_msg
11562       if msg:
11563         self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
11564                            "node: %s" % (idx, msg),
11565                            hint=("Please cleanup this device manually as"
11566                                  " soon as possible"))
11567
11568     self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
11569     result = self.rpc.call_drbd_disconnect_net([pnode], self.node_secondary_ip,
11570                                                self.instance.disks)[pnode]
11571
11572     msg = result.fail_msg
11573     if msg:
11574       # detaches didn't succeed (unlikely)
11575       self.cfg.ReleaseDRBDMinors(self.instance.name)
11576       raise errors.OpExecError("Can't detach the disks from the network on"
11577                                " old node: %s" % (msg,))
11578
11579     # if we managed to detach at least one, we update all the disks of
11580     # the instance to point to the new secondary
11581     self.lu.LogInfo("Updating instance configuration")
11582     for dev, _, new_logical_id in iv_names.itervalues():
11583       dev.logical_id = new_logical_id
11584       self.cfg.SetDiskID(dev, self.instance.primary_node)
11585
11586     self.cfg.Update(self.instance, feedback_fn)
11587
11588     # Release all node locks (the configuration has been updated)
11589     _ReleaseLocks(self.lu, locking.LEVEL_NODE)
11590
11591     # and now perform the drbd attach
11592     self.lu.LogInfo("Attaching primary drbds to new secondary"
11593                     " (standalone => connected)")
11594     result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
11595                                             self.new_node],
11596                                            self.node_secondary_ip,
11597                                            (self.instance.disks, self.instance),
11598                                            self.instance.name,
11599                                            False)
11600     for to_node, to_result in result.items():
11601       msg = to_result.fail_msg
11602       if msg:
11603         self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
11604                            to_node, msg,
11605                            hint=("please do a gnt-instance info to see the"
11606                                  " status of disks"))
11607
11608     cstep = itertools.count(5)
11609
11610     if self.early_release:
11611       self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11612       self._RemoveOldStorage(self.target_node, iv_names)
11613       # TODO: Check if releasing locks early still makes sense
11614       _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
11615     else:
11616       # Release all resource locks except those used by the instance
11617       _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
11618                     keep=self.node_secondary_ip.keys())
11619
11620     # TODO: Can the instance lock be downgraded here? Take the optional disk
11621     # shutdown in the caller into consideration.
11622
11623     # Wait for sync
11624     # This can fail as the old devices are degraded and _WaitForSync
11625     # does a combined result over all disks, so we don't check its return value
11626     self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
11627     _WaitForSync(self.lu, self.instance)
11628
11629     # Check all devices manually
11630     self._CheckDevices(self.instance.primary_node, iv_names)
11631
11632     # Step: remove old storage
11633     if not self.early_release:
11634       self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11635       self._RemoveOldStorage(self.target_node, iv_names)
11636
11637
11638 class LURepairNodeStorage(NoHooksLU):
11639   """Repairs the volume group on a node.
11640
11641   """
11642   REQ_BGL = False
11643
11644   def CheckArguments(self):
11645     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
11646
11647     storage_type = self.op.storage_type
11648
11649     if (constants.SO_FIX_CONSISTENCY not in
11650         constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
11651       raise errors.OpPrereqError("Storage units of type '%s' can not be"
11652                                  " repaired" % storage_type,
11653                                  errors.ECODE_INVAL)
11654
11655   def ExpandNames(self):
11656     self.needed_locks = {
11657       locking.LEVEL_NODE: [self.op.node_name],
11658       }
11659
11660   def _CheckFaultyDisks(self, instance, node_name):
11661     """Ensure faulty disks abort the opcode or at least warn."""
11662     try:
11663       if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
11664                                   node_name, True):
11665         raise errors.OpPrereqError("Instance '%s' has faulty disks on"
11666                                    " node '%s'" % (instance.name, node_name),
11667                                    errors.ECODE_STATE)
11668     except errors.OpPrereqError, err:
11669       if self.op.ignore_consistency:
11670         self.proc.LogWarning(str(err.args[0]))
11671       else:
11672         raise
11673
11674   def CheckPrereq(self):
11675     """Check prerequisites.
11676
11677     """
11678     # Check whether any instance on this node has faulty disks
11679     for inst in _GetNodeInstances(self.cfg, self.op.node_name):
11680       if inst.admin_state != constants.ADMINST_UP:
11681         continue
11682       check_nodes = set(inst.all_nodes)
11683       check_nodes.discard(self.op.node_name)
11684       for inst_node_name in check_nodes:
11685         self._CheckFaultyDisks(inst, inst_node_name)
11686
11687   def Exec(self, feedback_fn):
11688     feedback_fn("Repairing storage unit '%s' on %s ..." %
11689                 (self.op.name, self.op.node_name))
11690
11691     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
11692     result = self.rpc.call_storage_execute(self.op.node_name,
11693                                            self.op.storage_type, st_args,
11694                                            self.op.name,
11695                                            constants.SO_FIX_CONSISTENCY)
11696     result.Raise("Failed to repair storage unit '%s' on %s" %
11697                  (self.op.name, self.op.node_name))
11698
11699
11700 class LUNodeEvacuate(NoHooksLU):
11701   """Evacuates instances off a list of nodes.
11702
11703   """
11704   REQ_BGL = False
11705
11706   _MODE2IALLOCATOR = {
11707     constants.NODE_EVAC_PRI: constants.IALLOCATOR_NEVAC_PRI,
11708     constants.NODE_EVAC_SEC: constants.IALLOCATOR_NEVAC_SEC,
11709     constants.NODE_EVAC_ALL: constants.IALLOCATOR_NEVAC_ALL,
11710     }
11711   assert frozenset(_MODE2IALLOCATOR.keys()) == constants.NODE_EVAC_MODES
11712   assert (frozenset(_MODE2IALLOCATOR.values()) ==
11713           constants.IALLOCATOR_NEVAC_MODES)
11714
11715   def CheckArguments(self):
11716     _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
11717
11718   def ExpandNames(self):
11719     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
11720
11721     if self.op.remote_node is not None:
11722       self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
11723       assert self.op.remote_node
11724
11725       if self.op.remote_node == self.op.node_name:
11726         raise errors.OpPrereqError("Can not use evacuated node as a new"
11727                                    " secondary node", errors.ECODE_INVAL)
11728
11729       if self.op.mode != constants.NODE_EVAC_SEC:
11730         raise errors.OpPrereqError("Without the use of an iallocator only"
11731                                    " secondary instances can be evacuated",
11732                                    errors.ECODE_INVAL)
11733
11734     # Declare locks
11735     self.share_locks = _ShareAll()
11736     self.needed_locks = {
11737       locking.LEVEL_INSTANCE: [],
11738       locking.LEVEL_NODEGROUP: [],
11739       locking.LEVEL_NODE: [],
11740       }
11741
11742     # Determine nodes (via group) optimistically, needs verification once locks
11743     # have been acquired
11744     self.lock_nodes = self._DetermineNodes()
11745
11746   def _DetermineNodes(self):
11747     """Gets the list of nodes to operate on.
11748
11749     """
11750     if self.op.remote_node is None:
11751       # Iallocator will choose any node(s) in the same group
11752       group_nodes = self.cfg.GetNodeGroupMembersByNodes([self.op.node_name])
11753     else:
11754       group_nodes = frozenset([self.op.remote_node])
11755
11756     # Determine nodes to be locked
11757     return set([self.op.node_name]) | group_nodes
11758
11759   def _DetermineInstances(self):
11760     """Builds list of instances to operate on.
11761
11762     """
11763     assert self.op.mode in constants.NODE_EVAC_MODES
11764
11765     if self.op.mode == constants.NODE_EVAC_PRI:
11766       # Primary instances only
11767       inst_fn = _GetNodePrimaryInstances
11768       assert self.op.remote_node is None, \
11769         "Evacuating primary instances requires iallocator"
11770     elif self.op.mode == constants.NODE_EVAC_SEC:
11771       # Secondary instances only
11772       inst_fn = _GetNodeSecondaryInstances
11773     else:
11774       # All instances
11775       assert self.op.mode == constants.NODE_EVAC_ALL
11776       inst_fn = _GetNodeInstances
11777       # TODO: In 2.6, change the iallocator interface to take an evacuation mode
11778       # per instance
11779       raise errors.OpPrereqError("Due to an issue with the iallocator"
11780                                  " interface it is not possible to evacuate"
11781                                  " all instances at once; specify explicitly"
11782                                  " whether to evacuate primary or secondary"
11783                                  " instances",
11784                                  errors.ECODE_INVAL)
11785
11786     return inst_fn(self.cfg, self.op.node_name)
11787
11788   def DeclareLocks(self, level):
11789     if level == locking.LEVEL_INSTANCE:
11790       # Lock instances optimistically, needs verification once node and group
11791       # locks have been acquired
11792       self.needed_locks[locking.LEVEL_INSTANCE] = \
11793         set(i.name for i in self._DetermineInstances())
11794
11795     elif level == locking.LEVEL_NODEGROUP:
11796       # Lock node groups for all potential target nodes optimistically, needs
11797       # verification once nodes have been acquired
11798       self.needed_locks[locking.LEVEL_NODEGROUP] = \
11799         self.cfg.GetNodeGroupsFromNodes(self.lock_nodes)
11800
11801     elif level == locking.LEVEL_NODE:
11802       self.needed_locks[locking.LEVEL_NODE] = self.lock_nodes
11803
11804   def CheckPrereq(self):
11805     # Verify locks
11806     owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
11807     owned_nodes = self.owned_locks(locking.LEVEL_NODE)
11808     owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
11809
11810     need_nodes = self._DetermineNodes()
11811
11812     if not owned_nodes.issuperset(need_nodes):
11813       raise errors.OpPrereqError("Nodes in same group as '%s' changed since"
11814                                  " locks were acquired, current nodes are"
11815                                  " are '%s', used to be '%s'; retry the"
11816                                  " operation" %
11817                                  (self.op.node_name,
11818                                   utils.CommaJoin(need_nodes),
11819                                   utils.CommaJoin(owned_nodes)),
11820                                  errors.ECODE_STATE)
11821
11822     wanted_groups = self.cfg.GetNodeGroupsFromNodes(owned_nodes)
11823     if owned_groups != wanted_groups:
11824       raise errors.OpExecError("Node groups changed since locks were acquired,"
11825                                " current groups are '%s', used to be '%s';"
11826                                " retry the operation" %
11827                                (utils.CommaJoin(wanted_groups),
11828                                 utils.CommaJoin(owned_groups)))
11829
11830     # Determine affected instances
11831     self.instances = self._DetermineInstances()
11832     self.instance_names = [i.name for i in self.instances]
11833
11834     if set(self.instance_names) != owned_instances:
11835       raise errors.OpExecError("Instances on node '%s' changed since locks"
11836                                " were acquired, current instances are '%s',"
11837                                " used to be '%s'; retry the operation" %
11838                                (self.op.node_name,
11839                                 utils.CommaJoin(self.instance_names),
11840                                 utils.CommaJoin(owned_instances)))
11841
11842     if self.instance_names:
11843       self.LogInfo("Evacuating instances from node '%s': %s",
11844                    self.op.node_name,
11845                    utils.CommaJoin(utils.NiceSort(self.instance_names)))
11846     else:
11847       self.LogInfo("No instances to evacuate from node '%s'",
11848                    self.op.node_name)
11849
11850     if self.op.remote_node is not None:
11851       for i in self.instances:
11852         if i.primary_node == self.op.remote_node:
11853           raise errors.OpPrereqError("Node %s is the primary node of"
11854                                      " instance %s, cannot use it as"
11855                                      " secondary" %
11856                                      (self.op.remote_node, i.name),
11857                                      errors.ECODE_INVAL)
11858
11859   def Exec(self, feedback_fn):
11860     assert (self.op.iallocator is not None) ^ (self.op.remote_node is not None)
11861
11862     if not self.instance_names:
11863       # No instances to evacuate
11864       jobs = []
11865
11866     elif self.op.iallocator is not None:
11867       # TODO: Implement relocation to other group
11868       evac_mode = self._MODE2IALLOCATOR[self.op.mode]
11869       req = iallocator.IAReqNodeEvac(evac_mode=evac_mode,
11870                                      instances=list(self.instance_names))
11871       ial = iallocator.IAllocator(self.cfg, self.rpc, req)
11872
11873       ial.Run(self.op.iallocator)
11874
11875       if not ial.success:
11876         raise errors.OpPrereqError("Can't compute node evacuation using"
11877                                    " iallocator '%s': %s" %
11878                                    (self.op.iallocator, ial.info),
11879                                    errors.ECODE_NORES)
11880
11881       jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, True)
11882
11883     elif self.op.remote_node is not None:
11884       assert self.op.mode == constants.NODE_EVAC_SEC
11885       jobs = [
11886         [opcodes.OpInstanceReplaceDisks(instance_name=instance_name,
11887                                         remote_node=self.op.remote_node,
11888                                         disks=[],
11889                                         mode=constants.REPLACE_DISK_CHG,
11890                                         early_release=self.op.early_release)]
11891         for instance_name in self.instance_names
11892         ]
11893
11894     else:
11895       raise errors.ProgrammerError("No iallocator or remote node")
11896
11897     return ResultWithJobs(jobs)
11898
11899
11900 def _SetOpEarlyRelease(early_release, op):
11901   """Sets C{early_release} flag on opcodes if available.
11902
11903   """
11904   try:
11905     op.early_release = early_release
11906   except AttributeError:
11907     assert not isinstance(op, opcodes.OpInstanceReplaceDisks)
11908
11909   return op
11910
11911
11912 def _NodeEvacDest(use_nodes, group, nodes):
11913   """Returns group or nodes depending on caller's choice.
11914
11915   """
11916   if use_nodes:
11917     return utils.CommaJoin(nodes)
11918   else:
11919     return group
11920
11921
11922 def _LoadNodeEvacResult(lu, alloc_result, early_release, use_nodes):
11923   """Unpacks the result of change-group and node-evacuate iallocator requests.
11924
11925   Iallocator modes L{constants.IALLOCATOR_MODE_NODE_EVAC} and
11926   L{constants.IALLOCATOR_MODE_CHG_GROUP}.
11927
11928   @type lu: L{LogicalUnit}
11929   @param lu: Logical unit instance
11930   @type alloc_result: tuple/list
11931   @param alloc_result: Result from iallocator
11932   @type early_release: bool
11933   @param early_release: Whether to release locks early if possible
11934   @type use_nodes: bool
11935   @param use_nodes: Whether to display node names instead of groups
11936
11937   """
11938   (moved, failed, jobs) = alloc_result
11939
11940   if failed:
11941     failreason = utils.CommaJoin("%s (%s)" % (name, reason)
11942                                  for (name, reason) in failed)
11943     lu.LogWarning("Unable to evacuate instances %s", failreason)
11944     raise errors.OpExecError("Unable to evacuate instances %s" % failreason)
11945
11946   if moved:
11947     lu.LogInfo("Instances to be moved: %s",
11948                utils.CommaJoin("%s (to %s)" %
11949                                (name, _NodeEvacDest(use_nodes, group, nodes))
11950                                for (name, group, nodes) in moved))
11951
11952   return [map(compat.partial(_SetOpEarlyRelease, early_release),
11953               map(opcodes.OpCode.LoadOpCode, ops))
11954           for ops in jobs]
11955
11956
11957 def _DiskSizeInBytesToMebibytes(lu, size):
11958   """Converts a disk size in bytes to mebibytes.
11959
11960   Warns and rounds up if the size isn't an even multiple of 1 MiB.
11961
11962   """
11963   (mib, remainder) = divmod(size, 1024 * 1024)
11964
11965   if remainder != 0:
11966     lu.LogWarning("Disk size is not an even multiple of 1 MiB; rounding up"
11967                   " to not overwrite existing data (%s bytes will not be"
11968                   " wiped)", (1024 * 1024) - remainder)
11969     mib += 1
11970
11971   return mib
11972
11973
11974 class LUInstanceGrowDisk(LogicalUnit):
11975   """Grow a disk of an instance.
11976
11977   """
11978   HPATH = "disk-grow"
11979   HTYPE = constants.HTYPE_INSTANCE
11980   REQ_BGL = False
11981
11982   def ExpandNames(self):
11983     self._ExpandAndLockInstance()
11984     self.needed_locks[locking.LEVEL_NODE] = []
11985     self.needed_locks[locking.LEVEL_NODE_RES] = []
11986     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
11987     self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
11988
11989   def DeclareLocks(self, level):
11990     if level == locking.LEVEL_NODE:
11991       self._LockInstancesNodes()
11992     elif level == locking.LEVEL_NODE_RES:
11993       # Copy node locks
11994       self.needed_locks[locking.LEVEL_NODE_RES] = \
11995         _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
11996
11997   def BuildHooksEnv(self):
11998     """Build hooks env.
11999
12000     This runs on the master, the primary and all the secondaries.
12001
12002     """
12003     env = {
12004       "DISK": self.op.disk,
12005       "AMOUNT": self.op.amount,
12006       "ABSOLUTE": self.op.absolute,
12007       }
12008     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
12009     return env
12010
12011   def BuildHooksNodes(self):
12012     """Build hooks nodes.
12013
12014     """
12015     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
12016     return (nl, nl)
12017
12018   def CheckPrereq(self):
12019     """Check prerequisites.
12020
12021     This checks that the instance is in the cluster.
12022
12023     """
12024     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
12025     assert instance is not None, \
12026       "Cannot retrieve locked instance %s" % self.op.instance_name
12027     nodenames = list(instance.all_nodes)
12028     for node in nodenames:
12029       _CheckNodeOnline(self, node)
12030
12031     self.instance = instance
12032
12033     if instance.disk_template not in constants.DTS_GROWABLE:
12034       raise errors.OpPrereqError("Instance's disk layout does not support"
12035                                  " growing", errors.ECODE_INVAL)
12036
12037     self.disk = instance.FindDisk(self.op.disk)
12038
12039     if self.op.absolute:
12040       self.target = self.op.amount
12041       self.delta = self.target - self.disk.size
12042       if self.delta < 0:
12043         raise errors.OpPrereqError("Requested size (%s) is smaller than "
12044                                    "current disk size (%s)" %
12045                                    (utils.FormatUnit(self.target, "h"),
12046                                     utils.FormatUnit(self.disk.size, "h")),
12047                                    errors.ECODE_STATE)
12048     else:
12049       self.delta = self.op.amount
12050       self.target = self.disk.size + self.delta
12051       if self.delta < 0:
12052         raise errors.OpPrereqError("Requested increment (%s) is negative" %
12053                                    utils.FormatUnit(self.delta, "h"),
12054                                    errors.ECODE_INVAL)
12055
12056     if instance.disk_template not in (constants.DT_FILE,
12057                                       constants.DT_SHARED_FILE,
12058                                       constants.DT_RBD):
12059       # TODO: check the free disk space for file, when that feature will be
12060       # supported
12061       _CheckNodesFreeDiskPerVG(self, nodenames,
12062                                self.disk.ComputeGrowth(self.delta))
12063
12064   def Exec(self, feedback_fn):
12065     """Execute disk grow.
12066
12067     """
12068     instance = self.instance
12069     disk = self.disk
12070
12071     assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
12072     assert (self.owned_locks(locking.LEVEL_NODE) ==
12073             self.owned_locks(locking.LEVEL_NODE_RES))
12074
12075     wipe_disks = self.cfg.GetClusterInfo().prealloc_wipe_disks
12076
12077     disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
12078     if not disks_ok:
12079       raise errors.OpExecError("Cannot activate block device to grow")
12080
12081     feedback_fn("Growing disk %s of instance '%s' by %s to %s" %
12082                 (self.op.disk, instance.name,
12083                  utils.FormatUnit(self.delta, "h"),
12084                  utils.FormatUnit(self.target, "h")))
12085
12086     # First run all grow ops in dry-run mode
12087     for node in instance.all_nodes:
12088       self.cfg.SetDiskID(disk, node)
12089       result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
12090                                            True, True)
12091       result.Raise("Dry-run grow request failed to node %s" % node)
12092
12093     if wipe_disks:
12094       # Get disk size from primary node for wiping
12095       result = self.rpc.call_blockdev_getsize(instance.primary_node, [disk])
12096       result.Raise("Failed to retrieve disk size from node '%s'" %
12097                    instance.primary_node)
12098
12099       (disk_size_in_bytes, ) = result.payload
12100
12101       if disk_size_in_bytes is None:
12102         raise errors.OpExecError("Failed to retrieve disk size from primary"
12103                                  " node '%s'" % instance.primary_node)
12104
12105       old_disk_size = _DiskSizeInBytesToMebibytes(self, disk_size_in_bytes)
12106
12107       assert old_disk_size >= disk.size, \
12108         ("Retrieved disk size too small (got %s, should be at least %s)" %
12109          (old_disk_size, disk.size))
12110     else:
12111       old_disk_size = None
12112
12113     # We know that (as far as we can test) operations across different
12114     # nodes will succeed, time to run it for real on the backing storage
12115     for node in instance.all_nodes:
12116       self.cfg.SetDiskID(disk, node)
12117       result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
12118                                            False, True)
12119       result.Raise("Grow request failed to node %s" % node)
12120
12121     # And now execute it for logical storage, on the primary node
12122     node = instance.primary_node
12123     self.cfg.SetDiskID(disk, node)
12124     result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
12125                                          False, False)
12126     result.Raise("Grow request failed to node %s" % node)
12127
12128     disk.RecordGrow(self.delta)
12129     self.cfg.Update(instance, feedback_fn)
12130
12131     # Changes have been recorded, release node lock
12132     _ReleaseLocks(self, locking.LEVEL_NODE)
12133
12134     # Downgrade lock while waiting for sync
12135     self.glm.downgrade(locking.LEVEL_INSTANCE)
12136
12137     assert wipe_disks ^ (old_disk_size is None)
12138
12139     if wipe_disks:
12140       assert instance.disks[self.op.disk] == disk
12141
12142       # Wipe newly added disk space
12143       _WipeDisks(self, instance,
12144                  disks=[(self.op.disk, disk, old_disk_size)])
12145
12146     if self.op.wait_for_sync:
12147       disk_abort = not _WaitForSync(self, instance, disks=[disk])
12148       if disk_abort:
12149         self.proc.LogWarning("Disk sync-ing has not returned a good"
12150                              " status; please check the instance")
12151       if instance.admin_state != constants.ADMINST_UP:
12152         _SafeShutdownInstanceDisks(self, instance, disks=[disk])
12153     elif instance.admin_state != constants.ADMINST_UP:
12154       self.proc.LogWarning("Not shutting down the disk even if the instance is"
12155                            " not supposed to be running because no wait for"
12156                            " sync mode was requested")
12157
12158     assert self.owned_locks(locking.LEVEL_NODE_RES)
12159     assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
12160
12161
12162 class LUInstanceQueryData(NoHooksLU):
12163   """Query runtime instance data.
12164
12165   """
12166   REQ_BGL = False
12167
12168   def ExpandNames(self):
12169     self.needed_locks = {}
12170
12171     # Use locking if requested or when non-static information is wanted
12172     if not (self.op.static or self.op.use_locking):
12173       self.LogWarning("Non-static data requested, locks need to be acquired")
12174       self.op.use_locking = True
12175
12176     if self.op.instances or not self.op.use_locking:
12177       # Expand instance names right here
12178       self.wanted_names = _GetWantedInstances(self, self.op.instances)
12179     else:
12180       # Will use acquired locks
12181       self.wanted_names = None
12182
12183     if self.op.use_locking:
12184       self.share_locks = _ShareAll()
12185
12186       if self.wanted_names is None:
12187         self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
12188       else:
12189         self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
12190
12191       self.needed_locks[locking.LEVEL_NODEGROUP] = []
12192       self.needed_locks[locking.LEVEL_NODE] = []
12193       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
12194
12195   def DeclareLocks(self, level):
12196     if self.op.use_locking:
12197       if level == locking.LEVEL_NODEGROUP:
12198         owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
12199
12200         # Lock all groups used by instances optimistically; this requires going
12201         # via the node before it's locked, requiring verification later on
12202         self.needed_locks[locking.LEVEL_NODEGROUP] = \
12203           frozenset(group_uuid
12204                     for instance_name in owned_instances
12205                     for group_uuid in
12206                       self.cfg.GetInstanceNodeGroups(instance_name))
12207
12208       elif level == locking.LEVEL_NODE:
12209         self._LockInstancesNodes()
12210
12211   def CheckPrereq(self):
12212     """Check prerequisites.
12213
12214     This only checks the optional instance list against the existing names.
12215
12216     """
12217     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
12218     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
12219     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
12220
12221     if self.wanted_names is None:
12222       assert self.op.use_locking, "Locking was not used"
12223       self.wanted_names = owned_instances
12224
12225     instances = dict(self.cfg.GetMultiInstanceInfo(self.wanted_names))
12226
12227     if self.op.use_locking:
12228       _CheckInstancesNodeGroups(self.cfg, instances, owned_groups, owned_nodes,
12229                                 None)
12230     else:
12231       assert not (owned_instances or owned_groups or owned_nodes)
12232
12233     self.wanted_instances = instances.values()
12234
12235   def _ComputeBlockdevStatus(self, node, instance, dev):
12236     """Returns the status of a block device
12237
12238     """
12239     if self.op.static or not node:
12240       return None
12241
12242     self.cfg.SetDiskID(dev, node)
12243
12244     result = self.rpc.call_blockdev_find(node, dev)
12245     if result.offline:
12246       return None
12247
12248     result.Raise("Can't compute disk status for %s" % instance.name)
12249
12250     status = result.payload
12251     if status is None:
12252       return None
12253
12254     return (status.dev_path, status.major, status.minor,
12255             status.sync_percent, status.estimated_time,
12256             status.is_degraded, status.ldisk_status)
12257
12258   def _ComputeDiskStatus(self, instance, snode, dev):
12259     """Compute block device status.
12260
12261     """
12262     (anno_dev,) = _AnnotateDiskParams(instance, [dev], self.cfg)
12263
12264     return self._ComputeDiskStatusInner(instance, snode, anno_dev)
12265
12266   def _ComputeDiskStatusInner(self, instance, snode, dev):
12267     """Compute block device status.
12268
12269     @attention: The device has to be annotated already.
12270
12271     """
12272     if dev.dev_type in constants.LDS_DRBD:
12273       # we change the snode then (otherwise we use the one passed in)
12274       if dev.logical_id[0] == instance.primary_node:
12275         snode = dev.logical_id[1]
12276       else:
12277         snode = dev.logical_id[0]
12278
12279     dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
12280                                               instance, dev)
12281     dev_sstatus = self._ComputeBlockdevStatus(snode, instance, dev)
12282
12283     if dev.children:
12284       dev_children = map(compat.partial(self._ComputeDiskStatusInner,
12285                                         instance, snode),
12286                          dev.children)
12287     else:
12288       dev_children = []
12289
12290     return {
12291       "iv_name": dev.iv_name,
12292       "dev_type": dev.dev_type,
12293       "logical_id": dev.logical_id,
12294       "physical_id": dev.physical_id,
12295       "pstatus": dev_pstatus,
12296       "sstatus": dev_sstatus,
12297       "children": dev_children,
12298       "mode": dev.mode,
12299       "size": dev.size,
12300       }
12301
12302   def Exec(self, feedback_fn):
12303     """Gather and return data"""
12304     result = {}
12305
12306     cluster = self.cfg.GetClusterInfo()
12307
12308     node_names = itertools.chain(*(i.all_nodes for i in self.wanted_instances))
12309     nodes = dict(self.cfg.GetMultiNodeInfo(node_names))
12310
12311     groups = dict(self.cfg.GetMultiNodeGroupInfo(node.group
12312                                                  for node in nodes.values()))
12313
12314     group2name_fn = lambda uuid: groups[uuid].name
12315
12316     for instance in self.wanted_instances:
12317       pnode = nodes[instance.primary_node]
12318
12319       if self.op.static or pnode.offline:
12320         remote_state = None
12321         if pnode.offline:
12322           self.LogWarning("Primary node %s is marked offline, returning static"
12323                           " information only for instance %s" %
12324                           (pnode.name, instance.name))
12325       else:
12326         remote_info = self.rpc.call_instance_info(instance.primary_node,
12327                                                   instance.name,
12328                                                   instance.hypervisor)
12329         remote_info.Raise("Error checking node %s" % instance.primary_node)
12330         remote_info = remote_info.payload
12331         if remote_info and "state" in remote_info:
12332           remote_state = "up"
12333         else:
12334           if instance.admin_state == constants.ADMINST_UP:
12335             remote_state = "down"
12336           else:
12337             remote_state = instance.admin_state
12338
12339       disks = map(compat.partial(self._ComputeDiskStatus, instance, None),
12340                   instance.disks)
12341
12342       snodes_group_uuids = [nodes[snode_name].group
12343                             for snode_name in instance.secondary_nodes]
12344
12345       result[instance.name] = {
12346         "name": instance.name,
12347         "config_state": instance.admin_state,
12348         "run_state": remote_state,
12349         "pnode": instance.primary_node,
12350         "pnode_group_uuid": pnode.group,
12351         "pnode_group_name": group2name_fn(pnode.group),
12352         "snodes": instance.secondary_nodes,
12353         "snodes_group_uuids": snodes_group_uuids,
12354         "snodes_group_names": map(group2name_fn, snodes_group_uuids),
12355         "os": instance.os,
12356         # this happens to be the same format used for hooks
12357         "nics": _NICListToTuple(self, instance.nics),
12358         "disk_template": instance.disk_template,
12359         "disks": disks,
12360         "hypervisor": instance.hypervisor,
12361         "network_port": instance.network_port,
12362         "hv_instance": instance.hvparams,
12363         "hv_actual": cluster.FillHV(instance, skip_globals=True),
12364         "be_instance": instance.beparams,
12365         "be_actual": cluster.FillBE(instance),
12366         "os_instance": instance.osparams,
12367         "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
12368         "serial_no": instance.serial_no,
12369         "mtime": instance.mtime,
12370         "ctime": instance.ctime,
12371         "uuid": instance.uuid,
12372         }
12373
12374     return result
12375
12376
12377 def PrepareContainerMods(mods, private_fn):
12378   """Prepares a list of container modifications by adding a private data field.
12379
12380   @type mods: list of tuples; (operation, index, parameters)
12381   @param mods: List of modifications
12382   @type private_fn: callable or None
12383   @param private_fn: Callable for constructing a private data field for a
12384     modification
12385   @rtype: list
12386
12387   """
12388   if private_fn is None:
12389     fn = lambda: None
12390   else:
12391     fn = private_fn
12392
12393   return [(op, idx, params, fn()) for (op, idx, params) in mods]
12394
12395
12396 #: Type description for changes as returned by L{ApplyContainerMods}'s
12397 #: callbacks
12398 _TApplyContModsCbChanges = \
12399   ht.TMaybeListOf(ht.TAnd(ht.TIsLength(2), ht.TItems([
12400     ht.TNonEmptyString,
12401     ht.TAny,
12402     ])))
12403
12404
12405 def ApplyContainerMods(kind, container, chgdesc, mods,
12406                        create_fn, modify_fn, remove_fn):
12407   """Applies descriptions in C{mods} to C{container}.
12408
12409   @type kind: string
12410   @param kind: One-word item description
12411   @type container: list
12412   @param container: Container to modify
12413   @type chgdesc: None or list
12414   @param chgdesc: List of applied changes
12415   @type mods: list
12416   @param mods: Modifications as returned by L{PrepareContainerMods}
12417   @type create_fn: callable
12418   @param create_fn: Callback for creating a new item (L{constants.DDM_ADD});
12419     receives absolute item index, parameters and private data object as added
12420     by L{PrepareContainerMods}, returns tuple containing new item and changes
12421     as list
12422   @type modify_fn: callable
12423   @param modify_fn: Callback for modifying an existing item
12424     (L{constants.DDM_MODIFY}); receives absolute item index, item, parameters
12425     and private data object as added by L{PrepareContainerMods}, returns
12426     changes as list
12427   @type remove_fn: callable
12428   @param remove_fn: Callback on removing item; receives absolute item index,
12429     item and private data object as added by L{PrepareContainerMods}
12430
12431   """
12432   for (op, idx, params, private) in mods:
12433     if idx == -1:
12434       # Append
12435       absidx = len(container) - 1
12436     elif idx < 0:
12437       raise IndexError("Not accepting negative indices other than -1")
12438     elif idx > len(container):
12439       raise IndexError("Got %s index %s, but there are only %s" %
12440                        (kind, idx, len(container)))
12441     else:
12442       absidx = idx
12443
12444     changes = None
12445
12446     if op == constants.DDM_ADD:
12447       # Calculate where item will be added
12448       if idx == -1:
12449         addidx = len(container)
12450       else:
12451         addidx = idx
12452
12453       if create_fn is None:
12454         item = params
12455       else:
12456         (item, changes) = create_fn(addidx, params, private)
12457
12458       if idx == -1:
12459         container.append(item)
12460       else:
12461         assert idx >= 0
12462         assert idx <= len(container)
12463         # list.insert does so before the specified index
12464         container.insert(idx, item)
12465     else:
12466       # Retrieve existing item
12467       try:
12468         item = container[absidx]
12469       except IndexError:
12470         raise IndexError("Invalid %s index %s" % (kind, idx))
12471
12472       if op == constants.DDM_REMOVE:
12473         assert not params
12474
12475         if remove_fn is not None:
12476           remove_fn(absidx, item, private)
12477
12478         changes = [("%s/%s" % (kind, absidx), "remove")]
12479
12480         assert container[absidx] == item
12481         del container[absidx]
12482       elif op == constants.DDM_MODIFY:
12483         if modify_fn is not None:
12484           changes = modify_fn(absidx, item, params, private)
12485       else:
12486         raise errors.ProgrammerError("Unhandled operation '%s'" % op)
12487
12488     assert _TApplyContModsCbChanges(changes)
12489
12490     if not (chgdesc is None or changes is None):
12491       chgdesc.extend(changes)
12492
12493
12494 def _UpdateIvNames(base_index, disks):
12495   """Updates the C{iv_name} attribute of disks.
12496
12497   @type disks: list of L{objects.Disk}
12498
12499   """
12500   for (idx, disk) in enumerate(disks):
12501     disk.iv_name = "disk/%s" % (base_index + idx, )
12502
12503
12504 class _InstNicModPrivate:
12505   """Data structure for network interface modifications.
12506
12507   Used by L{LUInstanceSetParams}.
12508
12509   """
12510   def __init__(self):
12511     self.params = None
12512     self.filled = None
12513
12514
12515 class LUInstanceSetParams(LogicalUnit):
12516   """Modifies an instances's parameters.
12517
12518   """
12519   HPATH = "instance-modify"
12520   HTYPE = constants.HTYPE_INSTANCE
12521   REQ_BGL = False
12522
12523   @staticmethod
12524   def _UpgradeDiskNicMods(kind, mods, verify_fn):
12525     assert ht.TList(mods)
12526     assert not mods or len(mods[0]) in (2, 3)
12527
12528     if mods and len(mods[0]) == 2:
12529       result = []
12530
12531       addremove = 0
12532       for op, params in mods:
12533         if op in (constants.DDM_ADD, constants.DDM_REMOVE):
12534           result.append((op, -1, params))
12535           addremove += 1
12536
12537           if addremove > 1:
12538             raise errors.OpPrereqError("Only one %s add or remove operation is"
12539                                        " supported at a time" % kind,
12540                                        errors.ECODE_INVAL)
12541         else:
12542           result.append((constants.DDM_MODIFY, op, params))
12543
12544       assert verify_fn(result)
12545     else:
12546       result = mods
12547
12548     return result
12549
12550   @staticmethod
12551   def _CheckMods(kind, mods, key_types, item_fn):
12552     """Ensures requested disk/NIC modifications are valid.
12553
12554     """
12555     for (op, _, params) in mods:
12556       assert ht.TDict(params)
12557
12558       utils.ForceDictType(params, key_types)
12559
12560       if op == constants.DDM_REMOVE:
12561         if params:
12562           raise errors.OpPrereqError("No settings should be passed when"
12563                                      " removing a %s" % kind,
12564                                      errors.ECODE_INVAL)
12565       elif op in (constants.DDM_ADD, constants.DDM_MODIFY):
12566         item_fn(op, params)
12567       else:
12568         raise errors.ProgrammerError("Unhandled operation '%s'" % op)
12569
12570   @staticmethod
12571   def _VerifyDiskModification(op, params):
12572     """Verifies a disk modification.
12573
12574     """
12575     if op == constants.DDM_ADD:
12576       mode = params.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
12577       if mode not in constants.DISK_ACCESS_SET:
12578         raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
12579                                    errors.ECODE_INVAL)
12580
12581       size = params.get(constants.IDISK_SIZE, None)
12582       if size is None:
12583         raise errors.OpPrereqError("Required disk parameter '%s' missing" %
12584                                    constants.IDISK_SIZE, errors.ECODE_INVAL)
12585
12586       try:
12587         size = int(size)
12588       except (TypeError, ValueError), err:
12589         raise errors.OpPrereqError("Invalid disk size parameter: %s" % err,
12590                                    errors.ECODE_INVAL)
12591
12592       params[constants.IDISK_SIZE] = size
12593
12594     elif op == constants.DDM_MODIFY and constants.IDISK_SIZE in params:
12595       raise errors.OpPrereqError("Disk size change not possible, use"
12596                                  " grow-disk", errors.ECODE_INVAL)
12597
12598   @staticmethod
12599   def _VerifyNicModification(op, params):
12600     """Verifies a network interface modification.
12601
12602     """
12603     if op in (constants.DDM_ADD, constants.DDM_MODIFY):
12604       ip = params.get(constants.INIC_IP, None)
12605       if ip is None:
12606         pass
12607       elif ip.lower() == constants.VALUE_NONE:
12608         params[constants.INIC_IP] = None
12609       elif not netutils.IPAddress.IsValid(ip):
12610         raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
12611                                    errors.ECODE_INVAL)
12612
12613       bridge = params.get("bridge", None)
12614       link = params.get(constants.INIC_LINK, None)
12615       if bridge and link:
12616         raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
12617                                    " at the same time", errors.ECODE_INVAL)
12618       elif bridge and bridge.lower() == constants.VALUE_NONE:
12619         params["bridge"] = None
12620       elif link and link.lower() == constants.VALUE_NONE:
12621         params[constants.INIC_LINK] = None
12622
12623       if op == constants.DDM_ADD:
12624         macaddr = params.get(constants.INIC_MAC, None)
12625         if macaddr is None:
12626           params[constants.INIC_MAC] = constants.VALUE_AUTO
12627
12628       if constants.INIC_MAC in params:
12629         macaddr = params[constants.INIC_MAC]
12630         if macaddr not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
12631           macaddr = utils.NormalizeAndValidateMac(macaddr)
12632
12633         if op == constants.DDM_MODIFY and macaddr == constants.VALUE_AUTO:
12634           raise errors.OpPrereqError("'auto' is not a valid MAC address when"
12635                                      " modifying an existing NIC",
12636                                      errors.ECODE_INVAL)
12637
12638   def CheckArguments(self):
12639     if not (self.op.nics or self.op.disks or self.op.disk_template or
12640             self.op.hvparams or self.op.beparams or self.op.os_name or
12641             self.op.offline is not None or self.op.runtime_mem):
12642       raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
12643
12644     if self.op.hvparams:
12645       _CheckGlobalHvParams(self.op.hvparams)
12646
12647     self.op.disks = self._UpgradeDiskNicMods(
12648       "disk", self.op.disks, opcodes.OpInstanceSetParams.TestDiskModifications)
12649     self.op.nics = self._UpgradeDiskNicMods(
12650       "NIC", self.op.nics, opcodes.OpInstanceSetParams.TestNicModifications)
12651
12652     # Check disk modifications
12653     self._CheckMods("disk", self.op.disks, constants.IDISK_PARAMS_TYPES,
12654                     self._VerifyDiskModification)
12655
12656     if self.op.disks and self.op.disk_template is not None:
12657       raise errors.OpPrereqError("Disk template conversion and other disk"
12658                                  " changes not supported at the same time",
12659                                  errors.ECODE_INVAL)
12660
12661     if (self.op.disk_template and
12662         self.op.disk_template in constants.DTS_INT_MIRROR and
12663         self.op.remote_node is None):
12664       raise errors.OpPrereqError("Changing the disk template to a mirrored"
12665                                  " one requires specifying a secondary node",
12666                                  errors.ECODE_INVAL)
12667
12668     # Check NIC modifications
12669     self._CheckMods("NIC", self.op.nics, constants.INIC_PARAMS_TYPES,
12670                     self._VerifyNicModification)
12671
12672   def ExpandNames(self):
12673     self._ExpandAndLockInstance()
12674     # Can't even acquire node locks in shared mode as upcoming changes in
12675     # Ganeti 2.6 will start to modify the node object on disk conversion
12676     self.needed_locks[locking.LEVEL_NODE] = []
12677     self.needed_locks[locking.LEVEL_NODE_RES] = []
12678     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
12679
12680   def DeclareLocks(self, level):
12681     # TODO: Acquire group lock in shared mode (disk parameters)
12682     if level == locking.LEVEL_NODE:
12683       self._LockInstancesNodes()
12684       if self.op.disk_template and self.op.remote_node:
12685         self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
12686         self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
12687     elif level == locking.LEVEL_NODE_RES and self.op.disk_template:
12688       # Copy node locks
12689       self.needed_locks[locking.LEVEL_NODE_RES] = \
12690         _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
12691
12692   def BuildHooksEnv(self):
12693     """Build hooks env.
12694
12695     This runs on the master, primary and secondaries.
12696
12697     """
12698     args = dict()
12699     if constants.BE_MINMEM in self.be_new:
12700       args["minmem"] = self.be_new[constants.BE_MINMEM]
12701     if constants.BE_MAXMEM in self.be_new:
12702       args["maxmem"] = self.be_new[constants.BE_MAXMEM]
12703     if constants.BE_VCPUS in self.be_new:
12704       args["vcpus"] = self.be_new[constants.BE_VCPUS]
12705     # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
12706     # information at all.
12707
12708     if self._new_nics is not None:
12709       nics = []
12710
12711       for nic in self._new_nics:
12712         nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
12713         mode = nicparams[constants.NIC_MODE]
12714         link = nicparams[constants.NIC_LINK]
12715         nics.append((nic.ip, nic.mac, mode, link))
12716
12717       args["nics"] = nics
12718
12719     env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
12720     if self.op.disk_template:
12721       env["NEW_DISK_TEMPLATE"] = self.op.disk_template
12722     if self.op.runtime_mem:
12723       env["RUNTIME_MEMORY"] = self.op.runtime_mem
12724
12725     return env
12726
12727   def BuildHooksNodes(self):
12728     """Build hooks nodes.
12729
12730     """
12731     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
12732     return (nl, nl)
12733
12734   def _PrepareNicModification(self, params, private, old_ip, old_params,
12735                               cluster, pnode):
12736     update_params_dict = dict([(key, params[key])
12737                                for key in constants.NICS_PARAMETERS
12738                                if key in params])
12739
12740     if "bridge" in params:
12741       update_params_dict[constants.NIC_LINK] = params["bridge"]
12742
12743     new_params = _GetUpdatedParams(old_params, update_params_dict)
12744     utils.ForceDictType(new_params, constants.NICS_PARAMETER_TYPES)
12745
12746     new_filled_params = cluster.SimpleFillNIC(new_params)
12747     objects.NIC.CheckParameterSyntax(new_filled_params)
12748
12749     new_mode = new_filled_params[constants.NIC_MODE]
12750     if new_mode == constants.NIC_MODE_BRIDGED:
12751       bridge = new_filled_params[constants.NIC_LINK]
12752       msg = self.rpc.call_bridges_exist(pnode, [bridge]).fail_msg
12753       if msg:
12754         msg = "Error checking bridges on node '%s': %s" % (pnode, msg)
12755         if self.op.force:
12756           self.warn.append(msg)
12757         else:
12758           raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
12759
12760     elif new_mode == constants.NIC_MODE_ROUTED:
12761       ip = params.get(constants.INIC_IP, old_ip)
12762       if ip is None:
12763         raise errors.OpPrereqError("Cannot set the NIC IP address to None"
12764                                    " on a routed NIC", errors.ECODE_INVAL)
12765
12766     if constants.INIC_MAC in params:
12767       mac = params[constants.INIC_MAC]
12768       if mac is None:
12769         raise errors.OpPrereqError("Cannot unset the NIC MAC address",
12770                                    errors.ECODE_INVAL)
12771       elif mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
12772         # otherwise generate the MAC address
12773         params[constants.INIC_MAC] = \
12774           self.cfg.GenerateMAC(self.proc.GetECId())
12775       else:
12776         # or validate/reserve the current one
12777         try:
12778           self.cfg.ReserveMAC(mac, self.proc.GetECId())
12779         except errors.ReservationError:
12780           raise errors.OpPrereqError("MAC address '%s' already in use"
12781                                      " in cluster" % mac,
12782                                      errors.ECODE_NOTUNIQUE)
12783
12784     private.params = new_params
12785     private.filled = new_filled_params
12786
12787   def CheckPrereq(self):
12788     """Check prerequisites.
12789
12790     This only checks the instance list against the existing names.
12791
12792     """
12793     # checking the new params on the primary/secondary nodes
12794
12795     instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
12796     cluster = self.cluster = self.cfg.GetClusterInfo()
12797     assert self.instance is not None, \
12798       "Cannot retrieve locked instance %s" % self.op.instance_name
12799     pnode = instance.primary_node
12800     nodelist = list(instance.all_nodes)
12801     pnode_info = self.cfg.GetNodeInfo(pnode)
12802     self.diskparams = self.cfg.GetInstanceDiskParams(instance)
12803
12804     # Prepare disk/NIC modifications
12805     self.diskmod = PrepareContainerMods(self.op.disks, None)
12806     self.nicmod = PrepareContainerMods(self.op.nics, _InstNicModPrivate)
12807
12808     # OS change
12809     if self.op.os_name and not self.op.force:
12810       _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
12811                       self.op.force_variant)
12812       instance_os = self.op.os_name
12813     else:
12814       instance_os = instance.os
12815
12816     assert not (self.op.disk_template and self.op.disks), \
12817       "Can't modify disk template and apply disk changes at the same time"
12818
12819     if self.op.disk_template:
12820       if instance.disk_template == self.op.disk_template:
12821         raise errors.OpPrereqError("Instance already has disk template %s" %
12822                                    instance.disk_template, errors.ECODE_INVAL)
12823
12824       if (instance.disk_template,
12825           self.op.disk_template) not in self._DISK_CONVERSIONS:
12826         raise errors.OpPrereqError("Unsupported disk template conversion from"
12827                                    " %s to %s" % (instance.disk_template,
12828                                                   self.op.disk_template),
12829                                    errors.ECODE_INVAL)
12830       _CheckInstanceState(self, instance, INSTANCE_DOWN,
12831                           msg="cannot change disk template")
12832       if self.op.disk_template in constants.DTS_INT_MIRROR:
12833         if self.op.remote_node == pnode:
12834           raise errors.OpPrereqError("Given new secondary node %s is the same"
12835                                      " as the primary node of the instance" %
12836                                      self.op.remote_node, errors.ECODE_STATE)
12837         _CheckNodeOnline(self, self.op.remote_node)
12838         _CheckNodeNotDrained(self, self.op.remote_node)
12839         # FIXME: here we assume that the old instance type is DT_PLAIN
12840         assert instance.disk_template == constants.DT_PLAIN
12841         disks = [{constants.IDISK_SIZE: d.size,
12842                   constants.IDISK_VG: d.logical_id[0]}
12843                  for d in instance.disks]
12844         required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
12845         _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
12846
12847         snode_info = self.cfg.GetNodeInfo(self.op.remote_node)
12848         snode_group = self.cfg.GetNodeGroup(snode_info.group)
12849         ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
12850                                                                 snode_group)
12851         _CheckTargetNodeIPolicy(self, ipolicy, instance, snode_info,
12852                                 ignore=self.op.ignore_ipolicy)
12853         if pnode_info.group != snode_info.group:
12854           self.LogWarning("The primary and secondary nodes are in two"
12855                           " different node groups; the disk parameters"
12856                           " from the first disk's node group will be"
12857                           " used")
12858
12859     # hvparams processing
12860     if self.op.hvparams:
12861       hv_type = instance.hypervisor
12862       i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
12863       utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
12864       hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
12865
12866       # local check
12867       hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
12868       _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
12869       self.hv_proposed = self.hv_new = hv_new # the new actual values
12870       self.hv_inst = i_hvdict # the new dict (without defaults)
12871     else:
12872       self.hv_proposed = cluster.SimpleFillHV(instance.hypervisor, instance.os,
12873                                               instance.hvparams)
12874       self.hv_new = self.hv_inst = {}
12875
12876     # beparams processing
12877     if self.op.beparams:
12878       i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
12879                                    use_none=True)
12880       objects.UpgradeBeParams(i_bedict)
12881       utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
12882       be_new = cluster.SimpleFillBE(i_bedict)
12883       self.be_proposed = self.be_new = be_new # the new actual values
12884       self.be_inst = i_bedict # the new dict (without defaults)
12885     else:
12886       self.be_new = self.be_inst = {}
12887       self.be_proposed = cluster.SimpleFillBE(instance.beparams)
12888     be_old = cluster.FillBE(instance)
12889
12890     # CPU param validation -- checking every time a parameter is
12891     # changed to cover all cases where either CPU mask or vcpus have
12892     # changed
12893     if (constants.BE_VCPUS in self.be_proposed and
12894         constants.HV_CPU_MASK in self.hv_proposed):
12895       cpu_list = \
12896         utils.ParseMultiCpuMask(self.hv_proposed[constants.HV_CPU_MASK])
12897       # Verify mask is consistent with number of vCPUs. Can skip this
12898       # test if only 1 entry in the CPU mask, which means same mask
12899       # is applied to all vCPUs.
12900       if (len(cpu_list) > 1 and
12901           len(cpu_list) != self.be_proposed[constants.BE_VCPUS]):
12902         raise errors.OpPrereqError("Number of vCPUs [%d] does not match the"
12903                                    " CPU mask [%s]" %
12904                                    (self.be_proposed[constants.BE_VCPUS],
12905                                     self.hv_proposed[constants.HV_CPU_MASK]),
12906                                    errors.ECODE_INVAL)
12907
12908       # Only perform this test if a new CPU mask is given
12909       if constants.HV_CPU_MASK in self.hv_new:
12910         # Calculate the largest CPU number requested
12911         max_requested_cpu = max(map(max, cpu_list))
12912         # Check that all of the instance's nodes have enough physical CPUs to
12913         # satisfy the requested CPU mask
12914         _CheckNodesPhysicalCPUs(self, instance.all_nodes,
12915                                 max_requested_cpu + 1, instance.hypervisor)
12916
12917     # osparams processing
12918     if self.op.osparams:
12919       i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
12920       _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
12921       self.os_inst = i_osdict # the new dict (without defaults)
12922     else:
12923       self.os_inst = {}
12924
12925     self.warn = []
12926
12927     #TODO(dynmem): do the appropriate check involving MINMEM
12928     if (constants.BE_MAXMEM in self.op.beparams and not self.op.force and
12929         be_new[constants.BE_MAXMEM] > be_old[constants.BE_MAXMEM]):
12930       mem_check_list = [pnode]
12931       if be_new[constants.BE_AUTO_BALANCE]:
12932         # either we changed auto_balance to yes or it was from before
12933         mem_check_list.extend(instance.secondary_nodes)
12934       instance_info = self.rpc.call_instance_info(pnode, instance.name,
12935                                                   instance.hypervisor)
12936       nodeinfo = self.rpc.call_node_info(mem_check_list, None,
12937                                          [instance.hypervisor])
12938       pninfo = nodeinfo[pnode]
12939       msg = pninfo.fail_msg
12940       if msg:
12941         # Assume the primary node is unreachable and go ahead
12942         self.warn.append("Can't get info from primary node %s: %s" %
12943                          (pnode, msg))
12944       else:
12945         (_, _, (pnhvinfo, )) = pninfo.payload
12946         if not isinstance(pnhvinfo.get("memory_free", None), int):
12947           self.warn.append("Node data from primary node %s doesn't contain"
12948                            " free memory information" % pnode)
12949         elif instance_info.fail_msg:
12950           self.warn.append("Can't get instance runtime information: %s" %
12951                            instance_info.fail_msg)
12952         else:
12953           if instance_info.payload:
12954             current_mem = int(instance_info.payload["memory"])
12955           else:
12956             # Assume instance not running
12957             # (there is a slight race condition here, but it's not very
12958             # probable, and we have no other way to check)
12959             # TODO: Describe race condition
12960             current_mem = 0
12961           #TODO(dynmem): do the appropriate check involving MINMEM
12962           miss_mem = (be_new[constants.BE_MAXMEM] - current_mem -
12963                       pnhvinfo["memory_free"])
12964           if miss_mem > 0:
12965             raise errors.OpPrereqError("This change will prevent the instance"
12966                                        " from starting, due to %d MB of memory"
12967                                        " missing on its primary node" %
12968                                        miss_mem, errors.ECODE_NORES)
12969
12970       if be_new[constants.BE_AUTO_BALANCE]:
12971         for node, nres in nodeinfo.items():
12972           if node not in instance.secondary_nodes:
12973             continue
12974           nres.Raise("Can't get info from secondary node %s" % node,
12975                      prereq=True, ecode=errors.ECODE_STATE)
12976           (_, _, (nhvinfo, )) = nres.payload
12977           if not isinstance(nhvinfo.get("memory_free", None), int):
12978             raise errors.OpPrereqError("Secondary node %s didn't return free"
12979                                        " memory information" % node,
12980                                        errors.ECODE_STATE)
12981           #TODO(dynmem): do the appropriate check involving MINMEM
12982           elif be_new[constants.BE_MAXMEM] > nhvinfo["memory_free"]:
12983             raise errors.OpPrereqError("This change will prevent the instance"
12984                                        " from failover to its secondary node"
12985                                        " %s, due to not enough memory" % node,
12986                                        errors.ECODE_STATE)
12987
12988     if self.op.runtime_mem:
12989       remote_info = self.rpc.call_instance_info(instance.primary_node,
12990                                                 instance.name,
12991                                                 instance.hypervisor)
12992       remote_info.Raise("Error checking node %s" % instance.primary_node)
12993       if not remote_info.payload: # not running already
12994         raise errors.OpPrereqError("Instance %s is not running" %
12995                                    instance.name, errors.ECODE_STATE)
12996
12997       current_memory = remote_info.payload["memory"]
12998       if (not self.op.force and
12999            (self.op.runtime_mem > self.be_proposed[constants.BE_MAXMEM] or
13000             self.op.runtime_mem < self.be_proposed[constants.BE_MINMEM])):
13001         raise errors.OpPrereqError("Instance %s must have memory between %d"
13002                                    " and %d MB of memory unless --force is"
13003                                    " given" %
13004                                    (instance.name,
13005                                     self.be_proposed[constants.BE_MINMEM],
13006                                     self.be_proposed[constants.BE_MAXMEM]),
13007                                    errors.ECODE_INVAL)
13008
13009       delta = self.op.runtime_mem - current_memory
13010       if delta > 0:
13011         _CheckNodeFreeMemory(self, instance.primary_node,
13012                              "ballooning memory for instance %s" %
13013                              instance.name, delta, instance.hypervisor)
13014
13015     if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
13016       raise errors.OpPrereqError("Disk operations not supported for"
13017                                  " diskless instances", errors.ECODE_INVAL)
13018
13019     def _PrepareNicCreate(_, params, private):
13020       self._PrepareNicModification(params, private, None, {}, cluster, pnode)
13021       return (None, None)
13022
13023     def _PrepareNicMod(_, nic, params, private):
13024       self._PrepareNicModification(params, private, nic.ip,
13025                                    nic.nicparams, cluster, pnode)
13026       return None
13027
13028     # Verify NIC changes (operating on copy)
13029     nics = instance.nics[:]
13030     ApplyContainerMods("NIC", nics, None, self.nicmod,
13031                        _PrepareNicCreate, _PrepareNicMod, None)
13032     if len(nics) > constants.MAX_NICS:
13033       raise errors.OpPrereqError("Instance has too many network interfaces"
13034                                  " (%d), cannot add more" % constants.MAX_NICS,
13035                                  errors.ECODE_STATE)
13036
13037     # Verify disk changes (operating on a copy)
13038     disks = instance.disks[:]
13039     ApplyContainerMods("disk", disks, None, self.diskmod, None, None, None)
13040     if len(disks) > constants.MAX_DISKS:
13041       raise errors.OpPrereqError("Instance has too many disks (%d), cannot add"
13042                                  " more" % constants.MAX_DISKS,
13043                                  errors.ECODE_STATE)
13044
13045     if self.op.offline is not None:
13046       if self.op.offline:
13047         msg = "can't change to offline"
13048       else:
13049         msg = "can't change to online"
13050       _CheckInstanceState(self, instance, CAN_CHANGE_INSTANCE_OFFLINE, msg=msg)
13051
13052     # Pre-compute NIC changes (necessary to use result in hooks)
13053     self._nic_chgdesc = []
13054     if self.nicmod:
13055       # Operate on copies as this is still in prereq
13056       nics = [nic.Copy() for nic in instance.nics]
13057       ApplyContainerMods("NIC", nics, self._nic_chgdesc, self.nicmod,
13058                          self._CreateNewNic, self._ApplyNicMods, None)
13059       self._new_nics = nics
13060     else:
13061       self._new_nics = None
13062
13063   def _ConvertPlainToDrbd(self, feedback_fn):
13064     """Converts an instance from plain to drbd.
13065
13066     """
13067     feedback_fn("Converting template to drbd")
13068     instance = self.instance
13069     pnode = instance.primary_node
13070     snode = self.op.remote_node
13071
13072     assert instance.disk_template == constants.DT_PLAIN
13073
13074     # create a fake disk info for _GenerateDiskTemplate
13075     disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
13076                   constants.IDISK_VG: d.logical_id[0]}
13077                  for d in instance.disks]
13078     new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
13079                                       instance.name, pnode, [snode],
13080                                       disk_info, None, None, 0, feedback_fn,
13081                                       self.diskparams)
13082     anno_disks = rpc.AnnotateDiskParams(constants.DT_DRBD8, new_disks,
13083                                         self.diskparams)
13084     info = _GetInstanceInfoText(instance)
13085     feedback_fn("Creating additional volumes...")
13086     # first, create the missing data and meta devices
13087     for disk in anno_disks:
13088       # unfortunately this is... not too nice
13089       _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
13090                             info, True)
13091       for child in disk.children:
13092         _CreateSingleBlockDev(self, snode, instance, child, info, True)
13093     # at this stage, all new LVs have been created, we can rename the
13094     # old ones
13095     feedback_fn("Renaming original volumes...")
13096     rename_list = [(o, n.children[0].logical_id)
13097                    for (o, n) in zip(instance.disks, new_disks)]
13098     result = self.rpc.call_blockdev_rename(pnode, rename_list)
13099     result.Raise("Failed to rename original LVs")
13100
13101     feedback_fn("Initializing DRBD devices...")
13102     # all child devices are in place, we can now create the DRBD devices
13103     for disk in anno_disks:
13104       for node in [pnode, snode]:
13105         f_create = node == pnode
13106         _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
13107
13108     # at this point, the instance has been modified
13109     instance.disk_template = constants.DT_DRBD8
13110     instance.disks = new_disks
13111     self.cfg.Update(instance, feedback_fn)
13112
13113     # Release node locks while waiting for sync
13114     _ReleaseLocks(self, locking.LEVEL_NODE)
13115
13116     # disks are created, waiting for sync
13117     disk_abort = not _WaitForSync(self, instance,
13118                                   oneshot=not self.op.wait_for_sync)
13119     if disk_abort:
13120       raise errors.OpExecError("There are some degraded disks for"
13121                                " this instance, please cleanup manually")
13122
13123     # Node resource locks will be released by caller
13124
13125   def _ConvertDrbdToPlain(self, feedback_fn):
13126     """Converts an instance from drbd to plain.
13127
13128     """
13129     instance = self.instance
13130
13131     assert len(instance.secondary_nodes) == 1
13132     assert instance.disk_template == constants.DT_DRBD8
13133
13134     pnode = instance.primary_node
13135     snode = instance.secondary_nodes[0]
13136     feedback_fn("Converting template to plain")
13137
13138     old_disks = _AnnotateDiskParams(instance, instance.disks, self.cfg)
13139     new_disks = [d.children[0] for d in instance.disks]
13140
13141     # copy over size and mode
13142     for parent, child in zip(old_disks, new_disks):
13143       child.size = parent.size
13144       child.mode = parent.mode
13145
13146     # this is a DRBD disk, return its port to the pool
13147     # NOTE: this must be done right before the call to cfg.Update!
13148     for disk in old_disks:
13149       tcp_port = disk.logical_id[2]
13150       self.cfg.AddTcpUdpPort(tcp_port)
13151
13152     # update instance structure
13153     instance.disks = new_disks
13154     instance.disk_template = constants.DT_PLAIN
13155     self.cfg.Update(instance, feedback_fn)
13156
13157     # Release locks in case removing disks takes a while
13158     _ReleaseLocks(self, locking.LEVEL_NODE)
13159
13160     feedback_fn("Removing volumes on the secondary node...")
13161     for disk in old_disks:
13162       self.cfg.SetDiskID(disk, snode)
13163       msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
13164       if msg:
13165         self.LogWarning("Could not remove block device %s on node %s,"
13166                         " continuing anyway: %s", disk.iv_name, snode, msg)
13167
13168     feedback_fn("Removing unneeded volumes on the primary node...")
13169     for idx, disk in enumerate(old_disks):
13170       meta = disk.children[1]
13171       self.cfg.SetDiskID(meta, pnode)
13172       msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
13173       if msg:
13174         self.LogWarning("Could not remove metadata for disk %d on node %s,"
13175                         " continuing anyway: %s", idx, pnode, msg)
13176
13177   def _CreateNewDisk(self, idx, params, _):
13178     """Creates a new disk.
13179
13180     """
13181     instance = self.instance
13182
13183     # add a new disk
13184     if instance.disk_template in constants.DTS_FILEBASED:
13185       (file_driver, file_path) = instance.disks[0].logical_id
13186       file_path = os.path.dirname(file_path)
13187     else:
13188       file_driver = file_path = None
13189
13190     disk = \
13191       _GenerateDiskTemplate(self, instance.disk_template, instance.name,
13192                             instance.primary_node, instance.secondary_nodes,
13193                             [params], file_path, file_driver, idx,
13194                             self.Log, self.diskparams)[0]
13195
13196     info = _GetInstanceInfoText(instance)
13197
13198     logging.info("Creating volume %s for instance %s",
13199                  disk.iv_name, instance.name)
13200     # Note: this needs to be kept in sync with _CreateDisks
13201     #HARDCODE
13202     for node in instance.all_nodes:
13203       f_create = (node == instance.primary_node)
13204       try:
13205         _CreateBlockDev(self, node, instance, disk, f_create, info, f_create)
13206       except errors.OpExecError, err:
13207         self.LogWarning("Failed to create volume %s (%s) on node '%s': %s",
13208                         disk.iv_name, disk, node, err)
13209
13210     return (disk, [
13211       ("disk/%d" % idx, "add:size=%s,mode=%s" % (disk.size, disk.mode)),
13212       ])
13213
13214   @staticmethod
13215   def _ModifyDisk(idx, disk, params, _):
13216     """Modifies a disk.
13217
13218     """
13219     disk.mode = params[constants.IDISK_MODE]
13220
13221     return [
13222       ("disk.mode/%d" % idx, disk.mode),
13223       ]
13224
13225   def _RemoveDisk(self, idx, root, _):
13226     """Removes a disk.
13227
13228     """
13229     (anno_disk,) = _AnnotateDiskParams(self.instance, [root], self.cfg)
13230     for node, disk in anno_disk.ComputeNodeTree(self.instance.primary_node):
13231       self.cfg.SetDiskID(disk, node)
13232       msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
13233       if msg:
13234         self.LogWarning("Could not remove disk/%d on node '%s': %s,"
13235                         " continuing anyway", idx, node, msg)
13236
13237     # if this is a DRBD disk, return its port to the pool
13238     if root.dev_type in constants.LDS_DRBD:
13239       self.cfg.AddTcpUdpPort(root.logical_id[2])
13240
13241   @staticmethod
13242   def _CreateNewNic(idx, params, private):
13243     """Creates data structure for a new network interface.
13244
13245     """
13246     mac = params[constants.INIC_MAC]
13247     ip = params.get(constants.INIC_IP, None)
13248     nicparams = private.params
13249
13250     return (objects.NIC(mac=mac, ip=ip, nicparams=nicparams), [
13251       ("nic.%d" % idx,
13252        "add:mac=%s,ip=%s,mode=%s,link=%s" %
13253        (mac, ip, private.filled[constants.NIC_MODE],
13254        private.filled[constants.NIC_LINK])),
13255       ])
13256
13257   @staticmethod
13258   def _ApplyNicMods(idx, nic, params, private):
13259     """Modifies a network interface.
13260
13261     """
13262     changes = []
13263
13264     for key in [constants.INIC_MAC, constants.INIC_IP]:
13265       if key in params:
13266         changes.append(("nic.%s/%d" % (key, idx), params[key]))
13267         setattr(nic, key, params[key])
13268
13269     if private.params:
13270       nic.nicparams = private.params
13271
13272       for (key, val) in params.items():
13273         changes.append(("nic.%s/%d" % (key, idx), val))
13274
13275     return changes
13276
13277   def Exec(self, feedback_fn):
13278     """Modifies an instance.
13279
13280     All parameters take effect only at the next restart of the instance.
13281
13282     """
13283     # Process here the warnings from CheckPrereq, as we don't have a
13284     # feedback_fn there.
13285     # TODO: Replace with self.LogWarning
13286     for warn in self.warn:
13287       feedback_fn("WARNING: %s" % warn)
13288
13289     assert ((self.op.disk_template is None) ^
13290             bool(self.owned_locks(locking.LEVEL_NODE_RES))), \
13291       "Not owning any node resource locks"
13292
13293     result = []
13294     instance = self.instance
13295
13296     # runtime memory
13297     if self.op.runtime_mem:
13298       rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node,
13299                                                      instance,
13300                                                      self.op.runtime_mem)
13301       rpcres.Raise("Cannot modify instance runtime memory")
13302       result.append(("runtime_memory", self.op.runtime_mem))
13303
13304     # Apply disk changes
13305     ApplyContainerMods("disk", instance.disks, result, self.diskmod,
13306                        self._CreateNewDisk, self._ModifyDisk, self._RemoveDisk)
13307     _UpdateIvNames(0, instance.disks)
13308
13309     if self.op.disk_template:
13310       if __debug__:
13311         check_nodes = set(instance.all_nodes)
13312         if self.op.remote_node:
13313           check_nodes.add(self.op.remote_node)
13314         for level in [locking.LEVEL_NODE, locking.LEVEL_NODE_RES]:
13315           owned = self.owned_locks(level)
13316           assert not (check_nodes - owned), \
13317             ("Not owning the correct locks, owning %r, expected at least %r" %
13318              (owned, check_nodes))
13319
13320       r_shut = _ShutdownInstanceDisks(self, instance)
13321       if not r_shut:
13322         raise errors.OpExecError("Cannot shutdown instance disks, unable to"
13323                                  " proceed with disk template conversion")
13324       mode = (instance.disk_template, self.op.disk_template)
13325       try:
13326         self._DISK_CONVERSIONS[mode](self, feedback_fn)
13327       except:
13328         self.cfg.ReleaseDRBDMinors(instance.name)
13329         raise
13330       result.append(("disk_template", self.op.disk_template))
13331
13332       assert instance.disk_template == self.op.disk_template, \
13333         ("Expected disk template '%s', found '%s'" %
13334          (self.op.disk_template, instance.disk_template))
13335
13336     # Release node and resource locks if there are any (they might already have
13337     # been released during disk conversion)
13338     _ReleaseLocks(self, locking.LEVEL_NODE)
13339     _ReleaseLocks(self, locking.LEVEL_NODE_RES)
13340
13341     # Apply NIC changes
13342     if self._new_nics is not None:
13343       instance.nics = self._new_nics
13344       result.extend(self._nic_chgdesc)
13345
13346     # hvparams changes
13347     if self.op.hvparams:
13348       instance.hvparams = self.hv_inst
13349       for key, val in self.op.hvparams.iteritems():
13350         result.append(("hv/%s" % key, val))
13351
13352     # beparams changes
13353     if self.op.beparams:
13354       instance.beparams = self.be_inst
13355       for key, val in self.op.beparams.iteritems():
13356         result.append(("be/%s" % key, val))
13357
13358     # OS change
13359     if self.op.os_name:
13360       instance.os = self.op.os_name
13361
13362     # osparams changes
13363     if self.op.osparams:
13364       instance.osparams = self.os_inst
13365       for key, val in self.op.osparams.iteritems():
13366         result.append(("os/%s" % key, val))
13367
13368     if self.op.offline is None:
13369       # Ignore
13370       pass
13371     elif self.op.offline:
13372       # Mark instance as offline
13373       self.cfg.MarkInstanceOffline(instance.name)
13374       result.append(("admin_state", constants.ADMINST_OFFLINE))
13375     else:
13376       # Mark instance as online, but stopped
13377       self.cfg.MarkInstanceDown(instance.name)
13378       result.append(("admin_state", constants.ADMINST_DOWN))
13379
13380     self.cfg.Update(instance, feedback_fn)
13381
13382     assert not (self.owned_locks(locking.LEVEL_NODE_RES) or
13383                 self.owned_locks(locking.LEVEL_NODE)), \
13384       "All node locks should have been released by now"
13385
13386     return result
13387
13388   _DISK_CONVERSIONS = {
13389     (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
13390     (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
13391     }
13392
13393
13394 class LUInstanceChangeGroup(LogicalUnit):
13395   HPATH = "instance-change-group"
13396   HTYPE = constants.HTYPE_INSTANCE
13397   REQ_BGL = False
13398
13399   def ExpandNames(self):
13400     self.share_locks = _ShareAll()
13401     self.needed_locks = {
13402       locking.LEVEL_NODEGROUP: [],
13403       locking.LEVEL_NODE: [],
13404       }
13405
13406     self._ExpandAndLockInstance()
13407
13408     if self.op.target_groups:
13409       self.req_target_uuids = map(self.cfg.LookupNodeGroup,
13410                                   self.op.target_groups)
13411     else:
13412       self.req_target_uuids = None
13413
13414     self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
13415
13416   def DeclareLocks(self, level):
13417     if level == locking.LEVEL_NODEGROUP:
13418       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
13419
13420       if self.req_target_uuids:
13421         lock_groups = set(self.req_target_uuids)
13422
13423         # Lock all groups used by instance optimistically; this requires going
13424         # via the node before it's locked, requiring verification later on
13425         instance_groups = self.cfg.GetInstanceNodeGroups(self.op.instance_name)
13426         lock_groups.update(instance_groups)
13427       else:
13428         # No target groups, need to lock all of them
13429         lock_groups = locking.ALL_SET
13430
13431       self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
13432
13433     elif level == locking.LEVEL_NODE:
13434       if self.req_target_uuids:
13435         # Lock all nodes used by instances
13436         self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
13437         self._LockInstancesNodes()
13438
13439         # Lock all nodes in all potential target groups
13440         lock_groups = (frozenset(self.owned_locks(locking.LEVEL_NODEGROUP)) -
13441                        self.cfg.GetInstanceNodeGroups(self.op.instance_name))
13442         member_nodes = [node_name
13443                         for group in lock_groups
13444                         for node_name in self.cfg.GetNodeGroup(group).members]
13445         self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
13446       else:
13447         # Lock all nodes as all groups are potential targets
13448         self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
13449
13450   def CheckPrereq(self):
13451     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
13452     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
13453     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
13454
13455     assert (self.req_target_uuids is None or
13456             owned_groups.issuperset(self.req_target_uuids))
13457     assert owned_instances == set([self.op.instance_name])
13458
13459     # Get instance information
13460     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
13461
13462     # Check if node groups for locked instance are still correct
13463     assert owned_nodes.issuperset(self.instance.all_nodes), \
13464       ("Instance %s's nodes changed while we kept the lock" %
13465        self.op.instance_name)
13466
13467     inst_groups = _CheckInstanceNodeGroups(self.cfg, self.op.instance_name,
13468                                            owned_groups)
13469
13470     if self.req_target_uuids:
13471       # User requested specific target groups
13472       self.target_uuids = frozenset(self.req_target_uuids)
13473     else:
13474       # All groups except those used by the instance are potential targets
13475       self.target_uuids = owned_groups - inst_groups
13476
13477     conflicting_groups = self.target_uuids & inst_groups
13478     if conflicting_groups:
13479       raise errors.OpPrereqError("Can't use group(s) '%s' as targets, they are"
13480                                  " used by the instance '%s'" %
13481                                  (utils.CommaJoin(conflicting_groups),
13482                                   self.op.instance_name),
13483                                  errors.ECODE_INVAL)
13484
13485     if not self.target_uuids:
13486       raise errors.OpPrereqError("There are no possible target groups",
13487                                  errors.ECODE_INVAL)
13488
13489   def BuildHooksEnv(self):
13490     """Build hooks env.
13491
13492     """
13493     assert self.target_uuids
13494
13495     env = {
13496       "TARGET_GROUPS": " ".join(self.target_uuids),
13497       }
13498
13499     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
13500
13501     return env
13502
13503   def BuildHooksNodes(self):
13504     """Build hooks nodes.
13505
13506     """
13507     mn = self.cfg.GetMasterNode()
13508     return ([mn], [mn])
13509
13510   def Exec(self, feedback_fn):
13511     instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
13512
13513     assert instances == [self.op.instance_name], "Instance not locked"
13514
13515     req = iallocator.IAReqGroupChange(instances=instances,
13516                                       target_groups=list(self.target_uuids))
13517     ial = iallocator.IAllocator(self.cfg, self.rpc, req)
13518
13519     ial.Run(self.op.iallocator)
13520
13521     if not ial.success:
13522       raise errors.OpPrereqError("Can't compute solution for changing group of"
13523                                  " instance '%s' using iallocator '%s': %s" %
13524                                  (self.op.instance_name, self.op.iallocator,
13525                                   ial.info), errors.ECODE_NORES)
13526
13527     jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
13528
13529     self.LogInfo("Iallocator returned %s job(s) for changing group of"
13530                  " instance '%s'", len(jobs), self.op.instance_name)
13531
13532     return ResultWithJobs(jobs)
13533
13534
13535 class LUBackupQuery(NoHooksLU):
13536   """Query the exports list
13537
13538   """
13539   REQ_BGL = False
13540
13541   def CheckArguments(self):
13542     self.expq = _ExportQuery(qlang.MakeSimpleFilter("node", self.op.nodes),
13543                              ["node", "export"], self.op.use_locking)
13544
13545   def ExpandNames(self):
13546     self.expq.ExpandNames(self)
13547
13548   def DeclareLocks(self, level):
13549     self.expq.DeclareLocks(self, level)
13550
13551   def Exec(self, feedback_fn):
13552     result = {}
13553
13554     for (node, expname) in self.expq.OldStyleQuery(self):
13555       if expname is None:
13556         result[node] = False
13557       else:
13558         result.setdefault(node, []).append(expname)
13559
13560     return result
13561
13562
13563 class _ExportQuery(_QueryBase):
13564   FIELDS = query.EXPORT_FIELDS
13565
13566   #: The node name is not a unique key for this query
13567   SORT_FIELD = "node"
13568
13569   def ExpandNames(self, lu):
13570     lu.needed_locks = {}
13571
13572     # The following variables interact with _QueryBase._GetNames
13573     if self.names:
13574       self.wanted = _GetWantedNodes(lu, self.names)
13575     else:
13576       self.wanted = locking.ALL_SET
13577
13578     self.do_locking = self.use_locking
13579
13580     if self.do_locking:
13581       lu.share_locks = _ShareAll()
13582       lu.needed_locks = {
13583         locking.LEVEL_NODE: self.wanted,
13584         }
13585
13586   def DeclareLocks(self, lu, level):
13587     pass
13588
13589   def _GetQueryData(self, lu):
13590     """Computes the list of nodes and their attributes.
13591
13592     """
13593     # Locking is not used
13594     # TODO
13595     assert not (compat.any(lu.glm.is_owned(level)
13596                            for level in locking.LEVELS
13597                            if level != locking.LEVEL_CLUSTER) or
13598                 self.do_locking or self.use_locking)
13599
13600     nodes = self._GetNames(lu, lu.cfg.GetNodeList(), locking.LEVEL_NODE)
13601
13602     result = []
13603
13604     for (node, nres) in lu.rpc.call_export_list(nodes).items():
13605       if nres.fail_msg:
13606         result.append((node, None))
13607       else:
13608         result.extend((node, expname) for expname in nres.payload)
13609
13610     return result
13611
13612
13613 class LUBackupPrepare(NoHooksLU):
13614   """Prepares an instance for an export and returns useful information.
13615
13616   """
13617   REQ_BGL = False
13618
13619   def ExpandNames(self):
13620     self._ExpandAndLockInstance()
13621
13622   def CheckPrereq(self):
13623     """Check prerequisites.
13624
13625     """
13626     instance_name = self.op.instance_name
13627
13628     self.instance = self.cfg.GetInstanceInfo(instance_name)
13629     assert self.instance is not None, \
13630           "Cannot retrieve locked instance %s" % self.op.instance_name
13631     _CheckNodeOnline(self, self.instance.primary_node)
13632
13633     self._cds = _GetClusterDomainSecret()
13634
13635   def Exec(self, feedback_fn):
13636     """Prepares an instance for an export.
13637
13638     """
13639     instance = self.instance
13640
13641     if self.op.mode == constants.EXPORT_MODE_REMOTE:
13642       salt = utils.GenerateSecret(8)
13643
13644       feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
13645       result = self.rpc.call_x509_cert_create(instance.primary_node,
13646                                               constants.RIE_CERT_VALIDITY)
13647       result.Raise("Can't create X509 key and certificate on %s" % result.node)
13648
13649       (name, cert_pem) = result.payload
13650
13651       cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
13652                                              cert_pem)
13653
13654       return {
13655         "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
13656         "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
13657                           salt),
13658         "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
13659         }
13660
13661     return None
13662
13663
13664 class LUBackupExport(LogicalUnit):
13665   """Export an instance to an image in the cluster.
13666
13667   """
13668   HPATH = "instance-export"
13669   HTYPE = constants.HTYPE_INSTANCE
13670   REQ_BGL = False
13671
13672   def CheckArguments(self):
13673     """Check the arguments.
13674
13675     """
13676     self.x509_key_name = self.op.x509_key_name
13677     self.dest_x509_ca_pem = self.op.destination_x509_ca
13678
13679     if self.op.mode == constants.EXPORT_MODE_REMOTE:
13680       if not self.x509_key_name:
13681         raise errors.OpPrereqError("Missing X509 key name for encryption",
13682                                    errors.ECODE_INVAL)
13683
13684       if not self.dest_x509_ca_pem:
13685         raise errors.OpPrereqError("Missing destination X509 CA",
13686                                    errors.ECODE_INVAL)
13687
13688   def ExpandNames(self):
13689     self._ExpandAndLockInstance()
13690
13691     # Lock all nodes for local exports
13692     if self.op.mode == constants.EXPORT_MODE_LOCAL:
13693       # FIXME: lock only instance primary and destination node
13694       #
13695       # Sad but true, for now we have do lock all nodes, as we don't know where
13696       # the previous export might be, and in this LU we search for it and
13697       # remove it from its current node. In the future we could fix this by:
13698       #  - making a tasklet to search (share-lock all), then create the
13699       #    new one, then one to remove, after
13700       #  - removing the removal operation altogether
13701       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
13702
13703   def DeclareLocks(self, level):
13704     """Last minute lock declaration."""
13705     # All nodes are locked anyway, so nothing to do here.
13706
13707   def BuildHooksEnv(self):
13708     """Build hooks env.
13709
13710     This will run on the master, primary node and target node.
13711
13712     """
13713     env = {
13714       "EXPORT_MODE": self.op.mode,
13715       "EXPORT_NODE": self.op.target_node,
13716       "EXPORT_DO_SHUTDOWN": self.op.shutdown,
13717       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
13718       # TODO: Generic function for boolean env variables
13719       "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
13720       }
13721
13722     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
13723
13724     return env
13725
13726   def BuildHooksNodes(self):
13727     """Build hooks nodes.
13728
13729     """
13730     nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
13731
13732     if self.op.mode == constants.EXPORT_MODE_LOCAL:
13733       nl.append(self.op.target_node)
13734
13735     return (nl, nl)
13736
13737   def CheckPrereq(self):
13738     """Check prerequisites.
13739
13740     This checks that the instance and node names are valid.
13741
13742     """
13743     instance_name = self.op.instance_name
13744
13745     self.instance = self.cfg.GetInstanceInfo(instance_name)
13746     assert self.instance is not None, \
13747           "Cannot retrieve locked instance %s" % self.op.instance_name
13748     _CheckNodeOnline(self, self.instance.primary_node)
13749
13750     if (self.op.remove_instance and
13751         self.instance.admin_state == constants.ADMINST_UP and
13752         not self.op.shutdown):
13753       raise errors.OpPrereqError("Can not remove instance without shutting it"
13754                                  " down before", errors.ECODE_STATE)
13755
13756     if self.op.mode == constants.EXPORT_MODE_LOCAL:
13757       self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
13758       self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
13759       assert self.dst_node is not None
13760
13761       _CheckNodeOnline(self, self.dst_node.name)
13762       _CheckNodeNotDrained(self, self.dst_node.name)
13763
13764       self._cds = None
13765       self.dest_disk_info = None
13766       self.dest_x509_ca = None
13767
13768     elif self.op.mode == constants.EXPORT_MODE_REMOTE:
13769       self.dst_node = None
13770
13771       if len(self.op.target_node) != len(self.instance.disks):
13772         raise errors.OpPrereqError(("Received destination information for %s"
13773                                     " disks, but instance %s has %s disks") %
13774                                    (len(self.op.target_node), instance_name,
13775                                     len(self.instance.disks)),
13776                                    errors.ECODE_INVAL)
13777
13778       cds = _GetClusterDomainSecret()
13779
13780       # Check X509 key name
13781       try:
13782         (key_name, hmac_digest, hmac_salt) = self.x509_key_name
13783       except (TypeError, ValueError), err:
13784         raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err,
13785                                    errors.ECODE_INVAL)
13786
13787       if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
13788         raise errors.OpPrereqError("HMAC for X509 key name is wrong",
13789                                    errors.ECODE_INVAL)
13790
13791       # Load and verify CA
13792       try:
13793         (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
13794       except OpenSSL.crypto.Error, err:
13795         raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
13796                                    (err, ), errors.ECODE_INVAL)
13797
13798       (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
13799       if errcode is not None:
13800         raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
13801                                    (msg, ), errors.ECODE_INVAL)
13802
13803       self.dest_x509_ca = cert
13804
13805       # Verify target information
13806       disk_info = []
13807       for idx, disk_data in enumerate(self.op.target_node):
13808         try:
13809           (host, port, magic) = \
13810             masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
13811         except errors.GenericError, err:
13812           raise errors.OpPrereqError("Target info for disk %s: %s" %
13813                                      (idx, err), errors.ECODE_INVAL)
13814
13815         disk_info.append((host, port, magic))
13816
13817       assert len(disk_info) == len(self.op.target_node)
13818       self.dest_disk_info = disk_info
13819
13820     else:
13821       raise errors.ProgrammerError("Unhandled export mode %r" %
13822                                    self.op.mode)
13823
13824     # instance disk type verification
13825     # TODO: Implement export support for file-based disks
13826     for disk in self.instance.disks:
13827       if disk.dev_type == constants.LD_FILE:
13828         raise errors.OpPrereqError("Export not supported for instances with"
13829                                    " file-based disks", errors.ECODE_INVAL)
13830
13831   def _CleanupExports(self, feedback_fn):
13832     """Removes exports of current instance from all other nodes.
13833
13834     If an instance in a cluster with nodes A..D was exported to node C, its
13835     exports will be removed from the nodes A, B and D.
13836
13837     """
13838     assert self.op.mode != constants.EXPORT_MODE_REMOTE
13839
13840     nodelist = self.cfg.GetNodeList()
13841     nodelist.remove(self.dst_node.name)
13842
13843     # on one-node clusters nodelist will be empty after the removal
13844     # if we proceed the backup would be removed because OpBackupQuery
13845     # substitutes an empty list with the full cluster node list.
13846     iname = self.instance.name
13847     if nodelist:
13848       feedback_fn("Removing old exports for instance %s" % iname)
13849       exportlist = self.rpc.call_export_list(nodelist)
13850       for node in exportlist:
13851         if exportlist[node].fail_msg:
13852           continue
13853         if iname in exportlist[node].payload:
13854           msg = self.rpc.call_export_remove(node, iname).fail_msg
13855           if msg:
13856             self.LogWarning("Could not remove older export for instance %s"
13857                             " on node %s: %s", iname, node, msg)
13858
13859   def Exec(self, feedback_fn):
13860     """Export an instance to an image in the cluster.
13861
13862     """
13863     assert self.op.mode in constants.EXPORT_MODES
13864
13865     instance = self.instance
13866     src_node = instance.primary_node
13867
13868     if self.op.shutdown:
13869       # shutdown the instance, but not the disks
13870       feedback_fn("Shutting down instance %s" % instance.name)
13871       result = self.rpc.call_instance_shutdown(src_node, instance,
13872                                                self.op.shutdown_timeout)
13873       # TODO: Maybe ignore failures if ignore_remove_failures is set
13874       result.Raise("Could not shutdown instance %s on"
13875                    " node %s" % (instance.name, src_node))
13876
13877     # set the disks ID correctly since call_instance_start needs the
13878     # correct drbd minor to create the symlinks
13879     for disk in instance.disks:
13880       self.cfg.SetDiskID(disk, src_node)
13881
13882     activate_disks = (instance.admin_state != constants.ADMINST_UP)
13883
13884     if activate_disks:
13885       # Activate the instance disks if we'exporting a stopped instance
13886       feedback_fn("Activating disks for %s" % instance.name)
13887       _StartInstanceDisks(self, instance, None)
13888
13889     try:
13890       helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
13891                                                      instance)
13892
13893       helper.CreateSnapshots()
13894       try:
13895         if (self.op.shutdown and
13896             instance.admin_state == constants.ADMINST_UP and
13897             not self.op.remove_instance):
13898           assert not activate_disks
13899           feedback_fn("Starting instance %s" % instance.name)
13900           result = self.rpc.call_instance_start(src_node,
13901                                                 (instance, None, None), False)
13902           msg = result.fail_msg
13903           if msg:
13904             feedback_fn("Failed to start instance: %s" % msg)
13905             _ShutdownInstanceDisks(self, instance)
13906             raise errors.OpExecError("Could not start instance: %s" % msg)
13907
13908         if self.op.mode == constants.EXPORT_MODE_LOCAL:
13909           (fin_resu, dresults) = helper.LocalExport(self.dst_node)
13910         elif self.op.mode == constants.EXPORT_MODE_REMOTE:
13911           connect_timeout = constants.RIE_CONNECT_TIMEOUT
13912           timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
13913
13914           (key_name, _, _) = self.x509_key_name
13915
13916           dest_ca_pem = \
13917             OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
13918                                             self.dest_x509_ca)
13919
13920           (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
13921                                                      key_name, dest_ca_pem,
13922                                                      timeouts)
13923       finally:
13924         helper.Cleanup()
13925
13926       # Check for backwards compatibility
13927       assert len(dresults) == len(instance.disks)
13928       assert compat.all(isinstance(i, bool) for i in dresults), \
13929              "Not all results are boolean: %r" % dresults
13930
13931     finally:
13932       if activate_disks:
13933         feedback_fn("Deactivating disks for %s" % instance.name)
13934         _ShutdownInstanceDisks(self, instance)
13935
13936     if not (compat.all(dresults) and fin_resu):
13937       failures = []
13938       if not fin_resu:
13939         failures.append("export finalization")
13940       if not compat.all(dresults):
13941         fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
13942                                if not dsk)
13943         failures.append("disk export: disk(s) %s" % fdsk)
13944
13945       raise errors.OpExecError("Export failed, errors in %s" %
13946                                utils.CommaJoin(failures))
13947
13948     # At this point, the export was successful, we can cleanup/finish
13949
13950     # Remove instance if requested
13951     if self.op.remove_instance:
13952       feedback_fn("Removing instance %s" % instance.name)
13953       _RemoveInstance(self, feedback_fn, instance,
13954                       self.op.ignore_remove_failures)
13955
13956     if self.op.mode == constants.EXPORT_MODE_LOCAL:
13957       self._CleanupExports(feedback_fn)
13958
13959     return fin_resu, dresults
13960
13961
13962 class LUBackupRemove(NoHooksLU):
13963   """Remove exports related to the named instance.
13964
13965   """
13966   REQ_BGL = False
13967
13968   def ExpandNames(self):
13969     self.needed_locks = {}
13970     # We need all nodes to be locked in order for RemoveExport to work, but we
13971     # don't need to lock the instance itself, as nothing will happen to it (and
13972     # we can remove exports also for a removed instance)
13973     self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
13974
13975   def Exec(self, feedback_fn):
13976     """Remove any export.
13977
13978     """
13979     instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
13980     # If the instance was not found we'll try with the name that was passed in.
13981     # This will only work if it was an FQDN, though.
13982     fqdn_warn = False
13983     if not instance_name:
13984       fqdn_warn = True
13985       instance_name = self.op.instance_name
13986
13987     locked_nodes = self.owned_locks(locking.LEVEL_NODE)
13988     exportlist = self.rpc.call_export_list(locked_nodes)
13989     found = False
13990     for node in exportlist:
13991       msg = exportlist[node].fail_msg
13992       if msg:
13993         self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
13994         continue
13995       if instance_name in exportlist[node].payload:
13996         found = True
13997         result = self.rpc.call_export_remove(node, instance_name)
13998         msg = result.fail_msg
13999         if msg:
14000           logging.error("Could not remove export for instance %s"
14001                         " on node %s: %s", instance_name, node, msg)
14002
14003     if fqdn_warn and not found:
14004       feedback_fn("Export not found. If trying to remove an export belonging"
14005                   " to a deleted instance please use its Fully Qualified"
14006                   " Domain Name.")
14007
14008
14009 class LUGroupAdd(LogicalUnit):
14010   """Logical unit for creating node groups.
14011
14012   """
14013   HPATH = "group-add"
14014   HTYPE = constants.HTYPE_GROUP
14015   REQ_BGL = False
14016
14017   def ExpandNames(self):
14018     # We need the new group's UUID here so that we can create and acquire the
14019     # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
14020     # that it should not check whether the UUID exists in the configuration.
14021     self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
14022     self.needed_locks = {}
14023     self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
14024
14025   def CheckPrereq(self):
14026     """Check prerequisites.
14027
14028     This checks that the given group name is not an existing node group
14029     already.
14030
14031     """
14032     try:
14033       existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14034     except errors.OpPrereqError:
14035       pass
14036     else:
14037       raise errors.OpPrereqError("Desired group name '%s' already exists as a"
14038                                  " node group (UUID: %s)" %
14039                                  (self.op.group_name, existing_uuid),
14040                                  errors.ECODE_EXISTS)
14041
14042     if self.op.ndparams:
14043       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
14044
14045     if self.op.hv_state:
14046       self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
14047     else:
14048       self.new_hv_state = None
14049
14050     if self.op.disk_state:
14051       self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
14052     else:
14053       self.new_disk_state = None
14054
14055     if self.op.diskparams:
14056       for templ in constants.DISK_TEMPLATES:
14057         if templ in self.op.diskparams:
14058           utils.ForceDictType(self.op.diskparams[templ],
14059                               constants.DISK_DT_TYPES)
14060       self.new_diskparams = self.op.diskparams
14061       try:
14062         utils.VerifyDictOptions(self.new_diskparams, constants.DISK_DT_DEFAULTS)
14063       except errors.OpPrereqError, err:
14064         raise errors.OpPrereqError("While verify diskparams options: %s" % err,
14065                                    errors.ECODE_INVAL)
14066     else:
14067       self.new_diskparams = {}
14068
14069     if self.op.ipolicy:
14070       cluster = self.cfg.GetClusterInfo()
14071       full_ipolicy = cluster.SimpleFillIPolicy(self.op.ipolicy)
14072       try:
14073         objects.InstancePolicy.CheckParameterSyntax(full_ipolicy, False)
14074       except errors.ConfigurationError, err:
14075         raise errors.OpPrereqError("Invalid instance policy: %s" % err,
14076                                    errors.ECODE_INVAL)
14077
14078   def BuildHooksEnv(self):
14079     """Build hooks env.
14080
14081     """
14082     return {
14083       "GROUP_NAME": self.op.group_name,
14084       }
14085
14086   def BuildHooksNodes(self):
14087     """Build hooks nodes.
14088
14089     """
14090     mn = self.cfg.GetMasterNode()
14091     return ([mn], [mn])
14092
14093   def Exec(self, feedback_fn):
14094     """Add the node group to the cluster.
14095
14096     """
14097     group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
14098                                   uuid=self.group_uuid,
14099                                   alloc_policy=self.op.alloc_policy,
14100                                   ndparams=self.op.ndparams,
14101                                   diskparams=self.new_diskparams,
14102                                   ipolicy=self.op.ipolicy,
14103                                   hv_state_static=self.new_hv_state,
14104                                   disk_state_static=self.new_disk_state)
14105
14106     self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
14107     del self.remove_locks[locking.LEVEL_NODEGROUP]
14108
14109
14110 class LUGroupAssignNodes(NoHooksLU):
14111   """Logical unit for assigning nodes to groups.
14112
14113   """
14114   REQ_BGL = False
14115
14116   def ExpandNames(self):
14117     # These raise errors.OpPrereqError on their own:
14118     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14119     self.op.nodes = _GetWantedNodes(self, self.op.nodes)
14120
14121     # We want to lock all the affected nodes and groups. We have readily
14122     # available the list of nodes, and the *destination* group. To gather the
14123     # list of "source" groups, we need to fetch node information later on.
14124     self.needed_locks = {
14125       locking.LEVEL_NODEGROUP: set([self.group_uuid]),
14126       locking.LEVEL_NODE: self.op.nodes,
14127       }
14128
14129   def DeclareLocks(self, level):
14130     if level == locking.LEVEL_NODEGROUP:
14131       assert len(self.needed_locks[locking.LEVEL_NODEGROUP]) == 1
14132
14133       # Try to get all affected nodes' groups without having the group or node
14134       # lock yet. Needs verification later in the code flow.
14135       groups = self.cfg.GetNodeGroupsFromNodes(self.op.nodes)
14136
14137       self.needed_locks[locking.LEVEL_NODEGROUP].update(groups)
14138
14139   def CheckPrereq(self):
14140     """Check prerequisites.
14141
14142     """
14143     assert self.needed_locks[locking.LEVEL_NODEGROUP]
14144     assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
14145             frozenset(self.op.nodes))
14146
14147     expected_locks = (set([self.group_uuid]) |
14148                       self.cfg.GetNodeGroupsFromNodes(self.op.nodes))
14149     actual_locks = self.owned_locks(locking.LEVEL_NODEGROUP)
14150     if actual_locks != expected_locks:
14151       raise errors.OpExecError("Nodes changed groups since locks were acquired,"
14152                                " current groups are '%s', used to be '%s'" %
14153                                (utils.CommaJoin(expected_locks),
14154                                 utils.CommaJoin(actual_locks)))
14155
14156     self.node_data = self.cfg.GetAllNodesInfo()
14157     self.group = self.cfg.GetNodeGroup(self.group_uuid)
14158     instance_data = self.cfg.GetAllInstancesInfo()
14159
14160     if self.group is None:
14161       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
14162                                (self.op.group_name, self.group_uuid))
14163
14164     (new_splits, previous_splits) = \
14165       self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
14166                                              for node in self.op.nodes],
14167                                             self.node_data, instance_data)
14168
14169     if new_splits:
14170       fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
14171
14172       if not self.op.force:
14173         raise errors.OpExecError("The following instances get split by this"
14174                                  " change and --force was not given: %s" %
14175                                  fmt_new_splits)
14176       else:
14177         self.LogWarning("This operation will split the following instances: %s",
14178                         fmt_new_splits)
14179
14180         if previous_splits:
14181           self.LogWarning("In addition, these already-split instances continue"
14182                           " to be split across groups: %s",
14183                           utils.CommaJoin(utils.NiceSort(previous_splits)))
14184
14185   def Exec(self, feedback_fn):
14186     """Assign nodes to a new group.
14187
14188     """
14189     mods = [(node_name, self.group_uuid) for node_name in self.op.nodes]
14190
14191     self.cfg.AssignGroupNodes(mods)
14192
14193   @staticmethod
14194   def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
14195     """Check for split instances after a node assignment.
14196
14197     This method considers a series of node assignments as an atomic operation,
14198     and returns information about split instances after applying the set of
14199     changes.
14200
14201     In particular, it returns information about newly split instances, and
14202     instances that were already split, and remain so after the change.
14203
14204     Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
14205     considered.
14206
14207     @type changes: list of (node_name, new_group_uuid) pairs.
14208     @param changes: list of node assignments to consider.
14209     @param node_data: a dict with data for all nodes
14210     @param instance_data: a dict with all instances to consider
14211     @rtype: a two-tuple
14212     @return: a list of instances that were previously okay and result split as a
14213       consequence of this change, and a list of instances that were previously
14214       split and this change does not fix.
14215
14216     """
14217     changed_nodes = dict((node, group) for node, group in changes
14218                          if node_data[node].group != group)
14219
14220     all_split_instances = set()
14221     previously_split_instances = set()
14222
14223     def InstanceNodes(instance):
14224       return [instance.primary_node] + list(instance.secondary_nodes)
14225
14226     for inst in instance_data.values():
14227       if inst.disk_template not in constants.DTS_INT_MIRROR:
14228         continue
14229
14230       instance_nodes = InstanceNodes(inst)
14231
14232       if len(set(node_data[node].group for node in instance_nodes)) > 1:
14233         previously_split_instances.add(inst.name)
14234
14235       if len(set(changed_nodes.get(node, node_data[node].group)
14236                  for node in instance_nodes)) > 1:
14237         all_split_instances.add(inst.name)
14238
14239     return (list(all_split_instances - previously_split_instances),
14240             list(previously_split_instances & all_split_instances))
14241
14242
14243 class _GroupQuery(_QueryBase):
14244   FIELDS = query.GROUP_FIELDS
14245
14246   def ExpandNames(self, lu):
14247     lu.needed_locks = {}
14248
14249     self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
14250     self._cluster = lu.cfg.GetClusterInfo()
14251     name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
14252
14253     if not self.names:
14254       self.wanted = [name_to_uuid[name]
14255                      for name in utils.NiceSort(name_to_uuid.keys())]
14256     else:
14257       # Accept names to be either names or UUIDs.
14258       missing = []
14259       self.wanted = []
14260       all_uuid = frozenset(self._all_groups.keys())
14261
14262       for name in self.names:
14263         if name in all_uuid:
14264           self.wanted.append(name)
14265         elif name in name_to_uuid:
14266           self.wanted.append(name_to_uuid[name])
14267         else:
14268           missing.append(name)
14269
14270       if missing:
14271         raise errors.OpPrereqError("Some groups do not exist: %s" %
14272                                    utils.CommaJoin(missing),
14273                                    errors.ECODE_NOENT)
14274
14275   def DeclareLocks(self, lu, level):
14276     pass
14277
14278   def _GetQueryData(self, lu):
14279     """Computes the list of node groups and their attributes.
14280
14281     """
14282     do_nodes = query.GQ_NODE in self.requested_data
14283     do_instances = query.GQ_INST in self.requested_data
14284
14285     group_to_nodes = None
14286     group_to_instances = None
14287
14288     # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
14289     # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
14290     # latter GetAllInstancesInfo() is not enough, for we have to go through
14291     # instance->node. Hence, we will need to process nodes even if we only need
14292     # instance information.
14293     if do_nodes or do_instances:
14294       all_nodes = lu.cfg.GetAllNodesInfo()
14295       group_to_nodes = dict((uuid, []) for uuid in self.wanted)
14296       node_to_group = {}
14297
14298       for node in all_nodes.values():
14299         if node.group in group_to_nodes:
14300           group_to_nodes[node.group].append(node.name)
14301           node_to_group[node.name] = node.group
14302
14303       if do_instances:
14304         all_instances = lu.cfg.GetAllInstancesInfo()
14305         group_to_instances = dict((uuid, []) for uuid in self.wanted)
14306
14307         for instance in all_instances.values():
14308           node = instance.primary_node
14309           if node in node_to_group:
14310             group_to_instances[node_to_group[node]].append(instance.name)
14311
14312         if not do_nodes:
14313           # Do not pass on node information if it was not requested.
14314           group_to_nodes = None
14315
14316     return query.GroupQueryData(self._cluster,
14317                                 [self._all_groups[uuid]
14318                                  for uuid in self.wanted],
14319                                 group_to_nodes, group_to_instances,
14320                                 query.GQ_DISKPARAMS in self.requested_data)
14321
14322
14323 class LUGroupQuery(NoHooksLU):
14324   """Logical unit for querying node groups.
14325
14326   """
14327   REQ_BGL = False
14328
14329   def CheckArguments(self):
14330     self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
14331                           self.op.output_fields, False)
14332
14333   def ExpandNames(self):
14334     self.gq.ExpandNames(self)
14335
14336   def DeclareLocks(self, level):
14337     self.gq.DeclareLocks(self, level)
14338
14339   def Exec(self, feedback_fn):
14340     return self.gq.OldStyleQuery(self)
14341
14342
14343 class LUGroupSetParams(LogicalUnit):
14344   """Modifies the parameters of a node group.
14345
14346   """
14347   HPATH = "group-modify"
14348   HTYPE = constants.HTYPE_GROUP
14349   REQ_BGL = False
14350
14351   def CheckArguments(self):
14352     all_changes = [
14353       self.op.ndparams,
14354       self.op.diskparams,
14355       self.op.alloc_policy,
14356       self.op.hv_state,
14357       self.op.disk_state,
14358       self.op.ipolicy,
14359       ]
14360
14361     if all_changes.count(None) == len(all_changes):
14362       raise errors.OpPrereqError("Please pass at least one modification",
14363                                  errors.ECODE_INVAL)
14364
14365   def ExpandNames(self):
14366     # This raises errors.OpPrereqError on its own:
14367     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14368
14369     self.needed_locks = {
14370       locking.LEVEL_INSTANCE: [],
14371       locking.LEVEL_NODEGROUP: [self.group_uuid],
14372       }
14373
14374     self.share_locks[locking.LEVEL_INSTANCE] = 1
14375
14376   def DeclareLocks(self, level):
14377     if level == locking.LEVEL_INSTANCE:
14378       assert not self.needed_locks[locking.LEVEL_INSTANCE]
14379
14380       # Lock instances optimistically, needs verification once group lock has
14381       # been acquired
14382       self.needed_locks[locking.LEVEL_INSTANCE] = \
14383           self.cfg.GetNodeGroupInstances(self.group_uuid)
14384
14385   @staticmethod
14386   def _UpdateAndVerifyDiskParams(old, new):
14387     """Updates and verifies disk parameters.
14388
14389     """
14390     new_params = _GetUpdatedParams(old, new)
14391     utils.ForceDictType(new_params, constants.DISK_DT_TYPES)
14392     return new_params
14393
14394   def CheckPrereq(self):
14395     """Check prerequisites.
14396
14397     """
14398     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
14399
14400     # Check if locked instances are still correct
14401     _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
14402
14403     self.group = self.cfg.GetNodeGroup(self.group_uuid)
14404     cluster = self.cfg.GetClusterInfo()
14405
14406     if self.group is None:
14407       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
14408                                (self.op.group_name, self.group_uuid))
14409
14410     if self.op.ndparams:
14411       new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
14412       utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
14413       self.new_ndparams = new_ndparams
14414
14415     if self.op.diskparams:
14416       diskparams = self.group.diskparams
14417       uavdp = self._UpdateAndVerifyDiskParams
14418       # For each disktemplate subdict update and verify the values
14419       new_diskparams = dict((dt,
14420                              uavdp(diskparams.get(dt, {}),
14421                                    self.op.diskparams[dt]))
14422                             for dt in constants.DISK_TEMPLATES
14423                             if dt in self.op.diskparams)
14424       # As we've all subdicts of diskparams ready, lets merge the actual
14425       # dict with all updated subdicts
14426       self.new_diskparams = objects.FillDict(diskparams, new_diskparams)
14427       try:
14428         utils.VerifyDictOptions(self.new_diskparams, constants.DISK_DT_DEFAULTS)
14429       except errors.OpPrereqError, err:
14430         raise errors.OpPrereqError("While verify diskparams options: %s" % err,
14431                                    errors.ECODE_INVAL)
14432
14433     if self.op.hv_state:
14434       self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
14435                                                  self.group.hv_state_static)
14436
14437     if self.op.disk_state:
14438       self.new_disk_state = \
14439         _MergeAndVerifyDiskState(self.op.disk_state,
14440                                  self.group.disk_state_static)
14441
14442     if self.op.ipolicy:
14443       self.new_ipolicy = _GetUpdatedIPolicy(self.group.ipolicy,
14444                                             self.op.ipolicy,
14445                                             group_policy=True)
14446
14447       new_ipolicy = cluster.SimpleFillIPolicy(self.new_ipolicy)
14448       inst_filter = lambda inst: inst.name in owned_instances
14449       instances = self.cfg.GetInstancesInfoByFilter(inst_filter).values()
14450       gmi = ganeti.masterd.instance
14451       violations = \
14452           _ComputeNewInstanceViolations(gmi.CalculateGroupIPolicy(cluster,
14453                                                                   self.group),
14454                                         new_ipolicy, instances)
14455
14456       if violations:
14457         self.LogWarning("After the ipolicy change the following instances"
14458                         " violate them: %s",
14459                         utils.CommaJoin(violations))
14460
14461   def BuildHooksEnv(self):
14462     """Build hooks env.
14463
14464     """
14465     return {
14466       "GROUP_NAME": self.op.group_name,
14467       "NEW_ALLOC_POLICY": self.op.alloc_policy,
14468       }
14469
14470   def BuildHooksNodes(self):
14471     """Build hooks nodes.
14472
14473     """
14474     mn = self.cfg.GetMasterNode()
14475     return ([mn], [mn])
14476
14477   def Exec(self, feedback_fn):
14478     """Modifies the node group.
14479
14480     """
14481     result = []
14482
14483     if self.op.ndparams:
14484       self.group.ndparams = self.new_ndparams
14485       result.append(("ndparams", str(self.group.ndparams)))
14486
14487     if self.op.diskparams:
14488       self.group.diskparams = self.new_diskparams
14489       result.append(("diskparams", str(self.group.diskparams)))
14490
14491     if self.op.alloc_policy:
14492       self.group.alloc_policy = self.op.alloc_policy
14493
14494     if self.op.hv_state:
14495       self.group.hv_state_static = self.new_hv_state
14496
14497     if self.op.disk_state:
14498       self.group.disk_state_static = self.new_disk_state
14499
14500     if self.op.ipolicy:
14501       self.group.ipolicy = self.new_ipolicy
14502
14503     self.cfg.Update(self.group, feedback_fn)
14504     return result
14505
14506
14507 class LUGroupRemove(LogicalUnit):
14508   HPATH = "group-remove"
14509   HTYPE = constants.HTYPE_GROUP
14510   REQ_BGL = False
14511
14512   def ExpandNames(self):
14513     # This will raises errors.OpPrereqError on its own:
14514     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14515     self.needed_locks = {
14516       locking.LEVEL_NODEGROUP: [self.group_uuid],
14517       }
14518
14519   def CheckPrereq(self):
14520     """Check prerequisites.
14521
14522     This checks that the given group name exists as a node group, that is
14523     empty (i.e., contains no nodes), and that is not the last group of the
14524     cluster.
14525
14526     """
14527     # Verify that the group is empty.
14528     group_nodes = [node.name
14529                    for node in self.cfg.GetAllNodesInfo().values()
14530                    if node.group == self.group_uuid]
14531
14532     if group_nodes:
14533       raise errors.OpPrereqError("Group '%s' not empty, has the following"
14534                                  " nodes: %s" %
14535                                  (self.op.group_name,
14536                                   utils.CommaJoin(utils.NiceSort(group_nodes))),
14537                                  errors.ECODE_STATE)
14538
14539     # Verify the cluster would not be left group-less.
14540     if len(self.cfg.GetNodeGroupList()) == 1:
14541       raise errors.OpPrereqError("Group '%s' is the only group, cannot be"
14542                                  " removed" % self.op.group_name,
14543                                  errors.ECODE_STATE)
14544
14545   def BuildHooksEnv(self):
14546     """Build hooks env.
14547
14548     """
14549     return {
14550       "GROUP_NAME": self.op.group_name,
14551       }
14552
14553   def BuildHooksNodes(self):
14554     """Build hooks nodes.
14555
14556     """
14557     mn = self.cfg.GetMasterNode()
14558     return ([mn], [mn])
14559
14560   def Exec(self, feedback_fn):
14561     """Remove the node group.
14562
14563     """
14564     try:
14565       self.cfg.RemoveNodeGroup(self.group_uuid)
14566     except errors.ConfigurationError:
14567       raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
14568                                (self.op.group_name, self.group_uuid))
14569
14570     self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
14571
14572
14573 class LUGroupRename(LogicalUnit):
14574   HPATH = "group-rename"
14575   HTYPE = constants.HTYPE_GROUP
14576   REQ_BGL = False
14577
14578   def ExpandNames(self):
14579     # This raises errors.OpPrereqError on its own:
14580     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14581
14582     self.needed_locks = {
14583       locking.LEVEL_NODEGROUP: [self.group_uuid],
14584       }
14585
14586   def CheckPrereq(self):
14587     """Check prerequisites.
14588
14589     Ensures requested new name is not yet used.
14590
14591     """
14592     try:
14593       new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
14594     except errors.OpPrereqError:
14595       pass
14596     else:
14597       raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
14598                                  " node group (UUID: %s)" %
14599                                  (self.op.new_name, new_name_uuid),
14600                                  errors.ECODE_EXISTS)
14601
14602   def BuildHooksEnv(self):
14603     """Build hooks env.
14604
14605     """
14606     return {
14607       "OLD_NAME": self.op.group_name,
14608       "NEW_NAME": self.op.new_name,
14609       }
14610
14611   def BuildHooksNodes(self):
14612     """Build hooks nodes.
14613
14614     """
14615     mn = self.cfg.GetMasterNode()
14616
14617     all_nodes = self.cfg.GetAllNodesInfo()
14618     all_nodes.pop(mn, None)
14619
14620     run_nodes = [mn]
14621     run_nodes.extend(node.name for node in all_nodes.values()
14622                      if node.group == self.group_uuid)
14623
14624     return (run_nodes, run_nodes)
14625
14626   def Exec(self, feedback_fn):
14627     """Rename the node group.
14628
14629     """
14630     group = self.cfg.GetNodeGroup(self.group_uuid)
14631
14632     if group is None:
14633       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
14634                                (self.op.group_name, self.group_uuid))
14635
14636     group.name = self.op.new_name
14637     self.cfg.Update(group, feedback_fn)
14638
14639     return self.op.new_name
14640
14641
14642 class LUGroupEvacuate(LogicalUnit):
14643   HPATH = "group-evacuate"
14644   HTYPE = constants.HTYPE_GROUP
14645   REQ_BGL = False
14646
14647   def ExpandNames(self):
14648     # This raises errors.OpPrereqError on its own:
14649     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14650
14651     if self.op.target_groups:
14652       self.req_target_uuids = map(self.cfg.LookupNodeGroup,
14653                                   self.op.target_groups)
14654     else:
14655       self.req_target_uuids = []
14656
14657     if self.group_uuid in self.req_target_uuids:
14658       raise errors.OpPrereqError("Group to be evacuated (%s) can not be used"
14659                                  " as a target group (targets are %s)" %
14660                                  (self.group_uuid,
14661                                   utils.CommaJoin(self.req_target_uuids)),
14662                                  errors.ECODE_INVAL)
14663
14664     self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
14665
14666     self.share_locks = _ShareAll()
14667     self.needed_locks = {
14668       locking.LEVEL_INSTANCE: [],
14669       locking.LEVEL_NODEGROUP: [],
14670       locking.LEVEL_NODE: [],
14671       }
14672
14673   def DeclareLocks(self, level):
14674     if level == locking.LEVEL_INSTANCE:
14675       assert not self.needed_locks[locking.LEVEL_INSTANCE]
14676
14677       # Lock instances optimistically, needs verification once node and group
14678       # locks have been acquired
14679       self.needed_locks[locking.LEVEL_INSTANCE] = \
14680         self.cfg.GetNodeGroupInstances(self.group_uuid)
14681
14682     elif level == locking.LEVEL_NODEGROUP:
14683       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
14684
14685       if self.req_target_uuids:
14686         lock_groups = set([self.group_uuid] + self.req_target_uuids)
14687
14688         # Lock all groups used by instances optimistically; this requires going
14689         # via the node before it's locked, requiring verification later on
14690         lock_groups.update(group_uuid
14691                            for instance_name in
14692                              self.owned_locks(locking.LEVEL_INSTANCE)
14693                            for group_uuid in
14694                              self.cfg.GetInstanceNodeGroups(instance_name))
14695       else:
14696         # No target groups, need to lock all of them
14697         lock_groups = locking.ALL_SET
14698
14699       self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
14700
14701     elif level == locking.LEVEL_NODE:
14702       # This will only lock the nodes in the group to be evacuated which
14703       # contain actual instances
14704       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
14705       self._LockInstancesNodes()
14706
14707       # Lock all nodes in group to be evacuated and target groups
14708       owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
14709       assert self.group_uuid in owned_groups
14710       member_nodes = [node_name
14711                       for group in owned_groups
14712                       for node_name in self.cfg.GetNodeGroup(group).members]
14713       self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
14714
14715   def CheckPrereq(self):
14716     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
14717     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
14718     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
14719
14720     assert owned_groups.issuperset(self.req_target_uuids)
14721     assert self.group_uuid in owned_groups
14722
14723     # Check if locked instances are still correct
14724     _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
14725
14726     # Get instance information
14727     self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
14728
14729     # Check if node groups for locked instances are still correct
14730     _CheckInstancesNodeGroups(self.cfg, self.instances,
14731                               owned_groups, owned_nodes, self.group_uuid)
14732
14733     if self.req_target_uuids:
14734       # User requested specific target groups
14735       self.target_uuids = self.req_target_uuids
14736     else:
14737       # All groups except the one to be evacuated are potential targets
14738       self.target_uuids = [group_uuid for group_uuid in owned_groups
14739                            if group_uuid != self.group_uuid]
14740
14741       if not self.target_uuids:
14742         raise errors.OpPrereqError("There are no possible target groups",
14743                                    errors.ECODE_INVAL)
14744
14745   def BuildHooksEnv(self):
14746     """Build hooks env.
14747
14748     """
14749     return {
14750       "GROUP_NAME": self.op.group_name,
14751       "TARGET_GROUPS": " ".join(self.target_uuids),
14752       }
14753
14754   def BuildHooksNodes(self):
14755     """Build hooks nodes.
14756
14757     """
14758     mn = self.cfg.GetMasterNode()
14759
14760     assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
14761
14762     run_nodes = [mn] + self.cfg.GetNodeGroup(self.group_uuid).members
14763
14764     return (run_nodes, run_nodes)
14765
14766   def Exec(self, feedback_fn):
14767     instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
14768
14769     assert self.group_uuid not in self.target_uuids
14770
14771     req = iallocator.IAReqGroupChange(instances=instances,
14772                                       target_groups=self.target_uuids)
14773     ial = iallocator.IAllocator(self.cfg, self.rpc, req)
14774
14775     ial.Run(self.op.iallocator)
14776
14777     if not ial.success:
14778       raise errors.OpPrereqError("Can't compute group evacuation using"
14779                                  " iallocator '%s': %s" %
14780                                  (self.op.iallocator, ial.info),
14781                                  errors.ECODE_NORES)
14782
14783     jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
14784
14785     self.LogInfo("Iallocator returned %s job(s) for evacuating node group %s",
14786                  len(jobs), self.op.group_name)
14787
14788     return ResultWithJobs(jobs)
14789
14790
14791 class TagsLU(NoHooksLU): # pylint: disable=W0223
14792   """Generic tags LU.
14793
14794   This is an abstract class which is the parent of all the other tags LUs.
14795
14796   """
14797   def ExpandNames(self):
14798     self.group_uuid = None
14799     self.needed_locks = {}
14800
14801     if self.op.kind == constants.TAG_NODE:
14802       self.op.name = _ExpandNodeName(self.cfg, self.op.name)
14803       lock_level = locking.LEVEL_NODE
14804       lock_name = self.op.name
14805     elif self.op.kind == constants.TAG_INSTANCE:
14806       self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
14807       lock_level = locking.LEVEL_INSTANCE
14808       lock_name = self.op.name
14809     elif self.op.kind == constants.TAG_NODEGROUP:
14810       self.group_uuid = self.cfg.LookupNodeGroup(self.op.name)
14811       lock_level = locking.LEVEL_NODEGROUP
14812       lock_name = self.group_uuid
14813     else:
14814       lock_level = None
14815       lock_name = None
14816
14817     if lock_level and getattr(self.op, "use_locking", True):
14818       self.needed_locks[lock_level] = lock_name
14819
14820     # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
14821     # not possible to acquire the BGL based on opcode parameters)
14822
14823   def CheckPrereq(self):
14824     """Check prerequisites.
14825
14826     """
14827     if self.op.kind == constants.TAG_CLUSTER:
14828       self.target = self.cfg.GetClusterInfo()
14829     elif self.op.kind == constants.TAG_NODE:
14830       self.target = self.cfg.GetNodeInfo(self.op.name)
14831     elif self.op.kind == constants.TAG_INSTANCE:
14832       self.target = self.cfg.GetInstanceInfo(self.op.name)
14833     elif self.op.kind == constants.TAG_NODEGROUP:
14834       self.target = self.cfg.GetNodeGroup(self.group_uuid)
14835     else:
14836       raise errors.OpPrereqError("Wrong tag type requested (%s)" %
14837                                  str(self.op.kind), errors.ECODE_INVAL)
14838
14839
14840 class LUTagsGet(TagsLU):
14841   """Returns the tags of a given object.
14842
14843   """
14844   REQ_BGL = False
14845
14846   def ExpandNames(self):
14847     TagsLU.ExpandNames(self)
14848
14849     # Share locks as this is only a read operation
14850     self.share_locks = _ShareAll()
14851
14852   def Exec(self, feedback_fn):
14853     """Returns the tag list.
14854
14855     """
14856     return list(self.target.GetTags())
14857
14858
14859 class LUTagsSearch(NoHooksLU):
14860   """Searches the tags for a given pattern.
14861
14862   """
14863   REQ_BGL = False
14864
14865   def ExpandNames(self):
14866     self.needed_locks = {}
14867
14868   def CheckPrereq(self):
14869     """Check prerequisites.
14870
14871     This checks the pattern passed for validity by compiling it.
14872
14873     """
14874     try:
14875       self.re = re.compile(self.op.pattern)
14876     except re.error, err:
14877       raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
14878                                  (self.op.pattern, err), errors.ECODE_INVAL)
14879
14880   def Exec(self, feedback_fn):
14881     """Returns the tag list.
14882
14883     """
14884     cfg = self.cfg
14885     tgts = [("/cluster", cfg.GetClusterInfo())]
14886     ilist = cfg.GetAllInstancesInfo().values()
14887     tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
14888     nlist = cfg.GetAllNodesInfo().values()
14889     tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
14890     tgts.extend(("/nodegroup/%s" % n.name, n)
14891                 for n in cfg.GetAllNodeGroupsInfo().values())
14892     results = []
14893     for path, target in tgts:
14894       for tag in target.GetTags():
14895         if self.re.search(tag):
14896           results.append((path, tag))
14897     return results
14898
14899
14900 class LUTagsSet(TagsLU):
14901   """Sets a tag on a given object.
14902
14903   """
14904   REQ_BGL = False
14905
14906   def CheckPrereq(self):
14907     """Check prerequisites.
14908
14909     This checks the type and length of the tag name and value.
14910
14911     """
14912     TagsLU.CheckPrereq(self)
14913     for tag in self.op.tags:
14914       objects.TaggableObject.ValidateTag(tag)
14915
14916   def Exec(self, feedback_fn):
14917     """Sets the tag.
14918
14919     """
14920     try:
14921       for tag in self.op.tags:
14922         self.target.AddTag(tag)
14923     except errors.TagError, err:
14924       raise errors.OpExecError("Error while setting tag: %s" % str(err))
14925     self.cfg.Update(self.target, feedback_fn)
14926
14927
14928 class LUTagsDel(TagsLU):
14929   """Delete a list of tags from a given object.
14930
14931   """
14932   REQ_BGL = False
14933
14934   def CheckPrereq(self):
14935     """Check prerequisites.
14936
14937     This checks that we have the given tag.
14938
14939     """
14940     TagsLU.CheckPrereq(self)
14941     for tag in self.op.tags:
14942       objects.TaggableObject.ValidateTag(tag)
14943     del_tags = frozenset(self.op.tags)
14944     cur_tags = self.target.GetTags()
14945
14946     diff_tags = del_tags - cur_tags
14947     if diff_tags:
14948       diff_names = ("'%s'" % i for i in sorted(diff_tags))
14949       raise errors.OpPrereqError("Tag(s) %s not found" %
14950                                  (utils.CommaJoin(diff_names), ),
14951                                  errors.ECODE_NOENT)
14952
14953   def Exec(self, feedback_fn):
14954     """Remove the tag from the object.
14955
14956     """
14957     for tag in self.op.tags:
14958       self.target.RemoveTag(tag)
14959     self.cfg.Update(self.target, feedback_fn)
14960
14961
14962 class LUTestDelay(NoHooksLU):
14963   """Sleep for a specified amount of time.
14964
14965   This LU sleeps on the master and/or nodes for a specified amount of
14966   time.
14967
14968   """
14969   REQ_BGL = False
14970
14971   def ExpandNames(self):
14972     """Expand names and set required locks.
14973
14974     This expands the node list, if any.
14975
14976     """
14977     self.needed_locks = {}
14978     if self.op.on_nodes:
14979       # _GetWantedNodes can be used here, but is not always appropriate to use
14980       # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
14981       # more information.
14982       self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
14983       self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
14984
14985   def _TestDelay(self):
14986     """Do the actual sleep.
14987
14988     """
14989     if self.op.on_master:
14990       if not utils.TestDelay(self.op.duration):
14991         raise errors.OpExecError("Error during master delay test")
14992     if self.op.on_nodes:
14993       result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
14994       for node, node_result in result.items():
14995         node_result.Raise("Failure during rpc call to node %s" % node)
14996
14997   def Exec(self, feedback_fn):
14998     """Execute the test delay opcode, with the wanted repetitions.
14999
15000     """
15001     if self.op.repeat == 0:
15002       self._TestDelay()
15003     else:
15004       top_value = self.op.repeat - 1
15005       for i in range(self.op.repeat):
15006         self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
15007         self._TestDelay()
15008
15009
15010 class LURestrictedCommand(NoHooksLU):
15011   """Logical unit for executing restricted commands.
15012
15013   """
15014   REQ_BGL = False
15015
15016   def ExpandNames(self):
15017     if self.op.nodes:
15018       self.op.nodes = _GetWantedNodes(self, self.op.nodes)
15019
15020     self.needed_locks = {
15021       locking.LEVEL_NODE: self.op.nodes,
15022       }
15023     self.share_locks = {
15024       locking.LEVEL_NODE: not self.op.use_locking,
15025       }
15026
15027   def CheckPrereq(self):
15028     """Check prerequisites.
15029
15030     """
15031
15032   def Exec(self, feedback_fn):
15033     """Execute restricted command and return output.
15034
15035     """
15036     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
15037
15038     # Check if correct locks are held
15039     assert set(self.op.nodes).issubset(owned_nodes)
15040
15041     rpcres = self.rpc.call_restricted_command(self.op.nodes, self.op.command)
15042
15043     result = []
15044
15045     for node_name in self.op.nodes:
15046       nres = rpcres[node_name]
15047       if nres.fail_msg:
15048         msg = ("Command '%s' on node '%s' failed: %s" %
15049                (self.op.command, node_name, nres.fail_msg))
15050         result.append((False, msg))
15051       else:
15052         result.append((True, nres.payload))
15053
15054     return result
15055
15056
15057 class LUTestJqueue(NoHooksLU):
15058   """Utility LU to test some aspects of the job queue.
15059
15060   """
15061   REQ_BGL = False
15062
15063   # Must be lower than default timeout for WaitForJobChange to see whether it
15064   # notices changed jobs
15065   _CLIENT_CONNECT_TIMEOUT = 20.0
15066   _CLIENT_CONFIRM_TIMEOUT = 60.0
15067
15068   @classmethod
15069   def _NotifyUsingSocket(cls, cb, errcls):
15070     """Opens a Unix socket and waits for another program to connect.
15071
15072     @type cb: callable
15073     @param cb: Callback to send socket name to client
15074     @type errcls: class
15075     @param errcls: Exception class to use for errors
15076
15077     """
15078     # Using a temporary directory as there's no easy way to create temporary
15079     # sockets without writing a custom loop around tempfile.mktemp and
15080     # socket.bind
15081     tmpdir = tempfile.mkdtemp()
15082     try:
15083       tmpsock = utils.PathJoin(tmpdir, "sock")
15084
15085       logging.debug("Creating temporary socket at %s", tmpsock)
15086       sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
15087       try:
15088         sock.bind(tmpsock)
15089         sock.listen(1)
15090
15091         # Send details to client
15092         cb(tmpsock)
15093
15094         # Wait for client to connect before continuing
15095         sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
15096         try:
15097           (conn, _) = sock.accept()
15098         except socket.error, err:
15099           raise errcls("Client didn't connect in time (%s)" % err)
15100       finally:
15101         sock.close()
15102     finally:
15103       # Remove as soon as client is connected
15104       shutil.rmtree(tmpdir)
15105
15106     # Wait for client to close
15107     try:
15108       try:
15109         # pylint: disable=E1101
15110         # Instance of '_socketobject' has no ... member
15111         conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
15112         conn.recv(1)
15113       except socket.error, err:
15114         raise errcls("Client failed to confirm notification (%s)" % err)
15115     finally:
15116       conn.close()
15117
15118   def _SendNotification(self, test, arg, sockname):
15119     """Sends a notification to the client.
15120
15121     @type test: string
15122     @param test: Test name
15123     @param arg: Test argument (depends on test)
15124     @type sockname: string
15125     @param sockname: Socket path
15126
15127     """
15128     self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
15129
15130   def _Notify(self, prereq, test, arg):
15131     """Notifies the client of a test.
15132
15133     @type prereq: bool
15134     @param prereq: Whether this is a prereq-phase test
15135     @type test: string
15136     @param test: Test name
15137     @param arg: Test argument (depends on test)
15138
15139     """
15140     if prereq:
15141       errcls = errors.OpPrereqError
15142     else:
15143       errcls = errors.OpExecError
15144
15145     return self._NotifyUsingSocket(compat.partial(self._SendNotification,
15146                                                   test, arg),
15147                                    errcls)
15148
15149   def CheckArguments(self):
15150     self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
15151     self.expandnames_calls = 0
15152
15153   def ExpandNames(self):
15154     checkargs_calls = getattr(self, "checkargs_calls", 0)
15155     if checkargs_calls < 1:
15156       raise errors.ProgrammerError("CheckArguments was not called")
15157
15158     self.expandnames_calls += 1
15159
15160     if self.op.notify_waitlock:
15161       self._Notify(True, constants.JQT_EXPANDNAMES, None)
15162
15163     self.LogInfo("Expanding names")
15164
15165     # Get lock on master node (just to get a lock, not for a particular reason)
15166     self.needed_locks = {
15167       locking.LEVEL_NODE: self.cfg.GetMasterNode(),
15168       }
15169
15170   def Exec(self, feedback_fn):
15171     if self.expandnames_calls < 1:
15172       raise errors.ProgrammerError("ExpandNames was not called")
15173
15174     if self.op.notify_exec:
15175       self._Notify(False, constants.JQT_EXEC, None)
15176
15177     self.LogInfo("Executing")
15178
15179     if self.op.log_messages:
15180       self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
15181       for idx, msg in enumerate(self.op.log_messages):
15182         self.LogInfo("Sending log message %s", idx + 1)
15183         feedback_fn(constants.JQT_MSGPREFIX + msg)
15184         # Report how many test messages have been sent
15185         self._Notify(False, constants.JQT_LOGMSG, idx + 1)
15186
15187     if self.op.fail:
15188       raise errors.OpExecError("Opcode failure was requested")
15189
15190     return True
15191
15192
15193 class LUTestAllocator(NoHooksLU):
15194   """Run allocator tests.
15195
15196   This LU runs the allocator tests
15197
15198   """
15199   def CheckPrereq(self):
15200     """Check prerequisites.
15201
15202     This checks the opcode parameters depending on the director and mode test.
15203
15204     """
15205     if self.op.mode in (constants.IALLOCATOR_MODE_ALLOC,
15206                         constants.IALLOCATOR_MODE_MULTI_ALLOC):
15207       for attr in ["memory", "disks", "disk_template",
15208                    "os", "tags", "nics", "vcpus"]:
15209         if not hasattr(self.op, attr):
15210           raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
15211                                      attr, errors.ECODE_INVAL)
15212       iname = self.cfg.ExpandInstanceName(self.op.name)
15213       if iname is not None:
15214         raise errors.OpPrereqError("Instance '%s' already in the cluster" %
15215                                    iname, errors.ECODE_EXISTS)
15216       if not isinstance(self.op.nics, list):
15217         raise errors.OpPrereqError("Invalid parameter 'nics'",
15218                                    errors.ECODE_INVAL)
15219       if not isinstance(self.op.disks, list):
15220         raise errors.OpPrereqError("Invalid parameter 'disks'",
15221                                    errors.ECODE_INVAL)
15222       for row in self.op.disks:
15223         if (not isinstance(row, dict) or
15224             constants.IDISK_SIZE not in row or
15225             not isinstance(row[constants.IDISK_SIZE], int) or
15226             constants.IDISK_MODE not in row or
15227             row[constants.IDISK_MODE] not in constants.DISK_ACCESS_SET):
15228           raise errors.OpPrereqError("Invalid contents of the 'disks'"
15229                                      " parameter", errors.ECODE_INVAL)
15230       if self.op.hypervisor is None:
15231         self.op.hypervisor = self.cfg.GetHypervisorType()
15232     elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
15233       fname = _ExpandInstanceName(self.cfg, self.op.name)
15234       self.op.name = fname
15235       self.relocate_from = \
15236           list(self.cfg.GetInstanceInfo(fname).secondary_nodes)
15237     elif self.op.mode in (constants.IALLOCATOR_MODE_CHG_GROUP,
15238                           constants.IALLOCATOR_MODE_NODE_EVAC):
15239       if not self.op.instances:
15240         raise errors.OpPrereqError("Missing instances", errors.ECODE_INVAL)
15241       self.op.instances = _GetWantedInstances(self, self.op.instances)
15242     else:
15243       raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
15244                                  self.op.mode, errors.ECODE_INVAL)
15245
15246     if self.op.direction == constants.IALLOCATOR_DIR_OUT:
15247       if self.op.allocator is None:
15248         raise errors.OpPrereqError("Missing allocator name",
15249                                    errors.ECODE_INVAL)
15250     elif self.op.direction != constants.IALLOCATOR_DIR_IN:
15251       raise errors.OpPrereqError("Wrong allocator test '%s'" %
15252                                  self.op.direction, errors.ECODE_INVAL)
15253
15254   def Exec(self, feedback_fn):
15255     """Run the allocator test.
15256
15257     """
15258     if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
15259       req = iallocator.IAReqInstanceAlloc(name=self.op.name,
15260                                           memory=self.op.memory,
15261                                           disks=self.op.disks,
15262                                           disk_template=self.op.disk_template,
15263                                           os=self.op.os,
15264                                           tags=self.op.tags,
15265                                           nics=self.op.nics,
15266                                           vcpus=self.op.vcpus,
15267                                           spindle_use=self.op.spindle_use,
15268                                           hypervisor=self.op.hypervisor)
15269     elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
15270       req = iallocator.IAReqRelocate(name=self.op.name,
15271                                      relocate_from=list(self.relocate_from))
15272     elif self.op.mode == constants.IALLOCATOR_MODE_CHG_GROUP:
15273       req = iallocator.IAReqGroupChange(instances=self.op.instances,
15274                                         target_groups=self.op.target_groups)
15275     elif self.op.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
15276       req = iallocator.IAReqNodeEvac(instances=self.op.instances,
15277                                      evac_mode=self.op.evac_mode)
15278     elif self.op.mode == constants.IALLOCATOR_MODE_MULTI_ALLOC:
15279       disk_template = self.op.disk_template
15280       insts = [iallocator.IAReqInstanceAlloc(name="%s%s" % (self.op.name, idx),
15281                                              memory=self.op.memory,
15282                                              disks=self.op.disks,
15283                                              disk_template=disk_template,
15284                                              os=self.op.os,
15285                                              tags=self.op.tags,
15286                                              nics=self.op.nics,
15287                                              vcpus=self.op.vcpus,
15288                                              spindle_use=self.op.spindle_use,
15289                                              hypervisor=self.op.hypervisor)
15290                for idx in range(self.op.count)]
15291       req = iallocator.IAReqMultiInstanceAlloc(instances=insts)
15292     else:
15293       raise errors.ProgrammerError("Uncatched mode %s in"
15294                                    " LUTestAllocator.Exec", self.op.mode)
15295
15296     ial = iallocator.IAllocator(self.cfg, self.rpc, req)
15297     if self.op.direction == constants.IALLOCATOR_DIR_IN:
15298       result = ial.in_text
15299     else:
15300       ial.Run(self.op.allocator, validate=False)
15301       result = ial.out_text
15302     return result
15303
15304
15305 #: Query type implementations
15306 _QUERY_IMPL = {
15307   constants.QR_CLUSTER: _ClusterQuery,
15308   constants.QR_INSTANCE: _InstanceQuery,
15309   constants.QR_NODE: _NodeQuery,
15310   constants.QR_GROUP: _GroupQuery,
15311   constants.QR_OS: _OsQuery,
15312   constants.QR_EXPORT: _ExportQuery,
15313   }
15314
15315 assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
15316
15317
15318 def _GetQueryImplementation(name):
15319   """Returns the implemtnation for a query type.
15320
15321   @param name: Query type, must be one of L{constants.QR_VIA_OP}
15322
15323   """
15324   try:
15325     return _QUERY_IMPL[name]
15326   except KeyError:
15327     raise errors.OpPrereqError("Unknown query resource '%s'" % name,
15328                                errors.ECODE_INVAL)