code.grnet.gr Git - ganeti-local/blob - lib/cmdlib.py

   1 #
   2 #
   3
   4 # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012 Google Inc.
   5 #
   6 # This program is free software; you can redistribute it and/or modify
   7 # it under the terms of the GNU General Public License as published by
   8 # the Free Software Foundation; either version 2 of the License, or
   9 # (at your option) any later version.
  10 #
  11 # This program is distributed in the hope that it will be useful, but
  12 # WITHOUT ANY WARRANTY; without even the implied warranty of
  13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 # General Public License for more details.
  15 #
  16 # You should have received a copy of the GNU General Public License
  17 # along with this program; if not, write to the Free Software
  18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  19 # 02110-1301, USA.
  20
  21
  22 """Module implementing the master-side code."""
  23
  24 # pylint: disable=W0201,C0302
  25
  26 # W0201 since most LU attributes are defined in CheckPrereq or similar
  27 # functions
  28
  29 # C0302: since we have waaaay too many lines in this module
  30
  31 import os
  32 import os.path
  33 import time
  34 import re
  35 import logging
  36 import copy
  37 import OpenSSL
  38 import socket
  39 import tempfile
  40 import shutil
  41 import itertools
  42 import operator
  43
  44 from ganeti import ssh
  45 from ganeti import utils
  46 from ganeti import errors
  47 from ganeti import hypervisor
  48 from ganeti import locking
  49 from ganeti import constants
  50 from ganeti import objects
  51 from ganeti import ssconf
  52 from ganeti import uidpool
  53 from ganeti import compat
  54 from ganeti import masterd
  55 from ganeti import netutils
  56 from ganeti import query
  57 from ganeti import qlang
  58 from ganeti import opcodes
  59 from ganeti import ht
  60 from ganeti import rpc
  61 from ganeti import runtime
  62 from ganeti import pathutils
  63 from ganeti import vcluster
  64 from ganeti.masterd import iallocator
  65
  66 import ganeti.masterd.instance # pylint: disable=W0611
  67
  68
  69 # States of instance
  70 INSTANCE_DOWN = [constants.ADMINST_DOWN]
  71 INSTANCE_ONLINE = [constants.ADMINST_DOWN, constants.ADMINST_UP]
  72 INSTANCE_NOT_RUNNING = [constants.ADMINST_DOWN, constants.ADMINST_OFFLINE]
  73
  74 #: Instance status in which an instance can be marked as offline/online
  75 CAN_CHANGE_INSTANCE_OFFLINE = (frozenset(INSTANCE_DOWN) | frozenset([
  76   constants.ADMINST_OFFLINE,
  77   ]))
  78
  79
  80 class ResultWithJobs:
  81   """Data container for LU results with jobs.
  82
  83   Instances of this class returned from L{LogicalUnit.Exec} will be recognized
  84   by L{mcpu._ProcessResult}. The latter will then submit the jobs
  85   contained in the C{jobs} attribute and include the job IDs in the opcode
  86   result.
  87
  88   """
  89   def __init__(self, jobs, **kwargs):
  90     """Initializes this class.
  91
  92     Additional return values can be specified as keyword arguments.
  93
  94     @type jobs: list of lists of L{opcode.OpCode}
  95     @param jobs: A list of lists of opcode objects
  96
  97     """
  98     self.jobs = jobs
  99     self.other = kwargs
 100
 101
 102 class LogicalUnit(object):
 103   """Logical Unit base class.
 104
 105   Subclasses must follow these rules:
 106     - implement ExpandNames
 107     - implement CheckPrereq (except when tasklets are used)
 108     - implement Exec (except when tasklets are used)
 109     - implement BuildHooksEnv
 110     - implement BuildHooksNodes
 111     - redefine HPATH and HTYPE
 112     - optionally redefine their run requirements:
 113         REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
 114
 115   Note that all commands require root permissions.
 116
 117   @ivar dry_run_result: the value (if any) that will be returned to the caller
 118       in dry-run mode (signalled by opcode dry_run parameter)
 119
 120   """
 121   HPATH = None
 122   HTYPE = None
 123   REQ_BGL = True
 124
 125   def __init__(self, processor, op, context, rpc_runner):
 126     """Constructor for LogicalUnit.
 127
 128     This needs to be overridden in derived classes in order to check op
 129     validity.
 130
 131     """
 132     self.proc = processor
 133     self.op = op
 134     self.cfg = context.cfg
 135     self.glm = context.glm
 136     # readability alias
 137     self.owned_locks = context.glm.list_owned
 138     self.context = context
 139     self.rpc = rpc_runner
 140     # Dicts used to declare locking needs to mcpu
 141     self.needed_locks = None
 142     self.share_locks = dict.fromkeys(locking.LEVELS, 0)
 143     self.add_locks = {}
 144     self.remove_locks = {}
 145     # Used to force good behavior when calling helper functions
 146     self.recalculate_locks = {}
 147     # logging
 148     self.Log = processor.Log # pylint: disable=C0103
 149     self.LogWarning = processor.LogWarning # pylint: disable=C0103
 150     self.LogInfo = processor.LogInfo # pylint: disable=C0103
 151     self.LogStep = processor.LogStep # pylint: disable=C0103
 152     # support for dry-run
 153     self.dry_run_result = None
 154     # support for generic debug attribute
 155     if (not hasattr(self.op, "debug_level") or
 156         not isinstance(self.op.debug_level, int)):
 157       self.op.debug_level = 0
 158
 159     # Tasklets
 160     self.tasklets = None
 161
 162     # Validate opcode parameters and set defaults
 163     self.op.Validate(True)
 164
 165     self.CheckArguments()
 166
 167   def CheckArguments(self):
 168     """Check syntactic validity for the opcode arguments.
 169
 170     This method is for doing a simple syntactic check and ensure
 171     validity of opcode parameters, without any cluster-related
 172     checks. While the same can be accomplished in ExpandNames and/or
 173     CheckPrereq, doing these separate is better because:
 174
 175       - ExpandNames is left as as purely a lock-related function
 176       - CheckPrereq is run after we have acquired locks (and possible
 177         waited for them)
 178
 179     The function is allowed to change the self.op attribute so that
 180     later methods can no longer worry about missing parameters.
 181
 182     """
 183     pass
 184
 185   def ExpandNames(self):
 186     """Expand names for this LU.
 187
 188     This method is called before starting to execute the opcode, and it should
 189     update all the parameters of the opcode to their canonical form (e.g. a
 190     short node name must be fully expanded after this method has successfully
 191     completed). This way locking, hooks, logging, etc. can work correctly.
 192
 193     LUs which implement this method must also populate the self.needed_locks
 194     member, as a dict with lock levels as keys, and a list of needed lock names
 195     as values. Rules:
 196
 197       - use an empty dict if you don't need any lock
 198       - if you don't need any lock at a particular level omit that
 199         level (note that in this case C{DeclareLocks} won't be called
 200         at all for that level)
 201       - if you need locks at a level, but you can't calculate it in
 202         this function, initialise that level with an empty list and do
 203         further processing in L{LogicalUnit.DeclareLocks} (see that
 204         function's docstring)
 205       - don't put anything for the BGL level
 206       - if you want all locks at a level use L{locking.ALL_SET} as a value
 207
 208     If you need to share locks (rather than acquire them exclusively) at one
 209     level you can modify self.share_locks, setting a true value (usually 1) for
 210     that level. By default locks are not shared.
 211
 212     This function can also define a list of tasklets, which then will be
 213     executed in order instead of the usual LU-level CheckPrereq and Exec
 214     functions, if those are not defined by the LU.
 215
 216     Examples::
 217
 218       # Acquire all nodes and one instance
 219       self.needed_locks = {
 220         locking.LEVEL_NODE: locking.ALL_SET,
 221         locking.LEVEL_INSTANCE: ['instance1.example.com'],
 222       }
 223       # Acquire just two nodes
 224       self.needed_locks = {
 225         locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
 226       }
 227       # Acquire no locks
 228       self.needed_locks = {} # No, you can't leave it to the default value None
 229
 230     """
 231     # The implementation of this method is mandatory only if the new LU is
 232     # concurrent, so that old LUs don't need to be changed all at the same
 233     # time.
 234     if self.REQ_BGL:
 235       self.needed_locks = {} # Exclusive LUs don't need locks.
 236     else:
 237       raise NotImplementedError
 238
 239   def DeclareLocks(self, level):
 240     """Declare LU locking needs for a level
 241
 242     While most LUs can just declare their locking needs at ExpandNames time,
 243     sometimes there's the need to calculate some locks after having acquired
 244     the ones before. This function is called just before acquiring locks at a
 245     particular level, but after acquiring the ones at lower levels, and permits
 246     such calculations. It can be used to modify self.needed_locks, and by
 247     default it does nothing.
 248
 249     This function is only called if you have something already set in
 250     self.needed_locks for the level.
 251
 252     @param level: Locking level which is going to be locked
 253     @type level: member of L{ganeti.locking.LEVELS}
 254
 255     """
 256
 257   def CheckPrereq(self):
 258     """Check prerequisites for this LU.
 259
 260     This method should check that the prerequisites for the execution
 261     of this LU are fulfilled. It can do internode communication, but
 262     it should be idempotent - no cluster or system changes are
 263     allowed.
 264
 265     The method should raise errors.OpPrereqError in case something is
 266     not fulfilled. Its return value is ignored.
 267
 268     This method should also update all the parameters of the opcode to
 269     their canonical form if it hasn't been done by ExpandNames before.
 270
 271     """
 272     if self.tasklets is not None:
 273       for (idx, tl) in enumerate(self.tasklets):
 274         logging.debug("Checking prerequisites for tasklet %s/%s",
 275                       idx + 1, len(self.tasklets))
 276         tl.CheckPrereq()
 277     else:
 278       pass
 279
 280   def Exec(self, feedback_fn):
 281     """Execute the LU.
 282
 283     This method should implement the actual work. It should raise
 284     errors.OpExecError for failures that are somewhat dealt with in
 285     code, or expected.
 286
 287     """
 288     if self.tasklets is not None:
 289       for (idx, tl) in enumerate(self.tasklets):
 290         logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
 291         tl.Exec(feedback_fn)
 292     else:
 293       raise NotImplementedError
 294
 295   def BuildHooksEnv(self):
 296     """Build hooks environment for this LU.
 297
 298     @rtype: dict
 299     @return: Dictionary containing the environment that will be used for
 300       running the hooks for this LU. The keys of the dict must not be prefixed
 301       with "GANETI_"--that'll be added by the hooks runner. The hooks runner
 302       will extend the environment with additional variables. If no environment
 303       should be defined, an empty dictionary should be returned (not C{None}).
 304     @note: If the C{HPATH} attribute of the LU class is C{None}, this function
 305       will not be called.
 306
 307     """
 308     raise NotImplementedError
 309
 310   def BuildHooksNodes(self):
 311     """Build list of nodes to run LU's hooks.
 312
 313     @rtype: tuple; (list, list)
 314     @return: Tuple containing a list of node names on which the hook
 315       should run before the execution and a list of node names on which the
 316       hook should run after the execution. No nodes should be returned as an
 317       empty list (and not None).
 318     @note: If the C{HPATH} attribute of the LU class is C{None}, this function
 319       will not be called.
 320
 321     """
 322     raise NotImplementedError
 323
 324   def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
 325     """Notify the LU about the results of its hooks.
 326
 327     This method is called every time a hooks phase is executed, and notifies
 328     the Logical Unit about the hooks' result. The LU can then use it to alter
 329     its result based on the hooks.  By default the method does nothing and the
 330     previous result is passed back unchanged but any LU can define it if it
 331     wants to use the local cluster hook-scripts somehow.
 332
 333     @param phase: one of L{constants.HOOKS_PHASE_POST} or
 334         L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
 335     @param hook_results: the results of the multi-node hooks rpc call
 336     @param feedback_fn: function used send feedback back to the caller
 337     @param lu_result: the previous Exec result this LU had, or None
 338         in the PRE phase
 339     @return: the new Exec result, based on the previous result
 340         and hook results
 341
 342     """
 343     # API must be kept, thus we ignore the unused argument and could
 344     # be a function warnings
 345     # pylint: disable=W0613,R0201
 346     return lu_result
 347
 348   def _ExpandAndLockInstance(self):
 349     """Helper function to expand and lock an instance.
 350
 351     Many LUs that work on an instance take its name in self.op.instance_name
 352     and need to expand it and then declare the expanded name for locking. This
 353     function does it, and then updates self.op.instance_name to the expanded
 354     name. It also initializes needed_locks as a dict, if this hasn't been done
 355     before.
 356
 357     """
 358     if self.needed_locks is None:
 359       self.needed_locks = {}
 360     else:
 361       assert locking.LEVEL_INSTANCE not in self.needed_locks, \
 362         "_ExpandAndLockInstance called with instance-level locks set"
 363     self.op.instance_name = _ExpandInstanceName(self.cfg,
 364                                                 self.op.instance_name)
 365     self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
 366
 367   def _LockInstancesNodes(self, primary_only=False,
 368                           level=locking.LEVEL_NODE):
 369     """Helper function to declare instances' nodes for locking.
 370
 371     This function should be called after locking one or more instances to lock
 372     their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
 373     with all primary or secondary nodes for instances already locked and
 374     present in self.needed_locks[locking.LEVEL_INSTANCE].
 375
 376     It should be called from DeclareLocks, and for safety only works if
 377     self.recalculate_locks[locking.LEVEL_NODE] is set.
 378
 379     In the future it may grow parameters to just lock some instance's nodes, or
 380     to just lock primaries or secondary nodes, if needed.
 381
 382     If should be called in DeclareLocks in a way similar to::
 383
 384       if level == locking.LEVEL_NODE:
 385         self._LockInstancesNodes()
 386
 387     @type primary_only: boolean
 388     @param primary_only: only lock primary nodes of locked instances
 389     @param level: Which lock level to use for locking nodes
 390
 391     """
 392     assert level in self.recalculate_locks, \
 393       "_LockInstancesNodes helper function called with no nodes to recalculate"
 394
 395     # TODO: check if we're really been called with the instance locks held
 396
 397     # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
 398     # future we might want to have different behaviors depending on the value
 399     # of self.recalculate_locks[locking.LEVEL_NODE]
 400     wanted_nodes = []
 401     locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
 402     for _, instance in self.cfg.GetMultiInstanceInfo(locked_i):
 403       wanted_nodes.append(instance.primary_node)
 404       if not primary_only:
 405         wanted_nodes.extend(instance.secondary_nodes)
 406
 407     if self.recalculate_locks[level] == constants.LOCKS_REPLACE:
 408       self.needed_locks[level] = wanted_nodes
 409     elif self.recalculate_locks[level] == constants.LOCKS_APPEND:
 410       self.needed_locks[level].extend(wanted_nodes)
 411     else:
 412       raise errors.ProgrammerError("Unknown recalculation mode")
 413
 414     del self.recalculate_locks[level]
 415
 416
 417 class NoHooksLU(LogicalUnit): # pylint: disable=W0223
 418   """Simple LU which runs no hooks.
 419
 420   This LU is intended as a parent for other LogicalUnits which will
 421   run no hooks, in order to reduce duplicate code.
 422
 423   """
 424   HPATH = None
 425   HTYPE = None
 426
 427   def BuildHooksEnv(self):
 428     """Empty BuildHooksEnv for NoHooksLu.
 429
 430     This just raises an error.
 431
 432     """
 433     raise AssertionError("BuildHooksEnv called for NoHooksLUs")
 434
 435   def BuildHooksNodes(self):
 436     """Empty BuildHooksNodes for NoHooksLU.
 437
 438     """
 439     raise AssertionError("BuildHooksNodes called for NoHooksLU")
 440
 441
 442 class Tasklet:
 443   """Tasklet base class.
 444
 445   Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
 446   they can mix legacy code with tasklets. Locking needs to be done in the LU,
 447   tasklets know nothing about locks.
 448
 449   Subclasses must follow these rules:
 450     - Implement CheckPrereq
 451     - Implement Exec
 452
 453   """
 454   def __init__(self, lu):
 455     self.lu = lu
 456
 457     # Shortcuts
 458     self.cfg = lu.cfg
 459     self.rpc = lu.rpc
 460
 461   def CheckPrereq(self):
 462     """Check prerequisites for this tasklets.
 463
 464     This method should check whether the prerequisites for the execution of
 465     this tasklet are fulfilled. It can do internode communication, but it
 466     should be idempotent - no cluster or system changes are allowed.
 467
 468     The method should raise errors.OpPrereqError in case something is not
 469     fulfilled. Its return value is ignored.
 470
 471     This method should also update all parameters to their canonical form if it
 472     hasn't been done before.
 473
 474     """
 475     pass
 476
 477   def Exec(self, feedback_fn):
 478     """Execute the tasklet.
 479
 480     This method should implement the actual work. It should raise
 481     errors.OpExecError for failures that are somewhat dealt with in code, or
 482     expected.
 483
 484     """
 485     raise NotImplementedError
 486
 487
 488 class _QueryBase:
 489   """Base for query utility classes.
 490
 491   """
 492   #: Attribute holding field definitions
 493   FIELDS = None
 494
 495   #: Field to sort by
 496   SORT_FIELD = "name"
 497
 498   def __init__(self, qfilter, fields, use_locking):
 499     """Initializes this class.
 500
 501     """
 502     self.use_locking = use_locking
 503
 504     self.query = query.Query(self.FIELDS, fields, qfilter=qfilter,
 505                              namefield=self.SORT_FIELD)
 506     self.requested_data = self.query.RequestedData()
 507     self.names = self.query.RequestedNames()
 508
 509     # Sort only if no names were requested
 510     self.sort_by_name = not self.names
 511
 512     self.do_locking = None
 513     self.wanted = None
 514
 515   def _GetNames(self, lu, all_names, lock_level):
 516     """Helper function to determine names asked for in the query.
 517
 518     """
 519     if self.do_locking:
 520       names = lu.owned_locks(lock_level)
 521     else:
 522       names = all_names
 523
 524     if self.wanted == locking.ALL_SET:
 525       assert not self.names
 526       # caller didn't specify names, so ordering is not important
 527       return utils.NiceSort(names)
 528
 529     # caller specified names and we must keep the same order
 530     assert self.names
 531     assert not self.do_locking or lu.glm.is_owned(lock_level)
 532
 533     missing = set(self.wanted).difference(names)
 534     if missing:
 535       raise errors.OpExecError("Some items were removed before retrieving"
 536                                " their data: %s" % missing)
 537
 538     # Return expanded names
 539     return self.wanted
 540
 541   def ExpandNames(self, lu):
 542     """Expand names for this query.
 543
 544     See L{LogicalUnit.ExpandNames}.
 545
 546     """
 547     raise NotImplementedError()
 548
 549   def DeclareLocks(self, lu, level):
 550     """Declare locks for this query.
 551
 552     See L{LogicalUnit.DeclareLocks}.
 553
 554     """
 555     raise NotImplementedError()
 556
 557   def _GetQueryData(self, lu):
 558     """Collects all data for this query.
 559
 560     @return: Query data object
 561
 562     """
 563     raise NotImplementedError()
 564
 565   def NewStyleQuery(self, lu):
 566     """Collect data and execute query.
 567
 568     """
 569     return query.GetQueryResponse(self.query, self._GetQueryData(lu),
 570                                   sort_by_name=self.sort_by_name)
 571
 572   def OldStyleQuery(self, lu):
 573     """Collect data and execute query.
 574
 575     """
 576     return self.query.OldStyleQuery(self._GetQueryData(lu),
 577                                     sort_by_name=self.sort_by_name)
 578
 579
 580 def _ShareAll():
 581   """Returns a dict declaring all lock levels shared.
 582
 583   """
 584   return dict.fromkeys(locking.LEVELS, 1)
 585
 586
 587 def _AnnotateDiskParams(instance, devs, cfg):
 588   """Little helper wrapper to the rpc annotation method.
 589
 590   @param instance: The instance object
 591   @type devs: List of L{objects.Disk}
 592   @param devs: The root devices (not any of its children!)
 593   @param cfg: The config object
 594   @returns The annotated disk copies
 595   @see L{rpc.AnnotateDiskParams}
 596
 597   """
 598   return rpc.AnnotateDiskParams(instance.disk_template, devs,
 599                                 cfg.GetInstanceDiskParams(instance))
 600
 601
 602 def _CheckInstancesNodeGroups(cfg, instances, owned_groups, owned_nodes,
 603                               cur_group_uuid):
 604   """Checks if node groups for locked instances are still correct.
 605
 606   @type cfg: L{config.ConfigWriter}
 607   @param cfg: Cluster configuration
 608   @type instances: dict; string as key, L{objects.Instance} as value
 609   @param instances: Dictionary, instance name as key, instance object as value
 610   @type owned_groups: iterable of string
 611   @param owned_groups: List of owned groups
 612   @type owned_nodes: iterable of string
 613   @param owned_nodes: List of owned nodes
 614   @type cur_group_uuid: string or None
 615   @param cur_group_uuid: Optional group UUID to check against instance's groups
 616
 617   """
 618   for (name, inst) in instances.items():
 619     assert owned_nodes.issuperset(inst.all_nodes), \
 620       "Instance %s's nodes changed while we kept the lock" % name
 621
 622     inst_groups = _CheckInstanceNodeGroups(cfg, name, owned_groups)
 623
 624     assert cur_group_uuid is None or cur_group_uuid in inst_groups, \
 625       "Instance %s has no node in group %s" % (name, cur_group_uuid)
 626
 627
 628 def _CheckInstanceNodeGroups(cfg, instance_name, owned_groups,
 629                              primary_only=False):
 630   """Checks if the owned node groups are still correct for an instance.
 631
 632   @type cfg: L{config.ConfigWriter}
 633   @param cfg: The cluster configuration
 634   @type instance_name: string
 635   @param instance_name: Instance name
 636   @type owned_groups: set or frozenset
 637   @param owned_groups: List of currently owned node groups
 638   @type primary_only: boolean
 639   @param primary_only: Whether to check node groups for only the primary node
 640
 641   """
 642   inst_groups = cfg.GetInstanceNodeGroups(instance_name, primary_only)
 643
 644   if not owned_groups.issuperset(inst_groups):
 645     raise errors.OpPrereqError("Instance %s's node groups changed since"
 646                                " locks were acquired, current groups are"
 647                                " are '%s', owning groups '%s'; retry the"
 648                                " operation" %
 649                                (instance_name,
 650                                 utils.CommaJoin(inst_groups),
 651                                 utils.CommaJoin(owned_groups)),
 652                                errors.ECODE_STATE)
 653
 654   return inst_groups
 655
 656
 657 def _CheckNodeGroupInstances(cfg, group_uuid, owned_instances):
 658   """Checks if the instances in a node group are still correct.
 659
 660   @type cfg: L{config.ConfigWriter}
 661   @param cfg: The cluster configuration
 662   @type group_uuid: string
 663   @param group_uuid: Node group UUID
 664   @type owned_instances: set or frozenset
 665   @param owned_instances: List of currently owned instances
 666
 667   """
 668   wanted_instances = cfg.GetNodeGroupInstances(group_uuid)
 669   if owned_instances != wanted_instances:
 670     raise errors.OpPrereqError("Instances in node group '%s' changed since"
 671                                " locks were acquired, wanted '%s', have '%s';"
 672                                " retry the operation" %
 673                                (group_uuid,
 674                                 utils.CommaJoin(wanted_instances),
 675                                 utils.CommaJoin(owned_instances)),
 676                                errors.ECODE_STATE)
 677
 678   return wanted_instances
 679
 680
 681 def _SupportsOob(cfg, node):
 682   """Tells if node supports OOB.
 683
 684   @type cfg: L{config.ConfigWriter}
 685   @param cfg: The cluster configuration
 686   @type node: L{objects.Node}
 687   @param node: The node
 688   @return: The OOB script if supported or an empty string otherwise
 689
 690   """
 691   return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
 692
 693
 694 def _CopyLockList(names):
 695   """Makes a copy of a list of lock names.
 696
 697   Handles L{locking.ALL_SET} correctly.
 698
 699   """
 700   if names == locking.ALL_SET:
 701     return locking.ALL_SET
 702   else:
 703     return names[:]
 704
 705
 706 def _GetWantedNodes(lu, nodes):
 707   """Returns list of checked and expanded node names.
 708
 709   @type lu: L{LogicalUnit}
 710   @param lu: the logical unit on whose behalf we execute
 711   @type nodes: list
 712   @param nodes: list of node names or None for all nodes
 713   @rtype: list
 714   @return: the list of nodes, sorted
 715   @raise errors.ProgrammerError: if the nodes parameter is wrong type
 716
 717   """
 718   if nodes:
 719     return [_ExpandNodeName(lu.cfg, name) for name in nodes]
 720
 721   return utils.NiceSort(lu.cfg.GetNodeList())
 722
 723
 724 def _GetWantedInstances(lu, instances):
 725   """Returns list of checked and expanded instance names.
 726
 727   @type lu: L{LogicalUnit}
 728   @param lu: the logical unit on whose behalf we execute
 729   @type instances: list
 730   @param instances: list of instance names or None for all instances
 731   @rtype: list
 732   @return: the list of instances, sorted
 733   @raise errors.OpPrereqError: if the instances parameter is wrong type
 734   @raise errors.OpPrereqError: if any of the passed instances is not found
 735
 736   """
 737   if instances:
 738     wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
 739   else:
 740     wanted = utils.NiceSort(lu.cfg.GetInstanceList())
 741   return wanted
 742
 743
 744 def _GetUpdatedParams(old_params, update_dict,
 745                       use_default=True, use_none=False):
 746   """Return the new version of a parameter dictionary.
 747
 748   @type old_params: dict
 749   @param old_params: old parameters
 750   @type update_dict: dict
 751   @param update_dict: dict containing new parameter values, or
 752       constants.VALUE_DEFAULT to reset the parameter to its default
 753       value
 754   @param use_default: boolean
 755   @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
 756       values as 'to be deleted' values
 757   @param use_none: boolean
 758   @type use_none: whether to recognise C{None} values as 'to be
 759       deleted' values
 760   @rtype: dict
 761   @return: the new parameter dictionary
 762
 763   """
 764   params_copy = copy.deepcopy(old_params)
 765   for key, val in update_dict.iteritems():
 766     if ((use_default and val == constants.VALUE_DEFAULT) or
 767         (use_none and val is None)):
 768       try:
 769         del params_copy[key]
 770       except KeyError:
 771         pass
 772     else:
 773       params_copy[key] = val
 774   return params_copy
 775
 776
 777 def _GetUpdatedIPolicy(old_ipolicy, new_ipolicy, group_policy=False):
 778   """Return the new version of a instance policy.
 779
 780   @param group_policy: whether this policy applies to a group and thus
 781     we should support removal of policy entries
 782
 783   """
 784   use_none = use_default = group_policy
 785   ipolicy = copy.deepcopy(old_ipolicy)
 786   for key, value in new_ipolicy.items():
 787     if key not in constants.IPOLICY_ALL_KEYS:
 788       raise errors.OpPrereqError("Invalid key in new ipolicy: %s" % key,
 789                                  errors.ECODE_INVAL)
 790     if key in constants.IPOLICY_ISPECS:
 791       utils.ForceDictType(value, constants.ISPECS_PARAMETER_TYPES)
 792       ipolicy[key] = _GetUpdatedParams(old_ipolicy.get(key, {}), value,
 793                                        use_none=use_none,
 794                                        use_default=use_default)
 795     else:
 796       if (not value or value == [constants.VALUE_DEFAULT] or
 797           value == constants.VALUE_DEFAULT):
 798         if group_policy:
 799           del ipolicy[key]
 800         else:
 801           raise errors.OpPrereqError("Can't unset ipolicy attribute '%s'"
 802                                      " on the cluster'" % key,
 803                                      errors.ECODE_INVAL)
 804       else:
 805         if key in constants.IPOLICY_PARAMETERS:
 806           # FIXME: we assume all such values are float
 807           try:
 808             ipolicy[key] = float(value)
 809           except (TypeError, ValueError), err:
 810             raise errors.OpPrereqError("Invalid value for attribute"
 811                                        " '%s': '%s', error: %s" %
 812                                        (key, value, err), errors.ECODE_INVAL)
 813         else:
 814           # FIXME: we assume all others are lists; this should be redone
 815           # in a nicer way
 816           ipolicy[key] = list(value)
 817   try:
 818     objects.InstancePolicy.CheckParameterSyntax(ipolicy, not group_policy)
 819   except errors.ConfigurationError, err:
 820     raise errors.OpPrereqError("Invalid instance policy: %s" % err,
 821                                errors.ECODE_INVAL)
 822   return ipolicy
 823
 824
 825 def _UpdateAndVerifySubDict(base, updates, type_check):
 826   """Updates and verifies a dict with sub dicts of the same type.
 827
 828   @param base: The dict with the old data
 829   @param updates: The dict with the new data
 830   @param type_check: Dict suitable to ForceDictType to verify correct types
 831   @returns: A new dict with updated and verified values
 832
 833   """
 834   def fn(old, value):
 835     new = _GetUpdatedParams(old, value)
 836     utils.ForceDictType(new, type_check)
 837     return new
 838
 839   ret = copy.deepcopy(base)
 840   ret.update(dict((key, fn(base.get(key, {}), value))
 841                   for key, value in updates.items()))
 842   return ret
 843
 844
 845 def _MergeAndVerifyHvState(op_input, obj_input):
 846   """Combines the hv state from an opcode with the one of the object
 847
 848   @param op_input: The input dict from the opcode
 849   @param obj_input: The input dict from the objects
 850   @return: The verified and updated dict
 851
 852   """
 853   if op_input:
 854     invalid_hvs = set(op_input) - constants.HYPER_TYPES
 855     if invalid_hvs:
 856       raise errors.OpPrereqError("Invalid hypervisor(s) in hypervisor state:"
 857                                  " %s" % utils.CommaJoin(invalid_hvs),
 858                                  errors.ECODE_INVAL)
 859     if obj_input is None:
 860       obj_input = {}
 861     type_check = constants.HVSTS_PARAMETER_TYPES
 862     return _UpdateAndVerifySubDict(obj_input, op_input, type_check)
 863
 864   return None
 865
 866
 867 def _MergeAndVerifyDiskState(op_input, obj_input):
 868   """Combines the disk state from an opcode with the one of the object
 869
 870   @param op_input: The input dict from the opcode
 871   @param obj_input: The input dict from the objects
 872   @return: The verified and updated dict
 873   """
 874   if op_input:
 875     invalid_dst = set(op_input) - constants.DS_VALID_TYPES
 876     if invalid_dst:
 877       raise errors.OpPrereqError("Invalid storage type(s) in disk state: %s" %
 878                                  utils.CommaJoin(invalid_dst),
 879                                  errors.ECODE_INVAL)
 880     type_check = constants.DSS_PARAMETER_TYPES
 881     if obj_input is None:
 882       obj_input = {}
 883     return dict((key, _UpdateAndVerifySubDict(obj_input.get(key, {}), value,
 884                                               type_check))
 885                 for key, value in op_input.items())
 886
 887   return None
 888
 889
 890 def _ReleaseLocks(lu, level, names=None, keep=None):
 891   """Releases locks owned by an LU.
 892
 893   @type lu: L{LogicalUnit}
 894   @param level: Lock level
 895   @type names: list or None
 896   @param names: Names of locks to release
 897   @type keep: list or None
 898   @param keep: Names of locks to retain
 899
 900   """
 901   assert not (keep is not None and names is not None), \
 902          "Only one of the 'names' and the 'keep' parameters can be given"
 903
 904   if names is not None:
 905     should_release = names.__contains__
 906   elif keep:
 907     should_release = lambda name: name not in keep
 908   else:
 909     should_release = None
 910
 911   owned = lu.owned_locks(level)
 912   if not owned:
 913     # Not owning any lock at this level, do nothing
 914     pass
 915
 916   elif should_release:
 917     retain = []
 918     release = []
 919
 920     # Determine which locks to release
 921     for name in owned:
 922       if should_release(name):
 923         release.append(name)
 924       else:
 925         retain.append(name)
 926
 927     assert len(lu.owned_locks(level)) == (len(retain) + len(release))
 928
 929     # Release just some locks
 930     lu.glm.release(level, names=release)
 931
 932     assert frozenset(lu.owned_locks(level)) == frozenset(retain)
 933   else:
 934     # Release everything
 935     lu.glm.release(level)
 936
 937     assert not lu.glm.is_owned(level), "No locks should be owned"
 938
 939
 940 def _MapInstanceDisksToNodes(instances):
 941   """Creates a map from (node, volume) to instance name.
 942
 943   @type instances: list of L{objects.Instance}
 944   @rtype: dict; tuple of (node name, volume name) as key, instance name as value
 945
 946   """
 947   return dict(((node, vol), inst.name)
 948               for inst in instances
 949               for (node, vols) in inst.MapLVsByNode().items()
 950               for vol in vols)
 951
 952
 953 def _RunPostHook(lu, node_name):
 954   """Runs the post-hook for an opcode on a single node.
 955
 956   """
 957   hm = lu.proc.BuildHooksManager(lu)
 958   try:
 959     hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
 960   except Exception, err: # pylint: disable=W0703
 961     lu.LogWarning("Errors occurred running hooks on %s: %s" % (node_name, err))
 962
 963
 964 def _CheckOutputFields(static, dynamic, selected):
 965   """Checks whether all selected fields are valid.
 966
 967   @type static: L{utils.FieldSet}
 968   @param static: static fields set
 969   @type dynamic: L{utils.FieldSet}
 970   @param dynamic: dynamic fields set
 971
 972   """
 973   f = utils.FieldSet()
 974   f.Extend(static)
 975   f.Extend(dynamic)
 976
 977   delta = f.NonMatching(selected)
 978   if delta:
 979     raise errors.OpPrereqError("Unknown output fields selected: %s"
 980                                % ",".join(delta), errors.ECODE_INVAL)
 981
 982
 983 def _CheckGlobalHvParams(params):
 984   """Validates that given hypervisor params are not global ones.
 985
 986   This will ensure that instances don't get customised versions of
 987   global params.
 988
 989   """
 990   used_globals = constants.HVC_GLOBALS.intersection(params)
 991   if used_globals:
 992     msg = ("The following hypervisor parameters are global and cannot"
 993            " be customized at instance level, please modify them at"
 994            " cluster level: %s" % utils.CommaJoin(used_globals))
 995     raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
 996
 997
 998 def _CheckNodeOnline(lu, node, msg=None):
 999   """Ensure that a given node is online.
1000
1001   @param lu: the LU on behalf of which we make the check
1002   @param node: the node to check
1003   @param msg: if passed, should be a message to replace the default one
1004   @raise errors.OpPrereqError: if the node is offline
1005
1006   """
1007   if msg is None:
1008     msg = "Can't use offline node"
1009   if lu.cfg.GetNodeInfo(node).offline:
1010     raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
1011
1012
1013 def _CheckNodeNotDrained(lu, node):
1014   """Ensure that a given node is not drained.
1015
1016   @param lu: the LU on behalf of which we make the check
1017   @param node: the node to check
1018   @raise errors.OpPrereqError: if the node is drained
1019
1020   """
1021   if lu.cfg.GetNodeInfo(node).drained:
1022     raise errors.OpPrereqError("Can't use drained node %s" % node,
1023                                errors.ECODE_STATE)
1024
1025
1026 def _CheckNodeVmCapable(lu, node):
1027   """Ensure that a given node is vm capable.
1028
1029   @param lu: the LU on behalf of which we make the check
1030   @param node: the node to check
1031   @raise errors.OpPrereqError: if the node is not vm capable
1032
1033   """
1034   if not lu.cfg.GetNodeInfo(node).vm_capable:
1035     raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
1036                                errors.ECODE_STATE)
1037
1038
1039 def _CheckNodeHasOS(lu, node, os_name, force_variant):
1040   """Ensure that a node supports a given OS.
1041
1042   @param lu: the LU on behalf of which we make the check
1043   @param node: the node to check
1044   @param os_name: the OS to query about
1045   @param force_variant: whether to ignore variant errors
1046   @raise errors.OpPrereqError: if the node is not supporting the OS
1047
1048   """
1049   result = lu.rpc.call_os_get(node, os_name)
1050   result.Raise("OS '%s' not in supported OS list for node %s" %
1051                (os_name, node),
1052                prereq=True, ecode=errors.ECODE_INVAL)
1053   if not force_variant:
1054     _CheckOSVariant(result.payload, os_name)
1055
1056
1057 def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
1058   """Ensure that a node has the given secondary ip.
1059
1060   @type lu: L{LogicalUnit}
1061   @param lu: the LU on behalf of which we make the check
1062   @type node: string
1063   @param node: the node to check
1064   @type secondary_ip: string
1065   @param secondary_ip: the ip to check
1066   @type prereq: boolean
1067   @param prereq: whether to throw a prerequisite or an execute error
1068   @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
1069   @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
1070
1071   """
1072   result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
1073   result.Raise("Failure checking secondary ip on node %s" % node,
1074                prereq=prereq, ecode=errors.ECODE_ENVIRON)
1075   if not result.payload:
1076     msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
1077            " please fix and re-run this command" % secondary_ip)
1078     if prereq:
1079       raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
1080     else:
1081       raise errors.OpExecError(msg)
1082
1083
1084 def _GetClusterDomainSecret():
1085   """Reads the cluster domain secret.
1086
1087   """
1088   return utils.ReadOneLineFile(pathutils.CLUSTER_DOMAIN_SECRET_FILE,
1089                                strict=True)
1090
1091
1092 def _CheckInstanceState(lu, instance, req_states, msg=None):
1093   """Ensure that an instance is in one of the required states.
1094
1095   @param lu: the LU on behalf of which we make the check
1096   @param instance: the instance to check
1097   @param msg: if passed, should be a message to replace the default one
1098   @raise errors.OpPrereqError: if the instance is not in the required state
1099
1100   """
1101   if msg is None:
1102     msg = "can't use instance from outside %s states" % ", ".join(req_states)
1103   if instance.admin_state not in req_states:
1104     raise errors.OpPrereqError("Instance '%s' is marked to be %s, %s" %
1105                                (instance.name, instance.admin_state, msg),
1106                                errors.ECODE_STATE)
1107
1108   if constants.ADMINST_UP not in req_states:
1109     pnode = instance.primary_node
1110     if not lu.cfg.GetNodeInfo(pnode).offline:
1111       ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
1112       ins_l.Raise("Can't contact node %s for instance information" % pnode,
1113                   prereq=True, ecode=errors.ECODE_ENVIRON)
1114       if instance.name in ins_l.payload:
1115         raise errors.OpPrereqError("Instance %s is running, %s" %
1116                                    (instance.name, msg), errors.ECODE_STATE)
1117     else:
1118       lu.LogWarning("Primary node offline, ignoring check that instance"
1119                      " is down")
1120
1121
1122 def _ComputeMinMaxSpec(name, qualifier, ipolicy, value):
1123   """Computes if value is in the desired range.
1124
1125   @param name: name of the parameter for which we perform the check
1126   @param qualifier: a qualifier used in the error message (e.g. 'disk/1',
1127       not just 'disk')
1128   @param ipolicy: dictionary containing min, max and std values
1129   @param value: actual value that we want to use
1130   @return: None or element not meeting the criteria
1131
1132
1133   """
1134   if value in [None, constants.VALUE_AUTO]:
1135     return None
1136   max_v = ipolicy[constants.ISPECS_MAX].get(name, value)
1137   min_v = ipolicy[constants.ISPECS_MIN].get(name, value)
1138   if value > max_v or min_v > value:
1139     if qualifier:
1140       fqn = "%s/%s" % (name, qualifier)
1141     else:
1142       fqn = name
1143     return ("%s value %s is not in range [%s, %s]" %
1144             (fqn, value, min_v, max_v))
1145   return None
1146
1147
1148 def _ComputeIPolicySpecViolation(ipolicy, mem_size, cpu_count, disk_count,
1149                                  nic_count, disk_sizes, spindle_use,
1150                                  _compute_fn=_ComputeMinMaxSpec):
1151   """Verifies ipolicy against provided specs.
1152
1153   @type ipolicy: dict
1154   @param ipolicy: The ipolicy
1155   @type mem_size: int
1156   @param mem_size: The memory size
1157   @type cpu_count: int
1158   @param cpu_count: Used cpu cores
1159   @type disk_count: int
1160   @param disk_count: Number of disks used
1161   @type nic_count: int
1162   @param nic_count: Number of nics used
1163   @type disk_sizes: list of ints
1164   @param disk_sizes: Disk sizes of used disk (len must match C{disk_count})
1165   @type spindle_use: int
1166   @param spindle_use: The number of spindles this instance uses
1167   @param _compute_fn: The compute function (unittest only)
1168   @return: A list of violations, or an empty list of no violations are found
1169
1170   """
1171   assert disk_count == len(disk_sizes)
1172
1173   test_settings = [
1174     (constants.ISPEC_MEM_SIZE, "", mem_size),
1175     (constants.ISPEC_CPU_COUNT, "", cpu_count),
1176     (constants.ISPEC_DISK_COUNT, "", disk_count),
1177     (constants.ISPEC_NIC_COUNT, "", nic_count),
1178     (constants.ISPEC_SPINDLE_USE, "", spindle_use),
1179     ] + [(constants.ISPEC_DISK_SIZE, str(idx), d)
1180          for idx, d in enumerate(disk_sizes)]
1181
1182   return filter(None,
1183                 (_compute_fn(name, qualifier, ipolicy, value)
1184                  for (name, qualifier, value) in test_settings))
1185
1186
1187 def _ComputeIPolicyInstanceViolation(ipolicy, instance,
1188                                      _compute_fn=_ComputeIPolicySpecViolation):
1189   """Compute if instance meets the specs of ipolicy.
1190
1191   @type ipolicy: dict
1192   @param ipolicy: The ipolicy to verify against
1193   @type instance: L{objects.Instance}
1194   @param instance: The instance to verify
1195   @param _compute_fn: The function to verify ipolicy (unittest only)
1196   @see: L{_ComputeIPolicySpecViolation}
1197
1198   """
1199   mem_size = instance.beparams.get(constants.BE_MAXMEM, None)
1200   cpu_count = instance.beparams.get(constants.BE_VCPUS, None)
1201   spindle_use = instance.beparams.get(constants.BE_SPINDLE_USE, None)
1202   disk_count = len(instance.disks)
1203   disk_sizes = [disk.size for disk in instance.disks]
1204   nic_count = len(instance.nics)
1205
1206   return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1207                      disk_sizes, spindle_use)
1208
1209
1210 def _ComputeIPolicyInstanceSpecViolation(
1211   ipolicy, instance_spec, _compute_fn=_ComputeIPolicySpecViolation):
1212   """Compute if instance specs meets the specs of ipolicy.
1213
1214   @type ipolicy: dict
1215   @param ipolicy: The ipolicy to verify against
1216   @param instance_spec: dict
1217   @param instance_spec: The instance spec to verify
1218   @param _compute_fn: The function to verify ipolicy (unittest only)
1219   @see: L{_ComputeIPolicySpecViolation}
1220
1221   """
1222   mem_size = instance_spec.get(constants.ISPEC_MEM_SIZE, None)
1223   cpu_count = instance_spec.get(constants.ISPEC_CPU_COUNT, None)
1224   disk_count = instance_spec.get(constants.ISPEC_DISK_COUNT, 0)
1225   disk_sizes = instance_spec.get(constants.ISPEC_DISK_SIZE, [])
1226   nic_count = instance_spec.get(constants.ISPEC_NIC_COUNT, 0)
1227   spindle_use = instance_spec.get(constants.ISPEC_SPINDLE_USE, None)
1228
1229   return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1230                      disk_sizes, spindle_use)
1231
1232
1233 def _ComputeIPolicyNodeViolation(ipolicy, instance, current_group,
1234                                  target_group,
1235                                  _compute_fn=_ComputeIPolicyInstanceViolation):
1236   """Compute if instance meets the specs of the new target group.
1237
1238   @param ipolicy: The ipolicy to verify
1239   @param instance: The instance object to verify
1240   @param current_group: The current group of the instance
1241   @param target_group: The new group of the instance
1242   @param _compute_fn: The function to verify ipolicy (unittest only)
1243   @see: L{_ComputeIPolicySpecViolation}
1244
1245   """
1246   if current_group == target_group:
1247     return []
1248   else:
1249     return _compute_fn(ipolicy, instance)
1250
1251
1252 def _CheckTargetNodeIPolicy(lu, ipolicy, instance, node, ignore=False,
1253                             _compute_fn=_ComputeIPolicyNodeViolation):
1254   """Checks that the target node is correct in terms of instance policy.
1255
1256   @param ipolicy: The ipolicy to verify
1257   @param instance: The instance object to verify
1258   @param node: The new node to relocate
1259   @param ignore: Ignore violations of the ipolicy
1260   @param _compute_fn: The function to verify ipolicy (unittest only)
1261   @see: L{_ComputeIPolicySpecViolation}
1262
1263   """
1264   primary_node = lu.cfg.GetNodeInfo(instance.primary_node)
1265   res = _compute_fn(ipolicy, instance, primary_node.group, node.group)
1266
1267   if res:
1268     msg = ("Instance does not meet target node group's (%s) instance"
1269            " policy: %s") % (node.group, utils.CommaJoin(res))
1270     if ignore:
1271       lu.LogWarning(msg)
1272     else:
1273       raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
1274
1275
1276 def _ComputeNewInstanceViolations(old_ipolicy, new_ipolicy, instances):
1277   """Computes a set of any instances that would violate the new ipolicy.
1278
1279   @param old_ipolicy: The current (still in-place) ipolicy
1280   @param new_ipolicy: The new (to become) ipolicy
1281   @param instances: List of instances to verify
1282   @return: A list of instances which violates the new ipolicy but
1283       did not before
1284
1285   """
1286   return (_ComputeViolatingInstances(new_ipolicy, instances) -
1287           _ComputeViolatingInstances(old_ipolicy, instances))
1288
1289
1290 def _ExpandItemName(fn, name, kind):
1291   """Expand an item name.
1292
1293   @param fn: the function to use for expansion
1294   @param name: requested item name
1295   @param kind: text description ('Node' or 'Instance')
1296   @return: the resolved (full) name
1297   @raise errors.OpPrereqError: if the item is not found
1298
1299   """
1300   full_name = fn(name)
1301   if full_name is None:
1302     raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
1303                                errors.ECODE_NOENT)
1304   return full_name
1305
1306
1307 def _ExpandNodeName(cfg, name):
1308   """Wrapper over L{_ExpandItemName} for nodes."""
1309   return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
1310
1311
1312 def _ExpandInstanceName(cfg, name):
1313   """Wrapper over L{_ExpandItemName} for instance."""
1314   return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
1315
1316
1317 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
1318                           minmem, maxmem, vcpus, nics, disk_template, disks,
1319                           bep, hvp, hypervisor_name, tags):
1320   """Builds instance related env variables for hooks
1321
1322   This builds the hook environment from individual variables.
1323
1324   @type name: string
1325   @param name: the name of the instance
1326   @type primary_node: string
1327   @param primary_node: the name of the instance's primary node
1328   @type secondary_nodes: list
1329   @param secondary_nodes: list of secondary nodes as strings
1330   @type os_type: string
1331   @param os_type: the name of the instance's OS
1332   @type status: string
1333   @param status: the desired status of the instance
1334   @type minmem: string
1335   @param minmem: the minimum memory size of the instance
1336   @type maxmem: string
1337   @param maxmem: the maximum memory size of the instance
1338   @type vcpus: string
1339   @param vcpus: the count of VCPUs the instance has
1340   @type nics: list
1341   @param nics: list of tuples (ip, mac, mode, link) representing
1342       the NICs the instance has
1343   @type disk_template: string
1344   @param disk_template: the disk template of the instance
1345   @type disks: list
1346   @param disks: the list of (size, mode) pairs
1347   @type bep: dict
1348   @param bep: the backend parameters for the instance
1349   @type hvp: dict
1350   @param hvp: the hypervisor parameters for the instance
1351   @type hypervisor_name: string
1352   @param hypervisor_name: the hypervisor for the instance
1353   @type tags: list
1354   @param tags: list of instance tags as strings
1355   @rtype: dict
1356   @return: the hook environment for this instance
1357
1358   """
1359   env = {
1360     "OP_TARGET": name,
1361     "INSTANCE_NAME": name,
1362     "INSTANCE_PRIMARY": primary_node,
1363     "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
1364     "INSTANCE_OS_TYPE": os_type,
1365     "INSTANCE_STATUS": status,
1366     "INSTANCE_MINMEM": minmem,
1367     "INSTANCE_MAXMEM": maxmem,
1368     # TODO(2.7) remove deprecated "memory" value
1369     "INSTANCE_MEMORY": maxmem,
1370     "INSTANCE_VCPUS": vcpus,
1371     "INSTANCE_DISK_TEMPLATE": disk_template,
1372     "INSTANCE_HYPERVISOR": hypervisor_name,
1373   }
1374   if nics:
1375     nic_count = len(nics)
1376     for idx, (ip, mac, mode, link) in enumerate(nics):
1377       if ip is None:
1378         ip = ""
1379       env["INSTANCE_NIC%d_IP" % idx] = ip
1380       env["INSTANCE_NIC%d_MAC" % idx] = mac
1381       env["INSTANCE_NIC%d_MODE" % idx] = mode
1382       env["INSTANCE_NIC%d_LINK" % idx] = link
1383       if mode == constants.NIC_MODE_BRIDGED:
1384         env["INSTANCE_NIC%d_BRIDGE" % idx] = link
1385   else:
1386     nic_count = 0
1387
1388   env["INSTANCE_NIC_COUNT"] = nic_count
1389
1390   if disks:
1391     disk_count = len(disks)
1392     for idx, (size, mode) in enumerate(disks):
1393       env["INSTANCE_DISK%d_SIZE" % idx] = size
1394       env["INSTANCE_DISK%d_MODE" % idx] = mode
1395   else:
1396     disk_count = 0
1397
1398   env["INSTANCE_DISK_COUNT"] = disk_count
1399
1400   if not tags:
1401     tags = []
1402
1403   env["INSTANCE_TAGS"] = " ".join(tags)
1404
1405   for source, kind in [(bep, "BE"), (hvp, "HV")]:
1406     for key, value in source.items():
1407       env["INSTANCE_%s_%s" % (kind, key)] = value
1408
1409   return env
1410
1411
1412 def _NICListToTuple(lu, nics):
1413   """Build a list of nic information tuples.
1414
1415   This list is suitable to be passed to _BuildInstanceHookEnv or as a return
1416   value in LUInstanceQueryData.
1417
1418   @type lu:  L{LogicalUnit}
1419   @param lu: the logical unit on whose behalf we execute
1420   @type nics: list of L{objects.NIC}
1421   @param nics: list of nics to convert to hooks tuples
1422
1423   """
1424   hooks_nics = []
1425   cluster = lu.cfg.GetClusterInfo()
1426   for nic in nics:
1427     ip = nic.ip
1428     mac = nic.mac
1429     filled_params = cluster.SimpleFillNIC(nic.nicparams)
1430     mode = filled_params[constants.NIC_MODE]
1431     link = filled_params[constants.NIC_LINK]
1432     hooks_nics.append((ip, mac, mode, link))
1433   return hooks_nics
1434
1435
1436 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
1437   """Builds instance related env variables for hooks from an object.
1438
1439   @type lu: L{LogicalUnit}
1440   @param lu: the logical unit on whose behalf we execute
1441   @type instance: L{objects.Instance}
1442   @param instance: the instance for which we should build the
1443       environment
1444   @type override: dict
1445   @param override: dictionary with key/values that will override
1446       our values
1447   @rtype: dict
1448   @return: the hook environment dictionary
1449
1450   """
1451   cluster = lu.cfg.GetClusterInfo()
1452   bep = cluster.FillBE(instance)
1453   hvp = cluster.FillHV(instance)
1454   args = {
1455     "name": instance.name,
1456     "primary_node": instance.primary_node,
1457     "secondary_nodes": instance.secondary_nodes,
1458     "os_type": instance.os,
1459     "status": instance.admin_state,
1460     "maxmem": bep[constants.BE_MAXMEM],
1461     "minmem": bep[constants.BE_MINMEM],
1462     "vcpus": bep[constants.BE_VCPUS],
1463     "nics": _NICListToTuple(lu, instance.nics),
1464     "disk_template": instance.disk_template,
1465     "disks": [(disk.size, disk.mode) for disk in instance.disks],
1466     "bep": bep,
1467     "hvp": hvp,
1468     "hypervisor_name": instance.hypervisor,
1469     "tags": instance.tags,
1470   }
1471   if override:
1472     args.update(override)
1473   return _BuildInstanceHookEnv(**args) # pylint: disable=W0142
1474
1475
1476 def _AdjustCandidatePool(lu, exceptions):
1477   """Adjust the candidate pool after node operations.
1478
1479   """
1480   mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1481   if mod_list:
1482     lu.LogInfo("Promoted nodes to master candidate role: %s",
1483                utils.CommaJoin(node.name for node in mod_list))
1484     for name in mod_list:
1485       lu.context.ReaddNode(name)
1486   mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1487   if mc_now > mc_max:
1488     lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1489                (mc_now, mc_max))
1490
1491
1492 def _DecideSelfPromotion(lu, exceptions=None):
1493   """Decide whether I should promote myself as a master candidate.
1494
1495   """
1496   cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1497   mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1498   # the new node will increase mc_max with one, so:
1499   mc_should = min(mc_should + 1, cp_size)
1500   return mc_now < mc_should
1501
1502
1503 def _ComputeViolatingInstances(ipolicy, instances):
1504   """Computes a set of instances who violates given ipolicy.
1505
1506   @param ipolicy: The ipolicy to verify
1507   @type instances: object.Instance
1508   @param instances: List of instances to verify
1509   @return: A frozenset of instance names violating the ipolicy
1510
1511   """
1512   return frozenset([inst.name for inst in instances
1513                     if _ComputeIPolicyInstanceViolation(ipolicy, inst)])
1514
1515
1516 def _CheckNicsBridgesExist(lu, target_nics, target_node):
1517   """Check that the brigdes needed by a list of nics exist.
1518
1519   """
1520   cluster = lu.cfg.GetClusterInfo()
1521   paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1522   brlist = [params[constants.NIC_LINK] for params in paramslist
1523             if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1524   if brlist:
1525     result = lu.rpc.call_bridges_exist(target_node, brlist)
1526     result.Raise("Error checking bridges on destination node '%s'" %
1527                  target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1528
1529
1530 def _CheckInstanceBridgesExist(lu, instance, node=None):
1531   """Check that the brigdes needed by an instance exist.
1532
1533   """
1534   if node is None:
1535     node = instance.primary_node
1536   _CheckNicsBridgesExist(lu, instance.nics, node)
1537
1538
1539 def _CheckOSVariant(os_obj, name):
1540   """Check whether an OS name conforms to the os variants specification.
1541
1542   @type os_obj: L{objects.OS}
1543   @param os_obj: OS object to check
1544   @type name: string
1545   @param name: OS name passed by the user, to check for validity
1546
1547   """
1548   variant = objects.OS.GetVariant(name)
1549   if not os_obj.supported_variants:
1550     if variant:
1551       raise errors.OpPrereqError("OS '%s' doesn't support variants ('%s'"
1552                                  " passed)" % (os_obj.name, variant),
1553                                  errors.ECODE_INVAL)
1554     return
1555   if not variant:
1556     raise errors.OpPrereqError("OS name must include a variant",
1557                                errors.ECODE_INVAL)
1558
1559   if variant not in os_obj.supported_variants:
1560     raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1561
1562
1563 def _GetNodeInstancesInner(cfg, fn):
1564   return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1565
1566
1567 def _GetNodeInstances(cfg, node_name):
1568   """Returns a list of all primary and secondary instances on a node.
1569
1570   """
1571
1572   return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1573
1574
1575 def _GetNodePrimaryInstances(cfg, node_name):
1576   """Returns primary instances on a node.
1577
1578   """
1579   return _GetNodeInstancesInner(cfg,
1580                                 lambda inst: node_name == inst.primary_node)
1581
1582
1583 def _GetNodeSecondaryInstances(cfg, node_name):
1584   """Returns secondary instances on a node.
1585
1586   """
1587   return _GetNodeInstancesInner(cfg,
1588                                 lambda inst: node_name in inst.secondary_nodes)
1589
1590
1591 def _GetStorageTypeArgs(cfg, storage_type):
1592   """Returns the arguments for a storage type.
1593
1594   """
1595   # Special case for file storage
1596   if storage_type == constants.ST_FILE:
1597     # storage.FileStorage wants a list of storage directories
1598     return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1599
1600   return []
1601
1602
1603 def _FindFaultyInstanceDisks(cfg, rpc_runner, instance, node_name, prereq):
1604   faulty = []
1605
1606   for dev in instance.disks:
1607     cfg.SetDiskID(dev, node_name)
1608
1609   result = rpc_runner.call_blockdev_getmirrorstatus(node_name, (instance.disks,
1610                                                                 instance))
1611   result.Raise("Failed to get disk status from node %s" % node_name,
1612                prereq=prereq, ecode=errors.ECODE_ENVIRON)
1613
1614   for idx, bdev_status in enumerate(result.payload):
1615     if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1616       faulty.append(idx)
1617
1618   return faulty
1619
1620
1621 def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1622   """Check the sanity of iallocator and node arguments and use the
1623   cluster-wide iallocator if appropriate.
1624
1625   Check that at most one of (iallocator, node) is specified. If none is
1626   specified, or the iallocator is L{constants.DEFAULT_IALLOCATOR_SHORTCUT},
1627   then the LU's opcode's iallocator slot is filled with the cluster-wide
1628   default iallocator.
1629
1630   @type iallocator_slot: string
1631   @param iallocator_slot: the name of the opcode iallocator slot
1632   @type node_slot: string
1633   @param node_slot: the name of the opcode target node slot
1634
1635   """
1636   node = getattr(lu.op, node_slot, None)
1637   ialloc = getattr(lu.op, iallocator_slot, None)
1638   if node == []:
1639     node = None
1640
1641   if node is not None and ialloc is not None:
1642     raise errors.OpPrereqError("Do not specify both, iallocator and node",
1643                                errors.ECODE_INVAL)
1644   elif ((node is None and ialloc is None) or
1645         ialloc == constants.DEFAULT_IALLOCATOR_SHORTCUT):
1646     default_iallocator = lu.cfg.GetDefaultIAllocator()
1647     if default_iallocator:
1648       setattr(lu.op, iallocator_slot, default_iallocator)
1649     else:
1650       raise errors.OpPrereqError("No iallocator or node given and no"
1651                                  " cluster-wide default iallocator found;"
1652                                  " please specify either an iallocator or a"
1653                                  " node, or set a cluster-wide default"
1654                                  " iallocator", errors.ECODE_INVAL)
1655
1656
1657 def _GetDefaultIAllocator(cfg, ialloc):
1658   """Decides on which iallocator to use.
1659
1660   @type cfg: L{config.ConfigWriter}
1661   @param cfg: Cluster configuration object
1662   @type ialloc: string or None
1663   @param ialloc: Iallocator specified in opcode
1664   @rtype: string
1665   @return: Iallocator name
1666
1667   """
1668   if not ialloc:
1669     # Use default iallocator
1670     ialloc = cfg.GetDefaultIAllocator()
1671
1672   if not ialloc:
1673     raise errors.OpPrereqError("No iallocator was specified, neither in the"
1674                                " opcode nor as a cluster-wide default",
1675                                errors.ECODE_INVAL)
1676
1677   return ialloc
1678
1679
1680 def _CheckHostnameSane(lu, name):
1681   """Ensures that a given hostname resolves to a 'sane' name.
1682
1683   The given name is required to be a prefix of the resolved hostname,
1684   to prevent accidental mismatches.
1685
1686   @param lu: the logical unit on behalf of which we're checking
1687   @param name: the name we should resolve and check
1688   @return: the resolved hostname object
1689
1690   """
1691   hostname = netutils.GetHostname(name=name)
1692   if hostname.name != name:
1693     lu.LogInfo("Resolved given name '%s' to '%s'", name, hostname.name)
1694   if not utils.MatchNameComponent(name, [hostname.name]):
1695     raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
1696                                 " same as given hostname '%s'") %
1697                                 (hostname.name, name), errors.ECODE_INVAL)
1698   return hostname
1699
1700
1701 class LUClusterPostInit(LogicalUnit):
1702   """Logical unit for running hooks after cluster initialization.
1703
1704   """
1705   HPATH = "cluster-init"
1706   HTYPE = constants.HTYPE_CLUSTER
1707
1708   def BuildHooksEnv(self):
1709     """Build hooks env.
1710
1711     """
1712     return {
1713       "OP_TARGET": self.cfg.GetClusterName(),
1714       }
1715
1716   def BuildHooksNodes(self):
1717     """Build hooks nodes.
1718
1719     """
1720     return ([], [self.cfg.GetMasterNode()])
1721
1722   def Exec(self, feedback_fn):
1723     """Nothing to do.
1724
1725     """
1726     return True
1727
1728
1729 class LUClusterDestroy(LogicalUnit):
1730   """Logical unit for destroying the cluster.
1731
1732   """
1733   HPATH = "cluster-destroy"
1734   HTYPE = constants.HTYPE_CLUSTER
1735
1736   def BuildHooksEnv(self):
1737     """Build hooks env.
1738
1739     """
1740     return {
1741       "OP_TARGET": self.cfg.GetClusterName(),
1742       }
1743
1744   def BuildHooksNodes(self):
1745     """Build hooks nodes.
1746
1747     """
1748     return ([], [])
1749
1750   def CheckPrereq(self):
1751     """Check prerequisites.
1752
1753     This checks whether the cluster is empty.
1754
1755     Any errors are signaled by raising errors.OpPrereqError.
1756
1757     """
1758     master = self.cfg.GetMasterNode()
1759
1760     nodelist = self.cfg.GetNodeList()
1761     if len(nodelist) != 1 or nodelist[0] != master:
1762       raise errors.OpPrereqError("There are still %d node(s) in"
1763                                  " this cluster." % (len(nodelist) - 1),
1764                                  errors.ECODE_INVAL)
1765     instancelist = self.cfg.GetInstanceList()
1766     if instancelist:
1767       raise errors.OpPrereqError("There are still %d instance(s) in"
1768                                  " this cluster." % len(instancelist),
1769                                  errors.ECODE_INVAL)
1770
1771   def Exec(self, feedback_fn):
1772     """Destroys the cluster.
1773
1774     """
1775     master_params = self.cfg.GetMasterNetworkParameters()
1776
1777     # Run post hooks on master node before it's removed
1778     _RunPostHook(self, master_params.name)
1779
1780     ems = self.cfg.GetUseExternalMipScript()
1781     result = self.rpc.call_node_deactivate_master_ip(master_params.name,
1782                                                      master_params, ems)
1783     if result.fail_msg:
1784       self.LogWarning("Error disabling the master IP address: %s",
1785                       result.fail_msg)
1786
1787     return master_params.name
1788
1789
1790 def _VerifyCertificate(filename):
1791   """Verifies a certificate for L{LUClusterVerifyConfig}.
1792
1793   @type filename: string
1794   @param filename: Path to PEM file
1795
1796   """
1797   try:
1798     cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1799                                            utils.ReadFile(filename))
1800   except Exception, err: # pylint: disable=W0703
1801     return (LUClusterVerifyConfig.ETYPE_ERROR,
1802             "Failed to load X509 certificate %s: %s" % (filename, err))
1803
1804   (errcode, msg) = \
1805     utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1806                                 constants.SSL_CERT_EXPIRATION_ERROR)
1807
1808   if msg:
1809     fnamemsg = "While verifying %s: %s" % (filename, msg)
1810   else:
1811     fnamemsg = None
1812
1813   if errcode is None:
1814     return (None, fnamemsg)
1815   elif errcode == utils.CERT_WARNING:
1816     return (LUClusterVerifyConfig.ETYPE_WARNING, fnamemsg)
1817   elif errcode == utils.CERT_ERROR:
1818     return (LUClusterVerifyConfig.ETYPE_ERROR, fnamemsg)
1819
1820   raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1821
1822
1823 def _GetAllHypervisorParameters(cluster, instances):
1824   """Compute the set of all hypervisor parameters.
1825
1826   @type cluster: L{objects.Cluster}
1827   @param cluster: the cluster object
1828   @param instances: list of L{objects.Instance}
1829   @param instances: additional instances from which to obtain parameters
1830   @rtype: list of (origin, hypervisor, parameters)
1831   @return: a list with all parameters found, indicating the hypervisor they
1832        apply to, and the origin (can be "cluster", "os X", or "instance Y")
1833
1834   """
1835   hvp_data = []
1836
1837   for hv_name in cluster.enabled_hypervisors:
1838     hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
1839
1840   for os_name, os_hvp in cluster.os_hvp.items():
1841     for hv_name, hv_params in os_hvp.items():
1842       if hv_params:
1843         full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
1844         hvp_data.append(("os %s" % os_name, hv_name, full_params))
1845
1846   # TODO: collapse identical parameter values in a single one
1847   for instance in instances:
1848     if instance.hvparams:
1849       hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
1850                        cluster.FillHV(instance)))
1851
1852   return hvp_data
1853
1854
1855 class _VerifyErrors(object):
1856   """Mix-in for cluster/group verify LUs.
1857
1858   It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects
1859   self.op and self._feedback_fn to be available.)
1860
1861   """
1862
1863   ETYPE_FIELD = "code"
1864   ETYPE_ERROR = "ERROR"
1865   ETYPE_WARNING = "WARNING"
1866
1867   def _Error(self, ecode, item, msg, *args, **kwargs):
1868     """Format an error message.
1869
1870     Based on the opcode's error_codes parameter, either format a
1871     parseable error code, or a simpler error string.
1872
1873     This must be called only from Exec and functions called from Exec.
1874
1875     """
1876     ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1877     itype, etxt, _ = ecode
1878     # first complete the msg
1879     if args:
1880       msg = msg % args
1881     # then format the whole message
1882     if self.op.error_codes: # This is a mix-in. pylint: disable=E1101
1883       msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1884     else:
1885       if item:
1886         item = " " + item
1887       else:
1888         item = ""
1889       msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1890     # and finally report it via the feedback_fn
1891     self._feedback_fn("  - %s" % msg) # Mix-in. pylint: disable=E1101
1892
1893   def _ErrorIf(self, cond, ecode, *args, **kwargs):
1894     """Log an error message if the passed condition is True.
1895
1896     """
1897     cond = (bool(cond)
1898             or self.op.debug_simulate_errors) # pylint: disable=E1101
1899
1900     # If the error code is in the list of ignored errors, demote the error to a
1901     # warning
1902     (_, etxt, _) = ecode
1903     if etxt in self.op.ignore_errors:     # pylint: disable=E1101
1904       kwargs[self.ETYPE_FIELD] = self.ETYPE_WARNING
1905
1906     if cond:
1907       self._Error(ecode, *args, **kwargs)
1908
1909     # do not mark the operation as failed for WARN cases only
1910     if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1911       self.bad = self.bad or cond
1912
1913
1914 class LUClusterVerify(NoHooksLU):
1915   """Submits all jobs necessary to verify the cluster.
1916
1917   """
1918   REQ_BGL = False
1919
1920   def ExpandNames(self):
1921     self.needed_locks = {}
1922
1923   def Exec(self, feedback_fn):
1924     jobs = []
1925
1926     if self.op.group_name:
1927       groups = [self.op.group_name]
1928       depends_fn = lambda: None
1929     else:
1930       groups = self.cfg.GetNodeGroupList()
1931
1932       # Verify global configuration
1933       jobs.append([
1934         opcodes.OpClusterVerifyConfig(ignore_errors=self.op.ignore_errors)
1935         ])
1936
1937       # Always depend on global verification
1938       depends_fn = lambda: [(-len(jobs), [])]
1939
1940     jobs.extend(
1941       [opcodes.OpClusterVerifyGroup(group_name=group,
1942                                     ignore_errors=self.op.ignore_errors,
1943                                     depends=depends_fn())]
1944       for group in groups)
1945
1946     # Fix up all parameters
1947     for op in itertools.chain(*jobs): # pylint: disable=W0142
1948       op.debug_simulate_errors = self.op.debug_simulate_errors
1949       op.verbose = self.op.verbose
1950       op.error_codes = self.op.error_codes
1951       try:
1952         op.skip_checks = self.op.skip_checks
1953       except AttributeError:
1954         assert not isinstance(op, opcodes.OpClusterVerifyGroup)
1955
1956     return ResultWithJobs(jobs)
1957
1958
1959 class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
1960   """Verifies the cluster config.
1961
1962   """
1963   REQ_BGL = False
1964
1965   def _VerifyHVP(self, hvp_data):
1966     """Verifies locally the syntax of the hypervisor parameters.
1967
1968     """
1969     for item, hv_name, hv_params in hvp_data:
1970       msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
1971              (item, hv_name))
1972       try:
1973         hv_class = hypervisor.GetHypervisor(hv_name)
1974         utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
1975         hv_class.CheckParameterSyntax(hv_params)
1976       except errors.GenericError, err:
1977         self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg % str(err))
1978
1979   def ExpandNames(self):
1980     self.needed_locks = dict.fromkeys(locking.LEVELS, locking.ALL_SET)
1981     self.share_locks = _ShareAll()
1982
1983   def CheckPrereq(self):
1984     """Check prerequisites.
1985
1986     """
1987     # Retrieve all information
1988     self.all_group_info = self.cfg.GetAllNodeGroupsInfo()
1989     self.all_node_info = self.cfg.GetAllNodesInfo()
1990     self.all_inst_info = self.cfg.GetAllInstancesInfo()
1991
1992   def Exec(self, feedback_fn):
1993     """Verify integrity of cluster, performing various test on nodes.
1994
1995     """
1996     self.bad = False
1997     self._feedback_fn = feedback_fn
1998
1999     feedback_fn("* Verifying cluster config")
2000
2001     for msg in self.cfg.VerifyConfig():
2002       self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg)
2003
2004     feedback_fn("* Verifying cluster certificate files")
2005
2006     for cert_filename in pathutils.ALL_CERT_FILES:
2007       (errcode, msg) = _VerifyCertificate(cert_filename)
2008       self._ErrorIf(errcode, constants.CV_ECLUSTERCERT, None, msg, code=errcode)
2009
2010     feedback_fn("* Verifying hypervisor parameters")
2011
2012     self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(),
2013                                                 self.all_inst_info.values()))
2014
2015     feedback_fn("* Verifying all nodes belong to an existing group")
2016
2017     # We do this verification here because, should this bogus circumstance
2018     # occur, it would never be caught by VerifyGroup, which only acts on
2019     # nodes/instances reachable from existing node groups.
2020
2021     dangling_nodes = set(node.name for node in self.all_node_info.values()
2022                          if node.group not in self.all_group_info)
2023
2024     dangling_instances = {}
2025     no_node_instances = []
2026
2027     for inst in self.all_inst_info.values():
2028       if inst.primary_node in dangling_nodes:
2029         dangling_instances.setdefault(inst.primary_node, []).append(inst.name)
2030       elif inst.primary_node not in self.all_node_info:
2031         no_node_instances.append(inst.name)
2032
2033     pretty_dangling = [
2034         "%s (%s)" %
2035         (node.name,
2036          utils.CommaJoin(dangling_instances.get(node.name,
2037                                                 ["no instances"])))
2038         for node in dangling_nodes]
2039
2040     self._ErrorIf(bool(dangling_nodes), constants.CV_ECLUSTERDANGLINGNODES,
2041                   None,
2042                   "the following nodes (and their instances) belong to a non"
2043                   " existing group: %s", utils.CommaJoin(pretty_dangling))
2044
2045     self._ErrorIf(bool(no_node_instances), constants.CV_ECLUSTERDANGLINGINST,
2046                   None,
2047                   "the following instances have a non-existing primary-node:"
2048                   " %s", utils.CommaJoin(no_node_instances))
2049
2050     return not self.bad
2051
2052
2053 class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
2054   """Verifies the status of a node group.
2055
2056   """
2057   HPATH = "cluster-verify"
2058   HTYPE = constants.HTYPE_CLUSTER
2059   REQ_BGL = False
2060
2061   _HOOKS_INDENT_RE = re.compile("^", re.M)
2062
2063   class NodeImage(object):
2064     """A class representing the logical and physical status of a node.
2065
2066     @type name: string
2067     @ivar name: the node name to which this object refers
2068     @ivar volumes: a structure as returned from
2069         L{ganeti.backend.GetVolumeList} (runtime)
2070     @ivar instances: a list of running instances (runtime)
2071     @ivar pinst: list of configured primary instances (config)
2072     @ivar sinst: list of configured secondary instances (config)
2073     @ivar sbp: dictionary of {primary-node: list of instances} for all
2074         instances for which this node is secondary (config)
2075     @ivar mfree: free memory, as reported by hypervisor (runtime)
2076     @ivar dfree: free disk, as reported by the node (runtime)
2077     @ivar offline: the offline status (config)
2078     @type rpc_fail: boolean
2079     @ivar rpc_fail: whether the RPC verify call was successfull (overall,
2080         not whether the individual keys were correct) (runtime)
2081     @type lvm_fail: boolean
2082     @ivar lvm_fail: whether the RPC call didn't return valid LVM data
2083     @type hyp_fail: boolean
2084     @ivar hyp_fail: whether the RPC call didn't return the instance list
2085     @type ghost: boolean
2086     @ivar ghost: whether this is a known node or not (config)
2087     @type os_fail: boolean
2088     @ivar os_fail: whether the RPC call didn't return valid OS data
2089     @type oslist: list
2090     @ivar oslist: list of OSes as diagnosed by DiagnoseOS
2091     @type vm_capable: boolean
2092     @ivar vm_capable: whether the node can host instances
2093
2094     """
2095     def __init__(self, offline=False, name=None, vm_capable=True):
2096       self.name = name
2097       self.volumes = {}
2098       self.instances = []
2099       self.pinst = []
2100       self.sinst = []
2101       self.sbp = {}
2102       self.mfree = 0
2103       self.dfree = 0
2104       self.offline = offline
2105       self.vm_capable = vm_capable
2106       self.rpc_fail = False
2107       self.lvm_fail = False
2108       self.hyp_fail = False
2109       self.ghost = False
2110       self.os_fail = False
2111       self.oslist = {}
2112
2113   def ExpandNames(self):
2114     # This raises errors.OpPrereqError on its own:
2115     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
2116
2117     # Get instances in node group; this is unsafe and needs verification later
2118     inst_names = \
2119       self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True)
2120
2121     self.needed_locks = {
2122       locking.LEVEL_INSTANCE: inst_names,
2123       locking.LEVEL_NODEGROUP: [self.group_uuid],
2124       locking.LEVEL_NODE: [],
2125       }
2126
2127     self.share_locks = _ShareAll()
2128
2129   def DeclareLocks(self, level):
2130     if level == locking.LEVEL_NODE:
2131       # Get members of node group; this is unsafe and needs verification later
2132       nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
2133
2134       all_inst_info = self.cfg.GetAllInstancesInfo()
2135
2136       # In Exec(), we warn about mirrored instances that have primary and
2137       # secondary living in separate node groups. To fully verify that
2138       # volumes for these instances are healthy, we will need to do an
2139       # extra call to their secondaries. We ensure here those nodes will
2140       # be locked.
2141       for inst in self.owned_locks(locking.LEVEL_INSTANCE):
2142         # Important: access only the instances whose lock is owned
2143         if all_inst_info[inst].disk_template in constants.DTS_INT_MIRROR:
2144           nodes.update(all_inst_info[inst].secondary_nodes)
2145
2146       self.needed_locks[locking.LEVEL_NODE] = nodes
2147
2148   def CheckPrereq(self):
2149     assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
2150     self.group_info = self.cfg.GetNodeGroup(self.group_uuid)
2151
2152     group_nodes = set(self.group_info.members)
2153     group_instances = \
2154       self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True)
2155
2156     unlocked_nodes = \
2157         group_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
2158
2159     unlocked_instances = \
2160         group_instances.difference(self.owned_locks(locking.LEVEL_INSTANCE))
2161
2162     if unlocked_nodes:
2163       raise errors.OpPrereqError("Missing lock for nodes: %s" %
2164                                  utils.CommaJoin(unlocked_nodes),
2165                                  errors.ECODE_STATE)
2166
2167     if unlocked_instances:
2168       raise errors.OpPrereqError("Missing lock for instances: %s" %
2169                                  utils.CommaJoin(unlocked_instances),
2170                                  errors.ECODE_STATE)
2171
2172     self.all_node_info = self.cfg.GetAllNodesInfo()
2173     self.all_inst_info = self.cfg.GetAllInstancesInfo()
2174
2175     self.my_node_names = utils.NiceSort(group_nodes)
2176     self.my_inst_names = utils.NiceSort(group_instances)
2177
2178     self.my_node_info = dict((name, self.all_node_info[name])
2179                              for name in self.my_node_names)
2180
2181     self.my_inst_info = dict((name, self.all_inst_info[name])
2182                              for name in self.my_inst_names)
2183
2184     # We detect here the nodes that will need the extra RPC calls for verifying
2185     # split LV volumes; they should be locked.
2186     extra_lv_nodes = set()
2187
2188     for inst in self.my_inst_info.values():
2189       if inst.disk_template in constants.DTS_INT_MIRROR:
2190         for nname in inst.all_nodes:
2191           if self.all_node_info[nname].group != self.group_uuid:
2192             extra_lv_nodes.add(nname)
2193
2194     unlocked_lv_nodes = \
2195         extra_lv_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
2196
2197     if unlocked_lv_nodes:
2198       raise errors.OpPrereqError("Missing node locks for LV check: %s" %
2199                                  utils.CommaJoin(unlocked_lv_nodes),
2200                                  errors.ECODE_STATE)
2201     self.extra_lv_nodes = list(extra_lv_nodes)
2202
2203   def _VerifyNode(self, ninfo, nresult):
2204     """Perform some basic validation on data returned from a node.
2205
2206       - check the result data structure is well formed and has all the
2207         mandatory fields
2208       - check ganeti version
2209
2210     @type ninfo: L{objects.Node}
2211     @param ninfo: the node to check
2212     @param nresult: the results from the node
2213     @rtype: boolean
2214     @return: whether overall this call was successful (and we can expect
2215          reasonable values in the respose)
2216
2217     """
2218     node = ninfo.name
2219     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2220
2221     # main result, nresult should be a non-empty dict
2222     test = not nresult or not isinstance(nresult, dict)
2223     _ErrorIf(test, constants.CV_ENODERPC, node,
2224                   "unable to verify node: no data returned")
2225     if test:
2226       return False
2227
2228     # compares ganeti version
2229     local_version = constants.PROTOCOL_VERSION
2230     remote_version = nresult.get("version", None)
2231     test = not (remote_version and
2232                 isinstance(remote_version, (list, tuple)) and
2233                 len(remote_version) == 2)
2234     _ErrorIf(test, constants.CV_ENODERPC, node,
2235              "connection to node returned invalid data")
2236     if test:
2237       return False
2238
2239     test = local_version != remote_version[0]
2240     _ErrorIf(test, constants.CV_ENODEVERSION, node,
2241              "incompatible protocol versions: master %s,"
2242              " node %s", local_version, remote_version[0])
2243     if test:
2244       return False
2245
2246     # node seems compatible, we can actually try to look into its results
2247
2248     # full package version
2249     self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
2250                   constants.CV_ENODEVERSION, node,
2251                   "software version mismatch: master %s, node %s",
2252                   constants.RELEASE_VERSION, remote_version[1],
2253                   code=self.ETYPE_WARNING)
2254
2255     hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
2256     if ninfo.vm_capable and isinstance(hyp_result, dict):
2257       for hv_name, hv_result in hyp_result.iteritems():
2258         test = hv_result is not None
2259         _ErrorIf(test, constants.CV_ENODEHV, node,
2260                  "hypervisor %s verify failure: '%s'", hv_name, hv_result)
2261
2262     hvp_result = nresult.get(constants.NV_HVPARAMS, None)
2263     if ninfo.vm_capable and isinstance(hvp_result, list):
2264       for item, hv_name, hv_result in hvp_result:
2265         _ErrorIf(True, constants.CV_ENODEHV, node,
2266                  "hypervisor %s parameter verify failure (source %s): %s",
2267                  hv_name, item, hv_result)
2268
2269     test = nresult.get(constants.NV_NODESETUP,
2270                        ["Missing NODESETUP results"])
2271     _ErrorIf(test, constants.CV_ENODESETUP, node, "node setup error: %s",
2272              "; ".join(test))
2273
2274     return True
2275
2276   def _VerifyNodeTime(self, ninfo, nresult,
2277                       nvinfo_starttime, nvinfo_endtime):
2278     """Check the node time.
2279
2280     @type ninfo: L{objects.Node}
2281     @param ninfo: the node to check
2282     @param nresult: the remote results for the node
2283     @param nvinfo_starttime: the start time of the RPC call
2284     @param nvinfo_endtime: the end time of the RPC call
2285
2286     """
2287     node = ninfo.name
2288     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2289
2290     ntime = nresult.get(constants.NV_TIME, None)
2291     try:
2292       ntime_merged = utils.MergeTime(ntime)
2293     except (ValueError, TypeError):
2294       _ErrorIf(True, constants.CV_ENODETIME, node, "Node returned invalid time")
2295       return
2296
2297     if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
2298       ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
2299     elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
2300       ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
2301     else:
2302       ntime_diff = None
2303
2304     _ErrorIf(ntime_diff is not None, constants.CV_ENODETIME, node,
2305              "Node time diverges by at least %s from master node time",
2306              ntime_diff)
2307
2308   def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
2309     """Check the node LVM results.
2310
2311     @type ninfo: L{objects.Node}
2312     @param ninfo: the node to check
2313     @param nresult: the remote results for the node
2314     @param vg_name: the configured VG name
2315
2316     """
2317     if vg_name is None:
2318       return
2319
2320     node = ninfo.name
2321     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2322
2323     # checks vg existence and size > 20G
2324     vglist = nresult.get(constants.NV_VGLIST, None)
2325     test = not vglist
2326     _ErrorIf(test, constants.CV_ENODELVM, node, "unable to check volume groups")
2327     if not test:
2328       vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
2329                                             constants.MIN_VG_SIZE)
2330       _ErrorIf(vgstatus, constants.CV_ENODELVM, node, vgstatus)
2331
2332     # check pv names
2333     pvlist = nresult.get(constants.NV_PVLIST, None)
2334     test = pvlist is None
2335     _ErrorIf(test, constants.CV_ENODELVM, node, "Can't get PV list from node")
2336     if not test:
2337       # check that ':' is not present in PV names, since it's a
2338       # special character for lvcreate (denotes the range of PEs to
2339       # use on the PV)
2340       for _, pvname, owner_vg in pvlist:
2341         test = ":" in pvname
2342         _ErrorIf(test, constants.CV_ENODELVM, node,
2343                  "Invalid character ':' in PV '%s' of VG '%s'",
2344                  pvname, owner_vg)
2345
2346   def _VerifyNodeBridges(self, ninfo, nresult, bridges):
2347     """Check the node bridges.
2348
2349     @type ninfo: L{objects.Node}
2350     @param ninfo: the node to check
2351     @param nresult: the remote results for the node
2352     @param bridges: the expected list of bridges
2353
2354     """
2355     if not bridges:
2356       return
2357
2358     node = ninfo.name
2359     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2360
2361     missing = nresult.get(constants.NV_BRIDGES, None)
2362     test = not isinstance(missing, list)
2363     _ErrorIf(test, constants.CV_ENODENET, node,
2364              "did not return valid bridge information")
2365     if not test:
2366       _ErrorIf(bool(missing), constants.CV_ENODENET, node,
2367                "missing bridges: %s" % utils.CommaJoin(sorted(missing)))
2368
2369   def _VerifyNodeUserScripts(self, ninfo, nresult):
2370     """Check the results of user scripts presence and executability on the node
2371
2372     @type ninfo: L{objects.Node}
2373     @param ninfo: the node to check
2374     @param nresult: the remote results for the node
2375
2376     """
2377     node = ninfo.name
2378
2379     test = not constants.NV_USERSCRIPTS in nresult
2380     self._ErrorIf(test, constants.CV_ENODEUSERSCRIPTS, node,
2381                   "did not return user scripts information")
2382
2383     broken_scripts = nresult.get(constants.NV_USERSCRIPTS, None)
2384     if not test:
2385       self._ErrorIf(broken_scripts, constants.CV_ENODEUSERSCRIPTS, node,
2386                     "user scripts not present or not executable: %s" %
2387                     utils.CommaJoin(sorted(broken_scripts)))
2388
2389   def _VerifyNodeNetwork(self, ninfo, nresult):
2390     """Check the node network connectivity results.
2391
2392     @type ninfo: L{objects.Node}
2393     @param ninfo: the node to check
2394     @param nresult: the remote results for the node
2395
2396     """
2397     node = ninfo.name
2398     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2399
2400     test = constants.NV_NODELIST not in nresult
2401     _ErrorIf(test, constants.CV_ENODESSH, node,
2402              "node hasn't returned node ssh connectivity data")
2403     if not test:
2404       if nresult[constants.NV_NODELIST]:
2405         for a_node, a_msg in nresult[constants.NV_NODELIST].items():
2406           _ErrorIf(True, constants.CV_ENODESSH, node,
2407                    "ssh communication with node '%s': %s", a_node, a_msg)
2408
2409     test = constants.NV_NODENETTEST not in nresult
2410     _ErrorIf(test, constants.CV_ENODENET, node,
2411              "node hasn't returned node tcp connectivity data")
2412     if not test:
2413       if nresult[constants.NV_NODENETTEST]:
2414         nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
2415         for anode in nlist:
2416           _ErrorIf(True, constants.CV_ENODENET, node,
2417                    "tcp communication with node '%s': %s",
2418                    anode, nresult[constants.NV_NODENETTEST][anode])
2419
2420     test = constants.NV_MASTERIP not in nresult
2421     _ErrorIf(test, constants.CV_ENODENET, node,
2422              "node hasn't returned node master IP reachability data")
2423     if not test:
2424       if not nresult[constants.NV_MASTERIP]:
2425         if node == self.master_node:
2426           msg = "the master node cannot reach the master IP (not configured?)"
2427         else:
2428           msg = "cannot reach the master IP"
2429         _ErrorIf(True, constants.CV_ENODENET, node, msg)
2430
2431   def _VerifyInstance(self, instance, instanceconfig, node_image,
2432                       diskstatus):
2433     """Verify an instance.
2434
2435     This function checks to see if the required block devices are
2436     available on the instance's node.
2437
2438     """
2439     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2440     node_current = instanceconfig.primary_node
2441
2442     node_vol_should = {}
2443     instanceconfig.MapLVsByNode(node_vol_should)
2444
2445     cluster = self.cfg.GetClusterInfo()
2446     ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
2447                                                             self.group_info)
2448     err = _ComputeIPolicyInstanceViolation(ipolicy, instanceconfig)
2449     _ErrorIf(err, constants.CV_EINSTANCEPOLICY, instance, utils.CommaJoin(err))
2450
2451     for node in node_vol_should:
2452       n_img = node_image[node]
2453       if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2454         # ignore missing volumes on offline or broken nodes
2455         continue
2456       for volume in node_vol_should[node]:
2457         test = volume not in n_img.volumes
2458         _ErrorIf(test, constants.CV_EINSTANCEMISSINGDISK, instance,
2459                  "volume %s missing on node %s", volume, node)
2460
2461     if instanceconfig.admin_state == constants.ADMINST_UP:
2462       pri_img = node_image[node_current]
2463       test = instance not in pri_img.instances and not pri_img.offline
2464       _ErrorIf(test, constants.CV_EINSTANCEDOWN, instance,
2465                "instance not running on its primary node %s",
2466                node_current)
2467
2468     diskdata = [(nname, success, status, idx)
2469                 for (nname, disks) in diskstatus.items()
2470                 for idx, (success, status) in enumerate(disks)]
2471
2472     for nname, success, bdev_status, idx in diskdata:
2473       # the 'ghost node' construction in Exec() ensures that we have a
2474       # node here
2475       snode = node_image[nname]
2476       bad_snode = snode.ghost or snode.offline
2477       _ErrorIf(instanceconfig.admin_state == constants.ADMINST_UP and
2478                not success and not bad_snode,
2479                constants.CV_EINSTANCEFAULTYDISK, instance,
2480                "couldn't retrieve status for disk/%s on %s: %s",
2481                idx, nname, bdev_status)
2482       _ErrorIf((instanceconfig.admin_state == constants.ADMINST_UP and
2483                 success and bdev_status.ldisk_status == constants.LDS_FAULTY),
2484                constants.CV_EINSTANCEFAULTYDISK, instance,
2485                "disk/%s on %s is faulty", idx, nname)
2486
2487   def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
2488     """Verify if there are any unknown volumes in the cluster.
2489
2490     The .os, .swap and backup volumes are ignored. All other volumes are
2491     reported as unknown.
2492
2493     @type reserved: L{ganeti.utils.FieldSet}
2494     @param reserved: a FieldSet of reserved volume names
2495
2496     """
2497     for node, n_img in node_image.items():
2498       if (n_img.offline or n_img.rpc_fail or n_img.lvm_fail or
2499           self.all_node_info[node].group != self.group_uuid):
2500         # skip non-healthy nodes
2501         continue
2502       for volume in n_img.volumes:
2503         test = ((node not in node_vol_should or
2504                 volume not in node_vol_should[node]) and
2505                 not reserved.Matches(volume))
2506         self._ErrorIf(test, constants.CV_ENODEORPHANLV, node,
2507                       "volume %s is unknown", volume)
2508
2509   def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
2510     """Verify N+1 Memory Resilience.
2511
2512     Check that if one single node dies we can still start all the
2513     instances it was primary for.
2514
2515     """
2516     cluster_info = self.cfg.GetClusterInfo()
2517     for node, n_img in node_image.items():
2518       # This code checks that every node which is now listed as
2519       # secondary has enough memory to host all instances it is
2520       # supposed to should a single other node in the cluster fail.
2521       # FIXME: not ready for failover to an arbitrary node
2522       # FIXME: does not support file-backed instances
2523       # WARNING: we currently take into account down instances as well
2524       # as up ones, considering that even if they're down someone
2525       # might want to start them even in the event of a node failure.
2526       if n_img.offline or self.all_node_info[node].group != self.group_uuid:
2527         # we're skipping nodes marked offline and nodes in other groups from
2528         # the N+1 warning, since most likely we don't have good memory
2529         # infromation from them; we already list instances living on such
2530         # nodes, and that's enough warning
2531         continue
2532       #TODO(dynmem): also consider ballooning out other instances
2533       for prinode, instances in n_img.sbp.items():
2534         needed_mem = 0
2535         for instance in instances:
2536           bep = cluster_info.FillBE(instance_cfg[instance])
2537           if bep[constants.BE_AUTO_BALANCE]:
2538             needed_mem += bep[constants.BE_MINMEM]
2539         test = n_img.mfree < needed_mem
2540         self._ErrorIf(test, constants.CV_ENODEN1, node,
2541                       "not enough memory to accomodate instance failovers"
2542                       " should node %s fail (%dMiB needed, %dMiB available)",
2543                       prinode, needed_mem, n_img.mfree)
2544
2545   @classmethod
2546   def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
2547                    (files_all, files_opt, files_mc, files_vm)):
2548     """Verifies file checksums collected from all nodes.
2549
2550     @param errorif: Callback for reporting errors
2551     @param nodeinfo: List of L{objects.Node} objects
2552     @param master_node: Name of master node
2553     @param all_nvinfo: RPC results
2554
2555     """
2556     # Define functions determining which nodes to consider for a file
2557     files2nodefn = [
2558       (files_all, None),
2559       (files_mc, lambda node: (node.master_candidate or
2560                                node.name == master_node)),
2561       (files_vm, lambda node: node.vm_capable),
2562       ]
2563
2564     # Build mapping from filename to list of nodes which should have the file
2565     nodefiles = {}
2566     for (files, fn) in files2nodefn:
2567       if fn is None:
2568         filenodes = nodeinfo
2569       else:
2570         filenodes = filter(fn, nodeinfo)
2571       nodefiles.update((filename,
2572                         frozenset(map(operator.attrgetter("name"), filenodes)))
2573                        for filename in files)
2574
2575     assert set(nodefiles) == (files_all | files_mc | files_vm)
2576
2577     fileinfo = dict((filename, {}) for filename in nodefiles)
2578     ignore_nodes = set()
2579
2580     for node in nodeinfo:
2581       if node.offline:
2582         ignore_nodes.add(node.name)
2583         continue
2584
2585       nresult = all_nvinfo[node.name]
2586
2587       if nresult.fail_msg or not nresult.payload:
2588         node_files = None
2589       else:
2590         fingerprints = nresult.payload.get(constants.NV_FILELIST, None)
2591         node_files = dict((vcluster.LocalizeVirtualPath(key), value)
2592                           for (key, value) in fingerprints.items())
2593         del fingerprints
2594
2595       test = not (node_files and isinstance(node_files, dict))
2596       errorif(test, constants.CV_ENODEFILECHECK, node.name,
2597               "Node did not return file checksum data")
2598       if test:
2599         ignore_nodes.add(node.name)
2600         continue
2601
2602       # Build per-checksum mapping from filename to nodes having it
2603       for (filename, checksum) in node_files.items():
2604         assert filename in nodefiles
2605         fileinfo[filename].setdefault(checksum, set()).add(node.name)
2606
2607     for (filename, checksums) in fileinfo.items():
2608       assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
2609
2610       # Nodes having the file
2611       with_file = frozenset(node_name
2612                             for nodes in fileinfo[filename].values()
2613                             for node_name in nodes) - ignore_nodes
2614
2615       expected_nodes = nodefiles[filename] - ignore_nodes
2616
2617       # Nodes missing file
2618       missing_file = expected_nodes - with_file
2619
2620       if filename in files_opt:
2621         # All or no nodes
2622         errorif(missing_file and missing_file != expected_nodes,
2623                 constants.CV_ECLUSTERFILECHECK, None,
2624                 "File %s is optional, but it must exist on all or no"
2625                 " nodes (not found on %s)",
2626                 filename, utils.CommaJoin(utils.NiceSort(missing_file)))
2627       else:
2628         errorif(missing_file, constants.CV_ECLUSTERFILECHECK, None,
2629                 "File %s is missing from node(s) %s", filename,
2630                 utils.CommaJoin(utils.NiceSort(missing_file)))
2631
2632         # Warn if a node has a file it shouldn't
2633         unexpected = with_file - expected_nodes
2634         errorif(unexpected,
2635                 constants.CV_ECLUSTERFILECHECK, None,
2636                 "File %s should not exist on node(s) %s",
2637                 filename, utils.CommaJoin(utils.NiceSort(unexpected)))
2638
2639       # See if there are multiple versions of the file
2640       test = len(checksums) > 1
2641       if test:
2642         variants = ["variant %s on %s" %
2643                     (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
2644                     for (idx, (checksum, nodes)) in
2645                       enumerate(sorted(checksums.items()))]
2646       else:
2647         variants = []
2648
2649       errorif(test, constants.CV_ECLUSTERFILECHECK, None,
2650               "File %s found with %s different checksums (%s)",
2651               filename, len(checksums), "; ".join(variants))
2652
2653   def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
2654                       drbd_map):
2655     """Verifies and the node DRBD status.
2656
2657     @type ninfo: L{objects.Node}
2658     @param ninfo: the node to check
2659     @param nresult: the remote results for the node
2660     @param instanceinfo: the dict of instances
2661     @param drbd_helper: the configured DRBD usermode helper
2662     @param drbd_map: the DRBD map as returned by
2663         L{ganeti.config.ConfigWriter.ComputeDRBDMap}
2664
2665     """
2666     node = ninfo.name
2667     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2668
2669     if drbd_helper:
2670       helper_result = nresult.get(constants.NV_DRBDHELPER, None)
2671       test = (helper_result is None)
2672       _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2673                "no drbd usermode helper returned")
2674       if helper_result:
2675         status, payload = helper_result
2676         test = not status
2677         _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2678                  "drbd usermode helper check unsuccessful: %s", payload)
2679         test = status and (payload != drbd_helper)
2680         _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2681                  "wrong drbd usermode helper: %s", payload)
2682
2683     # compute the DRBD minors
2684     node_drbd = {}
2685     for minor, instance in drbd_map[node].items():
2686       test = instance not in instanceinfo
2687       _ErrorIf(test, constants.CV_ECLUSTERCFG, None,
2688                "ghost instance '%s' in temporary DRBD map", instance)
2689         # ghost instance should not be running, but otherwise we
2690         # don't give double warnings (both ghost instance and
2691         # unallocated minor in use)
2692       if test:
2693         node_drbd[minor] = (instance, False)
2694       else:
2695         instance = instanceinfo[instance]
2696         node_drbd[minor] = (instance.name,
2697                             instance.admin_state == constants.ADMINST_UP)
2698
2699     # and now check them
2700     used_minors = nresult.get(constants.NV_DRBDLIST, [])
2701     test = not isinstance(used_minors, (tuple, list))
2702     _ErrorIf(test, constants.CV_ENODEDRBD, node,
2703              "cannot parse drbd status file: %s", str(used_minors))
2704     if test:
2705       # we cannot check drbd status
2706       return
2707
2708     for minor, (iname, must_exist) in node_drbd.items():
2709       test = minor not in used_minors and must_exist
2710       _ErrorIf(test, constants.CV_ENODEDRBD, node,
2711                "drbd minor %d of instance %s is not active", minor, iname)
2712     for minor in used_minors:
2713       test = minor not in node_drbd
2714       _ErrorIf(test, constants.CV_ENODEDRBD, node,
2715                "unallocated drbd minor %d is in use", minor)
2716
2717   def _UpdateNodeOS(self, ninfo, nresult, nimg):
2718     """Builds the node OS structures.
2719
2720     @type ninfo: L{objects.Node}
2721     @param ninfo: the node to check
2722     @param nresult: the remote results for the node
2723     @param nimg: the node image object
2724
2725     """
2726     node = ninfo.name
2727     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2728
2729     remote_os = nresult.get(constants.NV_OSLIST, None)
2730     test = (not isinstance(remote_os, list) or
2731             not compat.all(isinstance(v, list) and len(v) == 7
2732                            for v in remote_os))
2733
2734     _ErrorIf(test, constants.CV_ENODEOS, node,
2735              "node hasn't returned valid OS data")
2736
2737     nimg.os_fail = test
2738
2739     if test:
2740       return
2741
2742     os_dict = {}
2743
2744     for (name, os_path, status, diagnose,
2745          variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
2746
2747       if name not in os_dict:
2748         os_dict[name] = []
2749
2750       # parameters is a list of lists instead of list of tuples due to
2751       # JSON lacking a real tuple type, fix it:
2752       parameters = [tuple(v) for v in parameters]
2753       os_dict[name].append((os_path, status, diagnose,
2754                             set(variants), set(parameters), set(api_ver)))
2755
2756     nimg.oslist = os_dict
2757
2758   def _VerifyNodeOS(self, ninfo, nimg, base):
2759     """Verifies the node OS list.
2760
2761     @type ninfo: L{objects.Node}
2762     @param ninfo: the node to check
2763     @param nimg: the node image object
2764     @param base: the 'template' node we match against (e.g. from the master)
2765
2766     """
2767     node = ninfo.name
2768     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2769
2770     assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
2771
2772     beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
2773     for os_name, os_data in nimg.oslist.items():
2774       assert os_data, "Empty OS status for OS %s?!" % os_name
2775       f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
2776       _ErrorIf(not f_status, constants.CV_ENODEOS, node,
2777                "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
2778       _ErrorIf(len(os_data) > 1, constants.CV_ENODEOS, node,
2779                "OS '%s' has multiple entries (first one shadows the rest): %s",
2780                os_name, utils.CommaJoin([v[0] for v in os_data]))
2781       # comparisons with the 'base' image
2782       test = os_name not in base.oslist
2783       _ErrorIf(test, constants.CV_ENODEOS, node,
2784                "Extra OS %s not present on reference node (%s)",
2785                os_name, base.name)
2786       if test:
2787         continue
2788       assert base.oslist[os_name], "Base node has empty OS status?"
2789       _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
2790       if not b_status:
2791         # base OS is invalid, skipping
2792         continue
2793       for kind, a, b in [("API version", f_api, b_api),
2794                          ("variants list", f_var, b_var),
2795                          ("parameters", beautify_params(f_param),
2796                           beautify_params(b_param))]:
2797         _ErrorIf(a != b, constants.CV_ENODEOS, node,
2798                  "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
2799                  kind, os_name, base.name,
2800                  utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
2801
2802     # check any missing OSes
2803     missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
2804     _ErrorIf(missing, constants.CV_ENODEOS, node,
2805              "OSes present on reference node %s but missing on this node: %s",
2806              base.name, utils.CommaJoin(missing))
2807
2808   def _VerifyFileStoragePaths(self, ninfo, nresult, is_master):
2809     """Verifies paths in L{pathutils.FILE_STORAGE_PATHS_FILE}.
2810
2811     @type ninfo: L{objects.Node}
2812     @param ninfo: the node to check
2813     @param nresult: the remote results for the node
2814     @type is_master: bool
2815     @param is_master: Whether node is the master node
2816
2817     """
2818     node = ninfo.name
2819
2820     if (is_master and
2821         (constants.ENABLE_FILE_STORAGE or
2822          constants.ENABLE_SHARED_FILE_STORAGE)):
2823       try:
2824         fspaths = nresult[constants.NV_FILE_STORAGE_PATHS]
2825       except KeyError:
2826         # This should never happen
2827         self._ErrorIf(True, constants.CV_ENODEFILESTORAGEPATHS, node,
2828                       "Node did not return forbidden file storage paths")
2829       else:
2830         self._ErrorIf(fspaths, constants.CV_ENODEFILESTORAGEPATHS, node,
2831                       "Found forbidden file storage paths: %s",
2832                       utils.CommaJoin(fspaths))
2833     else:
2834       self._ErrorIf(constants.NV_FILE_STORAGE_PATHS in nresult,
2835                     constants.CV_ENODEFILESTORAGEPATHS, node,
2836                     "Node should not have returned forbidden file storage"
2837                     " paths")
2838
2839   def _VerifyOob(self, ninfo, nresult):
2840     """Verifies out of band functionality of a node.
2841
2842     @type ninfo: L{objects.Node}
2843     @param ninfo: the node to check
2844     @param nresult: the remote results for the node
2845
2846     """
2847     node = ninfo.name
2848     # We just have to verify the paths on master and/or master candidates
2849     # as the oob helper is invoked on the master
2850     if ((ninfo.master_candidate or ninfo.master_capable) and
2851         constants.NV_OOB_PATHS in nresult):
2852       for path_result in nresult[constants.NV_OOB_PATHS]:
2853         self._ErrorIf(path_result, constants.CV_ENODEOOBPATH, node, path_result)
2854
2855   def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
2856     """Verifies and updates the node volume data.
2857
2858     This function will update a L{NodeImage}'s internal structures
2859     with data from the remote call.
2860
2861     @type ninfo: L{objects.Node}
2862     @param ninfo: the node to check
2863     @param nresult: the remote results for the node
2864     @param nimg: the node image object
2865     @param vg_name: the configured VG name
2866
2867     """
2868     node = ninfo.name
2869     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2870
2871     nimg.lvm_fail = True
2872     lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
2873     if vg_name is None:
2874       pass
2875     elif isinstance(lvdata, basestring):
2876       _ErrorIf(True, constants.CV_ENODELVM, node, "LVM problem on node: %s",
2877                utils.SafeEncode(lvdata))
2878     elif not isinstance(lvdata, dict):
2879       _ErrorIf(True, constants.CV_ENODELVM, node,
2880                "rpc call to node failed (lvlist)")
2881     else:
2882       nimg.volumes = lvdata
2883       nimg.lvm_fail = False
2884
2885   def _UpdateNodeInstances(self, ninfo, nresult, nimg):
2886     """Verifies and updates the node instance list.
2887
2888     If the listing was successful, then updates this node's instance
2889     list. Otherwise, it marks the RPC call as failed for the instance
2890     list key.
2891
2892     @type ninfo: L{objects.Node}
2893     @param ninfo: the node to check
2894     @param nresult: the remote results for the node
2895     @param nimg: the node image object
2896
2897     """
2898     idata = nresult.get(constants.NV_INSTANCELIST, None)
2899     test = not isinstance(idata, list)
2900     self._ErrorIf(test, constants.CV_ENODEHV, ninfo.name,
2901                   "rpc call to node failed (instancelist): %s",
2902                   utils.SafeEncode(str(idata)))
2903     if test:
2904       nimg.hyp_fail = True
2905     else:
2906       nimg.instances = idata
2907
2908   def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
2909     """Verifies and computes a node information map
2910
2911     @type ninfo: L{objects.Node}
2912     @param ninfo: the node to check
2913     @param nresult: the remote results for the node
2914     @param nimg: the node image object
2915     @param vg_name: the configured VG name
2916
2917     """
2918     node = ninfo.name
2919     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2920
2921     # try to read free memory (from the hypervisor)
2922     hv_info = nresult.get(constants.NV_HVINFO, None)
2923     test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
2924     _ErrorIf(test, constants.CV_ENODEHV, node,
2925              "rpc call to node failed (hvinfo)")
2926     if not test:
2927       try:
2928         nimg.mfree = int(hv_info["memory_free"])
2929       except (ValueError, TypeError):
2930         _ErrorIf(True, constants.CV_ENODERPC, node,
2931                  "node returned invalid nodeinfo, check hypervisor")
2932
2933     # FIXME: devise a free space model for file based instances as well
2934     if vg_name is not None:
2935       test = (constants.NV_VGLIST not in nresult or
2936               vg_name not in nresult[constants.NV_VGLIST])
2937       _ErrorIf(test, constants.CV_ENODELVM, node,
2938                "node didn't return data for the volume group '%s'"
2939                " - it is either missing or broken", vg_name)
2940       if not test:
2941         try:
2942           nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
2943         except (ValueError, TypeError):
2944           _ErrorIf(True, constants.CV_ENODERPC, node,
2945                    "node returned invalid LVM info, check LVM status")
2946
2947   def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
2948     """Gets per-disk status information for all instances.
2949
2950     @type nodelist: list of strings
2951     @param nodelist: Node names
2952     @type node_image: dict of (name, L{objects.Node})
2953     @param node_image: Node objects
2954     @type instanceinfo: dict of (name, L{objects.Instance})
2955     @param instanceinfo: Instance objects
2956     @rtype: {instance: {node: [(succes, payload)]}}
2957     @return: a dictionary of per-instance dictionaries with nodes as
2958         keys and disk information as values; the disk information is a
2959         list of tuples (success, payload)
2960
2961     """
2962     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2963
2964     node_disks = {}
2965     node_disks_devonly = {}
2966     diskless_instances = set()
2967     diskless = constants.DT_DISKLESS
2968
2969     for nname in nodelist:
2970       node_instances = list(itertools.chain(node_image[nname].pinst,
2971                                             node_image[nname].sinst))
2972       diskless_instances.update(inst for inst in node_instances
2973                                 if instanceinfo[inst].disk_template == diskless)
2974       disks = [(inst, disk)
2975                for inst in node_instances
2976                for disk in instanceinfo[inst].disks]
2977
2978       if not disks:
2979         # No need to collect data
2980         continue
2981
2982       node_disks[nname] = disks
2983
2984       # _AnnotateDiskParams makes already copies of the disks
2985       devonly = []
2986       for (inst, dev) in disks:
2987         (anno_disk,) = _AnnotateDiskParams(instanceinfo[inst], [dev], self.cfg)
2988         self.cfg.SetDiskID(anno_disk, nname)
2989         devonly.append(anno_disk)
2990
2991       node_disks_devonly[nname] = devonly
2992
2993     assert len(node_disks) == len(node_disks_devonly)
2994
2995     # Collect data from all nodes with disks
2996     result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
2997                                                           node_disks_devonly)
2998
2999     assert len(result) == len(node_disks)
3000
3001     instdisk = {}
3002
3003     for (nname, nres) in result.items():
3004       disks = node_disks[nname]
3005
3006       if nres.offline:
3007         # No data from this node
3008         data = len(disks) * [(False, "node offline")]
3009       else:
3010         msg = nres.fail_msg
3011         _ErrorIf(msg, constants.CV_ENODERPC, nname,
3012                  "while getting disk information: %s", msg)
3013         if msg:
3014           # No data from this node
3015           data = len(disks) * [(False, msg)]
3016         else:
3017           data = []
3018           for idx, i in enumerate(nres.payload):
3019             if isinstance(i, (tuple, list)) and len(i) == 2:
3020               data.append(i)
3021             else:
3022               logging.warning("Invalid result from node %s, entry %d: %s",
3023                               nname, idx, i)
3024               data.append((False, "Invalid result from the remote node"))
3025
3026       for ((inst, _), status) in zip(disks, data):
3027         instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
3028
3029     # Add empty entries for diskless instances.
3030     for inst in diskless_instances:
3031       assert inst not in instdisk
3032       instdisk[inst] = {}
3033
3034     assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
3035                       len(nnames) <= len(instanceinfo[inst].all_nodes) and
3036                       compat.all(isinstance(s, (tuple, list)) and
3037                                  len(s) == 2 for s in statuses)
3038                       for inst, nnames in instdisk.items()
3039                       for nname, statuses in nnames.items())
3040     assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
3041
3042     return instdisk
3043
3044   @staticmethod
3045   def _SshNodeSelector(group_uuid, all_nodes):
3046     """Create endless iterators for all potential SSH check hosts.
3047
3048     """
3049     nodes = [node for node in all_nodes
3050              if (node.group != group_uuid and
3051                  not node.offline)]
3052     keyfunc = operator.attrgetter("group")
3053
3054     return map(itertools.cycle,
3055                [sorted(map(operator.attrgetter("name"), names))
3056                 for _, names in itertools.groupby(sorted(nodes, key=keyfunc),
3057                                                   keyfunc)])
3058
3059   @classmethod
3060   def _SelectSshCheckNodes(cls, group_nodes, group_uuid, all_nodes):
3061     """Choose which nodes should talk to which other nodes.
3062
3063     We will make nodes contact all nodes in their group, and one node from
3064     every other group.
3065
3066     @warning: This algorithm has a known issue if one node group is much
3067       smaller than others (e.g. just one node). In such a case all other
3068       nodes will talk to the single node.
3069
3070     """
3071     online_nodes = sorted(node.name for node in group_nodes if not node.offline)
3072     sel = cls._SshNodeSelector(group_uuid, all_nodes)
3073
3074     return (online_nodes,
3075             dict((name, sorted([i.next() for i in sel]))
3076                  for name in online_nodes))
3077
3078   def BuildHooksEnv(self):
3079     """Build hooks env.
3080
3081     Cluster-Verify hooks just ran in the post phase and their failure makes
3082     the output be logged in the verify output and the verification to fail.
3083
3084     """
3085     env = {
3086       "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
3087       }
3088
3089     env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
3090                for node in self.my_node_info.values())
3091
3092     return env
3093
3094   def BuildHooksNodes(self):
3095     """Build hooks nodes.
3096
3097     """
3098     return ([], self.my_node_names)
3099
3100   def Exec(self, feedback_fn):
3101     """Verify integrity of the node group, performing various test on nodes.
3102
3103     """
3104     # This method has too many local variables. pylint: disable=R0914
3105     feedback_fn("* Verifying group '%s'" % self.group_info.name)
3106
3107     if not self.my_node_names:
3108       # empty node group
3109       feedback_fn("* Empty node group, skipping verification")
3110       return True
3111
3112     self.bad = False
3113     _ErrorIf = self._ErrorIf # pylint: disable=C0103
3114     verbose = self.op.verbose
3115     self._feedback_fn = feedback_fn
3116
3117     vg_name = self.cfg.GetVGName()
3118     drbd_helper = self.cfg.GetDRBDHelper()
3119     cluster = self.cfg.GetClusterInfo()
3120     groupinfo = self.cfg.GetAllNodeGroupsInfo()
3121     hypervisors = cluster.enabled_hypervisors
3122     node_data_list = [self.my_node_info[name] for name in self.my_node_names]
3123
3124     i_non_redundant = [] # Non redundant instances
3125     i_non_a_balanced = [] # Non auto-balanced instances
3126     i_offline = 0 # Count of offline instances
3127     n_offline = 0 # Count of offline nodes
3128     n_drained = 0 # Count of nodes being drained
3129     node_vol_should = {}
3130
3131     # FIXME: verify OS list
3132
3133     # File verification
3134     filemap = _ComputeAncillaryFiles(cluster, False)
3135
3136     # do local checksums
3137     master_node = self.master_node = self.cfg.GetMasterNode()
3138     master_ip = self.cfg.GetMasterIP()
3139
3140     feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names))
3141
3142     user_scripts = []
3143     if self.cfg.GetUseExternalMipScript():
3144       user_scripts.append(pathutils.EXTERNAL_MASTER_SETUP_SCRIPT)
3145
3146     node_verify_param = {
3147       constants.NV_FILELIST:
3148         map(vcluster.MakeVirtualPath,
3149             utils.UniqueSequence(filename
3150                                  for files in filemap
3151                                  for filename in files)),
3152       constants.NV_NODELIST:
3153         self._SelectSshCheckNodes(node_data_list, self.group_uuid,
3154                                   self.all_node_info.values()),
3155       constants.NV_HYPERVISOR: hypervisors,
3156       constants.NV_HVPARAMS:
3157         _GetAllHypervisorParameters(cluster, self.all_inst_info.values()),
3158       constants.NV_NODENETTEST: [(node.name, node.primary_ip, node.secondary_ip)
3159                                  for node in node_data_list
3160                                  if not node.offline],
3161       constants.NV_INSTANCELIST: hypervisors,
3162       constants.NV_VERSION: None,
3163       constants.NV_HVINFO: self.cfg.GetHypervisorType(),
3164       constants.NV_NODESETUP: None,
3165       constants.NV_TIME: None,
3166       constants.NV_MASTERIP: (master_node, master_ip),
3167       constants.NV_OSLIST: None,
3168       constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
3169       constants.NV_USERSCRIPTS: user_scripts,
3170       }
3171
3172     if vg_name is not None:
3173       node_verify_param[constants.NV_VGLIST] = None
3174       node_verify_param[constants.NV_LVLIST] = vg_name
3175       node_verify_param[constants.NV_PVLIST] = [vg_name]
3176
3177     if drbd_helper:
3178       node_verify_param[constants.NV_DRBDLIST] = None
3179       node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
3180
3181     if constants.ENABLE_FILE_STORAGE or constants.ENABLE_SHARED_FILE_STORAGE:
3182       # Load file storage paths only from master node
3183       node_verify_param[constants.NV_FILE_STORAGE_PATHS] = master_node
3184
3185     # bridge checks
3186     # FIXME: this needs to be changed per node-group, not cluster-wide
3187     bridges = set()
3188     default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
3189     if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3190       bridges.add(default_nicpp[constants.NIC_LINK])
3191     for instance in self.my_inst_info.values():
3192       for nic in instance.nics:
3193         full_nic = cluster.SimpleFillNIC(nic.nicparams)
3194         if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3195           bridges.add(full_nic[constants.NIC_LINK])
3196
3197     if bridges:
3198       node_verify_param[constants.NV_BRIDGES] = list(bridges)
3199
3200     # Build our expected cluster state
3201     node_image = dict((node.name, self.NodeImage(offline=node.offline,
3202                                                  name=node.name,
3203                                                  vm_capable=node.vm_capable))
3204                       for node in node_data_list)
3205
3206     # Gather OOB paths
3207     oob_paths = []
3208     for node in self.all_node_info.values():
3209       path = _SupportsOob(self.cfg, node)
3210       if path and path not in oob_paths:
3211         oob_paths.append(path)
3212
3213     if oob_paths:
3214       node_verify_param[constants.NV_OOB_PATHS] = oob_paths
3215
3216     for instance in self.my_inst_names:
3217       inst_config = self.my_inst_info[instance]
3218       if inst_config.admin_state == constants.ADMINST_OFFLINE:
3219         i_offline += 1
3220
3221       for nname in inst_config.all_nodes:
3222         if nname not in node_image:
3223           gnode = self.NodeImage(name=nname)
3224           gnode.ghost = (nname not in self.all_node_info)
3225           node_image[nname] = gnode
3226
3227       inst_config.MapLVsByNode(node_vol_should)
3228
3229       pnode = inst_config.primary_node
3230       node_image[pnode].pinst.append(instance)
3231
3232       for snode in inst_config.secondary_nodes:
3233         nimg = node_image[snode]
3234         nimg.sinst.append(instance)
3235         if pnode not in nimg.sbp:
3236           nimg.sbp[pnode] = []
3237         nimg.sbp[pnode].append(instance)
3238
3239     # At this point, we have the in-memory data structures complete,
3240     # except for the runtime information, which we'll gather next
3241
3242     # Due to the way our RPC system works, exact response times cannot be
3243     # guaranteed (e.g. a broken node could run into a timeout). By keeping the
3244     # time before and after executing the request, we can at least have a time
3245     # window.
3246     nvinfo_starttime = time.time()
3247     all_nvinfo = self.rpc.call_node_verify(self.my_node_names,
3248                                            node_verify_param,
3249                                            self.cfg.GetClusterName())
3250     nvinfo_endtime = time.time()
3251
3252     if self.extra_lv_nodes and vg_name is not None:
3253       extra_lv_nvinfo = \
3254           self.rpc.call_node_verify(self.extra_lv_nodes,
3255                                     {constants.NV_LVLIST: vg_name},
3256                                     self.cfg.GetClusterName())
3257     else:
3258       extra_lv_nvinfo = {}
3259
3260     all_drbd_map = self.cfg.ComputeDRBDMap()
3261
3262     feedback_fn("* Gathering disk information (%s nodes)" %
3263                 len(self.my_node_names))
3264     instdisk = self._CollectDiskInfo(self.my_node_names, node_image,
3265                                      self.my_inst_info)
3266
3267     feedback_fn("* Verifying configuration file consistency")
3268
3269     # If not all nodes are being checked, we need to make sure the master node
3270     # and a non-checked vm_capable node are in the list.
3271     absent_nodes = set(self.all_node_info).difference(self.my_node_info)
3272     if absent_nodes:
3273       vf_nvinfo = all_nvinfo.copy()
3274       vf_node_info = list(self.my_node_info.values())
3275       additional_nodes = []
3276       if master_node not in self.my_node_info:
3277         additional_nodes.append(master_node)
3278         vf_node_info.append(self.all_node_info[master_node])
3279       # Add the first vm_capable node we find which is not included,
3280       # excluding the master node (which we already have)
3281       for node in absent_nodes:
3282         nodeinfo = self.all_node_info[node]
3283         if (nodeinfo.vm_capable and not nodeinfo.offline and
3284             node != master_node):
3285           additional_nodes.append(node)
3286           vf_node_info.append(self.all_node_info[node])
3287           break
3288       key = constants.NV_FILELIST
3289       vf_nvinfo.update(self.rpc.call_node_verify(additional_nodes,
3290                                                  {key: node_verify_param[key]},
3291                                                  self.cfg.GetClusterName()))
3292     else:
3293       vf_nvinfo = all_nvinfo
3294       vf_node_info = self.my_node_info.values()
3295
3296     self._VerifyFiles(_ErrorIf, vf_node_info, master_node, vf_nvinfo, filemap)
3297
3298     feedback_fn("* Verifying node status")
3299
3300     refos_img = None
3301
3302     for node_i in node_data_list:
3303       node = node_i.name
3304       nimg = node_image[node]
3305
3306       if node_i.offline:
3307         if verbose:
3308           feedback_fn("* Skipping offline node %s" % (node,))
3309         n_offline += 1
3310         continue
3311
3312       if node == master_node:
3313         ntype = "master"
3314       elif node_i.master_candidate:
3315         ntype = "master candidate"
3316       elif node_i.drained:
3317         ntype = "drained"
3318         n_drained += 1
3319       else:
3320         ntype = "regular"
3321       if verbose:
3322         feedback_fn("* Verifying node %s (%s)" % (node, ntype))
3323
3324       msg = all_nvinfo[node].fail_msg
3325       _ErrorIf(msg, constants.CV_ENODERPC, node, "while contacting node: %s",
3326                msg)
3327       if msg:
3328         nimg.rpc_fail = True
3329         continue
3330
3331       nresult = all_nvinfo[node].payload
3332
3333       nimg.call_ok = self._VerifyNode(node_i, nresult)
3334       self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
3335       self._VerifyNodeNetwork(node_i, nresult)
3336       self._VerifyNodeUserScripts(node_i, nresult)
3337       self._VerifyOob(node_i, nresult)
3338       self._VerifyFileStoragePaths(node_i, nresult,
3339                                    node == master_node)
3340
3341       if nimg.vm_capable:
3342         self._VerifyNodeLVM(node_i, nresult, vg_name)
3343         self._VerifyNodeDrbd(node_i, nresult, self.all_inst_info, drbd_helper,
3344                              all_drbd_map)
3345
3346         self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
3347         self._UpdateNodeInstances(node_i, nresult, nimg)
3348         self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
3349         self._UpdateNodeOS(node_i, nresult, nimg)
3350
3351         if not nimg.os_fail:
3352           if refos_img is None:
3353             refos_img = nimg
3354           self._VerifyNodeOS(node_i, nimg, refos_img)
3355         self._VerifyNodeBridges(node_i, nresult, bridges)
3356
3357         # Check whether all running instancies are primary for the node. (This
3358         # can no longer be done from _VerifyInstance below, since some of the
3359         # wrong instances could be from other node groups.)
3360         non_primary_inst = set(nimg.instances).difference(nimg.pinst)
3361
3362         for inst in non_primary_inst:
3363           test = inst in self.all_inst_info
3364           _ErrorIf(test, constants.CV_EINSTANCEWRONGNODE, inst,
3365                    "instance should not run on node %s", node_i.name)
3366           _ErrorIf(not test, constants.CV_ENODEORPHANINSTANCE, node_i.name,
3367                    "node is running unknown instance %s", inst)
3368
3369     for node, result in extra_lv_nvinfo.items():
3370       self._UpdateNodeVolumes(self.all_node_info[node], result.payload,
3371                               node_image[node], vg_name)
3372
3373     feedback_fn("* Verifying instance status")
3374     for instance in self.my_inst_names:
3375       if verbose:
3376         feedback_fn("* Verifying instance %s" % instance)
3377       inst_config = self.my_inst_info[instance]
3378       self._VerifyInstance(instance, inst_config, node_image,
3379                            instdisk[instance])
3380       inst_nodes_offline = []
3381
3382       pnode = inst_config.primary_node
3383       pnode_img = node_image[pnode]
3384       _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
3385                constants.CV_ENODERPC, pnode, "instance %s, connection to"
3386                " primary node failed", instance)
3387
3388       _ErrorIf(inst_config.admin_state == constants.ADMINST_UP and
3389                pnode_img.offline,
3390                constants.CV_EINSTANCEBADNODE, instance,
3391                "instance is marked as running and lives on offline node %s",
3392                inst_config.primary_node)
3393
3394       # If the instance is non-redundant we cannot survive losing its primary
3395       # node, so we are not N+1 compliant.
3396       if inst_config.disk_template not in constants.DTS_MIRRORED:
3397         i_non_redundant.append(instance)
3398
3399       _ErrorIf(len(inst_config.secondary_nodes) > 1,
3400                constants.CV_EINSTANCELAYOUT,
3401                instance, "instance has multiple secondary nodes: %s",
3402                utils.CommaJoin(inst_config.secondary_nodes),
3403                code=self.ETYPE_WARNING)
3404
3405       if inst_config.disk_template in constants.DTS_INT_MIRROR:
3406         pnode = inst_config.primary_node
3407         instance_nodes = utils.NiceSort(inst_config.all_nodes)
3408         instance_groups = {}
3409
3410         for node in instance_nodes:
3411           instance_groups.setdefault(self.all_node_info[node].group,
3412                                      []).append(node)
3413
3414         pretty_list = [
3415           "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
3416           # Sort so that we always list the primary node first.
3417           for group, nodes in sorted(instance_groups.items(),
3418                                      key=lambda (_, nodes): pnode in nodes,
3419                                      reverse=True)]
3420
3421         self._ErrorIf(len(instance_groups) > 1,
3422                       constants.CV_EINSTANCESPLITGROUPS,
3423                       instance, "instance has primary and secondary nodes in"
3424                       " different groups: %s", utils.CommaJoin(pretty_list),
3425                       code=self.ETYPE_WARNING)
3426
3427       if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
3428         i_non_a_balanced.append(instance)
3429
3430       for snode in inst_config.secondary_nodes:
3431         s_img = node_image[snode]
3432         _ErrorIf(s_img.rpc_fail and not s_img.offline, constants.CV_ENODERPC,
3433                  snode, "instance %s, connection to secondary node failed",
3434                  instance)
3435
3436         if s_img.offline:
3437           inst_nodes_offline.append(snode)
3438
3439       # warn that the instance lives on offline nodes
3440       _ErrorIf(inst_nodes_offline, constants.CV_EINSTANCEBADNODE, instance,
3441                "instance has offline secondary node(s) %s",
3442                utils.CommaJoin(inst_nodes_offline))
3443       # ... or ghost/non-vm_capable nodes
3444       for node in inst_config.all_nodes:
3445         _ErrorIf(node_image[node].ghost, constants.CV_EINSTANCEBADNODE,
3446                  instance, "instance lives on ghost node %s", node)
3447         _ErrorIf(not node_image[node].vm_capable, constants.CV_EINSTANCEBADNODE,
3448                  instance, "instance lives on non-vm_capable node %s", node)
3449
3450     feedback_fn("* Verifying orphan volumes")
3451     reserved = utils.FieldSet(*cluster.reserved_lvs)
3452
3453     # We will get spurious "unknown volume" warnings if any node of this group
3454     # is secondary for an instance whose primary is in another group. To avoid
3455     # them, we find these instances and add their volumes to node_vol_should.
3456     for inst in self.all_inst_info.values():
3457       for secondary in inst.secondary_nodes:
3458         if (secondary in self.my_node_info
3459             and inst.name not in self.my_inst_info):
3460           inst.MapLVsByNode(node_vol_should)
3461           break
3462
3463     self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
3464
3465     if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
3466       feedback_fn("* Verifying N+1 Memory redundancy")
3467       self._VerifyNPlusOneMemory(node_image, self.my_inst_info)
3468
3469     feedback_fn("* Other Notes")
3470     if i_non_redundant:
3471       feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
3472                   % len(i_non_redundant))
3473
3474     if i_non_a_balanced:
3475       feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
3476                   % len(i_non_a_balanced))
3477
3478     if i_offline:
3479       feedback_fn("  - NOTICE: %d offline instance(s) found." % i_offline)
3480
3481     if n_offline:
3482       feedback_fn("  - NOTICE: %d offline node(s) found." % n_offline)
3483
3484     if n_drained:
3485       feedback_fn("  - NOTICE: %d drained node(s) found." % n_drained)
3486
3487     return not self.bad
3488
3489   def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
3490     """Analyze the post-hooks' result
3491
3492     This method analyses the hook result, handles it, and sends some
3493     nicely-formatted feedback back to the user.
3494
3495     @param phase: one of L{constants.HOOKS_PHASE_POST} or
3496         L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
3497     @param hooks_results: the results of the multi-node hooks rpc call
3498     @param feedback_fn: function used send feedback back to the caller
3499     @param lu_result: previous Exec result
3500     @return: the new Exec result, based on the previous result
3501         and hook results
3502
3503     """
3504     # We only really run POST phase hooks, only for non-empty groups,
3505     # and are only interested in their results
3506     if not self.my_node_names:
3507       # empty node group
3508       pass
3509     elif phase == constants.HOOKS_PHASE_POST:
3510       # Used to change hooks' output to proper indentation
3511       feedback_fn("* Hooks Results")
3512       assert hooks_results, "invalid result from hooks"
3513
3514       for node_name in hooks_results:
3515         res = hooks_results[node_name]
3516         msg = res.fail_msg
3517         test = msg and not res.offline
3518         self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3519                       "Communication failure in hooks execution: %s", msg)
3520         if res.offline or msg:
3521           # No need to investigate payload if node is offline or gave
3522           # an error.
3523           continue
3524         for script, hkr, output in res.payload:
3525           test = hkr == constants.HKR_FAIL
3526           self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3527                         "Script %s failed, output:", script)
3528           if test:
3529             output = self._HOOKS_INDENT_RE.sub("      ", output)
3530             feedback_fn("%s" % output)
3531             lu_result = False
3532
3533     return lu_result
3534
3535
3536 class LUClusterVerifyDisks(NoHooksLU):
3537   """Verifies the cluster disks status.
3538
3539   """
3540   REQ_BGL = False
3541
3542   def ExpandNames(self):
3543     self.share_locks = _ShareAll()
3544     self.needed_locks = {
3545       locking.LEVEL_NODEGROUP: locking.ALL_SET,
3546       }
3547
3548   def Exec(self, feedback_fn):
3549     group_names = self.owned_locks(locking.LEVEL_NODEGROUP)
3550
3551     # Submit one instance of L{opcodes.OpGroupVerifyDisks} per node group
3552     return ResultWithJobs([[opcodes.OpGroupVerifyDisks(group_name=group)]
3553                            for group in group_names])
3554
3555
3556 class LUGroupVerifyDisks(NoHooksLU):
3557   """Verifies the status of all disks in a node group.
3558
3559   """
3560   REQ_BGL = False
3561
3562   def ExpandNames(self):
3563     # Raises errors.OpPrereqError on its own if group can't be found
3564     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
3565
3566     self.share_locks = _ShareAll()
3567     self.needed_locks = {
3568       locking.LEVEL_INSTANCE: [],
3569       locking.LEVEL_NODEGROUP: [],
3570       locking.LEVEL_NODE: [],
3571       }
3572
3573   def DeclareLocks(self, level):
3574     if level == locking.LEVEL_INSTANCE:
3575       assert not self.needed_locks[locking.LEVEL_INSTANCE]
3576
3577       # Lock instances optimistically, needs verification once node and group
3578       # locks have been acquired
3579       self.needed_locks[locking.LEVEL_INSTANCE] = \
3580         self.cfg.GetNodeGroupInstances(self.group_uuid)
3581
3582     elif level == locking.LEVEL_NODEGROUP:
3583       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
3584
3585       self.needed_locks[locking.LEVEL_NODEGROUP] = \
3586         set([self.group_uuid] +
3587             # Lock all groups used by instances optimistically; this requires
3588             # going via the node before it's locked, requiring verification
3589             # later on
3590             [group_uuid
3591              for instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
3592              for group_uuid in self.cfg.GetInstanceNodeGroups(instance_name)])
3593
3594     elif level == locking.LEVEL_NODE:
3595       # This will only lock the nodes in the group to be verified which contain
3596       # actual instances
3597       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
3598       self._LockInstancesNodes()
3599
3600       # Lock all nodes in group to be verified
3601       assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
3602       member_nodes = self.cfg.GetNodeGroup(self.group_uuid).members
3603       self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
3604
3605   def CheckPrereq(self):
3606     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
3607     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
3608     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
3609
3610     assert self.group_uuid in owned_groups
3611
3612     # Check if locked instances are still correct
3613     _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
3614
3615     # Get instance information
3616     self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
3617
3618     # Check if node groups for locked instances are still correct
3619     _CheckInstancesNodeGroups(self.cfg, self.instances,
3620                               owned_groups, owned_nodes, self.group_uuid)
3621
3622   def Exec(self, feedback_fn):
3623     """Verify integrity of cluster disks.
3624
3625     @rtype: tuple of three items
3626     @return: a tuple of (dict of node-to-node_error, list of instances
3627         which need activate-disks, dict of instance: (node, volume) for
3628         missing volumes
3629
3630     """
3631     res_nodes = {}
3632     res_instances = set()
3633     res_missing = {}
3634
3635     nv_dict = _MapInstanceDisksToNodes(
3636       [inst for inst in self.instances.values()
3637        if inst.admin_state == constants.ADMINST_UP])
3638
3639     if nv_dict:
3640       nodes = utils.NiceSort(set(self.owned_locks(locking.LEVEL_NODE)) &
3641                              set(self.cfg.GetVmCapableNodeList()))
3642
3643       node_lvs = self.rpc.call_lv_list(nodes, [])
3644
3645       for (node, node_res) in node_lvs.items():
3646         if node_res.offline:
3647           continue
3648
3649         msg = node_res.fail_msg
3650         if msg:
3651           logging.warning("Error enumerating LVs on node %s: %s", node, msg)
3652           res_nodes[node] = msg
3653           continue
3654
3655         for lv_name, (_, _, lv_online) in node_res.payload.items():
3656           inst = nv_dict.pop((node, lv_name), None)
3657           if not (lv_online or inst is None):
3658             res_instances.add(inst)
3659
3660       # any leftover items in nv_dict are missing LVs, let's arrange the data
3661       # better
3662       for key, inst in nv_dict.iteritems():
3663         res_missing.setdefault(inst, []).append(list(key))
3664
3665     return (res_nodes, list(res_instances), res_missing)
3666
3667
3668 class LUClusterRepairDiskSizes(NoHooksLU):
3669   """Verifies the cluster disks sizes.
3670
3671   """
3672   REQ_BGL = False
3673
3674   def ExpandNames(self):
3675     if self.op.instances:
3676       self.wanted_names = _GetWantedInstances(self, self.op.instances)
3677       self.needed_locks = {
3678         locking.LEVEL_NODE_RES: [],
3679         locking.LEVEL_INSTANCE: self.wanted_names,
3680         }
3681       self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
3682     else:
3683       self.wanted_names = None
3684       self.needed_locks = {
3685         locking.LEVEL_NODE_RES: locking.ALL_SET,
3686         locking.LEVEL_INSTANCE: locking.ALL_SET,
3687         }
3688     self.share_locks = {
3689       locking.LEVEL_NODE_RES: 1,
3690       locking.LEVEL_INSTANCE: 0,
3691       }
3692
3693   def DeclareLocks(self, level):
3694     if level == locking.LEVEL_NODE_RES and self.wanted_names is not None:
3695       self._LockInstancesNodes(primary_only=True, level=level)
3696
3697   def CheckPrereq(self):
3698     """Check prerequisites.
3699
3700     This only checks the optional instance list against the existing names.
3701
3702     """
3703     if self.wanted_names is None:
3704       self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
3705
3706     self.wanted_instances = \
3707         map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
3708
3709   def _EnsureChildSizes(self, disk):
3710     """Ensure children of the disk have the needed disk size.
3711
3712     This is valid mainly for DRBD8 and fixes an issue where the
3713     children have smaller disk size.
3714
3715     @param disk: an L{ganeti.objects.Disk} object
3716
3717     """
3718     if disk.dev_type == constants.LD_DRBD8:
3719       assert disk.children, "Empty children for DRBD8?"
3720       fchild = disk.children[0]
3721       mismatch = fchild.size < disk.size
3722       if mismatch:
3723         self.LogInfo("Child disk has size %d, parent %d, fixing",
3724                      fchild.size, disk.size)
3725         fchild.size = disk.size
3726
3727       # and we recurse on this child only, not on the metadev
3728       return self._EnsureChildSizes(fchild) or mismatch
3729     else:
3730       return False
3731
3732   def Exec(self, feedback_fn):
3733     """Verify the size of cluster disks.
3734
3735     """
3736     # TODO: check child disks too
3737     # TODO: check differences in size between primary/secondary nodes
3738     per_node_disks = {}
3739     for instance in self.wanted_instances:
3740       pnode = instance.primary_node
3741       if pnode not in per_node_disks:
3742         per_node_disks[pnode] = []
3743       for idx, disk in enumerate(instance.disks):
3744         per_node_disks[pnode].append((instance, idx, disk))
3745
3746     assert not (frozenset(per_node_disks.keys()) -
3747                 self.owned_locks(locking.LEVEL_NODE_RES)), \
3748       "Not owning correct locks"
3749     assert not self.owned_locks(locking.LEVEL_NODE)
3750
3751     changed = []
3752     for node, dskl in per_node_disks.items():
3753       newl = [v[2].Copy() for v in dskl]
3754       for dsk in newl:
3755         self.cfg.SetDiskID(dsk, node)
3756       result = self.rpc.call_blockdev_getsize(node, newl)
3757       if result.fail_msg:
3758         self.LogWarning("Failure in blockdev_getsize call to node"
3759                         " %s, ignoring", node)
3760         continue
3761       if len(result.payload) != len(dskl):
3762         logging.warning("Invalid result from node %s: len(dksl)=%d,"
3763                         " result.payload=%s", node, len(dskl), result.payload)
3764         self.LogWarning("Invalid result from node %s, ignoring node results",
3765                         node)
3766         continue
3767       for ((instance, idx, disk), size) in zip(dskl, result.payload):
3768         if size is None:
3769           self.LogWarning("Disk %d of instance %s did not return size"
3770                           " information, ignoring", idx, instance.name)
3771           continue
3772         if not isinstance(size, (int, long)):
3773           self.LogWarning("Disk %d of instance %s did not return valid"
3774                           " size information, ignoring", idx, instance.name)
3775           continue
3776         size = size >> 20
3777         if size != disk.size:
3778           self.LogInfo("Disk %d of instance %s has mismatched size,"
3779                        " correcting: recorded %d, actual %d", idx,
3780                        instance.name, disk.size, size)
3781           disk.size = size
3782           self.cfg.Update(instance, feedback_fn)
3783           changed.append((instance.name, idx, size))
3784         if self._EnsureChildSizes(disk):
3785           self.cfg.Update(instance, feedback_fn)
3786           changed.append((instance.name, idx, disk.size))
3787     return changed
3788
3789
3790 class LUClusterRename(LogicalUnit):
3791   """Rename the cluster.
3792
3793   """
3794   HPATH = "cluster-rename"
3795   HTYPE = constants.HTYPE_CLUSTER
3796
3797   def BuildHooksEnv(self):
3798     """Build hooks env.
3799
3800     """
3801     return {
3802       "OP_TARGET": self.cfg.GetClusterName(),
3803       "NEW_NAME": self.op.name,
3804       }
3805
3806   def BuildHooksNodes(self):
3807     """Build hooks nodes.
3808
3809     """
3810     return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
3811
3812   def CheckPrereq(self):
3813     """Verify that the passed name is a valid one.
3814
3815     """
3816     hostname = netutils.GetHostname(name=self.op.name,
3817                                     family=self.cfg.GetPrimaryIPFamily())
3818
3819     new_name = hostname.name
3820     self.ip = new_ip = hostname.ip
3821     old_name = self.cfg.GetClusterName()
3822     old_ip = self.cfg.GetMasterIP()
3823     if new_name == old_name and new_ip == old_ip:
3824       raise errors.OpPrereqError("Neither the name nor the IP address of the"
3825                                  " cluster has changed",
3826                                  errors.ECODE_INVAL)
3827     if new_ip != old_ip:
3828       if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
3829         raise errors.OpPrereqError("The given cluster IP address (%s) is"
3830                                    " reachable on the network" %
3831                                    new_ip, errors.ECODE_NOTUNIQUE)
3832
3833     self.op.name = new_name
3834
3835   def Exec(self, feedback_fn):
3836     """Rename the cluster.
3837
3838     """
3839     clustername = self.op.name
3840     new_ip = self.ip
3841
3842     # shutdown the master IP
3843     master_params = self.cfg.GetMasterNetworkParameters()
3844     ems = self.cfg.GetUseExternalMipScript()
3845     result = self.rpc.call_node_deactivate_master_ip(master_params.name,
3846                                                      master_params, ems)
3847     result.Raise("Could not disable the master role")
3848
3849     try:
3850       cluster = self.cfg.GetClusterInfo()
3851       cluster.cluster_name = clustername
3852       cluster.master_ip = new_ip
3853       self.cfg.Update(cluster, feedback_fn)
3854
3855       # update the known hosts file
3856       ssh.WriteKnownHostsFile(self.cfg, pathutils.SSH_KNOWN_HOSTS_FILE)
3857       node_list = self.cfg.GetOnlineNodeList()
3858       try:
3859         node_list.remove(master_params.name)
3860       except ValueError:
3861         pass
3862       _UploadHelper(self, node_list, pathutils.SSH_KNOWN_HOSTS_FILE)
3863     finally:
3864       master_params.ip = new_ip
3865       result = self.rpc.call_node_activate_master_ip(master_params.name,
3866                                                      master_params, ems)
3867       msg = result.fail_msg
3868       if msg:
3869         self.LogWarning("Could not re-enable the master role on"
3870                         " the master, please restart manually: %s", msg)
3871
3872     return clustername
3873
3874
3875 def _ValidateNetmask(cfg, netmask):
3876   """Checks if a netmask is valid.
3877
3878   @type cfg: L{config.ConfigWriter}
3879   @param cfg: The cluster configuration
3880   @type netmask: int
3881   @param netmask: the netmask to be verified
3882   @raise errors.OpPrereqError: if the validation fails
3883
3884   """
3885   ip_family = cfg.GetPrimaryIPFamily()
3886   try:
3887     ipcls = netutils.IPAddress.GetClassFromIpFamily(ip_family)
3888   except errors.ProgrammerError:
3889     raise errors.OpPrereqError("Invalid primary ip family: %s." %
3890                                ip_family, errors.ECODE_INVAL)
3891   if not ipcls.ValidateNetmask(netmask):
3892     raise errors.OpPrereqError("CIDR netmask (%s) not valid" %
3893                                 (netmask), errors.ECODE_INVAL)
3894
3895
3896 class LUClusterSetParams(LogicalUnit):
3897   """Change the parameters of the cluster.
3898
3899   """
3900   HPATH = "cluster-modify"
3901   HTYPE = constants.HTYPE_CLUSTER
3902   REQ_BGL = False
3903
3904   def CheckArguments(self):
3905     """Check parameters
3906
3907     """
3908     if self.op.uid_pool:
3909       uidpool.CheckUidPool(self.op.uid_pool)
3910
3911     if self.op.add_uids:
3912       uidpool.CheckUidPool(self.op.add_uids)
3913
3914     if self.op.remove_uids:
3915       uidpool.CheckUidPool(self.op.remove_uids)
3916
3917     if self.op.master_netmask is not None:
3918       _ValidateNetmask(self.cfg, self.op.master_netmask)
3919
3920     if self.op.diskparams:
3921       for dt_params in self.op.diskparams.values():
3922         utils.ForceDictType(dt_params, constants.DISK_DT_TYPES)
3923       try:
3924         utils.VerifyDictOptions(self.op.diskparams, constants.DISK_DT_DEFAULTS)
3925       except errors.OpPrereqError, err:
3926         raise errors.OpPrereqError("While verify diskparams options: %s" % err,
3927                                    errors.ECODE_INVAL)
3928
3929   def ExpandNames(self):
3930     # FIXME: in the future maybe other cluster params won't require checking on
3931     # all nodes to be modified.
3932     self.needed_locks = {
3933       locking.LEVEL_NODE: locking.ALL_SET,
3934       locking.LEVEL_INSTANCE: locking.ALL_SET,
3935       locking.LEVEL_NODEGROUP: locking.ALL_SET,
3936     }
3937     self.share_locks = {
3938         locking.LEVEL_NODE: 1,
3939         locking.LEVEL_INSTANCE: 1,
3940         locking.LEVEL_NODEGROUP: 1,
3941     }
3942
3943   def BuildHooksEnv(self):
3944     """Build hooks env.
3945
3946     """
3947     return {
3948       "OP_TARGET": self.cfg.GetClusterName(),
3949       "NEW_VG_NAME": self.op.vg_name,
3950       }
3951
3952   def BuildHooksNodes(self):
3953     """Build hooks nodes.
3954
3955     """
3956     mn = self.cfg.GetMasterNode()
3957     return ([mn], [mn])
3958
3959   def CheckPrereq(self):
3960     """Check prerequisites.
3961
3962     This checks whether the given params don't conflict and
3963     if the given volume group is valid.
3964
3965     """
3966     if self.op.vg_name is not None and not self.op.vg_name:
3967       if self.cfg.HasAnyDiskOfType(constants.LD_LV):
3968         raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
3969                                    " instances exist", errors.ECODE_INVAL)
3970
3971     if self.op.drbd_helper is not None and not self.op.drbd_helper:
3972       if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
3973         raise errors.OpPrereqError("Cannot disable drbd helper while"
3974                                    " drbd-based instances exist",
3975                                    errors.ECODE_INVAL)
3976
3977     node_list = self.owned_locks(locking.LEVEL_NODE)
3978
3979     # if vg_name not None, checks given volume group on all nodes
3980     if self.op.vg_name:
3981       vglist = self.rpc.call_vg_list(node_list)
3982       for node in node_list:
3983         msg = vglist[node].fail_msg
3984         if msg:
3985           # ignoring down node
3986           self.LogWarning("Error while gathering data on node %s"
3987                           " (ignoring node): %s", node, msg)
3988           continue
3989         vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
3990                                               self.op.vg_name,
3991                                               constants.MIN_VG_SIZE)
3992         if vgstatus:
3993           raise errors.OpPrereqError("Error on node '%s': %s" %
3994                                      (node, vgstatus), errors.ECODE_ENVIRON)
3995
3996     if self.op.drbd_helper:
3997       # checks given drbd helper on all nodes
3998       helpers = self.rpc.call_drbd_helper(node_list)
3999       for (node, ninfo) in self.cfg.GetMultiNodeInfo(node_list):
4000         if ninfo.offline:
4001           self.LogInfo("Not checking drbd helper on offline node %s", node)
4002           continue
4003         msg = helpers[node].fail_msg
4004         if msg:
4005           raise errors.OpPrereqError("Error checking drbd helper on node"
4006                                      " '%s': %s" % (node, msg),
4007                                      errors.ECODE_ENVIRON)
4008         node_helper = helpers[node].payload
4009         if node_helper != self.op.drbd_helper:
4010           raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
4011                                      (node, node_helper), errors.ECODE_ENVIRON)
4012
4013     self.cluster = cluster = self.cfg.GetClusterInfo()
4014     # validate params changes
4015     if self.op.beparams:
4016       objects.UpgradeBeParams(self.op.beparams)
4017       utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
4018       self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
4019
4020     if self.op.ndparams:
4021       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
4022       self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
4023
4024       # TODO: we need a more general way to handle resetting
4025       # cluster-level parameters to default values
4026       if self.new_ndparams["oob_program"] == "":
4027         self.new_ndparams["oob_program"] = \
4028             constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
4029
4030     if self.op.hv_state:
4031       new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
4032                                             self.cluster.hv_state_static)
4033       self.new_hv_state = dict((hv, cluster.SimpleFillHvState(values))
4034                                for hv, values in new_hv_state.items())
4035
4036     if self.op.disk_state:
4037       new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state,
4038                                                 self.cluster.disk_state_static)
4039       self.new_disk_state = \
4040         dict((storage, dict((name, cluster.SimpleFillDiskState(values))
4041                             for name, values in svalues.items()))
4042              for storage, svalues in new_disk_state.items())
4043
4044     if self.op.ipolicy:
4045       self.new_ipolicy = _GetUpdatedIPolicy(cluster.ipolicy, self.op.ipolicy,
4046                                             group_policy=False)
4047
4048       all_instances = self.cfg.GetAllInstancesInfo().values()
4049       violations = set()
4050       for group in self.cfg.GetAllNodeGroupsInfo().values():
4051         instances = frozenset([inst for inst in all_instances
4052                                if compat.any(node in group.members
4053                                              for node in inst.all_nodes)])
4054         new_ipolicy = objects.FillIPolicy(self.new_ipolicy, group.ipolicy)
4055         ipol = ganeti.masterd.instance.CalculateGroupIPolicy(cluster, group)
4056         new = _ComputeNewInstanceViolations(ipol,
4057                                             new_ipolicy, instances)
4058         if new:
4059           violations.update(new)
4060
4061       if violations:
4062         self.LogWarning("After the ipolicy change the following instances"
4063                         " violate them: %s",
4064                         utils.CommaJoin(utils.NiceSort(violations)))
4065
4066     if self.op.nicparams:
4067       utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
4068       self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
4069       objects.NIC.CheckParameterSyntax(self.new_nicparams)
4070       nic_errors = []
4071
4072       # check all instances for consistency
4073       for instance in self.cfg.GetAllInstancesInfo().values():
4074         for nic_idx, nic in enumerate(instance.nics):
4075           params_copy = copy.deepcopy(nic.nicparams)
4076           params_filled = objects.FillDict(self.new_nicparams, params_copy)
4077
4078           # check parameter syntax
4079           try:
4080             objects.NIC.CheckParameterSyntax(params_filled)
4081           except errors.ConfigurationError, err:
4082             nic_errors.append("Instance %s, nic/%d: %s" %
4083                               (instance.name, nic_idx, err))
4084
4085           # if we're moving instances to routed, check that they have an ip
4086           target_mode = params_filled[constants.NIC_MODE]
4087           if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
4088             nic_errors.append("Instance %s, nic/%d: routed NIC with no ip"
4089                               " address" % (instance.name, nic_idx))
4090       if nic_errors:
4091         raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
4092                                    "\n".join(nic_errors), errors.ECODE_INVAL)
4093
4094     # hypervisor list/parameters
4095     self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
4096     if self.op.hvparams:
4097       for hv_name, hv_dict in self.op.hvparams.items():
4098         if hv_name not in self.new_hvparams:
4099           self.new_hvparams[hv_name] = hv_dict
4100         else:
4101           self.new_hvparams[hv_name].update(hv_dict)
4102
4103     # disk template parameters
4104     self.new_diskparams = objects.FillDict(cluster.diskparams, {})
4105     if self.op.diskparams:
4106       for dt_name, dt_params in self.op.diskparams.items():
4107         if dt_name not in self.op.diskparams:
4108           self.new_diskparams[dt_name] = dt_params
4109         else:
4110           self.new_diskparams[dt_name].update(dt_params)
4111
4112     # os hypervisor parameters
4113     self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
4114     if self.op.os_hvp:
4115       for os_name, hvs in self.op.os_hvp.items():
4116         if os_name not in self.new_os_hvp:
4117           self.new_os_hvp[os_name] = hvs
4118         else:
4119           for hv_name, hv_dict in hvs.items():
4120             if hv_name not in self.new_os_hvp[os_name]:
4121               self.new_os_hvp[os_name][hv_name] = hv_dict
4122             else:
4123               self.new_os_hvp[os_name][hv_name].update(hv_dict)
4124
4125     # os parameters
4126     self.new_osp = objects.FillDict(cluster.osparams, {})
4127     if self.op.osparams:
4128       for os_name, osp in self.op.osparams.items():
4129         if os_name not in self.new_osp:
4130           self.new_osp[os_name] = {}
4131
4132         self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
4133                                                   use_none=True)
4134
4135         if not self.new_osp[os_name]:
4136           # we removed all parameters
4137           del self.new_osp[os_name]
4138         else:
4139           # check the parameter validity (remote check)
4140           _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
4141                          os_name, self.new_osp[os_name])
4142
4143     # changes to the hypervisor list
4144     if self.op.enabled_hypervisors is not None:
4145       self.hv_list = self.op.enabled_hypervisors
4146       for hv in self.hv_list:
4147         # if the hypervisor doesn't already exist in the cluster
4148         # hvparams, we initialize it to empty, and then (in both
4149         # cases) we make sure to fill the defaults, as we might not
4150         # have a complete defaults list if the hypervisor wasn't
4151         # enabled before
4152         if hv not in new_hvp:
4153           new_hvp[hv] = {}
4154         new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
4155         utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
4156     else:
4157       self.hv_list = cluster.enabled_hypervisors
4158
4159     if self.op.hvparams or self.op.enabled_hypervisors is not None:
4160       # either the enabled list has changed, or the parameters have, validate
4161       for hv_name, hv_params in self.new_hvparams.items():
4162         if ((self.op.hvparams and hv_name in self.op.hvparams) or
4163             (self.op.enabled_hypervisors and
4164              hv_name in self.op.enabled_hypervisors)):
4165           # either this is a new hypervisor, or its parameters have changed
4166           hv_class = hypervisor.GetHypervisor(hv_name)
4167           utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
4168           hv_class.CheckParameterSyntax(hv_params)
4169           _CheckHVParams(self, node_list, hv_name, hv_params)
4170
4171     if self.op.os_hvp:
4172       # no need to check any newly-enabled hypervisors, since the
4173       # defaults have already been checked in the above code-block
4174       for os_name, os_hvp in self.new_os_hvp.items():
4175         for hv_name, hv_params in os_hvp.items():
4176           utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
4177           # we need to fill in the new os_hvp on top of the actual hv_p
4178           cluster_defaults = self.new_hvparams.get(hv_name, {})
4179           new_osp = objects.FillDict(cluster_defaults, hv_params)
4180           hv_class = hypervisor.GetHypervisor(hv_name)
4181           hv_class.CheckParameterSyntax(new_osp)
4182           _CheckHVParams(self, node_list, hv_name, new_osp)
4183
4184     if self.op.default_iallocator:
4185       alloc_script = utils.FindFile(self.op.default_iallocator,
4186                                     constants.IALLOCATOR_SEARCH_PATH,
4187                                     os.path.isfile)
4188       if alloc_script is None:
4189         raise errors.OpPrereqError("Invalid default iallocator script '%s'"
4190                                    " specified" % self.op.default_iallocator,
4191                                    errors.ECODE_INVAL)
4192
4193   def Exec(self, feedback_fn):
4194     """Change the parameters of the cluster.
4195
4196     """
4197     if self.op.vg_name is not None:
4198       new_volume = self.op.vg_name
4199       if not new_volume:
4200         new_volume = None
4201       if new_volume != self.cfg.GetVGName():
4202         self.cfg.SetVGName(new_volume)
4203       else:
4204         feedback_fn("Cluster LVM configuration already in desired"
4205                     " state, not changing")
4206     if self.op.drbd_helper is not None:
4207       new_helper = self.op.drbd_helper
4208       if not new_helper:
4209         new_helper = None
4210       if new_helper != self.cfg.GetDRBDHelper():
4211         self.cfg.SetDRBDHelper(new_helper)
4212       else:
4213         feedback_fn("Cluster DRBD helper already in desired state,"
4214                     " not changing")
4215     if self.op.hvparams:
4216       self.cluster.hvparams = self.new_hvparams
4217     if self.op.os_hvp:
4218       self.cluster.os_hvp = self.new_os_hvp
4219     if self.op.enabled_hypervisors is not None:
4220       self.cluster.hvparams = self.new_hvparams
4221       self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
4222     if self.op.beparams:
4223       self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
4224     if self.op.nicparams:
4225       self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
4226     if self.op.ipolicy:
4227       self.cluster.ipolicy = self.new_ipolicy
4228     if self.op.osparams:
4229       self.cluster.osparams = self.new_osp
4230     if self.op.ndparams:
4231       self.cluster.ndparams = self.new_ndparams
4232     if self.op.diskparams:
4233       self.cluster.diskparams = self.new_diskparams
4234     if self.op.hv_state:
4235       self.cluster.hv_state_static = self.new_hv_state
4236     if self.op.disk_state:
4237       self.cluster.disk_state_static = self.new_disk_state
4238
4239     if self.op.candidate_pool_size is not None:
4240       self.cluster.candidate_pool_size = self.op.candidate_pool_size
4241       # we need to update the pool size here, otherwise the save will fail
4242       _AdjustCandidatePool(self, [])
4243
4244     if self.op.maintain_node_health is not None:
4245       if self.op.maintain_node_health and not constants.ENABLE_CONFD:
4246         feedback_fn("Note: CONFD was disabled at build time, node health"
4247                     " maintenance is not useful (still enabling it)")
4248       self.cluster.maintain_node_health = self.op.maintain_node_health
4249
4250     if self.op.prealloc_wipe_disks is not None:
4251       self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
4252
4253     if self.op.add_uids is not None:
4254       uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
4255
4256     if self.op.remove_uids is not None:
4257       uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
4258
4259     if self.op.uid_pool is not None:
4260       self.cluster.uid_pool = self.op.uid_pool
4261
4262     if self.op.default_iallocator is not None:
4263       self.cluster.default_iallocator = self.op.default_iallocator
4264
4265     if self.op.reserved_lvs is not None:
4266       self.cluster.reserved_lvs = self.op.reserved_lvs
4267
4268     if self.op.use_external_mip_script is not None:
4269       self.cluster.use_external_mip_script = self.op.use_external_mip_script
4270
4271     def helper_os(aname, mods, desc):
4272       desc += " OS list"
4273       lst = getattr(self.cluster, aname)
4274       for key, val in mods:
4275         if key == constants.DDM_ADD:
4276           if val in lst:
4277             feedback_fn("OS %s already in %s, ignoring" % (val, desc))
4278           else:
4279             lst.append(val)
4280         elif key == constants.DDM_REMOVE:
4281           if val in lst:
4282             lst.remove(val)
4283           else:
4284             feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
4285         else:
4286           raise errors.ProgrammerError("Invalid modification '%s'" % key)
4287
4288     if self.op.hidden_os:
4289       helper_os("hidden_os", self.op.hidden_os, "hidden")
4290
4291     if self.op.blacklisted_os:
4292       helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
4293
4294     if self.op.master_netdev:
4295       master_params = self.cfg.GetMasterNetworkParameters()
4296       ems = self.cfg.GetUseExternalMipScript()
4297       feedback_fn("Shutting down master ip on the current netdev (%s)" %
4298                   self.cluster.master_netdev)
4299       result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4300                                                        master_params, ems)
4301       result.Raise("Could not disable the master ip")
4302       feedback_fn("Changing master_netdev from %s to %s" %
4303                   (master_params.netdev, self.op.master_netdev))
4304       self.cluster.master_netdev = self.op.master_netdev
4305
4306     if self.op.master_netmask:
4307       master_params = self.cfg.GetMasterNetworkParameters()
4308       feedback_fn("Changing master IP netmask to %s" % self.op.master_netmask)
4309       result = self.rpc.call_node_change_master_netmask(master_params.name,
4310                                                         master_params.netmask,
4311                                                         self.op.master_netmask,
4312                                                         master_params.ip,
4313                                                         master_params.netdev)
4314       if result.fail_msg:
4315         msg = "Could not change the master IP netmask: %s" % result.fail_msg
4316         feedback_fn(msg)
4317
4318       self.cluster.master_netmask = self.op.master_netmask
4319
4320     self.cfg.Update(self.cluster, feedback_fn)
4321
4322     if self.op.master_netdev:
4323       master_params = self.cfg.GetMasterNetworkParameters()
4324       feedback_fn("Starting the master ip on the new master netdev (%s)" %
4325                   self.op.master_netdev)
4326       ems = self.cfg.GetUseExternalMipScript()
4327       result = self.rpc.call_node_activate_master_ip(master_params.name,
4328                                                      master_params, ems)
4329       if result.fail_msg:
4330         self.LogWarning("Could not re-enable the master ip on"
4331                         " the master, please restart manually: %s",
4332                         result.fail_msg)
4333
4334
4335 def _UploadHelper(lu, nodes, fname):
4336   """Helper for uploading a file and showing warnings.
4337
4338   """
4339   if os.path.exists(fname):
4340     result = lu.rpc.call_upload_file(nodes, fname)
4341     for to_node, to_result in result.items():
4342       msg = to_result.fail_msg
4343       if msg:
4344         msg = ("Copy of file %s to node %s failed: %s" %
4345                (fname, to_node, msg))
4346         lu.proc.LogWarning(msg)
4347
4348
4349 def _ComputeAncillaryFiles(cluster, redist):
4350   """Compute files external to Ganeti which need to be consistent.
4351
4352   @type redist: boolean
4353   @param redist: Whether to include files which need to be redistributed
4354
4355   """
4356   # Compute files for all nodes
4357   files_all = set([
4358     pathutils.SSH_KNOWN_HOSTS_FILE,
4359     pathutils.CONFD_HMAC_KEY,
4360     pathutils.CLUSTER_DOMAIN_SECRET_FILE,
4361     pathutils.SPICE_CERT_FILE,
4362     pathutils.SPICE_CACERT_FILE,
4363     pathutils.RAPI_USERS_FILE,
4364     ])
4365
4366   if redist:
4367     # we need to ship at least the RAPI certificate
4368     files_all.add(pathutils.RAPI_CERT_FILE)
4369   else:
4370     files_all.update(pathutils.ALL_CERT_FILES)
4371     files_all.update(ssconf.SimpleStore().GetFileList())
4372
4373   if cluster.modify_etc_hosts:
4374     files_all.add(pathutils.ETC_HOSTS)
4375
4376   if cluster.use_external_mip_script:
4377     files_all.add(pathutils.EXTERNAL_MASTER_SETUP_SCRIPT)
4378
4379   # Files which are optional, these must:
4380   # - be present in one other category as well
4381   # - either exist or not exist on all nodes of that category (mc, vm all)
4382   files_opt = set([
4383     pathutils.RAPI_USERS_FILE,
4384     ])
4385
4386   # Files which should only be on master candidates
4387   files_mc = set()
4388
4389   if not redist:
4390     files_mc.add(pathutils.CLUSTER_CONF_FILE)
4391
4392   # File storage
4393   if (not redist and
4394       (constants.ENABLE_FILE_STORAGE or constants.ENABLE_SHARED_FILE_STORAGE)):
4395     files_all.add(pathutils.FILE_STORAGE_PATHS_FILE)
4396     files_opt.add(pathutils.FILE_STORAGE_PATHS_FILE)
4397
4398   # Files which should only be on VM-capable nodes
4399   files_vm = set(
4400     filename
4401     for hv_name in cluster.enabled_hypervisors
4402     for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[0])
4403
4404   files_opt |= set(
4405     filename
4406     for hv_name in cluster.enabled_hypervisors
4407     for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[1])
4408
4409   # Filenames in each category must be unique
4410   all_files_set = files_all | files_mc | files_vm
4411   assert (len(all_files_set) ==
4412           sum(map(len, [files_all, files_mc, files_vm]))), \
4413          "Found file listed in more than one file list"
4414
4415   # Optional files must be present in one other category
4416   assert all_files_set.issuperset(files_opt), \
4417          "Optional file not in a different required list"
4418
4419   # This one file should never ever be re-distributed via RPC
4420   assert not (redist and
4421               pathutils.FILE_STORAGE_PATHS_FILE in all_files_set)
4422
4423   return (files_all, files_opt, files_mc, files_vm)
4424
4425
4426 def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
4427   """Distribute additional files which are part of the cluster configuration.
4428
4429   ConfigWriter takes care of distributing the config and ssconf files, but
4430   there are more files which should be distributed to all nodes. This function
4431   makes sure those are copied.
4432
4433   @param lu: calling logical unit
4434   @param additional_nodes: list of nodes not in the config to distribute to
4435   @type additional_vm: boolean
4436   @param additional_vm: whether the additional nodes are vm-capable or not
4437
4438   """
4439   # Gather target nodes
4440   cluster = lu.cfg.GetClusterInfo()
4441   master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
4442
4443   online_nodes = lu.cfg.GetOnlineNodeList()
4444   online_set = frozenset(online_nodes)
4445   vm_nodes = list(online_set.intersection(lu.cfg.GetVmCapableNodeList()))
4446
4447   if additional_nodes is not None:
4448     online_nodes.extend(additional_nodes)
4449     if additional_vm:
4450       vm_nodes.extend(additional_nodes)
4451
4452   # Never distribute to master node
4453   for nodelist in [online_nodes, vm_nodes]:
4454     if master_info.name in nodelist:
4455       nodelist.remove(master_info.name)
4456
4457   # Gather file lists
4458   (files_all, _, files_mc, files_vm) = \
4459     _ComputeAncillaryFiles(cluster, True)
4460
4461   # Never re-distribute configuration file from here
4462   assert not (pathutils.CLUSTER_CONF_FILE in files_all or
4463               pathutils.CLUSTER_CONF_FILE in files_vm)
4464   assert not files_mc, "Master candidates not handled in this function"
4465
4466   filemap = [
4467     (online_nodes, files_all),
4468     (vm_nodes, files_vm),
4469     ]
4470
4471   # Upload the files
4472   for (node_list, files) in filemap:
4473     for fname in files:
4474       _UploadHelper(lu, node_list, fname)
4475
4476
4477 class LUClusterRedistConf(NoHooksLU):
4478   """Force the redistribution of cluster configuration.
4479
4480   This is a very simple LU.
4481
4482   """
4483   REQ_BGL = False
4484
4485   def ExpandNames(self):
4486     self.needed_locks = {
4487       locking.LEVEL_NODE: locking.ALL_SET,
4488     }
4489     self.share_locks[locking.LEVEL_NODE] = 1
4490
4491   def Exec(self, feedback_fn):
4492     """Redistribute the configuration.
4493
4494     """
4495     self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
4496     _RedistributeAncillaryFiles(self)
4497
4498
4499 class LUClusterActivateMasterIp(NoHooksLU):
4500   """Activate the master IP on the master node.
4501
4502   """
4503   def Exec(self, feedback_fn):
4504     """Activate the master IP.
4505
4506     """
4507     master_params = self.cfg.GetMasterNetworkParameters()
4508     ems = self.cfg.GetUseExternalMipScript()
4509     result = self.rpc.call_node_activate_master_ip(master_params.name,
4510                                                    master_params, ems)
4511     result.Raise("Could not activate the master IP")
4512
4513
4514 class LUClusterDeactivateMasterIp(NoHooksLU):
4515   """Deactivate the master IP on the master node.
4516
4517   """
4518   def Exec(self, feedback_fn):
4519     """Deactivate the master IP.
4520
4521     """
4522     master_params = self.cfg.GetMasterNetworkParameters()
4523     ems = self.cfg.GetUseExternalMipScript()
4524     result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4525                                                      master_params, ems)
4526     result.Raise("Could not deactivate the master IP")
4527
4528
4529 def _WaitForSync(lu, instance, disks=None, oneshot=False):
4530   """Sleep and poll for an instance's disk to sync.
4531
4532   """
4533   if not instance.disks or disks is not None and not disks:
4534     return True
4535
4536   disks = _ExpandCheckDisks(instance, disks)
4537
4538   if not oneshot:
4539     lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
4540
4541   node = instance.primary_node
4542
4543   for dev in disks:
4544     lu.cfg.SetDiskID(dev, node)
4545
4546   # TODO: Convert to utils.Retry
4547
4548   retries = 0
4549   degr_retries = 10 # in seconds, as we sleep 1 second each time
4550   while True:
4551     max_time = 0
4552     done = True
4553     cumul_degraded = False
4554     rstats = lu.rpc.call_blockdev_getmirrorstatus(node, (disks, instance))
4555     msg = rstats.fail_msg
4556     if msg:
4557       lu.LogWarning("Can't get any data from node %s: %s", node, msg)
4558       retries += 1
4559       if retries >= 10:
4560         raise errors.RemoteError("Can't contact node %s for mirror data,"
4561                                  " aborting." % node)
4562       time.sleep(6)
4563       continue
4564     rstats = rstats.payload
4565     retries = 0
4566     for i, mstat in enumerate(rstats):
4567       if mstat is None:
4568         lu.LogWarning("Can't compute data for node %s/%s",
4569                            node, disks[i].iv_name)
4570         continue
4571
4572       cumul_degraded = (cumul_degraded or
4573                         (mstat.is_degraded and mstat.sync_percent is None))
4574       if mstat.sync_percent is not None:
4575         done = False
4576         if mstat.estimated_time is not None:
4577           rem_time = ("%s remaining (estimated)" %
4578                       utils.FormatSeconds(mstat.estimated_time))
4579           max_time = mstat.estimated_time
4580         else:
4581           rem_time = "no time estimate"
4582         lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
4583                         (disks[i].iv_name, mstat.sync_percent, rem_time))
4584
4585     # if we're done but degraded, let's do a few small retries, to
4586     # make sure we see a stable and not transient situation; therefore
4587     # we force restart of the loop
4588     if (done or oneshot) and cumul_degraded and degr_retries > 0:
4589       logging.info("Degraded disks found, %d retries left", degr_retries)
4590       degr_retries -= 1
4591       time.sleep(1)
4592       continue
4593
4594     if done or oneshot:
4595       break
4596
4597     time.sleep(min(60, max_time))
4598
4599   if done:
4600     lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
4601   return not cumul_degraded
4602
4603
4604 def _BlockdevFind(lu, node, dev, instance):
4605   """Wrapper around call_blockdev_find to annotate diskparams.
4606
4607   @param lu: A reference to the lu object
4608   @param node: The node to call out
4609   @param dev: The device to find
4610   @param instance: The instance object the device belongs to
4611   @returns The result of the rpc call
4612
4613   """
4614   (disk,) = _AnnotateDiskParams(instance, [dev], lu.cfg)
4615   return lu.rpc.call_blockdev_find(node, disk)
4616
4617
4618 def _CheckDiskConsistency(lu, instance, dev, node, on_primary, ldisk=False):
4619   """Wrapper around L{_CheckDiskConsistencyInner}.
4620
4621   """
4622   (disk,) = _AnnotateDiskParams(instance, [dev], lu.cfg)
4623   return _CheckDiskConsistencyInner(lu, instance, disk, node, on_primary,
4624                                     ldisk=ldisk)
4625
4626
4627 def _CheckDiskConsistencyInner(lu, instance, dev, node, on_primary,
4628                                ldisk=False):
4629   """Check that mirrors are not degraded.
4630
4631   @attention: The device has to be annotated already.
4632
4633   The ldisk parameter, if True, will change the test from the
4634   is_degraded attribute (which represents overall non-ok status for
4635   the device(s)) to the ldisk (representing the local storage status).
4636
4637   """
4638   lu.cfg.SetDiskID(dev, node)
4639
4640   result = True
4641
4642   if on_primary or dev.AssembleOnSecondary():
4643     rstats = lu.rpc.call_blockdev_find(node, dev)
4644     msg = rstats.fail_msg
4645     if msg:
4646       lu.LogWarning("Can't find disk on node %s: %s", node, msg)
4647       result = False
4648     elif not rstats.payload:
4649       lu.LogWarning("Can't find disk on node %s", node)
4650       result = False
4651     else:
4652       if ldisk:
4653         result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
4654       else:
4655         result = result and not rstats.payload.is_degraded
4656
4657   if dev.children:
4658     for child in dev.children:
4659       result = result and _CheckDiskConsistencyInner(lu, instance, child, node,
4660                                                      on_primary)
4661
4662   return result
4663
4664
4665 class LUOobCommand(NoHooksLU):
4666   """Logical unit for OOB handling.
4667
4668   """
4669   REQ_BGL = False
4670   _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
4671
4672   def ExpandNames(self):
4673     """Gather locks we need.
4674
4675     """
4676     if self.op.node_names:
4677       self.op.node_names = _GetWantedNodes(self, self.op.node_names)
4678       lock_names = self.op.node_names
4679     else:
4680       lock_names = locking.ALL_SET
4681
4682     self.needed_locks = {
4683       locking.LEVEL_NODE: lock_names,
4684       }
4685
4686   def CheckPrereq(self):
4687     """Check prerequisites.
4688
4689     This checks:
4690      - the node exists in the configuration
4691      - OOB is supported
4692
4693     Any errors are signaled by raising errors.OpPrereqError.
4694
4695     """
4696     self.nodes = []
4697     self.master_node = self.cfg.GetMasterNode()
4698
4699     assert self.op.power_delay >= 0.0
4700
4701     if self.op.node_names:
4702       if (self.op.command in self._SKIP_MASTER and
4703           self.master_node in self.op.node_names):
4704         master_node_obj = self.cfg.GetNodeInfo(self.master_node)
4705         master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
4706
4707         if master_oob_handler:
4708           additional_text = ("run '%s %s %s' if you want to operate on the"
4709                              " master regardless") % (master_oob_handler,
4710                                                       self.op.command,
4711                                                       self.master_node)
4712         else:
4713           additional_text = "it does not support out-of-band operations"
4714
4715         raise errors.OpPrereqError(("Operating on the master node %s is not"
4716                                     " allowed for %s; %s") %
4717                                    (self.master_node, self.op.command,
4718                                     additional_text), errors.ECODE_INVAL)
4719     else:
4720       self.op.node_names = self.cfg.GetNodeList()
4721       if self.op.command in self._SKIP_MASTER:
4722         self.op.node_names.remove(self.master_node)
4723
4724     if self.op.command in self._SKIP_MASTER:
4725       assert self.master_node not in self.op.node_names
4726
4727     for (node_name, node) in self.cfg.GetMultiNodeInfo(self.op.node_names):
4728       if node is None:
4729         raise errors.OpPrereqError("Node %s not found" % node_name,
4730                                    errors.ECODE_NOENT)
4731       else:
4732         self.nodes.append(node)
4733
4734       if (not self.op.ignore_status and
4735           (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
4736         raise errors.OpPrereqError(("Cannot power off node %s because it is"
4737                                     " not marked offline") % node_name,
4738                                    errors.ECODE_STATE)
4739
4740   def Exec(self, feedback_fn):
4741     """Execute OOB and return result if we expect any.
4742
4743     """
4744     master_node = self.master_node
4745     ret = []
4746
4747     for idx, node in enumerate(utils.NiceSort(self.nodes,
4748                                               key=lambda node: node.name)):
4749       node_entry = [(constants.RS_NORMAL, node.name)]
4750       ret.append(node_entry)
4751
4752       oob_program = _SupportsOob(self.cfg, node)
4753
4754       if not oob_program:
4755         node_entry.append((constants.RS_UNAVAIL, None))
4756         continue
4757
4758       logging.info("Executing out-of-band command '%s' using '%s' on %s",
4759                    self.op.command, oob_program, node.name)
4760       result = self.rpc.call_run_oob(master_node, oob_program,
4761                                      self.op.command, node.name,
4762                                      self.op.timeout)
4763
4764       if result.fail_msg:
4765         self.LogWarning("Out-of-band RPC failed on node '%s': %s",
4766                         node.name, result.fail_msg)
4767         node_entry.append((constants.RS_NODATA, None))
4768       else:
4769         try:
4770           self._CheckPayload(result)
4771         except errors.OpExecError, err:
4772           self.LogWarning("Payload returned by node '%s' is not valid: %s",
4773                           node.name, err)
4774           node_entry.append((constants.RS_NODATA, None))
4775         else:
4776           if self.op.command == constants.OOB_HEALTH:
4777             # For health we should log important events
4778             for item, status in result.payload:
4779               if status in [constants.OOB_STATUS_WARNING,
4780                             constants.OOB_STATUS_CRITICAL]:
4781                 self.LogWarning("Item '%s' on node '%s' has status '%s'",
4782                                 item, node.name, status)
4783
4784           if self.op.command == constants.OOB_POWER_ON:
4785             node.powered = True
4786           elif self.op.command == constants.OOB_POWER_OFF:
4787             node.powered = False
4788           elif self.op.command == constants.OOB_POWER_STATUS:
4789             powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
4790             if powered != node.powered:
4791               logging.warning(("Recorded power state (%s) of node '%s' does not"
4792                                " match actual power state (%s)"), node.powered,
4793                               node.name, powered)
4794
4795           # For configuration changing commands we should update the node
4796           if self.op.command in (constants.OOB_POWER_ON,
4797                                  constants.OOB_POWER_OFF):
4798             self.cfg.Update(node, feedback_fn)
4799
4800           node_entry.append((constants.RS_NORMAL, result.payload))
4801
4802           if (self.op.command == constants.OOB_POWER_ON and
4803               idx < len(self.nodes) - 1):
4804             time.sleep(self.op.power_delay)
4805
4806     return ret
4807
4808   def _CheckPayload(self, result):
4809     """Checks if the payload is valid.
4810
4811     @param result: RPC result
4812     @raises errors.OpExecError: If payload is not valid
4813
4814     """
4815     errs = []
4816     if self.op.command == constants.OOB_HEALTH:
4817       if not isinstance(result.payload, list):
4818         errs.append("command 'health' is expected to return a list but got %s" %
4819                     type(result.payload))
4820       else:
4821         for item, status in result.payload:
4822           if status not in constants.OOB_STATUSES:
4823             errs.append("health item '%s' has invalid status '%s'" %
4824                         (item, status))
4825
4826     if self.op.command == constants.OOB_POWER_STATUS:
4827       if not isinstance(result.payload, dict):
4828         errs.append("power-status is expected to return a dict but got %s" %
4829                     type(result.payload))
4830
4831     if self.op.command in [
4832       constants.OOB_POWER_ON,
4833       constants.OOB_POWER_OFF,
4834       constants.OOB_POWER_CYCLE,
4835       ]:
4836       if result.payload is not None:
4837         errs.append("%s is expected to not return payload but got '%s'" %
4838                     (self.op.command, result.payload))
4839
4840     if errs:
4841       raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
4842                                utils.CommaJoin(errs))
4843
4844
4845 class _OsQuery(_QueryBase):
4846   FIELDS = query.OS_FIELDS
4847
4848   def ExpandNames(self, lu):
4849     # Lock all nodes in shared mode
4850     # Temporary removal of locks, should be reverted later
4851     # TODO: reintroduce locks when they are lighter-weight
4852     lu.needed_locks = {}
4853     #self.share_locks[locking.LEVEL_NODE] = 1
4854     #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4855
4856     # The following variables interact with _QueryBase._GetNames
4857     if self.names:
4858       self.wanted = self.names
4859     else:
4860       self.wanted = locking.ALL_SET
4861
4862     self.do_locking = self.use_locking
4863
4864   def DeclareLocks(self, lu, level):
4865     pass
4866
4867   @staticmethod
4868   def _DiagnoseByOS(rlist):
4869     """Remaps a per-node return list into an a per-os per-node dictionary
4870
4871     @param rlist: a map with node names as keys and OS objects as values
4872
4873     @rtype: dict
4874     @return: a dictionary with osnames as keys and as value another
4875         map, with nodes as keys and tuples of (path, status, diagnose,
4876         variants, parameters, api_versions) as values, eg::
4877
4878           {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
4879                                      (/srv/..., False, "invalid api")],
4880                            "node2": [(/srv/..., True, "", [], [])]}
4881           }
4882
4883     """
4884     all_os = {}
4885     # we build here the list of nodes that didn't fail the RPC (at RPC
4886     # level), so that nodes with a non-responding node daemon don't
4887     # make all OSes invalid
4888     good_nodes = [node_name for node_name in rlist
4889                   if not rlist[node_name].fail_msg]
4890     for node_name, nr in rlist.items():
4891       if nr.fail_msg or not nr.payload:
4892         continue
4893       for (name, path, status, diagnose, variants,
4894            params, api_versions) in nr.payload:
4895         if name not in all_os:
4896           # build a list of nodes for this os containing empty lists
4897           # for each node in node_list
4898           all_os[name] = {}
4899           for nname in good_nodes:
4900             all_os[name][nname] = []
4901         # convert params from [name, help] to (name, help)
4902         params = [tuple(v) for v in params]
4903         all_os[name][node_name].append((path, status, diagnose,
4904                                         variants, params, api_versions))
4905     return all_os
4906
4907   def _GetQueryData(self, lu):
4908     """Computes the list of nodes and their attributes.
4909
4910     """
4911     # Locking is not used
4912     assert not (compat.any(lu.glm.is_owned(level)
4913                            for level in locking.LEVELS
4914                            if level != locking.LEVEL_CLUSTER) or
4915                 self.do_locking or self.use_locking)
4916
4917     valid_nodes = [node.name
4918                    for node in lu.cfg.GetAllNodesInfo().values()
4919                    if not node.offline and node.vm_capable]
4920     pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
4921     cluster = lu.cfg.GetClusterInfo()
4922
4923     data = {}
4924
4925     for (os_name, os_data) in pol.items():
4926       info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
4927                           hidden=(os_name in cluster.hidden_os),
4928                           blacklisted=(os_name in cluster.blacklisted_os))
4929
4930       variants = set()
4931       parameters = set()
4932       api_versions = set()
4933
4934       for idx, osl in enumerate(os_data.values()):
4935         info.valid = bool(info.valid and osl and osl[0][1])
4936         if not info.valid:
4937           break
4938
4939         (node_variants, node_params, node_api) = osl[0][3:6]
4940         if idx == 0:
4941           # First entry
4942           variants.update(node_variants)
4943           parameters.update(node_params)
4944           api_versions.update(node_api)
4945         else:
4946           # Filter out inconsistent values
4947           variants.intersection_update(node_variants)
4948           parameters.intersection_update(node_params)
4949           api_versions.intersection_update(node_api)
4950
4951       info.variants = list(variants)
4952       info.parameters = list(parameters)
4953       info.api_versions = list(api_versions)
4954
4955       data[os_name] = info
4956
4957     # Prepare data in requested order
4958     return [data[name] for name in self._GetNames(lu, pol.keys(), None)
4959             if name in data]
4960
4961
4962 class LUOsDiagnose(NoHooksLU):
4963   """Logical unit for OS diagnose/query.
4964
4965   """
4966   REQ_BGL = False
4967
4968   @staticmethod
4969   def _BuildFilter(fields, names):
4970     """Builds a filter for querying OSes.
4971
4972     """
4973     name_filter = qlang.MakeSimpleFilter("name", names)
4974
4975     # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
4976     # respective field is not requested
4977     status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
4978                      for fname in ["hidden", "blacklisted"]
4979                      if fname not in fields]
4980     if "valid" not in fields:
4981       status_filter.append([qlang.OP_TRUE, "valid"])
4982
4983     if status_filter:
4984       status_filter.insert(0, qlang.OP_AND)
4985     else:
4986       status_filter = None
4987
4988     if name_filter and status_filter:
4989       return [qlang.OP_AND, name_filter, status_filter]
4990     elif name_filter:
4991       return name_filter
4992     else:
4993       return status_filter
4994
4995   def CheckArguments(self):
4996     self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
4997                        self.op.output_fields, False)
4998
4999   def ExpandNames(self):
5000     self.oq.ExpandNames(self)
5001
5002   def Exec(self, feedback_fn):
5003     return self.oq.OldStyleQuery(self)
5004
5005
5006 class LUNodeRemove(LogicalUnit):
5007   """Logical unit for removing a node.
5008
5009   """
5010   HPATH = "node-remove"
5011   HTYPE = constants.HTYPE_NODE
5012
5013   def BuildHooksEnv(self):
5014     """Build hooks env.
5015
5016     """
5017     return {
5018       "OP_TARGET": self.op.node_name,
5019       "NODE_NAME": self.op.node_name,
5020       }
5021
5022   def BuildHooksNodes(self):
5023     """Build hooks nodes.
5024
5025     This doesn't run on the target node in the pre phase as a failed
5026     node would then be impossible to remove.
5027
5028     """
5029     all_nodes = self.cfg.GetNodeList()
5030     try:
5031       all_nodes.remove(self.op.node_name)
5032     except ValueError:
5033       pass
5034     return (all_nodes, all_nodes)
5035
5036   def CheckPrereq(self):
5037     """Check prerequisites.
5038
5039     This checks:
5040      - the node exists in the configuration
5041      - it does not have primary or secondary instances
5042      - it's not the master
5043
5044     Any errors are signaled by raising errors.OpPrereqError.
5045
5046     """
5047     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5048     node = self.cfg.GetNodeInfo(self.op.node_name)
5049     assert node is not None
5050
5051     masternode = self.cfg.GetMasterNode()
5052     if node.name == masternode:
5053       raise errors.OpPrereqError("Node is the master node, failover to another"
5054                                  " node is required", errors.ECODE_INVAL)
5055
5056     for instance_name, instance in self.cfg.GetAllInstancesInfo().items():
5057       if node.name in instance.all_nodes:
5058         raise errors.OpPrereqError("Instance %s is still running on the node,"
5059                                    " please remove first" % instance_name,
5060                                    errors.ECODE_INVAL)
5061     self.op.node_name = node.name
5062     self.node = node
5063
5064   def Exec(self, feedback_fn):
5065     """Removes the node from the cluster.
5066
5067     """
5068     node = self.node
5069     logging.info("Stopping the node daemon and removing configs from node %s",
5070                  node.name)
5071
5072     modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
5073
5074     assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
5075       "Not owning BGL"
5076
5077     # Promote nodes to master candidate as needed
5078     _AdjustCandidatePool(self, exceptions=[node.name])
5079     self.context.RemoveNode(node.name)
5080
5081     # Run post hooks on the node before it's removed
5082     _RunPostHook(self, node.name)
5083
5084     result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
5085     msg = result.fail_msg
5086     if msg:
5087       self.LogWarning("Errors encountered on the remote node while leaving"
5088                       " the cluster: %s", msg)
5089
5090     # Remove node from our /etc/hosts
5091     if self.cfg.GetClusterInfo().modify_etc_hosts:
5092       master_node = self.cfg.GetMasterNode()
5093       result = self.rpc.call_etc_hosts_modify(master_node,
5094                                               constants.ETC_HOSTS_REMOVE,
5095                                               node.name, None)
5096       result.Raise("Can't update hosts file with new host data")
5097       _RedistributeAncillaryFiles(self)
5098
5099
5100 class _NodeQuery(_QueryBase):
5101   FIELDS = query.NODE_FIELDS
5102
5103   def ExpandNames(self, lu):
5104     lu.needed_locks = {}
5105     lu.share_locks = _ShareAll()
5106
5107     if self.names:
5108       self.wanted = _GetWantedNodes(lu, self.names)
5109     else:
5110       self.wanted = locking.ALL_SET
5111
5112     self.do_locking = (self.use_locking and
5113                        query.NQ_LIVE in self.requested_data)
5114
5115     if self.do_locking:
5116       # If any non-static field is requested we need to lock the nodes
5117       lu.needed_locks[locking.LEVEL_NODE] = self.wanted
5118
5119   def DeclareLocks(self, lu, level):
5120     pass
5121
5122   def _GetQueryData(self, lu):
5123     """Computes the list of nodes and their attributes.
5124
5125     """
5126     all_info = lu.cfg.GetAllNodesInfo()
5127
5128     nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
5129
5130     # Gather data as requested
5131     if query.NQ_LIVE in self.requested_data:
5132       # filter out non-vm_capable nodes
5133       toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
5134
5135       node_data = lu.rpc.call_node_info(toquery_nodes, [lu.cfg.GetVGName()],
5136                                         [lu.cfg.GetHypervisorType()])
5137       live_data = dict((name, rpc.MakeLegacyNodeInfo(nresult.payload))
5138                        for (name, nresult) in node_data.items()
5139                        if not nresult.fail_msg and nresult.payload)
5140     else:
5141       live_data = None
5142
5143     if query.NQ_INST in self.requested_data:
5144       node_to_primary = dict([(name, set()) for name in nodenames])
5145       node_to_secondary = dict([(name, set()) for name in nodenames])
5146
5147       inst_data = lu.cfg.GetAllInstancesInfo()
5148
5149       for inst in inst_data.values():
5150         if inst.primary_node in node_to_primary:
5151           node_to_primary[inst.primary_node].add(inst.name)
5152         for secnode in inst.secondary_nodes:
5153           if secnode in node_to_secondary:
5154             node_to_secondary[secnode].add(inst.name)
5155     else:
5156       node_to_primary = None
5157       node_to_secondary = None
5158
5159     if query.NQ_OOB in self.requested_data:
5160       oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
5161                          for name, node in all_info.iteritems())
5162     else:
5163       oob_support = None
5164
5165     if query.NQ_GROUP in self.requested_data:
5166       groups = lu.cfg.GetAllNodeGroupsInfo()
5167     else:
5168       groups = {}
5169
5170     return query.NodeQueryData([all_info[name] for name in nodenames],
5171                                live_data, lu.cfg.GetMasterNode(),
5172                                node_to_primary, node_to_secondary, groups,
5173                                oob_support, lu.cfg.GetClusterInfo())
5174
5175
5176 class LUNodeQuery(NoHooksLU):
5177   """Logical unit for querying nodes.
5178
5179   """
5180   # pylint: disable=W0142
5181   REQ_BGL = False
5182
5183   def CheckArguments(self):
5184     self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
5185                          self.op.output_fields, self.op.use_locking)
5186
5187   def ExpandNames(self):
5188     self.nq.ExpandNames(self)
5189
5190   def DeclareLocks(self, level):
5191     self.nq.DeclareLocks(self, level)
5192
5193   def Exec(self, feedback_fn):
5194     return self.nq.OldStyleQuery(self)
5195
5196
5197 class LUNodeQueryvols(NoHooksLU):
5198   """Logical unit for getting volumes on node(s).
5199
5200   """
5201   REQ_BGL = False
5202   _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
5203   _FIELDS_STATIC = utils.FieldSet("node")
5204
5205   def CheckArguments(self):
5206     _CheckOutputFields(static=self._FIELDS_STATIC,
5207                        dynamic=self._FIELDS_DYNAMIC,
5208                        selected=self.op.output_fields)
5209
5210   def ExpandNames(self):
5211     self.share_locks = _ShareAll()
5212     self.needed_locks = {}
5213
5214     if not self.op.nodes:
5215       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5216     else:
5217       self.needed_locks[locking.LEVEL_NODE] = \
5218         _GetWantedNodes(self, self.op.nodes)
5219
5220   def Exec(self, feedback_fn):
5221     """Computes the list of nodes and their attributes.
5222
5223     """
5224     nodenames = self.owned_locks(locking.LEVEL_NODE)
5225     volumes = self.rpc.call_node_volumes(nodenames)
5226
5227     ilist = self.cfg.GetAllInstancesInfo()
5228     vol2inst = _MapInstanceDisksToNodes(ilist.values())
5229
5230     output = []
5231     for node in nodenames:
5232       nresult = volumes[node]
5233       if nresult.offline:
5234         continue
5235       msg = nresult.fail_msg
5236       if msg:
5237         self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
5238         continue
5239
5240       node_vols = sorted(nresult.payload,
5241                          key=operator.itemgetter("dev"))
5242
5243       for vol in node_vols:
5244         node_output = []
5245         for field in self.op.output_fields:
5246           if field == "node":
5247             val = node
5248           elif field == "phys":
5249             val = vol["dev"]
5250           elif field == "vg":
5251             val = vol["vg"]
5252           elif field == "name":
5253             val = vol["name"]
5254           elif field == "size":
5255             val = int(float(vol["size"]))
5256           elif field == "instance":
5257             val = vol2inst.get((node, vol["vg"] + "/" + vol["name"]), "-")
5258           else:
5259             raise errors.ParameterError(field)
5260           node_output.append(str(val))
5261
5262         output.append(node_output)
5263
5264     return output
5265
5266
5267 class LUNodeQueryStorage(NoHooksLU):
5268   """Logical unit for getting information on storage units on node(s).
5269
5270   """
5271   _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
5272   REQ_BGL = False
5273
5274   def CheckArguments(self):
5275     _CheckOutputFields(static=self._FIELDS_STATIC,
5276                        dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
5277                        selected=self.op.output_fields)
5278
5279   def ExpandNames(self):
5280     self.share_locks = _ShareAll()
5281     self.needed_locks = {}
5282
5283     if self.op.nodes:
5284       self.needed_locks[locking.LEVEL_NODE] = \
5285         _GetWantedNodes(self, self.op.nodes)
5286     else:
5287       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5288
5289   def Exec(self, feedback_fn):
5290     """Computes the list of nodes and their attributes.
5291
5292     """
5293     self.nodes = self.owned_locks(locking.LEVEL_NODE)
5294
5295     # Always get name to sort by
5296     if constants.SF_NAME in self.op.output_fields:
5297       fields = self.op.output_fields[:]
5298     else:
5299       fields = [constants.SF_NAME] + self.op.output_fields
5300
5301     # Never ask for node or type as it's only known to the LU
5302     for extra in [constants.SF_NODE, constants.SF_TYPE]:
5303       while extra in fields:
5304         fields.remove(extra)
5305
5306     field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
5307     name_idx = field_idx[constants.SF_NAME]
5308
5309     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5310     data = self.rpc.call_storage_list(self.nodes,
5311                                       self.op.storage_type, st_args,
5312                                       self.op.name, fields)
5313
5314     result = []
5315
5316     for node in utils.NiceSort(self.nodes):
5317       nresult = data[node]
5318       if nresult.offline:
5319         continue
5320
5321       msg = nresult.fail_msg
5322       if msg:
5323         self.LogWarning("Can't get storage data from node %s: %s", node, msg)
5324         continue
5325
5326       rows = dict([(row[name_idx], row) for row in nresult.payload])
5327
5328       for name in utils.NiceSort(rows.keys()):
5329         row = rows[name]
5330
5331         out = []
5332
5333         for field in self.op.output_fields:
5334           if field == constants.SF_NODE:
5335             val = node
5336           elif field == constants.SF_TYPE:
5337             val = self.op.storage_type
5338           elif field in field_idx:
5339             val = row[field_idx[field]]
5340           else:
5341             raise errors.ParameterError(field)
5342
5343           out.append(val)
5344
5345         result.append(out)
5346
5347     return result
5348
5349
5350 class _InstanceQuery(_QueryBase):
5351   FIELDS = query.INSTANCE_FIELDS
5352
5353   def ExpandNames(self, lu):
5354     lu.needed_locks = {}
5355     lu.share_locks = _ShareAll()
5356
5357     if self.names:
5358       self.wanted = _GetWantedInstances(lu, self.names)
5359     else:
5360       self.wanted = locking.ALL_SET
5361
5362     self.do_locking = (self.use_locking and
5363                        query.IQ_LIVE in self.requested_data)
5364     if self.do_locking:
5365       lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
5366       lu.needed_locks[locking.LEVEL_NODEGROUP] = []
5367       lu.needed_locks[locking.LEVEL_NODE] = []
5368       lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5369
5370     self.do_grouplocks = (self.do_locking and
5371                           query.IQ_NODES in self.requested_data)
5372
5373   def DeclareLocks(self, lu, level):
5374     if self.do_locking:
5375       if level == locking.LEVEL_NODEGROUP and self.do_grouplocks:
5376         assert not lu.needed_locks[locking.LEVEL_NODEGROUP]
5377
5378         # Lock all groups used by instances optimistically; this requires going
5379         # via the node before it's locked, requiring verification later on
5380         lu.needed_locks[locking.LEVEL_NODEGROUP] = \
5381           set(group_uuid
5382               for instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
5383               for group_uuid in lu.cfg.GetInstanceNodeGroups(instance_name))
5384       elif level == locking.LEVEL_NODE:
5385         lu._LockInstancesNodes() # pylint: disable=W0212
5386
5387   @staticmethod
5388   def _CheckGroupLocks(lu):
5389     owned_instances = frozenset(lu.owned_locks(locking.LEVEL_INSTANCE))
5390     owned_groups = frozenset(lu.owned_locks(locking.LEVEL_NODEGROUP))
5391
5392     # Check if node groups for locked instances are still correct
5393     for instance_name in owned_instances:
5394       _CheckInstanceNodeGroups(lu.cfg, instance_name, owned_groups)
5395
5396   def _GetQueryData(self, lu):
5397     """Computes the list of instances and their attributes.
5398
5399     """
5400     if self.do_grouplocks:
5401       self._CheckGroupLocks(lu)
5402
5403     cluster = lu.cfg.GetClusterInfo()
5404     all_info = lu.cfg.GetAllInstancesInfo()
5405
5406     instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
5407
5408     instance_list = [all_info[name] for name in instance_names]
5409     nodes = frozenset(itertools.chain(*(inst.all_nodes
5410                                         for inst in instance_list)))
5411     hv_list = list(set([inst.hypervisor for inst in instance_list]))
5412     bad_nodes = []
5413     offline_nodes = []
5414     wrongnode_inst = set()
5415
5416     # Gather data as requested
5417     if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
5418       live_data = {}
5419       node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
5420       for name in nodes:
5421         result = node_data[name]
5422         if result.offline:
5423           # offline nodes will be in both lists
5424           assert result.fail_msg
5425           offline_nodes.append(name)
5426         if result.fail_msg:
5427           bad_nodes.append(name)
5428         elif result.payload:
5429           for inst in result.payload:
5430             if inst in all_info:
5431               if all_info[inst].primary_node == name:
5432                 live_data.update(result.payload)
5433               else:
5434                 wrongnode_inst.add(inst)
5435             else:
5436               # orphan instance; we don't list it here as we don't
5437               # handle this case yet in the output of instance listing
5438               logging.warning("Orphan instance '%s' found on node %s",
5439                               inst, name)
5440         # else no instance is alive
5441     else:
5442       live_data = {}
5443
5444     if query.IQ_DISKUSAGE in self.requested_data:
5445       gmi = ganeti.masterd.instance
5446       disk_usage = dict((inst.name,
5447                          gmi.ComputeDiskSize(inst.disk_template,
5448                                              [{constants.IDISK_SIZE: disk.size}
5449                                               for disk in inst.disks]))
5450                         for inst in instance_list)
5451     else:
5452       disk_usage = None
5453
5454     if query.IQ_CONSOLE in self.requested_data:
5455       consinfo = {}
5456       for inst in instance_list:
5457         if inst.name in live_data:
5458           # Instance is running
5459           consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
5460         else:
5461           consinfo[inst.name] = None
5462       assert set(consinfo.keys()) == set(instance_names)
5463     else:
5464       consinfo = None
5465
5466     if query.IQ_NODES in self.requested_data:
5467       node_names = set(itertools.chain(*map(operator.attrgetter("all_nodes"),
5468                                             instance_list)))
5469       nodes = dict(lu.cfg.GetMultiNodeInfo(node_names))
5470       groups = dict((uuid, lu.cfg.GetNodeGroup(uuid))
5471                     for uuid in set(map(operator.attrgetter("group"),
5472                                         nodes.values())))
5473     else:
5474       nodes = None
5475       groups = None
5476
5477     return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
5478                                    disk_usage, offline_nodes, bad_nodes,
5479                                    live_data, wrongnode_inst, consinfo,
5480                                    nodes, groups)
5481
5482
5483 class LUQuery(NoHooksLU):
5484   """Query for resources/items of a certain kind.
5485
5486   """
5487   # pylint: disable=W0142
5488   REQ_BGL = False
5489
5490   def CheckArguments(self):
5491     qcls = _GetQueryImplementation(self.op.what)
5492
5493     self.impl = qcls(self.op.qfilter, self.op.fields, self.op.use_locking)
5494
5495   def ExpandNames(self):
5496     self.impl.ExpandNames(self)
5497
5498   def DeclareLocks(self, level):
5499     self.impl.DeclareLocks(self, level)
5500
5501   def Exec(self, feedback_fn):
5502     return self.impl.NewStyleQuery(self)
5503
5504
5505 class LUQueryFields(NoHooksLU):
5506   """Query for resources/items of a certain kind.
5507
5508   """
5509   # pylint: disable=W0142
5510   REQ_BGL = False
5511
5512   def CheckArguments(self):
5513     self.qcls = _GetQueryImplementation(self.op.what)
5514
5515   def ExpandNames(self):
5516     self.needed_locks = {}
5517
5518   def Exec(self, feedback_fn):
5519     return query.QueryFields(self.qcls.FIELDS, self.op.fields)
5520
5521
5522 class LUNodeModifyStorage(NoHooksLU):
5523   """Logical unit for modifying a storage volume on a node.
5524
5525   """
5526   REQ_BGL = False
5527
5528   def CheckArguments(self):
5529     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5530
5531     storage_type = self.op.storage_type
5532
5533     try:
5534       modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
5535     except KeyError:
5536       raise errors.OpPrereqError("Storage units of type '%s' can not be"
5537                                  " modified" % storage_type,
5538                                  errors.ECODE_INVAL)
5539
5540     diff = set(self.op.changes.keys()) - modifiable
5541     if diff:
5542       raise errors.OpPrereqError("The following fields can not be modified for"
5543                                  " storage units of type '%s': %r" %
5544                                  (storage_type, list(diff)),
5545                                  errors.ECODE_INVAL)
5546
5547   def ExpandNames(self):
5548     self.needed_locks = {
5549       locking.LEVEL_NODE: self.op.node_name,
5550       }
5551
5552   def Exec(self, feedback_fn):
5553     """Computes the list of nodes and their attributes.
5554
5555     """
5556     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5557     result = self.rpc.call_storage_modify(self.op.node_name,
5558                                           self.op.storage_type, st_args,
5559                                           self.op.name, self.op.changes)
5560     result.Raise("Failed to modify storage unit '%s' on %s" %
5561                  (self.op.name, self.op.node_name))
5562
5563
5564 class LUNodeAdd(LogicalUnit):
5565   """Logical unit for adding node to the cluster.
5566
5567   """
5568   HPATH = "node-add"
5569   HTYPE = constants.HTYPE_NODE
5570   _NFLAGS = ["master_capable", "vm_capable"]
5571
5572   def CheckArguments(self):
5573     self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
5574     # validate/normalize the node name
5575     self.hostname = netutils.GetHostname(name=self.op.node_name,
5576                                          family=self.primary_ip_family)
5577     self.op.node_name = self.hostname.name
5578
5579     if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
5580       raise errors.OpPrereqError("Cannot readd the master node",
5581                                  errors.ECODE_STATE)
5582
5583     if self.op.readd and self.op.group:
5584       raise errors.OpPrereqError("Cannot pass a node group when a node is"
5585                                  " being readded", errors.ECODE_INVAL)
5586
5587   def BuildHooksEnv(self):
5588     """Build hooks env.
5589
5590     This will run on all nodes before, and on all nodes + the new node after.
5591
5592     """
5593     return {
5594       "OP_TARGET": self.op.node_name,
5595       "NODE_NAME": self.op.node_name,
5596       "NODE_PIP": self.op.primary_ip,
5597       "NODE_SIP": self.op.secondary_ip,
5598       "MASTER_CAPABLE": str(self.op.master_capable),
5599       "VM_CAPABLE": str(self.op.vm_capable),
5600       }
5601
5602   def BuildHooksNodes(self):
5603     """Build hooks nodes.
5604
5605     """
5606     # Exclude added node
5607     pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
5608     post_nodes = pre_nodes + [self.op.node_name, ]
5609
5610     return (pre_nodes, post_nodes)
5611
5612   def CheckPrereq(self):
5613     """Check prerequisites.
5614
5615     This checks:
5616      - the new node is not already in the config
5617      - it is resolvable
5618      - its parameters (single/dual homed) matches the cluster
5619
5620     Any errors are signaled by raising errors.OpPrereqError.
5621
5622     """
5623     cfg = self.cfg
5624     hostname = self.hostname
5625     node = hostname.name
5626     primary_ip = self.op.primary_ip = hostname.ip
5627     if self.op.secondary_ip is None:
5628       if self.primary_ip_family == netutils.IP6Address.family:
5629         raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
5630                                    " IPv4 address must be given as secondary",
5631                                    errors.ECODE_INVAL)
5632       self.op.secondary_ip = primary_ip
5633
5634     secondary_ip = self.op.secondary_ip
5635     if not netutils.IP4Address.IsValid(secondary_ip):
5636       raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5637                                  " address" % secondary_ip, errors.ECODE_INVAL)
5638
5639     node_list = cfg.GetNodeList()
5640     if not self.op.readd and node in node_list:
5641       raise errors.OpPrereqError("Node %s is already in the configuration" %
5642                                  node, errors.ECODE_EXISTS)
5643     elif self.op.readd and node not in node_list:
5644       raise errors.OpPrereqError("Node %s is not in the configuration" % node,
5645                                  errors.ECODE_NOENT)
5646
5647     self.changed_primary_ip = False
5648
5649     for existing_node_name, existing_node in cfg.GetMultiNodeInfo(node_list):
5650       if self.op.readd and node == existing_node_name:
5651         if existing_node.secondary_ip != secondary_ip:
5652           raise errors.OpPrereqError("Readded node doesn't have the same IP"
5653                                      " address configuration as before",
5654                                      errors.ECODE_INVAL)
5655         if existing_node.primary_ip != primary_ip:
5656           self.changed_primary_ip = True
5657
5658         continue
5659
5660       if (existing_node.primary_ip == primary_ip or
5661           existing_node.secondary_ip == primary_ip or
5662           existing_node.primary_ip == secondary_ip or
5663           existing_node.secondary_ip == secondary_ip):
5664         raise errors.OpPrereqError("New node ip address(es) conflict with"
5665                                    " existing node %s" % existing_node.name,
5666                                    errors.ECODE_NOTUNIQUE)
5667
5668     # After this 'if' block, None is no longer a valid value for the
5669     # _capable op attributes
5670     if self.op.readd:
5671       old_node = self.cfg.GetNodeInfo(node)
5672       assert old_node is not None, "Can't retrieve locked node %s" % node
5673       for attr in self._NFLAGS:
5674         if getattr(self.op, attr) is None:
5675           setattr(self.op, attr, getattr(old_node, attr))
5676     else:
5677       for attr in self._NFLAGS:
5678         if getattr(self.op, attr) is None:
5679           setattr(self.op, attr, True)
5680
5681     if self.op.readd and not self.op.vm_capable:
5682       pri, sec = cfg.GetNodeInstances(node)
5683       if pri or sec:
5684         raise errors.OpPrereqError("Node %s being re-added with vm_capable"
5685                                    " flag set to false, but it already holds"
5686                                    " instances" % node,
5687                                    errors.ECODE_STATE)
5688
5689     # check that the type of the node (single versus dual homed) is the
5690     # same as for the master
5691     myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
5692     master_singlehomed = myself.secondary_ip == myself.primary_ip
5693     newbie_singlehomed = secondary_ip == primary_ip
5694     if master_singlehomed != newbie_singlehomed:
5695       if master_singlehomed:
5696         raise errors.OpPrereqError("The master has no secondary ip but the"
5697                                    " new node has one",
5698                                    errors.ECODE_INVAL)
5699       else:
5700         raise errors.OpPrereqError("The master has a secondary ip but the"
5701                                    " new node doesn't have one",
5702                                    errors.ECODE_INVAL)
5703
5704     # checks reachability
5705     if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
5706       raise errors.OpPrereqError("Node not reachable by ping",
5707                                  errors.ECODE_ENVIRON)
5708
5709     if not newbie_singlehomed:
5710       # check reachability from my secondary ip to newbie's secondary ip
5711       if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
5712                               source=myself.secondary_ip):
5713         raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5714                                    " based ping to node daemon port",
5715                                    errors.ECODE_ENVIRON)
5716
5717     if self.op.readd:
5718       exceptions = [node]
5719     else:
5720       exceptions = []
5721
5722     if self.op.master_capable:
5723       self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
5724     else:
5725       self.master_candidate = False
5726
5727     if self.op.readd:
5728       self.new_node = old_node
5729     else:
5730       node_group = cfg.LookupNodeGroup(self.op.group)
5731       self.new_node = objects.Node(name=node,
5732                                    primary_ip=primary_ip,
5733                                    secondary_ip=secondary_ip,
5734                                    master_candidate=self.master_candidate,
5735                                    offline=False, drained=False,
5736                                    group=node_group)
5737
5738     if self.op.ndparams:
5739       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
5740
5741     if self.op.hv_state:
5742       self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
5743
5744     if self.op.disk_state:
5745       self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
5746
5747     # TODO: If we need to have multiple DnsOnlyRunner we probably should make
5748     #       it a property on the base class.
5749     result = rpc.DnsOnlyRunner().call_version([node])[node]
5750     result.Raise("Can't get version information from node %s" % node)
5751     if constants.PROTOCOL_VERSION == result.payload:
5752       logging.info("Communication to node %s fine, sw version %s match",
5753                    node, result.payload)
5754     else:
5755       raise errors.OpPrereqError("Version mismatch master version %s,"
5756                                  " node version %s" %
5757                                  (constants.PROTOCOL_VERSION, result.payload),
5758                                  errors.ECODE_ENVIRON)
5759
5760   def Exec(self, feedback_fn):
5761     """Adds the new node to the cluster.
5762
5763     """
5764     new_node = self.new_node
5765     node = new_node.name
5766
5767     assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
5768       "Not owning BGL"
5769
5770     # We adding a new node so we assume it's powered
5771     new_node.powered = True
5772
5773     # for re-adds, reset the offline/drained/master-candidate flags;
5774     # we need to reset here, otherwise offline would prevent RPC calls
5775     # later in the procedure; this also means that if the re-add
5776     # fails, we are left with a non-offlined, broken node
5777     if self.op.readd:
5778       new_node.drained = new_node.offline = False # pylint: disable=W0201
5779       self.LogInfo("Readding a node, the offline/drained flags were reset")
5780       # if we demote the node, we do cleanup later in the procedure
5781       new_node.master_candidate = self.master_candidate
5782       if self.changed_primary_ip:
5783         new_node.primary_ip = self.op.primary_ip
5784
5785     # copy the master/vm_capable flags
5786     for attr in self._NFLAGS:
5787       setattr(new_node, attr, getattr(self.op, attr))
5788
5789     # notify the user about any possible mc promotion
5790     if new_node.master_candidate:
5791       self.LogInfo("Node will be a master candidate")
5792
5793     if self.op.ndparams:
5794       new_node.ndparams = self.op.ndparams
5795     else:
5796       new_node.ndparams = {}
5797
5798     if self.op.hv_state:
5799       new_node.hv_state_static = self.new_hv_state
5800
5801     if self.op.disk_state:
5802       new_node.disk_state_static = self.new_disk_state
5803
5804     # Add node to our /etc/hosts, and add key to known_hosts
5805     if self.cfg.GetClusterInfo().modify_etc_hosts:
5806       master_node = self.cfg.GetMasterNode()
5807       result = self.rpc.call_etc_hosts_modify(master_node,
5808                                               constants.ETC_HOSTS_ADD,
5809                                               self.hostname.name,
5810                                               self.hostname.ip)
5811       result.Raise("Can't update hosts file with new host data")
5812
5813     if new_node.secondary_ip != new_node.primary_ip:
5814       _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
5815                                False)
5816
5817     node_verify_list = [self.cfg.GetMasterNode()]
5818     node_verify_param = {
5819       constants.NV_NODELIST: ([node], {}),
5820       # TODO: do a node-net-test as well?
5821     }
5822
5823     result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
5824                                        self.cfg.GetClusterName())
5825     for verifier in node_verify_list:
5826       result[verifier].Raise("Cannot communicate with node %s" % verifier)
5827       nl_payload = result[verifier].payload[constants.NV_NODELIST]
5828       if nl_payload:
5829         for failed in nl_payload:
5830           feedback_fn("ssh/hostname verification failed"
5831                       " (checking from %s): %s" %
5832                       (verifier, nl_payload[failed]))
5833         raise errors.OpExecError("ssh/hostname verification failed")
5834
5835     if self.op.readd:
5836       _RedistributeAncillaryFiles(self)
5837       self.context.ReaddNode(new_node)
5838       # make sure we redistribute the config
5839       self.cfg.Update(new_node, feedback_fn)
5840       # and make sure the new node will not have old files around
5841       if not new_node.master_candidate:
5842         result = self.rpc.call_node_demote_from_mc(new_node.name)
5843         msg = result.fail_msg
5844         if msg:
5845           self.LogWarning("Node failed to demote itself from master"
5846                           " candidate status: %s" % msg)
5847     else:
5848       _RedistributeAncillaryFiles(self, additional_nodes=[node],
5849                                   additional_vm=self.op.vm_capable)
5850       self.context.AddNode(new_node, self.proc.GetECId())
5851
5852
5853 class LUNodeSetParams(LogicalUnit):
5854   """Modifies the parameters of a node.
5855
5856   @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
5857       to the node role (as _ROLE_*)
5858   @cvar _R2F: a dictionary from node role to tuples of flags
5859   @cvar _FLAGS: a list of attribute names corresponding to the flags
5860
5861   """
5862   HPATH = "node-modify"
5863   HTYPE = constants.HTYPE_NODE
5864   REQ_BGL = False
5865   (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
5866   _F2R = {
5867     (True, False, False): _ROLE_CANDIDATE,
5868     (False, True, False): _ROLE_DRAINED,
5869     (False, False, True): _ROLE_OFFLINE,
5870     (False, False, False): _ROLE_REGULAR,
5871     }
5872   _R2F = dict((v, k) for k, v in _F2R.items())
5873   _FLAGS = ["master_candidate", "drained", "offline"]
5874
5875   def CheckArguments(self):
5876     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5877     all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
5878                 self.op.master_capable, self.op.vm_capable,
5879                 self.op.secondary_ip, self.op.ndparams, self.op.hv_state,
5880                 self.op.disk_state]
5881     if all_mods.count(None) == len(all_mods):
5882       raise errors.OpPrereqError("Please pass at least one modification",
5883                                  errors.ECODE_INVAL)
5884     if all_mods.count(True) > 1:
5885       raise errors.OpPrereqError("Can't set the node into more than one"
5886                                  " state at the same time",
5887                                  errors.ECODE_INVAL)
5888
5889     # Boolean value that tells us whether we might be demoting from MC
5890     self.might_demote = (self.op.master_candidate is False or
5891                          self.op.offline is True or
5892                          self.op.drained is True or
5893                          self.op.master_capable is False)
5894
5895     if self.op.secondary_ip:
5896       if not netutils.IP4Address.IsValid(self.op.secondary_ip):
5897         raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5898                                    " address" % self.op.secondary_ip,
5899                                    errors.ECODE_INVAL)
5900
5901     self.lock_all = self.op.auto_promote and self.might_demote
5902     self.lock_instances = self.op.secondary_ip is not None
5903
5904   def _InstanceFilter(self, instance):
5905     """Filter for getting affected instances.
5906
5907     """
5908     return (instance.disk_template in constants.DTS_INT_MIRROR and
5909             self.op.node_name in instance.all_nodes)
5910
5911   def ExpandNames(self):
5912     if self.lock_all:
5913       self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
5914     else:
5915       self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
5916
5917     # Since modifying a node can have severe effects on currently running
5918     # operations the resource lock is at least acquired in shared mode
5919     self.needed_locks[locking.LEVEL_NODE_RES] = \
5920       self.needed_locks[locking.LEVEL_NODE]
5921
5922     # Get node resource and instance locks in shared mode; they are not used
5923     # for anything but read-only access
5924     self.share_locks[locking.LEVEL_NODE_RES] = 1
5925     self.share_locks[locking.LEVEL_INSTANCE] = 1
5926
5927     if self.lock_instances:
5928       self.needed_locks[locking.LEVEL_INSTANCE] = \
5929         frozenset(self.cfg.GetInstancesInfoByFilter(self._InstanceFilter))
5930
5931   def BuildHooksEnv(self):
5932     """Build hooks env.
5933
5934     This runs on the master node.
5935
5936     """
5937     return {
5938       "OP_TARGET": self.op.node_name,
5939       "MASTER_CANDIDATE": str(self.op.master_candidate),
5940       "OFFLINE": str(self.op.offline),
5941       "DRAINED": str(self.op.drained),
5942       "MASTER_CAPABLE": str(self.op.master_capable),
5943       "VM_CAPABLE": str(self.op.vm_capable),
5944       }
5945
5946   def BuildHooksNodes(self):
5947     """Build hooks nodes.
5948
5949     """
5950     nl = [self.cfg.GetMasterNode(), self.op.node_name]
5951     return (nl, nl)
5952
5953   def CheckPrereq(self):
5954     """Check prerequisites.
5955
5956     This only checks the instance list against the existing names.
5957
5958     """
5959     node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
5960
5961     if self.lock_instances:
5962       affected_instances = \
5963         self.cfg.GetInstancesInfoByFilter(self._InstanceFilter)
5964
5965       # Verify instance locks
5966       owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
5967       wanted_instances = frozenset(affected_instances.keys())
5968       if wanted_instances - owned_instances:
5969         raise errors.OpPrereqError("Instances affected by changing node %s's"
5970                                    " secondary IP address have changed since"
5971                                    " locks were acquired, wanted '%s', have"
5972                                    " '%s'; retry the operation" %
5973                                    (self.op.node_name,
5974                                     utils.CommaJoin(wanted_instances),
5975                                     utils.CommaJoin(owned_instances)),
5976                                    errors.ECODE_STATE)
5977     else:
5978       affected_instances = None
5979
5980     if (self.op.master_candidate is not None or
5981         self.op.drained is not None or
5982         self.op.offline is not None):
5983       # we can't change the master's node flags
5984       if self.op.node_name == self.cfg.GetMasterNode():
5985         raise errors.OpPrereqError("The master role can be changed"
5986                                    " only via master-failover",
5987                                    errors.ECODE_INVAL)
5988
5989     if self.op.master_candidate and not node.master_capable:
5990       raise errors.OpPrereqError("Node %s is not master capable, cannot make"
5991                                  " it a master candidate" % node.name,
5992                                  errors.ECODE_STATE)
5993
5994     if self.op.vm_capable is False:
5995       (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
5996       if ipri or isec:
5997         raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
5998                                    " the vm_capable flag" % node.name,
5999                                    errors.ECODE_STATE)
6000
6001     if node.master_candidate and self.might_demote and not self.lock_all:
6002       assert not self.op.auto_promote, "auto_promote set but lock_all not"
6003       # check if after removing the current node, we're missing master
6004       # candidates
6005       (mc_remaining, mc_should, _) = \
6006           self.cfg.GetMasterCandidateStats(exceptions=[node.name])
6007       if mc_remaining < mc_should:
6008         raise errors.OpPrereqError("Not enough master candidates, please"
6009                                    " pass auto promote option to allow"
6010                                    " promotion (--auto-promote or RAPI"
6011                                    " auto_promote=True)", errors.ECODE_STATE)
6012
6013     self.old_flags = old_flags = (node.master_candidate,
6014                                   node.drained, node.offline)
6015     assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
6016     self.old_role = old_role = self._F2R[old_flags]
6017
6018     # Check for ineffective changes
6019     for attr in self._FLAGS:
6020       if (getattr(self.op, attr) is False and getattr(node, attr) is False):
6021         self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
6022         setattr(self.op, attr, None)
6023
6024     # Past this point, any flag change to False means a transition
6025     # away from the respective state, as only real changes are kept
6026
6027     # TODO: We might query the real power state if it supports OOB
6028     if _SupportsOob(self.cfg, node):
6029       if self.op.offline is False and not (node.powered or
6030                                            self.op.powered is True):
6031         raise errors.OpPrereqError(("Node %s needs to be turned on before its"
6032                                     " offline status can be reset") %
6033                                    self.op.node_name, errors.ECODE_STATE)
6034     elif self.op.powered is not None:
6035       raise errors.OpPrereqError(("Unable to change powered state for node %s"
6036                                   " as it does not support out-of-band"
6037                                   " handling") % self.op.node_name,
6038                                  errors.ECODE_STATE)
6039
6040     # If we're being deofflined/drained, we'll MC ourself if needed
6041     if (self.op.drained is False or self.op.offline is False or
6042         (self.op.master_capable and not node.master_capable)):
6043       if _DecideSelfPromotion(self):
6044         self.op.master_candidate = True
6045         self.LogInfo("Auto-promoting node to master candidate")
6046
6047     # If we're no longer master capable, we'll demote ourselves from MC
6048     if self.op.master_capable is False and node.master_candidate:
6049       self.LogInfo("Demoting from master candidate")
6050       self.op.master_candidate = False
6051
6052     # Compute new role
6053     assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
6054     if self.op.master_candidate:
6055       new_role = self._ROLE_CANDIDATE
6056     elif self.op.drained:
6057       new_role = self._ROLE_DRAINED
6058     elif self.op.offline:
6059       new_role = self._ROLE_OFFLINE
6060     elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
6061       # False is still in new flags, which means we're un-setting (the
6062       # only) True flag
6063       new_role = self._ROLE_REGULAR
6064     else: # no new flags, nothing, keep old role
6065       new_role = old_role
6066
6067     self.new_role = new_role
6068
6069     if old_role == self._ROLE_OFFLINE and new_role != old_role:
6070       # Trying to transition out of offline status
6071       result = self.rpc.call_version([node.name])[node.name]
6072       if result.fail_msg:
6073         raise errors.OpPrereqError("Node %s is being de-offlined but fails"
6074                                    " to report its version: %s" %
6075                                    (node.name, result.fail_msg),
6076                                    errors.ECODE_STATE)
6077       else:
6078         self.LogWarning("Transitioning node from offline to online state"
6079                         " without using re-add. Please make sure the node"
6080                         " is healthy!")
6081
6082     # When changing the secondary ip, verify if this is a single-homed to
6083     # multi-homed transition or vice versa, and apply the relevant
6084     # restrictions.
6085     if self.op.secondary_ip:
6086       # Ok even without locking, because this can't be changed by any LU
6087       master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
6088       master_singlehomed = master.secondary_ip == master.primary_ip
6089       if master_singlehomed and self.op.secondary_ip != node.primary_ip:
6090         if self.op.force and node.name == master.name:
6091           self.LogWarning("Transitioning from single-homed to multi-homed"
6092                           " cluster. All nodes will require a secondary ip.")
6093         else:
6094           raise errors.OpPrereqError("Changing the secondary ip on a"
6095                                      " single-homed cluster requires the"
6096                                      " --force option to be passed, and the"
6097                                      " target node to be the master",
6098                                      errors.ECODE_INVAL)
6099       elif not master_singlehomed and self.op.secondary_ip == node.primary_ip:
6100         if self.op.force and node.name == master.name:
6101           self.LogWarning("Transitioning from multi-homed to single-homed"
6102                           " cluster. Secondary IPs will have to be removed.")
6103         else:
6104           raise errors.OpPrereqError("Cannot set the secondary IP to be the"
6105                                      " same as the primary IP on a multi-homed"
6106                                      " cluster, unless the --force option is"
6107                                      " passed, and the target node is the"
6108                                      " master", errors.ECODE_INVAL)
6109
6110       assert not (frozenset(affected_instances) -
6111                   self.owned_locks(locking.LEVEL_INSTANCE))
6112
6113       if node.offline:
6114         if affected_instances:
6115           msg = ("Cannot change secondary IP address: offline node has"
6116                  " instances (%s) configured to use it" %
6117                  utils.CommaJoin(affected_instances.keys()))
6118           raise errors.OpPrereqError(msg, errors.ECODE_STATE)
6119       else:
6120         # On online nodes, check that no instances are running, and that
6121         # the node has the new ip and we can reach it.
6122         for instance in affected_instances.values():
6123           _CheckInstanceState(self, instance, INSTANCE_DOWN,
6124                               msg="cannot change secondary ip")
6125
6126         _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
6127         if master.name != node.name:
6128           # check reachability from master secondary ip to new secondary ip
6129           if not netutils.TcpPing(self.op.secondary_ip,
6130                                   constants.DEFAULT_NODED_PORT,
6131                                   source=master.secondary_ip):
6132             raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
6133                                        " based ping to node daemon port",
6134                                        errors.ECODE_ENVIRON)
6135
6136     if self.op.ndparams:
6137       new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
6138       utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
6139       self.new_ndparams = new_ndparams
6140
6141     if self.op.hv_state:
6142       self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
6143                                                  self.node.hv_state_static)
6144
6145     if self.op.disk_state:
6146       self.new_disk_state = \
6147         _MergeAndVerifyDiskState(self.op.disk_state,
6148                                  self.node.disk_state_static)
6149
6150   def Exec(self, feedback_fn):
6151     """Modifies a node.
6152
6153     """
6154     node = self.node
6155     old_role = self.old_role
6156     new_role = self.new_role
6157
6158     result = []
6159
6160     if self.op.ndparams:
6161       node.ndparams = self.new_ndparams
6162
6163     if self.op.powered is not None:
6164       node.powered = self.op.powered
6165
6166     if self.op.hv_state:
6167       node.hv_state_static = self.new_hv_state
6168
6169     if self.op.disk_state:
6170       node.disk_state_static = self.new_disk_state
6171
6172     for attr in ["master_capable", "vm_capable"]:
6173       val = getattr(self.op, attr)
6174       if val is not None:
6175         setattr(node, attr, val)
6176         result.append((attr, str(val)))
6177
6178     if new_role != old_role:
6179       # Tell the node to demote itself, if no longer MC and not offline
6180       if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
6181         msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
6182         if msg:
6183           self.LogWarning("Node failed to demote itself: %s", msg)
6184
6185       new_flags = self._R2F[new_role]
6186       for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
6187         if of != nf:
6188           result.append((desc, str(nf)))
6189       (node.master_candidate, node.drained, node.offline) = new_flags
6190
6191       # we locked all nodes, we adjust the CP before updating this node
6192       if self.lock_all:
6193         _AdjustCandidatePool(self, [node.name])
6194
6195     if self.op.secondary_ip:
6196       node.secondary_ip = self.op.secondary_ip
6197       result.append(("secondary_ip", self.op.secondary_ip))
6198
6199     # this will trigger configuration file update, if needed
6200     self.cfg.Update(node, feedback_fn)
6201
6202     # this will trigger job queue propagation or cleanup if the mc
6203     # flag changed
6204     if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
6205       self.context.ReaddNode(node)
6206
6207     return result
6208
6209
6210 class LUNodePowercycle(NoHooksLU):
6211   """Powercycles a node.
6212
6213   """
6214   REQ_BGL = False
6215
6216   def CheckArguments(self):
6217     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6218     if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
6219       raise errors.OpPrereqError("The node is the master and the force"
6220                                  " parameter was not set",
6221                                  errors.ECODE_INVAL)
6222
6223   def ExpandNames(self):
6224     """Locking for PowercycleNode.
6225
6226     This is a last-resort option and shouldn't block on other
6227     jobs. Therefore, we grab no locks.
6228
6229     """
6230     self.needed_locks = {}
6231
6232   def Exec(self, feedback_fn):
6233     """Reboots a node.
6234
6235     """
6236     result = self.rpc.call_node_powercycle(self.op.node_name,
6237                                            self.cfg.GetHypervisorType())
6238     result.Raise("Failed to schedule the reboot")
6239     return result.payload
6240
6241
6242 class LUClusterQuery(NoHooksLU):
6243   """Query cluster configuration.
6244
6245   """
6246   REQ_BGL = False
6247
6248   def ExpandNames(self):
6249     self.needed_locks = {}
6250
6251   def Exec(self, feedback_fn):
6252     """Return cluster config.
6253
6254     """
6255     cluster = self.cfg.GetClusterInfo()
6256     os_hvp = {}
6257
6258     # Filter just for enabled hypervisors
6259     for os_name, hv_dict in cluster.os_hvp.items():
6260       os_hvp[os_name] = {}
6261       for hv_name, hv_params in hv_dict.items():
6262         if hv_name in cluster.enabled_hypervisors:
6263           os_hvp[os_name][hv_name] = hv_params
6264
6265     # Convert ip_family to ip_version
6266     primary_ip_version = constants.IP4_VERSION
6267     if cluster.primary_ip_family == netutils.IP6Address.family:
6268       primary_ip_version = constants.IP6_VERSION
6269
6270     result = {
6271       "software_version": constants.RELEASE_VERSION,
6272       "protocol_version": constants.PROTOCOL_VERSION,
6273       "config_version": constants.CONFIG_VERSION,
6274       "os_api_version": max(constants.OS_API_VERSIONS),
6275       "export_version": constants.EXPORT_VERSION,
6276       "architecture": runtime.GetArchInfo(),
6277       "name": cluster.cluster_name,
6278       "master": cluster.master_node,
6279       "default_hypervisor": cluster.primary_hypervisor,
6280       "enabled_hypervisors": cluster.enabled_hypervisors,
6281       "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
6282                         for hypervisor_name in cluster.enabled_hypervisors]),
6283       "os_hvp": os_hvp,
6284       "beparams": cluster.beparams,
6285       "osparams": cluster.osparams,
6286       "ipolicy": cluster.ipolicy,
6287       "nicparams": cluster.nicparams,
6288       "ndparams": cluster.ndparams,
6289       "diskparams": cluster.diskparams,
6290       "candidate_pool_size": cluster.candidate_pool_size,
6291       "master_netdev": cluster.master_netdev,
6292       "master_netmask": cluster.master_netmask,
6293       "use_external_mip_script": cluster.use_external_mip_script,
6294       "volume_group_name": cluster.volume_group_name,
6295       "drbd_usermode_helper": cluster.drbd_usermode_helper,
6296       "file_storage_dir": cluster.file_storage_dir,
6297       "shared_file_storage_dir": cluster.shared_file_storage_dir,
6298       "maintain_node_health": cluster.maintain_node_health,
6299       "ctime": cluster.ctime,
6300       "mtime": cluster.mtime,
6301       "uuid": cluster.uuid,
6302       "tags": list(cluster.GetTags()),
6303       "uid_pool": cluster.uid_pool,
6304       "default_iallocator": cluster.default_iallocator,
6305       "reserved_lvs": cluster.reserved_lvs,
6306       "primary_ip_version": primary_ip_version,
6307       "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
6308       "hidden_os": cluster.hidden_os,
6309       "blacklisted_os": cluster.blacklisted_os,
6310       }
6311
6312     return result
6313
6314
6315 class LUClusterConfigQuery(NoHooksLU):
6316   """Return configuration values.
6317
6318   """
6319   REQ_BGL = False
6320
6321   def CheckArguments(self):
6322     self.cq = _ClusterQuery(None, self.op.output_fields, False)
6323
6324   def ExpandNames(self):
6325     self.cq.ExpandNames(self)
6326
6327   def DeclareLocks(self, level):
6328     self.cq.DeclareLocks(self, level)
6329
6330   def Exec(self, feedback_fn):
6331     result = self.cq.OldStyleQuery(self)
6332
6333     assert len(result) == 1
6334
6335     return result[0]
6336
6337
6338 class _ClusterQuery(_QueryBase):
6339   FIELDS = query.CLUSTER_FIELDS
6340
6341   #: Do not sort (there is only one item)
6342   SORT_FIELD = None
6343
6344   def ExpandNames(self, lu):
6345     lu.needed_locks = {}
6346
6347     # The following variables interact with _QueryBase._GetNames
6348     self.wanted = locking.ALL_SET
6349     self.do_locking = self.use_locking
6350
6351     if self.do_locking:
6352       raise errors.OpPrereqError("Can not use locking for cluster queries",
6353                                  errors.ECODE_INVAL)
6354
6355   def DeclareLocks(self, lu, level):
6356     pass
6357
6358   def _GetQueryData(self, lu):
6359     """Computes the list of nodes and their attributes.
6360
6361     """
6362     # Locking is not used
6363     assert not (compat.any(lu.glm.is_owned(level)
6364                            for level in locking.LEVELS
6365                            if level != locking.LEVEL_CLUSTER) or
6366                 self.do_locking or self.use_locking)
6367
6368     if query.CQ_CONFIG in self.requested_data:
6369       cluster = lu.cfg.GetClusterInfo()
6370     else:
6371       cluster = NotImplemented
6372
6373     if query.CQ_QUEUE_DRAINED in self.requested_data:
6374       drain_flag = os.path.exists(pathutils.JOB_QUEUE_DRAIN_FILE)
6375     else:
6376       drain_flag = NotImplemented
6377
6378     if query.CQ_WATCHER_PAUSE in self.requested_data:
6379       watcher_pause = utils.ReadWatcherPauseFile(pathutils.WATCHER_PAUSEFILE)
6380     else:
6381       watcher_pause = NotImplemented
6382
6383     return query.ClusterQueryData(cluster, drain_flag, watcher_pause)
6384
6385
6386 class LUInstanceActivateDisks(NoHooksLU):
6387   """Bring up an instance's disks.
6388
6389   """
6390   REQ_BGL = False
6391
6392   def ExpandNames(self):
6393     self._ExpandAndLockInstance()
6394     self.needed_locks[locking.LEVEL_NODE] = []
6395     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6396
6397   def DeclareLocks(self, level):
6398     if level == locking.LEVEL_NODE:
6399       self._LockInstancesNodes()
6400
6401   def CheckPrereq(self):
6402     """Check prerequisites.
6403
6404     This checks that the instance is in the cluster.
6405
6406     """
6407     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6408     assert self.instance is not None, \
6409       "Cannot retrieve locked instance %s" % self.op.instance_name
6410     _CheckNodeOnline(self, self.instance.primary_node)
6411
6412   def Exec(self, feedback_fn):
6413     """Activate the disks.
6414
6415     """
6416     disks_ok, disks_info = \
6417               _AssembleInstanceDisks(self, self.instance,
6418                                      ignore_size=self.op.ignore_size)
6419     if not disks_ok:
6420       raise errors.OpExecError("Cannot activate block devices")
6421
6422     if self.op.wait_for_sync:
6423       if not _WaitForSync(self, self.instance):
6424         raise errors.OpExecError("Some disks of the instance are degraded!")
6425
6426     return disks_info
6427
6428
6429 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
6430                            ignore_size=False):
6431   """Prepare the block devices for an instance.
6432
6433   This sets up the block devices on all nodes.
6434
6435   @type lu: L{LogicalUnit}
6436   @param lu: the logical unit on whose behalf we execute
6437   @type instance: L{objects.Instance}
6438   @param instance: the instance for whose disks we assemble
6439   @type disks: list of L{objects.Disk} or None
6440   @param disks: which disks to assemble (or all, if None)
6441   @type ignore_secondaries: boolean
6442   @param ignore_secondaries: if true, errors on secondary nodes
6443       won't result in an error return from the function
6444   @type ignore_size: boolean
6445   @param ignore_size: if true, the current known size of the disk
6446       will not be used during the disk activation, useful for cases
6447       when the size is wrong
6448   @return: False if the operation failed, otherwise a list of
6449       (host, instance_visible_name, node_visible_name)
6450       with the mapping from node devices to instance devices
6451
6452   """
6453   device_info = []
6454   disks_ok = True
6455   iname = instance.name
6456   disks = _ExpandCheckDisks(instance, disks)
6457
6458   # With the two passes mechanism we try to reduce the window of
6459   # opportunity for the race condition of switching DRBD to primary
6460   # before handshaking occured, but we do not eliminate it
6461
6462   # The proper fix would be to wait (with some limits) until the
6463   # connection has been made and drbd transitions from WFConnection
6464   # into any other network-connected state (Connected, SyncTarget,
6465   # SyncSource, etc.)
6466
6467   # 1st pass, assemble on all nodes in secondary mode
6468   for idx, inst_disk in enumerate(disks):
6469     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6470       if ignore_size:
6471         node_disk = node_disk.Copy()
6472         node_disk.UnsetSize()
6473       lu.cfg.SetDiskID(node_disk, node)
6474       result = lu.rpc.call_blockdev_assemble(node, (node_disk, instance), iname,
6475                                              False, idx)
6476       msg = result.fail_msg
6477       if msg:
6478         is_offline_secondary = (node in instance.secondary_nodes and
6479                                 result.offline)
6480         lu.proc.LogWarning("Could not prepare block device %s on node %s"
6481                            " (is_primary=False, pass=1): %s",
6482                            inst_disk.iv_name, node, msg)
6483         if not (ignore_secondaries or is_offline_secondary):
6484           disks_ok = False
6485
6486   # FIXME: race condition on drbd migration to primary
6487
6488   # 2nd pass, do only the primary node
6489   for idx, inst_disk in enumerate(disks):
6490     dev_path = None
6491
6492     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6493       if node != instance.primary_node:
6494         continue
6495       if ignore_size:
6496         node_disk = node_disk.Copy()
6497         node_disk.UnsetSize()
6498       lu.cfg.SetDiskID(node_disk, node)
6499       result = lu.rpc.call_blockdev_assemble(node, (node_disk, instance), iname,
6500                                              True, idx)
6501       msg = result.fail_msg
6502       if msg:
6503         lu.proc.LogWarning("Could not prepare block device %s on node %s"
6504                            " (is_primary=True, pass=2): %s",
6505                            inst_disk.iv_name, node, msg)
6506         disks_ok = False
6507       else:
6508         dev_path = result.payload
6509
6510     device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
6511
6512   # leave the disks configured for the primary node
6513   # this is a workaround that would be fixed better by
6514   # improving the logical/physical id handling
6515   for disk in disks:
6516     lu.cfg.SetDiskID(disk, instance.primary_node)
6517
6518   return disks_ok, device_info
6519
6520
6521 def _StartInstanceDisks(lu, instance, force):
6522   """Start the disks of an instance.
6523
6524   """
6525   disks_ok, _ = _AssembleInstanceDisks(lu, instance,
6526                                            ignore_secondaries=force)
6527   if not disks_ok:
6528     _ShutdownInstanceDisks(lu, instance)
6529     if force is not None and not force:
6530       lu.proc.LogWarning("", hint="If the message above refers to a"
6531                          " secondary node,"
6532                          " you can retry the operation using '--force'.")
6533     raise errors.OpExecError("Disk consistency error")
6534
6535
6536 class LUInstanceDeactivateDisks(NoHooksLU):
6537   """Shutdown an instance's disks.
6538
6539   """
6540   REQ_BGL = False
6541
6542   def ExpandNames(self):
6543     self._ExpandAndLockInstance()
6544     self.needed_locks[locking.LEVEL_NODE] = []
6545     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6546
6547   def DeclareLocks(self, level):
6548     if level == locking.LEVEL_NODE:
6549       self._LockInstancesNodes()
6550
6551   def CheckPrereq(self):
6552     """Check prerequisites.
6553
6554     This checks that the instance is in the cluster.
6555
6556     """
6557     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6558     assert self.instance is not None, \
6559       "Cannot retrieve locked instance %s" % self.op.instance_name
6560
6561   def Exec(self, feedback_fn):
6562     """Deactivate the disks
6563
6564     """
6565     instance = self.instance
6566     if self.op.force:
6567       _ShutdownInstanceDisks(self, instance)
6568     else:
6569       _SafeShutdownInstanceDisks(self, instance)
6570
6571
6572 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
6573   """Shutdown block devices of an instance.
6574
6575   This function checks if an instance is running, before calling
6576   _ShutdownInstanceDisks.
6577
6578   """
6579   _CheckInstanceState(lu, instance, INSTANCE_DOWN, msg="cannot shutdown disks")
6580   _ShutdownInstanceDisks(lu, instance, disks=disks)
6581
6582
6583 def _ExpandCheckDisks(instance, disks):
6584   """Return the instance disks selected by the disks list
6585
6586   @type disks: list of L{objects.Disk} or None
6587   @param disks: selected disks
6588   @rtype: list of L{objects.Disk}
6589   @return: selected instance disks to act on
6590
6591   """
6592   if disks is None:
6593     return instance.disks
6594   else:
6595     if not set(disks).issubset(instance.disks):
6596       raise errors.ProgrammerError("Can only act on disks belonging to the"
6597                                    " target instance")
6598     return disks
6599
6600
6601 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
6602   """Shutdown block devices of an instance.
6603
6604   This does the shutdown on all nodes of the instance.
6605
6606   If the ignore_primary is false, errors on the primary node are
6607   ignored.
6608
6609   """
6610   all_result = True
6611   disks = _ExpandCheckDisks(instance, disks)
6612
6613   for disk in disks:
6614     for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
6615       lu.cfg.SetDiskID(top_disk, node)
6616       result = lu.rpc.call_blockdev_shutdown(node, (top_disk, instance))
6617       msg = result.fail_msg
6618       if msg:
6619         lu.LogWarning("Could not shutdown block device %s on node %s: %s",
6620                       disk.iv_name, node, msg)
6621         if ((node == instance.primary_node and not ignore_primary) or
6622             (node != instance.primary_node and not result.offline)):
6623           all_result = False
6624   return all_result
6625
6626
6627 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
6628   """Checks if a node has enough free memory.
6629
6630   This function check if a given node has the needed amount of free
6631   memory. In case the node has less memory or we cannot get the
6632   information from the node, this function raise an OpPrereqError
6633   exception.
6634
6635   @type lu: C{LogicalUnit}
6636   @param lu: a logical unit from which we get configuration data
6637   @type node: C{str}
6638   @param node: the node to check
6639   @type reason: C{str}
6640   @param reason: string to use in the error message
6641   @type requested: C{int}
6642   @param requested: the amount of memory in MiB to check for
6643   @type hypervisor_name: C{str}
6644   @param hypervisor_name: the hypervisor to ask for memory stats
6645   @rtype: integer
6646   @return: node current free memory
6647   @raise errors.OpPrereqError: if the node doesn't have enough memory, or
6648       we cannot check the node
6649
6650   """
6651   nodeinfo = lu.rpc.call_node_info([node], None, [hypervisor_name])
6652   nodeinfo[node].Raise("Can't get data from node %s" % node,
6653                        prereq=True, ecode=errors.ECODE_ENVIRON)
6654   (_, _, (hv_info, )) = nodeinfo[node].payload
6655
6656   free_mem = hv_info.get("memory_free", None)
6657   if not isinstance(free_mem, int):
6658     raise errors.OpPrereqError("Can't compute free memory on node %s, result"
6659                                " was '%s'" % (node, free_mem),
6660                                errors.ECODE_ENVIRON)
6661   if requested > free_mem:
6662     raise errors.OpPrereqError("Not enough memory on node %s for %s:"
6663                                " needed %s MiB, available %s MiB" %
6664                                (node, reason, requested, free_mem),
6665                                errors.ECODE_NORES)
6666   return free_mem
6667
6668
6669 def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
6670   """Checks if nodes have enough free disk space in the all VGs.
6671
6672   This function check if all given nodes have the needed amount of
6673   free disk. In case any node has less disk or we cannot get the
6674   information from the node, this function raise an OpPrereqError
6675   exception.
6676
6677   @type lu: C{LogicalUnit}
6678   @param lu: a logical unit from which we get configuration data
6679   @type nodenames: C{list}
6680   @param nodenames: the list of node names to check
6681   @type req_sizes: C{dict}
6682   @param req_sizes: the hash of vg and corresponding amount of disk in
6683       MiB to check for
6684   @raise errors.OpPrereqError: if the node doesn't have enough disk,
6685       or we cannot check the node
6686
6687   """
6688   for vg, req_size in req_sizes.items():
6689     _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
6690
6691
6692 def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
6693   """Checks if nodes have enough free disk space in the specified VG.
6694
6695   This function check if all given nodes have the needed amount of
6696   free disk. In case any node has less disk or we cannot get the
6697   information from the node, this function raise an OpPrereqError
6698   exception.
6699
6700   @type lu: C{LogicalUnit}
6701   @param lu: a logical unit from which we get configuration data
6702   @type nodenames: C{list}
6703   @param nodenames: the list of node names to check
6704   @type vg: C{str}
6705   @param vg: the volume group to check
6706   @type requested: C{int}
6707   @param requested: the amount of disk in MiB to check for
6708   @raise errors.OpPrereqError: if the node doesn't have enough disk,
6709       or we cannot check the node
6710
6711   """
6712   nodeinfo = lu.rpc.call_node_info(nodenames, [vg], None)
6713   for node in nodenames:
6714     info = nodeinfo[node]
6715     info.Raise("Cannot get current information from node %s" % node,
6716                prereq=True, ecode=errors.ECODE_ENVIRON)
6717     (_, (vg_info, ), _) = info.payload
6718     vg_free = vg_info.get("vg_free", None)
6719     if not isinstance(vg_free, int):
6720       raise errors.OpPrereqError("Can't compute free disk space on node"
6721                                  " %s for vg %s, result was '%s'" %
6722                                  (node, vg, vg_free), errors.ECODE_ENVIRON)
6723     if requested > vg_free:
6724       raise errors.OpPrereqError("Not enough disk space on target node %s"
6725                                  " vg %s: required %d MiB, available %d MiB" %
6726                                  (node, vg, requested, vg_free),
6727                                  errors.ECODE_NORES)
6728
6729
6730 def _CheckNodesPhysicalCPUs(lu, nodenames, requested, hypervisor_name):
6731   """Checks if nodes have enough physical CPUs
6732
6733   This function checks if all given nodes have the needed number of
6734   physical CPUs. In case any node has less CPUs or we cannot get the
6735   information from the node, this function raises an OpPrereqError
6736   exception.
6737
6738   @type lu: C{LogicalUnit}
6739   @param lu: a logical unit from which we get configuration data
6740   @type nodenames: C{list}
6741   @param nodenames: the list of node names to check
6742   @type requested: C{int}
6743   @param requested: the minimum acceptable number of physical CPUs
6744   @raise errors.OpPrereqError: if the node doesn't have enough CPUs,
6745       or we cannot check the node
6746
6747   """
6748   nodeinfo = lu.rpc.call_node_info(nodenames, None, [hypervisor_name])
6749   for node in nodenames:
6750     info = nodeinfo[node]
6751     info.Raise("Cannot get current information from node %s" % node,
6752                prereq=True, ecode=errors.ECODE_ENVIRON)
6753     (_, _, (hv_info, )) = info.payload
6754     num_cpus = hv_info.get("cpu_total", None)
6755     if not isinstance(num_cpus, int):
6756       raise errors.OpPrereqError("Can't compute the number of physical CPUs"
6757                                  " on node %s, result was '%s'" %
6758                                  (node, num_cpus), errors.ECODE_ENVIRON)
6759     if requested > num_cpus:
6760       raise errors.OpPrereqError("Node %s has %s physical CPUs, but %s are "
6761                                  "required" % (node, num_cpus, requested),
6762                                  errors.ECODE_NORES)
6763
6764
6765 class LUInstanceStartup(LogicalUnit):
6766   """Starts an instance.
6767
6768   """
6769   HPATH = "instance-start"
6770   HTYPE = constants.HTYPE_INSTANCE
6771   REQ_BGL = False
6772
6773   def CheckArguments(self):
6774     # extra beparams
6775     if self.op.beparams:
6776       # fill the beparams dict
6777       objects.UpgradeBeParams(self.op.beparams)
6778       utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
6779
6780   def ExpandNames(self):
6781     self._ExpandAndLockInstance()
6782     self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
6783
6784   def DeclareLocks(self, level):
6785     if level == locking.LEVEL_NODE_RES:
6786       self._LockInstancesNodes(primary_only=True, level=locking.LEVEL_NODE_RES)
6787
6788   def BuildHooksEnv(self):
6789     """Build hooks env.
6790
6791     This runs on master, primary and secondary nodes of the instance.
6792
6793     """
6794     env = {
6795       "FORCE": self.op.force,
6796       }
6797
6798     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6799
6800     return env
6801
6802   def BuildHooksNodes(self):
6803     """Build hooks nodes.
6804
6805     """
6806     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6807     return (nl, nl)
6808
6809   def CheckPrereq(self):
6810     """Check prerequisites.
6811
6812     This checks that the instance is in the cluster.
6813
6814     """
6815     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6816     assert self.instance is not None, \
6817       "Cannot retrieve locked instance %s" % self.op.instance_name
6818
6819     # extra hvparams
6820     if self.op.hvparams:
6821       # check hypervisor parameter syntax (locally)
6822       cluster = self.cfg.GetClusterInfo()
6823       utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
6824       filled_hvp = cluster.FillHV(instance)
6825       filled_hvp.update(self.op.hvparams)
6826       hv_type = hypervisor.GetHypervisor(instance.hypervisor)
6827       hv_type.CheckParameterSyntax(filled_hvp)
6828       _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
6829
6830     _CheckInstanceState(self, instance, INSTANCE_ONLINE)
6831
6832     self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
6833
6834     if self.primary_offline and self.op.ignore_offline_nodes:
6835       self.proc.LogWarning("Ignoring offline primary node")
6836
6837       if self.op.hvparams or self.op.beparams:
6838         self.proc.LogWarning("Overridden parameters are ignored")
6839     else:
6840       _CheckNodeOnline(self, instance.primary_node)
6841
6842       bep = self.cfg.GetClusterInfo().FillBE(instance)
6843       bep.update(self.op.beparams)
6844
6845       # check bridges existence
6846       _CheckInstanceBridgesExist(self, instance)
6847
6848       remote_info = self.rpc.call_instance_info(instance.primary_node,
6849                                                 instance.name,
6850                                                 instance.hypervisor)
6851       remote_info.Raise("Error checking node %s" % instance.primary_node,
6852                         prereq=True, ecode=errors.ECODE_ENVIRON)
6853       if not remote_info.payload: # not running already
6854         _CheckNodeFreeMemory(self, instance.primary_node,
6855                              "starting instance %s" % instance.name,
6856                              bep[constants.BE_MINMEM], instance.hypervisor)
6857
6858   def Exec(self, feedback_fn):
6859     """Start the instance.
6860
6861     """
6862     instance = self.instance
6863     force = self.op.force
6864
6865     if not self.op.no_remember:
6866       self.cfg.MarkInstanceUp(instance.name)
6867
6868     if self.primary_offline:
6869       assert self.op.ignore_offline_nodes
6870       self.proc.LogInfo("Primary node offline, marked instance as started")
6871     else:
6872       node_current = instance.primary_node
6873
6874       _StartInstanceDisks(self, instance, force)
6875
6876       result = \
6877         self.rpc.call_instance_start(node_current,
6878                                      (instance, self.op.hvparams,
6879                                       self.op.beparams),
6880                                      self.op.startup_paused)
6881       msg = result.fail_msg
6882       if msg:
6883         _ShutdownInstanceDisks(self, instance)
6884         raise errors.OpExecError("Could not start instance: %s" % msg)
6885
6886
6887 class LUInstanceReboot(LogicalUnit):
6888   """Reboot an instance.
6889
6890   """
6891   HPATH = "instance-reboot"
6892   HTYPE = constants.HTYPE_INSTANCE
6893   REQ_BGL = False
6894
6895   def ExpandNames(self):
6896     self._ExpandAndLockInstance()
6897
6898   def BuildHooksEnv(self):
6899     """Build hooks env.
6900
6901     This runs on master, primary and secondary nodes of the instance.
6902
6903     """
6904     env = {
6905       "IGNORE_SECONDARIES": self.op.ignore_secondaries,
6906       "REBOOT_TYPE": self.op.reboot_type,
6907       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6908       }
6909
6910     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6911
6912     return env
6913
6914   def BuildHooksNodes(self):
6915     """Build hooks nodes.
6916
6917     """
6918     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6919     return (nl, nl)
6920
6921   def CheckPrereq(self):
6922     """Check prerequisites.
6923
6924     This checks that the instance is in the cluster.
6925
6926     """
6927     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6928     assert self.instance is not None, \
6929       "Cannot retrieve locked instance %s" % self.op.instance_name
6930     _CheckInstanceState(self, instance, INSTANCE_ONLINE)
6931     _CheckNodeOnline(self, instance.primary_node)
6932
6933     # check bridges existence
6934     _CheckInstanceBridgesExist(self, instance)
6935
6936   def Exec(self, feedback_fn):
6937     """Reboot the instance.
6938
6939     """
6940     instance = self.instance
6941     ignore_secondaries = self.op.ignore_secondaries
6942     reboot_type = self.op.reboot_type
6943
6944     remote_info = self.rpc.call_instance_info(instance.primary_node,
6945                                               instance.name,
6946                                               instance.hypervisor)
6947     remote_info.Raise("Error checking node %s" % instance.primary_node)
6948     instance_running = bool(remote_info.payload)
6949
6950     node_current = instance.primary_node
6951
6952     if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
6953                                             constants.INSTANCE_REBOOT_HARD]:
6954       for disk in instance.disks:
6955         self.cfg.SetDiskID(disk, node_current)
6956       result = self.rpc.call_instance_reboot(node_current, instance,
6957                                              reboot_type,
6958                                              self.op.shutdown_timeout)
6959       result.Raise("Could not reboot instance")
6960     else:
6961       if instance_running:
6962         result = self.rpc.call_instance_shutdown(node_current, instance,
6963                                                  self.op.shutdown_timeout)
6964         result.Raise("Could not shutdown instance for full reboot")
6965         _ShutdownInstanceDisks(self, instance)
6966       else:
6967         self.LogInfo("Instance %s was already stopped, starting now",
6968                      instance.name)
6969       _StartInstanceDisks(self, instance, ignore_secondaries)
6970       result = self.rpc.call_instance_start(node_current,
6971                                             (instance, None, None), False)
6972       msg = result.fail_msg
6973       if msg:
6974         _ShutdownInstanceDisks(self, instance)
6975         raise errors.OpExecError("Could not start instance for"
6976                                  " full reboot: %s" % msg)
6977
6978     self.cfg.MarkInstanceUp(instance.name)
6979
6980
6981 class LUInstanceShutdown(LogicalUnit):
6982   """Shutdown an instance.
6983
6984   """
6985   HPATH = "instance-stop"
6986   HTYPE = constants.HTYPE_INSTANCE
6987   REQ_BGL = False
6988
6989   def ExpandNames(self):
6990     self._ExpandAndLockInstance()
6991
6992   def BuildHooksEnv(self):
6993     """Build hooks env.
6994
6995     This runs on master, primary and secondary nodes of the instance.
6996
6997     """
6998     env = _BuildInstanceHookEnvByObject(self, self.instance)
6999     env["TIMEOUT"] = self.op.timeout
7000     return env
7001
7002   def BuildHooksNodes(self):
7003     """Build hooks nodes.
7004
7005     """
7006     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7007     return (nl, nl)
7008
7009   def CheckPrereq(self):
7010     """Check prerequisites.
7011
7012     This checks that the instance is in the cluster.
7013
7014     """
7015     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7016     assert self.instance is not None, \
7017       "Cannot retrieve locked instance %s" % self.op.instance_name
7018
7019     _CheckInstanceState(self, self.instance, INSTANCE_ONLINE)
7020
7021     self.primary_offline = \
7022       self.cfg.GetNodeInfo(self.instance.primary_node).offline
7023
7024     if self.primary_offline and self.op.ignore_offline_nodes:
7025       self.proc.LogWarning("Ignoring offline primary node")
7026     else:
7027       _CheckNodeOnline(self, self.instance.primary_node)
7028
7029   def Exec(self, feedback_fn):
7030     """Shutdown the instance.
7031
7032     """
7033     instance = self.instance
7034     node_current = instance.primary_node
7035     timeout = self.op.timeout
7036
7037     if not self.op.no_remember:
7038       self.cfg.MarkInstanceDown(instance.name)
7039
7040     if self.primary_offline:
7041       assert self.op.ignore_offline_nodes
7042       self.proc.LogInfo("Primary node offline, marked instance as stopped")
7043     else:
7044       result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
7045       msg = result.fail_msg
7046       if msg:
7047         self.proc.LogWarning("Could not shutdown instance: %s" % msg)
7048
7049       _ShutdownInstanceDisks(self, instance)
7050
7051
7052 class LUInstanceReinstall(LogicalUnit):
7053   """Reinstall an instance.
7054
7055   """
7056   HPATH = "instance-reinstall"
7057   HTYPE = constants.HTYPE_INSTANCE
7058   REQ_BGL = False
7059
7060   def ExpandNames(self):
7061     self._ExpandAndLockInstance()
7062
7063   def BuildHooksEnv(self):
7064     """Build hooks env.
7065
7066     This runs on master, primary and secondary nodes of the instance.
7067
7068     """
7069     return _BuildInstanceHookEnvByObject(self, self.instance)
7070
7071   def BuildHooksNodes(self):
7072     """Build hooks nodes.
7073
7074     """
7075     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7076     return (nl, nl)
7077
7078   def CheckPrereq(self):
7079     """Check prerequisites.
7080
7081     This checks that the instance is in the cluster and is not running.
7082
7083     """
7084     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7085     assert instance is not None, \
7086       "Cannot retrieve locked instance %s" % self.op.instance_name
7087     _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
7088                      " offline, cannot reinstall")
7089
7090     if instance.disk_template == constants.DT_DISKLESS:
7091       raise errors.OpPrereqError("Instance '%s' has no disks" %
7092                                  self.op.instance_name,
7093                                  errors.ECODE_INVAL)
7094     _CheckInstanceState(self, instance, INSTANCE_DOWN, msg="cannot reinstall")
7095
7096     if self.op.os_type is not None:
7097       # OS verification
7098       pnode = _ExpandNodeName(self.cfg, instance.primary_node)
7099       _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
7100       instance_os = self.op.os_type
7101     else:
7102       instance_os = instance.os
7103
7104     nodelist = list(instance.all_nodes)
7105
7106     if self.op.osparams:
7107       i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
7108       _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
7109       self.os_inst = i_osdict # the new dict (without defaults)
7110     else:
7111       self.os_inst = None
7112
7113     self.instance = instance
7114
7115   def Exec(self, feedback_fn):
7116     """Reinstall the instance.
7117
7118     """
7119     inst = self.instance
7120
7121     if self.op.os_type is not None:
7122       feedback_fn("Changing OS to '%s'..." % self.op.os_type)
7123       inst.os = self.op.os_type
7124       # Write to configuration
7125       self.cfg.Update(inst, feedback_fn)
7126
7127     _StartInstanceDisks(self, inst, None)
7128     try:
7129       feedback_fn("Running the instance OS create scripts...")
7130       # FIXME: pass debug option from opcode to backend
7131       result = self.rpc.call_instance_os_add(inst.primary_node,
7132                                              (inst, self.os_inst), True,
7133                                              self.op.debug_level)
7134       result.Raise("Could not install OS for instance %s on node %s" %
7135                    (inst.name, inst.primary_node))
7136     finally:
7137       _ShutdownInstanceDisks(self, inst)
7138
7139
7140 class LUInstanceRecreateDisks(LogicalUnit):
7141   """Recreate an instance's missing disks.
7142
7143   """
7144   HPATH = "instance-recreate-disks"
7145   HTYPE = constants.HTYPE_INSTANCE
7146   REQ_BGL = False
7147
7148   _MODIFYABLE = frozenset([
7149     constants.IDISK_SIZE,
7150     constants.IDISK_MODE,
7151     ])
7152
7153   # New or changed disk parameters may have different semantics
7154   assert constants.IDISK_PARAMS == (_MODIFYABLE | frozenset([
7155     constants.IDISK_ADOPT,
7156
7157     # TODO: Implement support changing VG while recreating
7158     constants.IDISK_VG,
7159     constants.IDISK_METAVG,
7160     ]))
7161
7162   def _RunAllocator(self):
7163     """Run the allocator based on input opcode.
7164
7165     """
7166     be_full = self.cfg.GetClusterInfo().FillBE(self.instance)
7167
7168     # FIXME
7169     # The allocator should actually run in "relocate" mode, but current
7170     # allocators don't support relocating all the nodes of an instance at
7171     # the same time. As a workaround we use "allocate" mode, but this is
7172     # suboptimal for two reasons:
7173     # - The instance name passed to the allocator is present in the list of
7174     #   existing instances, so there could be a conflict within the
7175     #   internal structures of the allocator. This doesn't happen with the
7176     #   current allocators, but it's a liability.
7177     # - The allocator counts the resources used by the instance twice: once
7178     #   because the instance exists already, and once because it tries to
7179     #   allocate a new instance.
7180     # The allocator could choose some of the nodes on which the instance is
7181     # running, but that's not a problem. If the instance nodes are broken,
7182     # they should be already be marked as drained or offline, and hence
7183     # skipped by the allocator. If instance disks have been lost for other
7184     # reasons, then recreating the disks on the same nodes should be fine.
7185     disk_template = self.instance.disk_template
7186     spindle_use = be_full[constants.BE_SPINDLE_USE]
7187     req = iallocator.IAReqInstanceAlloc(name=self.op.instance_name,
7188                                         disk_template=disk_template,
7189                                         tags=list(self.instance.GetTags()),
7190                                         os=self.instance.os,
7191                                         nics=[{}],
7192                                         vcpus=be_full[constants.BE_VCPUS],
7193                                         memory=be_full[constants.BE_MAXMEM],
7194                                         spindle_use=spindle_use,
7195                                         disks=[{constants.IDISK_SIZE: d.size,
7196                                                 constants.IDISK_MODE: d.mode}
7197                                                 for d in self.instance.disks],
7198                                         hypervisor=self.instance.hypervisor)
7199     ial = iallocator.IAllocator(self.cfg, self.rpc, req)
7200
7201     ial.Run(self.op.iallocator)
7202
7203     assert req.RequiredNodes() == len(self.instance.all_nodes)
7204
7205     if not ial.success:
7206       raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
7207                                  " %s" % (self.op.iallocator, ial.info),
7208                                  errors.ECODE_NORES)
7209
7210     self.op.nodes = ial.result
7211     self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
7212                  self.op.instance_name, self.op.iallocator,
7213                  utils.CommaJoin(ial.result))
7214
7215   def CheckArguments(self):
7216     if self.op.disks and ht.TPositiveInt(self.op.disks[0]):
7217       # Normalize and convert deprecated list of disk indices
7218       self.op.disks = [(idx, {}) for idx in sorted(frozenset(self.op.disks))]
7219
7220     duplicates = utils.FindDuplicates(map(compat.fst, self.op.disks))
7221     if duplicates:
7222       raise errors.OpPrereqError("Some disks have been specified more than"
7223                                  " once: %s" % utils.CommaJoin(duplicates),
7224                                  errors.ECODE_INVAL)
7225
7226     # We don't want _CheckIAllocatorOrNode selecting the default iallocator
7227     # when neither iallocator nor nodes are specified
7228     if self.op.iallocator or self.op.nodes:
7229       _CheckIAllocatorOrNode(self, "iallocator", "nodes")
7230
7231     for (idx, params) in self.op.disks:
7232       utils.ForceDictType(params, constants.IDISK_PARAMS_TYPES)
7233       unsupported = frozenset(params.keys()) - self._MODIFYABLE
7234       if unsupported:
7235         raise errors.OpPrereqError("Parameters for disk %s try to change"
7236                                    " unmodifyable parameter(s): %s" %
7237                                    (idx, utils.CommaJoin(unsupported)),
7238                                    errors.ECODE_INVAL)
7239
7240   def ExpandNames(self):
7241     self._ExpandAndLockInstance()
7242     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7243     if self.op.nodes:
7244       self.op.nodes = [_ExpandNodeName(self.cfg, n) for n in self.op.nodes]
7245       self.needed_locks[locking.LEVEL_NODE] = list(self.op.nodes)
7246     else:
7247       self.needed_locks[locking.LEVEL_NODE] = []
7248       if self.op.iallocator:
7249         # iallocator will select a new node in the same group
7250         self.needed_locks[locking.LEVEL_NODEGROUP] = []
7251     self.needed_locks[locking.LEVEL_NODE_RES] = []
7252
7253   def DeclareLocks(self, level):
7254     if level == locking.LEVEL_NODEGROUP:
7255       assert self.op.iallocator is not None
7256       assert not self.op.nodes
7257       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
7258       self.share_locks[locking.LEVEL_NODEGROUP] = 1
7259       # Lock the primary group used by the instance optimistically; this
7260       # requires going via the node before it's locked, requiring
7261       # verification later on
7262       self.needed_locks[locking.LEVEL_NODEGROUP] = \
7263         self.cfg.GetInstanceNodeGroups(self.op.instance_name, primary_only=True)
7264
7265     elif level == locking.LEVEL_NODE:
7266       # If an allocator is used, then we lock all the nodes in the current
7267       # instance group, as we don't know yet which ones will be selected;
7268       # if we replace the nodes without using an allocator, locks are
7269       # already declared in ExpandNames; otherwise, we need to lock all the
7270       # instance nodes for disk re-creation
7271       if self.op.iallocator:
7272         assert not self.op.nodes
7273         assert not self.needed_locks[locking.LEVEL_NODE]
7274         assert len(self.owned_locks(locking.LEVEL_NODEGROUP)) == 1
7275
7276         # Lock member nodes of the group of the primary node
7277         for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP):
7278           self.needed_locks[locking.LEVEL_NODE].extend(
7279             self.cfg.GetNodeGroup(group_uuid).members)
7280       elif not self.op.nodes:
7281         self._LockInstancesNodes(primary_only=False)
7282     elif level == locking.LEVEL_NODE_RES:
7283       # Copy node locks
7284       self.needed_locks[locking.LEVEL_NODE_RES] = \
7285         _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
7286
7287   def BuildHooksEnv(self):
7288     """Build hooks env.
7289
7290     This runs on master, primary and secondary nodes of the instance.
7291
7292     """
7293     return _BuildInstanceHookEnvByObject(self, self.instance)
7294
7295   def BuildHooksNodes(self):
7296     """Build hooks nodes.
7297
7298     """
7299     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7300     return (nl, nl)
7301
7302   def CheckPrereq(self):
7303     """Check prerequisites.
7304
7305     This checks that the instance is in the cluster and is not running.
7306
7307     """
7308     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7309     assert instance is not None, \
7310       "Cannot retrieve locked instance %s" % self.op.instance_name
7311     if self.op.nodes:
7312       if len(self.op.nodes) != len(instance.all_nodes):
7313         raise errors.OpPrereqError("Instance %s currently has %d nodes, but"
7314                                    " %d replacement nodes were specified" %
7315                                    (instance.name, len(instance.all_nodes),
7316                                     len(self.op.nodes)),
7317                                    errors.ECODE_INVAL)
7318       assert instance.disk_template != constants.DT_DRBD8 or \
7319           len(self.op.nodes) == 2
7320       assert instance.disk_template != constants.DT_PLAIN or \
7321           len(self.op.nodes) == 1
7322       primary_node = self.op.nodes[0]
7323     else:
7324       primary_node = instance.primary_node
7325     if not self.op.iallocator:
7326       _CheckNodeOnline(self, primary_node)
7327
7328     if instance.disk_template == constants.DT_DISKLESS:
7329       raise errors.OpPrereqError("Instance '%s' has no disks" %
7330                                  self.op.instance_name, errors.ECODE_INVAL)
7331
7332     # Verify if node group locks are still correct
7333     owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
7334     if owned_groups:
7335       # Node group locks are acquired only for the primary node (and only
7336       # when the allocator is used)
7337       _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups,
7338                                primary_only=True)
7339
7340     # if we replace nodes *and* the old primary is offline, we don't
7341     # check the instance state
7342     old_pnode = self.cfg.GetNodeInfo(instance.primary_node)
7343     if not ((self.op.iallocator or self.op.nodes) and old_pnode.offline):
7344       _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
7345                           msg="cannot recreate disks")
7346
7347     if self.op.disks:
7348       self.disks = dict(self.op.disks)
7349     else:
7350       self.disks = dict((idx, {}) for idx in range(len(instance.disks)))
7351
7352     maxidx = max(self.disks.keys())
7353     if maxidx >= len(instance.disks):
7354       raise errors.OpPrereqError("Invalid disk index '%s'" % maxidx,
7355                                  errors.ECODE_INVAL)
7356
7357     if ((self.op.nodes or self.op.iallocator) and
7358         sorted(self.disks.keys()) != range(len(instance.disks))):
7359       raise errors.OpPrereqError("Can't recreate disks partially and"
7360                                  " change the nodes at the same time",
7361                                  errors.ECODE_INVAL)
7362
7363     self.instance = instance
7364
7365     if self.op.iallocator:
7366       self._RunAllocator()
7367       # Release unneeded node and node resource locks
7368       _ReleaseLocks(self, locking.LEVEL_NODE, keep=self.op.nodes)
7369       _ReleaseLocks(self, locking.LEVEL_NODE_RES, keep=self.op.nodes)
7370
7371   def Exec(self, feedback_fn):
7372     """Recreate the disks.
7373
7374     """
7375     instance = self.instance
7376
7377     assert (self.owned_locks(locking.LEVEL_NODE) ==
7378             self.owned_locks(locking.LEVEL_NODE_RES))
7379
7380     to_skip = []
7381     mods = [] # keeps track of needed changes
7382
7383     for idx, disk in enumerate(instance.disks):
7384       try:
7385         changes = self.disks[idx]
7386       except KeyError:
7387         # Disk should not be recreated
7388         to_skip.append(idx)
7389         continue
7390
7391       # update secondaries for disks, if needed
7392       if self.op.nodes and disk.dev_type == constants.LD_DRBD8:
7393         # need to update the nodes and minors
7394         assert len(self.op.nodes) == 2
7395         assert len(disk.logical_id) == 6 # otherwise disk internals
7396                                          # have changed
7397         (_, _, old_port, _, _, old_secret) = disk.logical_id
7398         new_minors = self.cfg.AllocateDRBDMinor(self.op.nodes, instance.name)
7399         new_id = (self.op.nodes[0], self.op.nodes[1], old_port,
7400                   new_minors[0], new_minors[1], old_secret)
7401         assert len(disk.logical_id) == len(new_id)
7402       else:
7403         new_id = None
7404
7405       mods.append((idx, new_id, changes))
7406
7407     # now that we have passed all asserts above, we can apply the mods
7408     # in a single run (to avoid partial changes)
7409     for idx, new_id, changes in mods:
7410       disk = instance.disks[idx]
7411       if new_id is not None:
7412         assert disk.dev_type == constants.LD_DRBD8
7413         disk.logical_id = new_id
7414       if changes:
7415         disk.Update(size=changes.get(constants.IDISK_SIZE, None),
7416                     mode=changes.get(constants.IDISK_MODE, None))
7417
7418     # change primary node, if needed
7419     if self.op.nodes:
7420       instance.primary_node = self.op.nodes[0]
7421       self.LogWarning("Changing the instance's nodes, you will have to"
7422                       " remove any disks left on the older nodes manually")
7423
7424     if self.op.nodes:
7425       self.cfg.Update(instance, feedback_fn)
7426
7427     # All touched nodes must be locked
7428     mylocks = self.owned_locks(locking.LEVEL_NODE)
7429     assert mylocks.issuperset(frozenset(instance.all_nodes))
7430     _CreateDisks(self, instance, to_skip=to_skip)
7431
7432
7433 class LUInstanceRename(LogicalUnit):
7434   """Rename an instance.
7435
7436   """
7437   HPATH = "instance-rename"
7438   HTYPE = constants.HTYPE_INSTANCE
7439
7440   def CheckArguments(self):
7441     """Check arguments.
7442
7443     """
7444     if self.op.ip_check and not self.op.name_check:
7445       # TODO: make the ip check more flexible and not depend on the name check
7446       raise errors.OpPrereqError("IP address check requires a name check",
7447                                  errors.ECODE_INVAL)
7448
7449   def BuildHooksEnv(self):
7450     """Build hooks env.
7451
7452     This runs on master, primary and secondary nodes of the instance.
7453
7454     """
7455     env = _BuildInstanceHookEnvByObject(self, self.instance)
7456     env["INSTANCE_NEW_NAME"] = self.op.new_name
7457     return env
7458
7459   def BuildHooksNodes(self):
7460     """Build hooks nodes.
7461
7462     """
7463     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7464     return (nl, nl)
7465
7466   def CheckPrereq(self):
7467     """Check prerequisites.
7468
7469     This checks that the instance is in the cluster and is not running.
7470
7471     """
7472     self.op.instance_name = _ExpandInstanceName(self.cfg,
7473                                                 self.op.instance_name)
7474     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7475     assert instance is not None
7476     _CheckNodeOnline(self, instance.primary_node)
7477     _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
7478                         msg="cannot rename")
7479     self.instance = instance
7480
7481     new_name = self.op.new_name
7482     if self.op.name_check:
7483       hostname = _CheckHostnameSane(self, new_name)
7484       new_name = self.op.new_name = hostname.name
7485       if (self.op.ip_check and
7486           netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
7487         raise errors.OpPrereqError("IP %s of instance %s already in use" %
7488                                    (hostname.ip, new_name),
7489                                    errors.ECODE_NOTUNIQUE)
7490
7491     instance_list = self.cfg.GetInstanceList()
7492     if new_name in instance_list and new_name != instance.name:
7493       raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
7494                                  new_name, errors.ECODE_EXISTS)
7495
7496   def Exec(self, feedback_fn):
7497     """Rename the instance.
7498
7499     """
7500     inst = self.instance
7501     old_name = inst.name
7502
7503     rename_file_storage = False
7504     if (inst.disk_template in constants.DTS_FILEBASED and
7505         self.op.new_name != inst.name):
7506       old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
7507       rename_file_storage = True
7508
7509     self.cfg.RenameInstance(inst.name, self.op.new_name)
7510     # Change the instance lock. This is definitely safe while we hold the BGL.
7511     # Otherwise the new lock would have to be added in acquired mode.
7512     assert self.REQ_BGL
7513     self.glm.remove(locking.LEVEL_INSTANCE, old_name)
7514     self.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
7515
7516     # re-read the instance from the configuration after rename
7517     inst = self.cfg.GetInstanceInfo(self.op.new_name)
7518
7519     if rename_file_storage:
7520       new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
7521       result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
7522                                                      old_file_storage_dir,
7523                                                      new_file_storage_dir)
7524       result.Raise("Could not rename on node %s directory '%s' to '%s'"
7525                    " (but the instance has been renamed in Ganeti)" %
7526                    (inst.primary_node, old_file_storage_dir,
7527                     new_file_storage_dir))
7528
7529     _StartInstanceDisks(self, inst, None)
7530     # update info on disks
7531     info = _GetInstanceInfoText(inst)
7532     for (idx, disk) in enumerate(inst.disks):
7533       for node in inst.all_nodes:
7534         self.cfg.SetDiskID(disk, node)
7535         result = self.rpc.call_blockdev_setinfo(node, disk, info)
7536         if result.fail_msg:
7537           self.LogWarning("Error setting info on node %s for disk %s: %s",
7538                           node, idx, result.fail_msg)
7539     try:
7540       result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
7541                                                  old_name, self.op.debug_level)
7542       msg = result.fail_msg
7543       if msg:
7544         msg = ("Could not run OS rename script for instance %s on node %s"
7545                " (but the instance has been renamed in Ganeti): %s" %
7546                (inst.name, inst.primary_node, msg))
7547         self.proc.LogWarning(msg)
7548     finally:
7549       _ShutdownInstanceDisks(self, inst)
7550
7551     return inst.name
7552
7553
7554 class LUInstanceRemove(LogicalUnit):
7555   """Remove an instance.
7556
7557   """
7558   HPATH = "instance-remove"
7559   HTYPE = constants.HTYPE_INSTANCE
7560   REQ_BGL = False
7561
7562   def ExpandNames(self):
7563     self._ExpandAndLockInstance()
7564     self.needed_locks[locking.LEVEL_NODE] = []
7565     self.needed_locks[locking.LEVEL_NODE_RES] = []
7566     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7567
7568   def DeclareLocks(self, level):
7569     if level == locking.LEVEL_NODE:
7570       self._LockInstancesNodes()
7571     elif level == locking.LEVEL_NODE_RES:
7572       # Copy node locks
7573       self.needed_locks[locking.LEVEL_NODE_RES] = \
7574         _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
7575
7576   def BuildHooksEnv(self):
7577     """Build hooks env.
7578
7579     This runs on master, primary and secondary nodes of the instance.
7580
7581     """
7582     env = _BuildInstanceHookEnvByObject(self, self.instance)
7583     env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
7584     return env
7585
7586   def BuildHooksNodes(self):
7587     """Build hooks nodes.
7588
7589     """
7590     nl = [self.cfg.GetMasterNode()]
7591     nl_post = list(self.instance.all_nodes) + nl
7592     return (nl, nl_post)
7593
7594   def CheckPrereq(self):
7595     """Check prerequisites.
7596
7597     This checks that the instance is in the cluster.
7598
7599     """
7600     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7601     assert self.instance is not None, \
7602       "Cannot retrieve locked instance %s" % self.op.instance_name
7603
7604   def Exec(self, feedback_fn):
7605     """Remove the instance.
7606
7607     """
7608     instance = self.instance
7609     logging.info("Shutting down instance %s on node %s",
7610                  instance.name, instance.primary_node)
7611
7612     result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
7613                                              self.op.shutdown_timeout)
7614     msg = result.fail_msg
7615     if msg:
7616       if self.op.ignore_failures:
7617         feedback_fn("Warning: can't shutdown instance: %s" % msg)
7618       else:
7619         raise errors.OpExecError("Could not shutdown instance %s on"
7620                                  " node %s: %s" %
7621                                  (instance.name, instance.primary_node, msg))
7622
7623     assert (self.owned_locks(locking.LEVEL_NODE) ==
7624             self.owned_locks(locking.LEVEL_NODE_RES))
7625     assert not (set(instance.all_nodes) -
7626                 self.owned_locks(locking.LEVEL_NODE)), \
7627       "Not owning correct locks"
7628
7629     _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
7630
7631
7632 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
7633   """Utility function to remove an instance.
7634
7635   """
7636   logging.info("Removing block devices for instance %s", instance.name)
7637
7638   if not _RemoveDisks(lu, instance, ignore_failures=ignore_failures):
7639     if not ignore_failures:
7640       raise errors.OpExecError("Can't remove instance's disks")
7641     feedback_fn("Warning: can't remove instance's disks")
7642
7643   logging.info("Removing instance %s out of cluster config", instance.name)
7644
7645   lu.cfg.RemoveInstance(instance.name)
7646
7647   assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
7648     "Instance lock removal conflict"
7649
7650   # Remove lock for the instance
7651   lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
7652
7653
7654 class LUInstanceQuery(NoHooksLU):
7655   """Logical unit for querying instances.
7656
7657   """
7658   # pylint: disable=W0142
7659   REQ_BGL = False
7660
7661   def CheckArguments(self):
7662     self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
7663                              self.op.output_fields, self.op.use_locking)
7664
7665   def ExpandNames(self):
7666     self.iq.ExpandNames(self)
7667
7668   def DeclareLocks(self, level):
7669     self.iq.DeclareLocks(self, level)
7670
7671   def Exec(self, feedback_fn):
7672     return self.iq.OldStyleQuery(self)
7673
7674
7675 class LUInstanceFailover(LogicalUnit):
7676   """Failover an instance.
7677
7678   """
7679   HPATH = "instance-failover"
7680   HTYPE = constants.HTYPE_INSTANCE
7681   REQ_BGL = False
7682
7683   def CheckArguments(self):
7684     """Check the arguments.
7685
7686     """
7687     self.iallocator = getattr(self.op, "iallocator", None)
7688     self.target_node = getattr(self.op, "target_node", None)
7689
7690   def ExpandNames(self):
7691     self._ExpandAndLockInstance()
7692
7693     if self.op.target_node is not None:
7694       self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7695
7696     self.needed_locks[locking.LEVEL_NODE] = []
7697     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7698
7699     self.needed_locks[locking.LEVEL_NODE_RES] = []
7700     self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
7701
7702     ignore_consistency = self.op.ignore_consistency
7703     shutdown_timeout = self.op.shutdown_timeout
7704     self._migrater = TLMigrateInstance(self, self.op.instance_name,
7705                                        cleanup=False,
7706                                        failover=True,
7707                                        ignore_consistency=ignore_consistency,
7708                                        shutdown_timeout=shutdown_timeout,
7709                                        ignore_ipolicy=self.op.ignore_ipolicy)
7710     self.tasklets = [self._migrater]
7711
7712   def DeclareLocks(self, level):
7713     if level == locking.LEVEL_NODE:
7714       instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
7715       if instance.disk_template in constants.DTS_EXT_MIRROR:
7716         if self.op.target_node is None:
7717           self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7718         else:
7719           self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
7720                                                    self.op.target_node]
7721         del self.recalculate_locks[locking.LEVEL_NODE]
7722       else:
7723         self._LockInstancesNodes()
7724     elif level == locking.LEVEL_NODE_RES:
7725       # Copy node locks
7726       self.needed_locks[locking.LEVEL_NODE_RES] = \
7727         _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
7728
7729   def BuildHooksEnv(self):
7730     """Build hooks env.
7731
7732     This runs on master, primary and secondary nodes of the instance.
7733
7734     """
7735     instance = self._migrater.instance
7736     source_node = instance.primary_node
7737     target_node = self.op.target_node
7738     env = {
7739       "IGNORE_CONSISTENCY": self.op.ignore_consistency,
7740       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7741       "OLD_PRIMARY": source_node,
7742       "NEW_PRIMARY": target_node,
7743       }
7744
7745     if instance.disk_template in constants.DTS_INT_MIRROR:
7746       env["OLD_SECONDARY"] = instance.secondary_nodes[0]
7747       env["NEW_SECONDARY"] = source_node
7748     else:
7749       env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
7750
7751     env.update(_BuildInstanceHookEnvByObject(self, instance))
7752
7753     return env
7754
7755   def BuildHooksNodes(self):
7756     """Build hooks nodes.
7757
7758     """
7759     instance = self._migrater.instance
7760     nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
7761     return (nl, nl + [instance.primary_node])
7762
7763
7764 class LUInstanceMigrate(LogicalUnit):
7765   """Migrate an instance.
7766
7767   This is migration without shutting down, compared to the failover,
7768   which is done with shutdown.
7769
7770   """
7771   HPATH = "instance-migrate"
7772   HTYPE = constants.HTYPE_INSTANCE
7773   REQ_BGL = False
7774
7775   def ExpandNames(self):
7776     self._ExpandAndLockInstance()
7777
7778     if self.op.target_node is not None:
7779       self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7780
7781     self.needed_locks[locking.LEVEL_NODE] = []
7782     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7783
7784     self.needed_locks[locking.LEVEL_NODE] = []
7785     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7786
7787     self._migrater = \
7788       TLMigrateInstance(self, self.op.instance_name,
7789                         cleanup=self.op.cleanup,
7790                         failover=False,
7791                         fallback=self.op.allow_failover,
7792                         allow_runtime_changes=self.op.allow_runtime_changes,
7793                         ignore_ipolicy=self.op.ignore_ipolicy)
7794     self.tasklets = [self._migrater]
7795
7796   def DeclareLocks(self, level):
7797     if level == locking.LEVEL_NODE:
7798       instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
7799       if instance.disk_template in constants.DTS_EXT_MIRROR:
7800         if self.op.target_node is None:
7801           self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7802         else:
7803           self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
7804                                                    self.op.target_node]
7805         del self.recalculate_locks[locking.LEVEL_NODE]
7806       else:
7807         self._LockInstancesNodes()
7808     elif level == locking.LEVEL_NODE_RES:
7809       # Copy node locks
7810       self.needed_locks[locking.LEVEL_NODE_RES] = \
7811         _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
7812
7813   def BuildHooksEnv(self):
7814     """Build hooks env.
7815
7816     This runs on master, primary and secondary nodes of the instance.
7817
7818     """
7819     instance = self._migrater.instance
7820     source_node = instance.primary_node
7821     target_node = self.op.target_node
7822     env = _BuildInstanceHookEnvByObject(self, instance)
7823     env.update({
7824       "MIGRATE_LIVE": self._migrater.live,
7825       "MIGRATE_CLEANUP": self.op.cleanup,
7826       "OLD_PRIMARY": source_node,
7827       "NEW_PRIMARY": target_node,
7828       "ALLOW_RUNTIME_CHANGES": self.op.allow_runtime_changes,
7829       })
7830
7831     if instance.disk_template in constants.DTS_INT_MIRROR:
7832       env["OLD_SECONDARY"] = target_node
7833       env["NEW_SECONDARY"] = source_node
7834     else:
7835       env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
7836
7837     return env
7838
7839   def BuildHooksNodes(self):
7840     """Build hooks nodes.
7841
7842     """
7843     instance = self._migrater.instance
7844     nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
7845     return (nl, nl + [instance.primary_node])
7846
7847
7848 class LUInstanceMove(LogicalUnit):
7849   """Move an instance by data-copying.
7850
7851   """
7852   HPATH = "instance-move"
7853   HTYPE = constants.HTYPE_INSTANCE
7854   REQ_BGL = False
7855
7856   def ExpandNames(self):
7857     self._ExpandAndLockInstance()
7858     target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7859     self.op.target_node = target_node
7860     self.needed_locks[locking.LEVEL_NODE] = [target_node]
7861     self.needed_locks[locking.LEVEL_NODE_RES] = []
7862     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7863
7864   def DeclareLocks(self, level):
7865     if level == locking.LEVEL_NODE:
7866       self._LockInstancesNodes(primary_only=True)
7867     elif level == locking.LEVEL_NODE_RES:
7868       # Copy node locks
7869       self.needed_locks[locking.LEVEL_NODE_RES] = \
7870         _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
7871
7872   def BuildHooksEnv(self):
7873     """Build hooks env.
7874
7875     This runs on master, primary and secondary nodes of the instance.
7876
7877     """
7878     env = {
7879       "TARGET_NODE": self.op.target_node,
7880       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7881       }
7882     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
7883     return env
7884
7885   def BuildHooksNodes(self):
7886     """Build hooks nodes.
7887
7888     """
7889     nl = [
7890       self.cfg.GetMasterNode(),
7891       self.instance.primary_node,
7892       self.op.target_node,
7893       ]
7894     return (nl, nl)
7895
7896   def CheckPrereq(self):
7897     """Check prerequisites.
7898
7899     This checks that the instance is in the cluster.
7900
7901     """
7902     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7903     assert self.instance is not None, \
7904       "Cannot retrieve locked instance %s" % self.op.instance_name
7905
7906     node = self.cfg.GetNodeInfo(self.op.target_node)
7907     assert node is not None, \
7908       "Cannot retrieve locked node %s" % self.op.target_node
7909
7910     self.target_node = target_node = node.name
7911
7912     if target_node == instance.primary_node:
7913       raise errors.OpPrereqError("Instance %s is already on the node %s" %
7914                                  (instance.name, target_node),
7915                                  errors.ECODE_STATE)
7916
7917     bep = self.cfg.GetClusterInfo().FillBE(instance)
7918
7919     for idx, dsk in enumerate(instance.disks):
7920       if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
7921         raise errors.OpPrereqError("Instance disk %d has a complex layout,"
7922                                    " cannot copy" % idx, errors.ECODE_STATE)
7923
7924     _CheckNodeOnline(self, target_node)
7925     _CheckNodeNotDrained(self, target_node)
7926     _CheckNodeVmCapable(self, target_node)
7927     cluster = self.cfg.GetClusterInfo()
7928     group_info = self.cfg.GetNodeGroup(node.group)
7929     ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster, group_info)
7930     _CheckTargetNodeIPolicy(self, ipolicy, instance, node,
7931                             ignore=self.op.ignore_ipolicy)
7932
7933     if instance.admin_state == constants.ADMINST_UP:
7934       # check memory requirements on the secondary node
7935       _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
7936                            instance.name, bep[constants.BE_MAXMEM],
7937                            instance.hypervisor)
7938     else:
7939       self.LogInfo("Not checking memory on the secondary node as"
7940                    " instance will not be started")
7941
7942     # check bridge existance
7943     _CheckInstanceBridgesExist(self, instance, node=target_node)
7944
7945   def Exec(self, feedback_fn):
7946     """Move an instance.
7947
7948     The move is done by shutting it down on its present node, copying
7949     the data over (slow) and starting it on the new node.
7950
7951     """
7952     instance = self.instance
7953
7954     source_node = instance.primary_node
7955     target_node = self.target_node
7956
7957     self.LogInfo("Shutting down instance %s on source node %s",
7958                  instance.name, source_node)
7959
7960     assert (self.owned_locks(locking.LEVEL_NODE) ==
7961             self.owned_locks(locking.LEVEL_NODE_RES))
7962
7963     result = self.rpc.call_instance_shutdown(source_node, instance,
7964                                              self.op.shutdown_timeout)
7965     msg = result.fail_msg
7966     if msg:
7967       if self.op.ignore_consistency:
7968         self.proc.LogWarning("Could not shutdown instance %s on node %s."
7969                              " Proceeding anyway. Please make sure node"
7970                              " %s is down. Error details: %s",
7971                              instance.name, source_node, source_node, msg)
7972       else:
7973         raise errors.OpExecError("Could not shutdown instance %s on"
7974                                  " node %s: %s" %
7975                                  (instance.name, source_node, msg))
7976
7977     # create the target disks
7978     try:
7979       _CreateDisks(self, instance, target_node=target_node)
7980     except errors.OpExecError:
7981       self.LogWarning("Device creation failed, reverting...")
7982       try:
7983         _RemoveDisks(self, instance, target_node=target_node)
7984       finally:
7985         self.cfg.ReleaseDRBDMinors(instance.name)
7986         raise
7987
7988     cluster_name = self.cfg.GetClusterInfo().cluster_name
7989
7990     errs = []
7991     # activate, get path, copy the data over
7992     for idx, disk in enumerate(instance.disks):
7993       self.LogInfo("Copying data for disk %d", idx)
7994       result = self.rpc.call_blockdev_assemble(target_node, (disk, instance),
7995                                                instance.name, True, idx)
7996       if result.fail_msg:
7997         self.LogWarning("Can't assemble newly created disk %d: %s",
7998                         idx, result.fail_msg)
7999         errs.append(result.fail_msg)
8000         break
8001       dev_path = result.payload
8002       result = self.rpc.call_blockdev_export(source_node, (disk, instance),
8003                                              target_node, dev_path,
8004                                              cluster_name)
8005       if result.fail_msg:
8006         self.LogWarning("Can't copy data over for disk %d: %s",
8007                         idx, result.fail_msg)
8008         errs.append(result.fail_msg)
8009         break
8010
8011     if errs:
8012       self.LogWarning("Some disks failed to copy, aborting")
8013       try:
8014         _RemoveDisks(self, instance, target_node=target_node)
8015       finally:
8016         self.cfg.ReleaseDRBDMinors(instance.name)
8017         raise errors.OpExecError("Errors during disk copy: %s" %
8018                                  (",".join(errs),))
8019
8020     instance.primary_node = target_node
8021     self.cfg.Update(instance, feedback_fn)
8022
8023     self.LogInfo("Removing the disks on the original node")
8024     _RemoveDisks(self, instance, target_node=source_node)
8025
8026     # Only start the instance if it's marked as up
8027     if instance.admin_state == constants.ADMINST_UP:
8028       self.LogInfo("Starting instance %s on node %s",
8029                    instance.name, target_node)
8030
8031       disks_ok, _ = _AssembleInstanceDisks(self, instance,
8032                                            ignore_secondaries=True)
8033       if not disks_ok:
8034         _ShutdownInstanceDisks(self, instance)
8035         raise errors.OpExecError("Can't activate the instance's disks")
8036
8037       result = self.rpc.call_instance_start(target_node,
8038                                             (instance, None, None), False)
8039       msg = result.fail_msg
8040       if msg:
8041         _ShutdownInstanceDisks(self, instance)
8042         raise errors.OpExecError("Could not start instance %s on node %s: %s" %
8043                                  (instance.name, target_node, msg))
8044
8045
8046 class LUNodeMigrate(LogicalUnit):
8047   """Migrate all instances from a node.
8048
8049   """
8050   HPATH = "node-migrate"
8051   HTYPE = constants.HTYPE_NODE
8052   REQ_BGL = False
8053
8054   def CheckArguments(self):
8055     pass
8056
8057   def ExpandNames(self):
8058     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
8059
8060     self.share_locks = _ShareAll()
8061     self.needed_locks = {
8062       locking.LEVEL_NODE: [self.op.node_name],
8063       }
8064
8065   def BuildHooksEnv(self):
8066     """Build hooks env.
8067
8068     This runs on the master, the primary and all the secondaries.
8069
8070     """
8071     return {
8072       "NODE_NAME": self.op.node_name,
8073       "ALLOW_RUNTIME_CHANGES": self.op.allow_runtime_changes,
8074       }
8075
8076   def BuildHooksNodes(self):
8077     """Build hooks nodes.
8078
8079     """
8080     nl = [self.cfg.GetMasterNode()]
8081     return (nl, nl)
8082
8083   def CheckPrereq(self):
8084     pass
8085
8086   def Exec(self, feedback_fn):
8087     # Prepare jobs for migration instances
8088     allow_runtime_changes = self.op.allow_runtime_changes
8089     jobs = [
8090       [opcodes.OpInstanceMigrate(instance_name=inst.name,
8091                                  mode=self.op.mode,
8092                                  live=self.op.live,
8093                                  iallocator=self.op.iallocator,
8094                                  target_node=self.op.target_node,
8095                                  allow_runtime_changes=allow_runtime_changes,
8096                                  ignore_ipolicy=self.op.ignore_ipolicy)]
8097       for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name)
8098       ]
8099
8100     # TODO: Run iallocator in this opcode and pass correct placement options to
8101     # OpInstanceMigrate. Since other jobs can modify the cluster between
8102     # running the iallocator and the actual migration, a good consistency model
8103     # will have to be found.
8104
8105     assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
8106             frozenset([self.op.node_name]))
8107
8108     return ResultWithJobs(jobs)
8109
8110
8111 class TLMigrateInstance(Tasklet):
8112   """Tasklet class for instance migration.
8113
8114   @type live: boolean
8115   @ivar live: whether the migration will be done live or non-live;
8116       this variable is initalized only after CheckPrereq has run
8117   @type cleanup: boolean
8118   @ivar cleanup: Wheater we cleanup from a failed migration
8119   @type iallocator: string
8120   @ivar iallocator: The iallocator used to determine target_node
8121   @type target_node: string
8122   @ivar target_node: If given, the target_node to reallocate the instance to
8123   @type failover: boolean
8124   @ivar failover: Whether operation results in failover or migration
8125   @type fallback: boolean
8126   @ivar fallback: Whether fallback to failover is allowed if migration not
8127                   possible
8128   @type ignore_consistency: boolean
8129   @ivar ignore_consistency: Wheter we should ignore consistency between source
8130                             and target node
8131   @type shutdown_timeout: int
8132   @ivar shutdown_timeout: In case of failover timeout of the shutdown
8133   @type ignore_ipolicy: bool
8134   @ivar ignore_ipolicy: If true, we can ignore instance policy when migrating
8135
8136   """
8137
8138   # Constants
8139   _MIGRATION_POLL_INTERVAL = 1      # seconds
8140   _MIGRATION_FEEDBACK_INTERVAL = 10 # seconds
8141
8142   def __init__(self, lu, instance_name, cleanup=False,
8143                failover=False, fallback=False,
8144                ignore_consistency=False,
8145                allow_runtime_changes=True,
8146                shutdown_timeout=constants.DEFAULT_SHUTDOWN_TIMEOUT,
8147                ignore_ipolicy=False):
8148     """Initializes this class.
8149
8150     """
8151     Tasklet.__init__(self, lu)
8152
8153     # Parameters
8154     self.instance_name = instance_name
8155     self.cleanup = cleanup
8156     self.live = False # will be overridden later
8157     self.failover = failover
8158     self.fallback = fallback
8159     self.ignore_consistency = ignore_consistency
8160     self.shutdown_timeout = shutdown_timeout
8161     self.ignore_ipolicy = ignore_ipolicy
8162     self.allow_runtime_changes = allow_runtime_changes
8163
8164   def CheckPrereq(self):
8165     """Check prerequisites.
8166
8167     This checks that the instance is in the cluster.
8168
8169     """
8170     instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
8171     instance = self.cfg.GetInstanceInfo(instance_name)
8172     assert instance is not None
8173     self.instance = instance
8174     cluster = self.cfg.GetClusterInfo()
8175
8176     if (not self.cleanup and
8177         not instance.admin_state == constants.ADMINST_UP and
8178         not self.failover and self.fallback):
8179       self.lu.LogInfo("Instance is marked down or offline, fallback allowed,"
8180                       " switching to failover")
8181       self.failover = True
8182
8183     if instance.disk_template not in constants.DTS_MIRRORED:
8184       if self.failover:
8185         text = "failovers"
8186       else:
8187         text = "migrations"
8188       raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
8189                                  " %s" % (instance.disk_template, text),
8190                                  errors.ECODE_STATE)
8191
8192     if instance.disk_template in constants.DTS_EXT_MIRROR:
8193       _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
8194
8195       if self.lu.op.iallocator:
8196         self._RunAllocator()
8197       else:
8198         # We set set self.target_node as it is required by
8199         # BuildHooksEnv
8200         self.target_node = self.lu.op.target_node
8201
8202       # Check that the target node is correct in terms of instance policy
8203       nodeinfo = self.cfg.GetNodeInfo(self.target_node)
8204       group_info = self.cfg.GetNodeGroup(nodeinfo.group)
8205       ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
8206                                                               group_info)
8207       _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo,
8208                               ignore=self.ignore_ipolicy)
8209
8210       # self.target_node is already populated, either directly or by the
8211       # iallocator run
8212       target_node = self.target_node
8213       if self.target_node == instance.primary_node:
8214         raise errors.OpPrereqError("Cannot migrate instance %s"
8215                                    " to its primary (%s)" %
8216                                    (instance.name, instance.primary_node),
8217                                    errors.ECODE_STATE)
8218
8219       if len(self.lu.tasklets) == 1:
8220         # It is safe to release locks only when we're the only tasklet
8221         # in the LU
8222         _ReleaseLocks(self.lu, locking.LEVEL_NODE,
8223                       keep=[instance.primary_node, self.target_node])
8224
8225     else:
8226       secondary_nodes = instance.secondary_nodes
8227       if not secondary_nodes:
8228         raise errors.ConfigurationError("No secondary node but using"
8229                                         " %s disk template" %
8230                                         instance.disk_template)
8231       target_node = secondary_nodes[0]
8232       if self.lu.op.iallocator or (self.lu.op.target_node and
8233                                    self.lu.op.target_node != target_node):
8234         if self.failover:
8235           text = "failed over"
8236         else:
8237           text = "migrated"
8238         raise errors.OpPrereqError("Instances with disk template %s cannot"
8239                                    " be %s to arbitrary nodes"
8240                                    " (neither an iallocator nor a target"
8241                                    " node can be passed)" %
8242                                    (instance.disk_template, text),
8243                                    errors.ECODE_INVAL)
8244       nodeinfo = self.cfg.GetNodeInfo(target_node)
8245       group_info = self.cfg.GetNodeGroup(nodeinfo.group)
8246       ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
8247                                                               group_info)
8248       _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo,
8249                               ignore=self.ignore_ipolicy)
8250
8251     i_be = cluster.FillBE(instance)
8252
8253     # check memory requirements on the secondary node
8254     if (not self.cleanup and
8255          (not self.failover or instance.admin_state == constants.ADMINST_UP)):
8256       self.tgt_free_mem = _CheckNodeFreeMemory(self.lu, target_node,
8257                                                "migrating instance %s" %
8258                                                instance.name,
8259                                                i_be[constants.BE_MINMEM],
8260                                                instance.hypervisor)
8261     else:
8262       self.lu.LogInfo("Not checking memory on the secondary node as"
8263                       " instance will not be started")
8264
8265     # check if failover must be forced instead of migration
8266     if (not self.cleanup and not self.failover and
8267         i_be[constants.BE_ALWAYS_FAILOVER]):
8268       self.lu.LogInfo("Instance configured to always failover; fallback"
8269                       " to failover")
8270       self.failover = True
8271
8272     # check bridge existance
8273     _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
8274
8275     if not self.cleanup:
8276       _CheckNodeNotDrained(self.lu, target_node)
8277       if not self.failover:
8278         result = self.rpc.call_instance_migratable(instance.primary_node,
8279                                                    instance)
8280         if result.fail_msg and self.fallback:
8281           self.lu.LogInfo("Can't migrate, instance offline, fallback to"
8282                           " failover")
8283           self.failover = True
8284         else:
8285           result.Raise("Can't migrate, please use failover",
8286                        prereq=True, ecode=errors.ECODE_STATE)
8287
8288     assert not (self.failover and self.cleanup)
8289
8290     if not self.failover:
8291       if self.lu.op.live is not None and self.lu.op.mode is not None:
8292         raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
8293                                    " parameters are accepted",
8294                                    errors.ECODE_INVAL)
8295       if self.lu.op.live is not None:
8296         if self.lu.op.live:
8297           self.lu.op.mode = constants.HT_MIGRATION_LIVE
8298         else:
8299           self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
8300         # reset the 'live' parameter to None so that repeated
8301         # invocations of CheckPrereq do not raise an exception
8302         self.lu.op.live = None
8303       elif self.lu.op.mode is None:
8304         # read the default value from the hypervisor
8305         i_hv = cluster.FillHV(self.instance, skip_globals=False)
8306         self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
8307
8308       self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
8309     else:
8310       # Failover is never live
8311       self.live = False
8312
8313     if not (self.failover or self.cleanup):
8314       remote_info = self.rpc.call_instance_info(instance.primary_node,
8315                                                 instance.name,
8316                                                 instance.hypervisor)
8317       remote_info.Raise("Error checking instance on node %s" %
8318                         instance.primary_node)
8319       instance_running = bool(remote_info.payload)
8320       if instance_running:
8321         self.current_mem = int(remote_info.payload["memory"])
8322
8323   def _RunAllocator(self):
8324     """Run the allocator based on input opcode.
8325
8326     """
8327     # FIXME: add a self.ignore_ipolicy option
8328     req = iallocator.IAReqRelocate(name=self.instance_name,
8329                                    relocate_from=[self.instance.primary_node])
8330     ial = iallocator.IAllocator(self.cfg, self.rpc, req)
8331
8332     ial.Run(self.lu.op.iallocator)
8333
8334     if not ial.success:
8335       raise errors.OpPrereqError("Can't compute nodes using"
8336                                  " iallocator '%s': %s" %
8337                                  (self.lu.op.iallocator, ial.info),
8338                                  errors.ECODE_NORES)
8339     self.target_node = ial.result[0]
8340     self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
8341                     self.instance_name, self.lu.op.iallocator,
8342                     utils.CommaJoin(ial.result))
8343
8344   def _WaitUntilSync(self):
8345     """Poll with custom rpc for disk sync.
8346
8347     This uses our own step-based rpc call.
8348
8349     """
8350     self.feedback_fn("* wait until resync is done")
8351     all_done = False
8352     while not all_done:
8353       all_done = True
8354       result = self.rpc.call_drbd_wait_sync(self.all_nodes,
8355                                             self.nodes_ip,
8356                                             (self.instance.disks,
8357                                              self.instance))
8358       min_percent = 100
8359       for node, nres in result.items():
8360         nres.Raise("Cannot resync disks on node %s" % node)
8361         node_done, node_percent = nres.payload
8362         all_done = all_done and node_done
8363         if node_percent is not None:
8364           min_percent = min(min_percent, node_percent)
8365       if not all_done:
8366         if min_percent < 100:
8367           self.feedback_fn("   - progress: %.1f%%" % min_percent)
8368         time.sleep(2)
8369
8370   def _EnsureSecondary(self, node):
8371     """Demote a node to secondary.
8372
8373     """
8374     self.feedback_fn("* switching node %s to secondary mode" % node)
8375
8376     for dev in self.instance.disks:
8377       self.cfg.SetDiskID(dev, node)
8378
8379     result = self.rpc.call_blockdev_close(node, self.instance.name,
8380                                           self.instance.disks)
8381     result.Raise("Cannot change disk to secondary on node %s" % node)
8382
8383   def _GoStandalone(self):
8384     """Disconnect from the network.
8385
8386     """
8387     self.feedback_fn("* changing into standalone mode")
8388     result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
8389                                                self.instance.disks)
8390     for node, nres in result.items():
8391       nres.Raise("Cannot disconnect disks node %s" % node)
8392
8393   def _GoReconnect(self, multimaster):
8394     """Reconnect to the network.
8395
8396     """
8397     if multimaster:
8398       msg = "dual-master"
8399     else:
8400       msg = "single-master"
8401     self.feedback_fn("* changing disks into %s mode" % msg)
8402     result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
8403                                            (self.instance.disks, self.instance),
8404                                            self.instance.name, multimaster)
8405     for node, nres in result.items():
8406       nres.Raise("Cannot change disks config on node %s" % node)
8407
8408   def _ExecCleanup(self):
8409     """Try to cleanup after a failed migration.
8410
8411     The cleanup is done by:
8412       - check that the instance is running only on one node
8413         (and update the config if needed)
8414       - change disks on its secondary node to secondary
8415       - wait until disks are fully synchronized
8416       - disconnect from the network
8417       - change disks into single-master mode
8418       - wait again until disks are fully synchronized
8419
8420     """
8421     instance = self.instance
8422     target_node = self.target_node
8423     source_node = self.source_node
8424
8425     # check running on only one node
8426     self.feedback_fn("* checking where the instance actually runs"
8427                      " (if this hangs, the hypervisor might be in"
8428                      " a bad state)")
8429     ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
8430     for node, result in ins_l.items():
8431       result.Raise("Can't contact node %s" % node)
8432
8433     runningon_source = instance.name in ins_l[source_node].payload
8434     runningon_target = instance.name in ins_l[target_node].payload
8435
8436     if runningon_source and runningon_target:
8437       raise errors.OpExecError("Instance seems to be running on two nodes,"
8438                                " or the hypervisor is confused; you will have"
8439                                " to ensure manually that it runs only on one"
8440                                " and restart this operation")
8441
8442     if not (runningon_source or runningon_target):
8443       raise errors.OpExecError("Instance does not seem to be running at all;"
8444                                " in this case it's safer to repair by"
8445                                " running 'gnt-instance stop' to ensure disk"
8446                                " shutdown, and then restarting it")
8447
8448     if runningon_target:
8449       # the migration has actually succeeded, we need to update the config
8450       self.feedback_fn("* instance running on secondary node (%s),"
8451                        " updating config" % target_node)
8452       instance.primary_node = target_node
8453       self.cfg.Update(instance, self.feedback_fn)
8454       demoted_node = source_node
8455     else:
8456       self.feedback_fn("* instance confirmed to be running on its"
8457                        " primary node (%s)" % source_node)
8458       demoted_node = target_node
8459
8460     if instance.disk_template in constants.DTS_INT_MIRROR:
8461       self._EnsureSecondary(demoted_node)
8462       try:
8463         self._WaitUntilSync()
8464       except errors.OpExecError:
8465         # we ignore here errors, since if the device is standalone, it
8466         # won't be able to sync
8467         pass
8468       self._GoStandalone()
8469       self._GoReconnect(False)
8470       self._WaitUntilSync()
8471
8472     self.feedback_fn("* done")
8473
8474   def _RevertDiskStatus(self):
8475     """Try to revert the disk status after a failed migration.
8476
8477     """
8478     target_node = self.target_node
8479     if self.instance.disk_template in constants.DTS_EXT_MIRROR:
8480       return
8481
8482     try:
8483       self._EnsureSecondary(target_node)
8484       self._GoStandalone()
8485       self._GoReconnect(False)
8486       self._WaitUntilSync()
8487     except errors.OpExecError, err:
8488       self.lu.LogWarning("Migration failed and I can't reconnect the drives,"
8489                          " please try to recover the instance manually;"
8490                          " error '%s'" % str(err))
8491
8492   def _AbortMigration(self):
8493     """Call the hypervisor code to abort a started migration.
8494
8495     """
8496     instance = self.instance
8497     target_node = self.target_node
8498     source_node = self.source_node
8499     migration_info = self.migration_info
8500
8501     abort_result = self.rpc.call_instance_finalize_migration_dst(target_node,
8502                                                                  instance,
8503                                                                  migration_info,
8504                                                                  False)
8505     abort_msg = abort_result.fail_msg
8506     if abort_msg:
8507       logging.error("Aborting migration failed on target node %s: %s",
8508                     target_node, abort_msg)
8509       # Don't raise an exception here, as we stil have to try to revert the
8510       # disk status, even if this step failed.
8511
8512     abort_result = self.rpc.call_instance_finalize_migration_src(
8513       source_node, instance, False, self.live)
8514     abort_msg = abort_result.fail_msg
8515     if abort_msg:
8516       logging.error("Aborting migration failed on source node %s: %s",
8517                     source_node, abort_msg)
8518
8519   def _ExecMigration(self):
8520     """Migrate an instance.
8521
8522     The migrate is done by:
8523       - change the disks into dual-master mode
8524       - wait until disks are fully synchronized again
8525       - migrate the instance
8526       - change disks on the new secondary node (the old primary) to secondary
8527       - wait until disks are fully synchronized
8528       - change disks into single-master mode
8529
8530     """
8531     instance = self.instance
8532     target_node = self.target_node
8533     source_node = self.source_node
8534
8535     # Check for hypervisor version mismatch and warn the user.
8536     nodeinfo = self.rpc.call_node_info([source_node, target_node],
8537                                        None, [self.instance.hypervisor])
8538     for ninfo in nodeinfo.values():
8539       ninfo.Raise("Unable to retrieve node information from node '%s'" %
8540                   ninfo.node)
8541     (_, _, (src_info, )) = nodeinfo[source_node].payload
8542     (_, _, (dst_info, )) = nodeinfo[target_node].payload
8543
8544     if ((constants.HV_NODEINFO_KEY_VERSION in src_info) and
8545         (constants.HV_NODEINFO_KEY_VERSION in dst_info)):
8546       src_version = src_info[constants.HV_NODEINFO_KEY_VERSION]
8547       dst_version = dst_info[constants.HV_NODEINFO_KEY_VERSION]
8548       if src_version != dst_version:
8549         self.feedback_fn("* warning: hypervisor version mismatch between"
8550                          " source (%s) and target (%s) node" %
8551                          (src_version, dst_version))
8552
8553     self.feedback_fn("* checking disk consistency between source and target")
8554     for (idx, dev) in enumerate(instance.disks):
8555       if not _CheckDiskConsistency(self.lu, instance, dev, target_node, False):
8556         raise errors.OpExecError("Disk %s is degraded or not fully"
8557                                  " synchronized on target node,"
8558                                  " aborting migration" % idx)
8559
8560     if self.current_mem > self.tgt_free_mem:
8561       if not self.allow_runtime_changes:
8562         raise errors.OpExecError("Memory ballooning not allowed and not enough"
8563                                  " free memory to fit instance %s on target"
8564                                  " node %s (have %dMB, need %dMB)" %
8565                                  (instance.name, target_node,
8566                                   self.tgt_free_mem, self.current_mem))
8567       self.feedback_fn("* setting instance memory to %s" % self.tgt_free_mem)
8568       rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node,
8569                                                      instance,
8570                                                      self.tgt_free_mem)
8571       rpcres.Raise("Cannot modify instance runtime memory")
8572
8573     # First get the migration information from the remote node
8574     result = self.rpc.call_migration_info(source_node, instance)
8575     msg = result.fail_msg
8576     if msg:
8577       log_err = ("Failed fetching source migration information from %s: %s" %
8578                  (source_node, msg))
8579       logging.error(log_err)
8580       raise errors.OpExecError(log_err)
8581
8582     self.migration_info = migration_info = result.payload
8583
8584     if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
8585       # Then switch the disks to master/master mode
8586       self._EnsureSecondary(target_node)
8587       self._GoStandalone()
8588       self._GoReconnect(True)
8589       self._WaitUntilSync()
8590
8591     self.feedback_fn("* preparing %s to accept the instance" % target_node)
8592     result = self.rpc.call_accept_instance(target_node,
8593                                            instance,
8594                                            migration_info,
8595                                            self.nodes_ip[target_node])
8596
8597     msg = result.fail_msg
8598     if msg:
8599       logging.error("Instance pre-migration failed, trying to revert"
8600                     " disk status: %s", msg)
8601       self.feedback_fn("Pre-migration failed, aborting")
8602       self._AbortMigration()
8603       self._RevertDiskStatus()
8604       raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
8605                                (instance.name, msg))
8606
8607     self.feedback_fn("* migrating instance to %s" % target_node)
8608     result = self.rpc.call_instance_migrate(source_node, instance,
8609                                             self.nodes_ip[target_node],
8610                                             self.live)
8611     msg = result.fail_msg
8612     if msg:
8613       logging.error("Instance migration failed, trying to revert"
8614                     " disk status: %s", msg)
8615       self.feedback_fn("Migration failed, aborting")
8616       self._AbortMigration()
8617       self._RevertDiskStatus()
8618       raise errors.OpExecError("Could not migrate instance %s: %s" %
8619                                (instance.name, msg))
8620
8621     self.feedback_fn("* starting memory transfer")
8622     last_feedback = time.time()
8623     while True:
8624       result = self.rpc.call_instance_get_migration_status(source_node,
8625                                                            instance)
8626       msg = result.fail_msg
8627       ms = result.payload   # MigrationStatus instance
8628       if msg or (ms.status in constants.HV_MIGRATION_FAILED_STATUSES):
8629         logging.error("Instance migration failed, trying to revert"
8630                       " disk status: %s", msg)
8631         self.feedback_fn("Migration failed, aborting")
8632         self._AbortMigration()
8633         self._RevertDiskStatus()
8634         raise errors.OpExecError("Could not migrate instance %s: %s" %
8635                                  (instance.name, msg))
8636
8637       if result.payload.status != constants.HV_MIGRATION_ACTIVE:
8638         self.feedback_fn("* memory transfer complete")
8639         break
8640
8641       if (utils.TimeoutExpired(last_feedback,
8642                                self._MIGRATION_FEEDBACK_INTERVAL) and
8643           ms.transferred_ram is not None):
8644         mem_progress = 100 * float(ms.transferred_ram) / float(ms.total_ram)
8645         self.feedback_fn("* memory transfer progress: %.2f %%" % mem_progress)
8646         last_feedback = time.time()
8647
8648       time.sleep(self._MIGRATION_POLL_INTERVAL)
8649
8650     result = self.rpc.call_instance_finalize_migration_src(source_node,
8651                                                            instance,
8652                                                            True,
8653                                                            self.live)
8654     msg = result.fail_msg
8655     if msg:
8656       logging.error("Instance migration succeeded, but finalization failed"
8657                     " on the source node: %s", msg)
8658       raise errors.OpExecError("Could not finalize instance migration: %s" %
8659                                msg)
8660
8661     instance.primary_node = target_node
8662
8663     # distribute new instance config to the other nodes
8664     self.cfg.Update(instance, self.feedback_fn)
8665
8666     result = self.rpc.call_instance_finalize_migration_dst(target_node,
8667                                                            instance,
8668                                                            migration_info,
8669                                                            True)
8670     msg = result.fail_msg
8671     if msg:
8672       logging.error("Instance migration succeeded, but finalization failed"
8673                     " on the target node: %s", msg)
8674       raise errors.OpExecError("Could not finalize instance migration: %s" %
8675                                msg)
8676
8677     if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
8678       self._EnsureSecondary(source_node)
8679       self._WaitUntilSync()
8680       self._GoStandalone()
8681       self._GoReconnect(False)
8682       self._WaitUntilSync()
8683
8684     # If the instance's disk template is `rbd' and there was a successful
8685     # migration, unmap the device from the source node.
8686     if self.instance.disk_template == constants.DT_RBD:
8687       disks = _ExpandCheckDisks(instance, instance.disks)
8688       self.feedback_fn("* unmapping instance's disks from %s" % source_node)
8689       for disk in disks:
8690         result = self.rpc.call_blockdev_shutdown(source_node, (disk, instance))
8691         msg = result.fail_msg
8692         if msg:
8693           logging.error("Migration was successful, but couldn't unmap the"
8694                         " block device %s on source node %s: %s",
8695                         disk.iv_name, source_node, msg)
8696           logging.error("You need to unmap the device %s manually on %s",
8697                         disk.iv_name, source_node)
8698
8699     self.feedback_fn("* done")
8700
8701   def _ExecFailover(self):
8702     """Failover an instance.
8703
8704     The failover is done by shutting it down on its present node and
8705     starting it on the secondary.
8706
8707     """
8708     instance = self.instance
8709     primary_node = self.cfg.GetNodeInfo(instance.primary_node)
8710
8711     source_node = instance.primary_node
8712     target_node = self.target_node
8713
8714     if instance.admin_state == constants.ADMINST_UP:
8715       self.feedback_fn("* checking disk consistency between source and target")
8716       for (idx, dev) in enumerate(instance.disks):
8717         # for drbd, these are drbd over lvm
8718         if not _CheckDiskConsistency(self.lu, instance, dev, target_node,
8719                                      False):
8720           if primary_node.offline:
8721             self.feedback_fn("Node %s is offline, ignoring degraded disk %s on"
8722                              " target node %s" %
8723                              (primary_node.name, idx, target_node))
8724           elif not self.ignore_consistency:
8725             raise errors.OpExecError("Disk %s is degraded on target node,"
8726                                      " aborting failover" % idx)
8727     else:
8728       self.feedback_fn("* not checking disk consistency as instance is not"
8729                        " running")
8730
8731     self.feedback_fn("* shutting down instance on source node")
8732     logging.info("Shutting down instance %s on node %s",
8733                  instance.name, source_node)
8734
8735     result = self.rpc.call_instance_shutdown(source_node, instance,
8736                                              self.shutdown_timeout)
8737     msg = result.fail_msg
8738     if msg:
8739       if self.ignore_consistency or primary_node.offline:
8740         self.lu.LogWarning("Could not shutdown instance %s on node %s,"
8741                            " proceeding anyway; please make sure node"
8742                            " %s is down; error details: %s",
8743                            instance.name, source_node, source_node, msg)
8744       else:
8745         raise errors.OpExecError("Could not shutdown instance %s on"
8746                                  " node %s: %s" %
8747                                  (instance.name, source_node, msg))
8748
8749     self.feedback_fn("* deactivating the instance's disks on source node")
8750     if not _ShutdownInstanceDisks(self.lu, instance, ignore_primary=True):
8751       raise errors.OpExecError("Can't shut down the instance's disks")
8752
8753     instance.primary_node = target_node
8754     # distribute new instance config to the other nodes
8755     self.cfg.Update(instance, self.feedback_fn)
8756
8757     # Only start the instance if it's marked as up
8758     if instance.admin_state == constants.ADMINST_UP:
8759       self.feedback_fn("* activating the instance's disks on target node %s" %
8760                        target_node)
8761       logging.info("Starting instance %s on node %s",
8762                    instance.name, target_node)
8763
8764       disks_ok, _ = _AssembleInstanceDisks(self.lu, instance,
8765                                            ignore_secondaries=True)
8766       if not disks_ok:
8767         _ShutdownInstanceDisks(self.lu, instance)
8768         raise errors.OpExecError("Can't activate the instance's disks")
8769
8770       self.feedback_fn("* starting the instance on the target node %s" %
8771                        target_node)
8772       result = self.rpc.call_instance_start(target_node, (instance, None, None),
8773                                             False)
8774       msg = result.fail_msg
8775       if msg:
8776         _ShutdownInstanceDisks(self.lu, instance)
8777         raise errors.OpExecError("Could not start instance %s on node %s: %s" %
8778                                  (instance.name, target_node, msg))
8779
8780   def Exec(self, feedback_fn):
8781     """Perform the migration.
8782
8783     """
8784     self.feedback_fn = feedback_fn
8785     self.source_node = self.instance.primary_node
8786
8787     # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
8788     if self.instance.disk_template in constants.DTS_INT_MIRROR:
8789       self.target_node = self.instance.secondary_nodes[0]
8790       # Otherwise self.target_node has been populated either
8791       # directly, or through an iallocator.
8792
8793     self.all_nodes = [self.source_node, self.target_node]
8794     self.nodes_ip = dict((name, node.secondary_ip) for (name, node)
8795                          in self.cfg.GetMultiNodeInfo(self.all_nodes))
8796
8797     if self.failover:
8798       feedback_fn("Failover instance %s" % self.instance.name)
8799       self._ExecFailover()
8800     else:
8801       feedback_fn("Migrating instance %s" % self.instance.name)
8802
8803       if self.cleanup:
8804         return self._ExecCleanup()
8805       else:
8806         return self._ExecMigration()
8807
8808
8809 def _CreateBlockDev(lu, node, instance, device, force_create, info,
8810                     force_open):
8811   """Wrapper around L{_CreateBlockDevInner}.
8812
8813   This method annotates the root device first.
8814
8815   """
8816   (disk,) = _AnnotateDiskParams(instance, [device], lu.cfg)
8817   return _CreateBlockDevInner(lu, node, instance, disk, force_create, info,
8818                               force_open)
8819
8820
8821 def _CreateBlockDevInner(lu, node, instance, device, force_create,
8822                          info, force_open):
8823   """Create a tree of block devices on a given node.
8824
8825   If this device type has to be created on secondaries, create it and
8826   all its children.
8827
8828   If not, just recurse to children keeping the same 'force' value.
8829
8830   @attention: The device has to be annotated already.
8831
8832   @param lu: the lu on whose behalf we execute
8833   @param node: the node on which to create the device
8834   @type instance: L{objects.Instance}
8835   @param instance: the instance which owns the device
8836   @type device: L{objects.Disk}
8837   @param device: the device to create
8838   @type force_create: boolean
8839   @param force_create: whether to force creation of this device; this
8840       will be change to True whenever we find a device which has
8841       CreateOnSecondary() attribute
8842   @param info: the extra 'metadata' we should attach to the device
8843       (this will be represented as a LVM tag)
8844   @type force_open: boolean
8845   @param force_open: this parameter will be passes to the
8846       L{backend.BlockdevCreate} function where it specifies
8847       whether we run on primary or not, and it affects both
8848       the child assembly and the device own Open() execution
8849
8850   """
8851   if device.CreateOnSecondary():
8852     force_create = True
8853
8854   if device.children:
8855     for child in device.children:
8856       _CreateBlockDevInner(lu, node, instance, child, force_create,
8857                            info, force_open)
8858
8859   if not force_create:
8860     return
8861
8862   _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
8863
8864
8865 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
8866   """Create a single block device on a given node.
8867
8868   This will not recurse over children of the device, so they must be
8869   created in advance.
8870
8871   @param lu: the lu on whose behalf we execute
8872   @param node: the node on which to create the device
8873   @type instance: L{objects.Instance}
8874   @param instance: the instance which owns the device
8875   @type device: L{objects.Disk}
8876   @param device: the device to create
8877   @param info: the extra 'metadata' we should attach to the device
8878       (this will be represented as a LVM tag)
8879   @type force_open: boolean
8880   @param force_open: this parameter will be passes to the
8881       L{backend.BlockdevCreate} function where it specifies
8882       whether we run on primary or not, and it affects both
8883       the child assembly and the device own Open() execution
8884
8885   """
8886   lu.cfg.SetDiskID(device, node)
8887   result = lu.rpc.call_blockdev_create(node, device, device.size,
8888                                        instance.name, force_open, info)
8889   result.Raise("Can't create block device %s on"
8890                " node %s for instance %s" % (device, node, instance.name))
8891   if device.physical_id is None:
8892     device.physical_id = result.payload
8893
8894
8895 def _GenerateUniqueNames(lu, exts):
8896   """Generate a suitable LV name.
8897
8898   This will generate a logical volume name for the given instance.
8899
8900   """
8901   results = []
8902   for val in exts:
8903     new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
8904     results.append("%s%s" % (new_id, val))
8905   return results
8906
8907
8908 def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
8909                          iv_name, p_minor, s_minor):
8910   """Generate a drbd8 device complete with its children.
8911
8912   """
8913   assert len(vgnames) == len(names) == 2
8914   port = lu.cfg.AllocatePort()
8915   shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
8916
8917   dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
8918                           logical_id=(vgnames[0], names[0]),
8919                           params={})
8920   dev_meta = objects.Disk(dev_type=constants.LD_LV,
8921                           size=constants.DRBD_META_SIZE,
8922                           logical_id=(vgnames[1], names[1]),
8923                           params={})
8924   drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
8925                           logical_id=(primary, secondary, port,
8926                                       p_minor, s_minor,
8927                                       shared_secret),
8928                           children=[dev_data, dev_meta],
8929                           iv_name=iv_name, params={})
8930   return drbd_dev
8931
8932
8933 _DISK_TEMPLATE_NAME_PREFIX = {
8934   constants.DT_PLAIN: "",
8935   constants.DT_RBD: ".rbd",
8936   }
8937
8938
8939 _DISK_TEMPLATE_DEVICE_TYPE = {
8940   constants.DT_PLAIN: constants.LD_LV,
8941   constants.DT_FILE: constants.LD_FILE,
8942   constants.DT_SHARED_FILE: constants.LD_FILE,
8943   constants.DT_BLOCK: constants.LD_BLOCKDEV,
8944   constants.DT_RBD: constants.LD_RBD,
8945   }
8946
8947
8948 def _GenerateDiskTemplate(
8949   lu, template_name, instance_name, primary_node, secondary_nodes,
8950   disk_info, file_storage_dir, file_driver, base_index,
8951   feedback_fn, full_disk_params, _req_file_storage=opcodes.RequireFileStorage,
8952   _req_shr_file_storage=opcodes.RequireSharedFileStorage):
8953   """Generate the entire disk layout for a given template type.
8954
8955   """
8956   #TODO: compute space requirements
8957
8958   vgname = lu.cfg.GetVGName()
8959   disk_count = len(disk_info)
8960   disks = []
8961
8962   if template_name == constants.DT_DISKLESS:
8963     pass
8964   elif template_name == constants.DT_DRBD8:
8965     if len(secondary_nodes) != 1:
8966       raise errors.ProgrammerError("Wrong template configuration")
8967     remote_node = secondary_nodes[0]
8968     minors = lu.cfg.AllocateDRBDMinor(
8969       [primary_node, remote_node] * len(disk_info), instance_name)
8970
8971     (drbd_params, _, _) = objects.Disk.ComputeLDParams(template_name,
8972                                                        full_disk_params)
8973     drbd_default_metavg = drbd_params[constants.LDP_DEFAULT_METAVG]
8974
8975     names = []
8976     for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
8977                                                for i in range(disk_count)]):
8978       names.append(lv_prefix + "_data")
8979       names.append(lv_prefix + "_meta")
8980     for idx, disk in enumerate(disk_info):
8981       disk_index = idx + base_index
8982       data_vg = disk.get(constants.IDISK_VG, vgname)
8983       meta_vg = disk.get(constants.IDISK_METAVG, drbd_default_metavg)
8984       disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
8985                                       disk[constants.IDISK_SIZE],
8986                                       [data_vg, meta_vg],
8987                                       names[idx * 2:idx * 2 + 2],
8988                                       "disk/%d" % disk_index,
8989                                       minors[idx * 2], minors[idx * 2 + 1])
8990       disk_dev.mode = disk[constants.IDISK_MODE]
8991       disks.append(disk_dev)
8992   else:
8993     if secondary_nodes:
8994       raise errors.ProgrammerError("Wrong template configuration")
8995
8996     if template_name == constants.DT_FILE:
8997       _req_file_storage()
8998     elif template_name == constants.DT_SHARED_FILE:
8999       _req_shr_file_storage()
9000
9001     name_prefix = _DISK_TEMPLATE_NAME_PREFIX.get(template_name, None)
9002     if name_prefix is None:
9003       names = None
9004     else:
9005       names = _GenerateUniqueNames(lu, ["%s.disk%s" %
9006                                         (name_prefix, base_index + i)
9007                                         for i in range(disk_count)])
9008
9009     if template_name == constants.DT_PLAIN:
9010       def logical_id_fn(idx, _, disk):
9011         vg = disk.get(constants.IDISK_VG, vgname)
9012         return (vg, names[idx])
9013     elif template_name in (constants.DT_FILE, constants.DT_SHARED_FILE):
9014       logical_id_fn = \
9015         lambda _, disk_index, disk: (file_driver,
9016                                      "%s/disk%d" % (file_storage_dir,
9017                                                     disk_index))
9018     elif template_name == constants.DT_BLOCK:
9019       logical_id_fn = \
9020         lambda idx, disk_index, disk: (constants.BLOCKDEV_DRIVER_MANUAL,
9021                                        disk[constants.IDISK_ADOPT])
9022     elif template_name == constants.DT_RBD:
9023       logical_id_fn = lambda idx, _, disk: ("rbd", names[idx])
9024     else:
9025       raise errors.ProgrammerError("Unknown disk template '%s'" % template_name)
9026
9027     dev_type = _DISK_TEMPLATE_DEVICE_TYPE[template_name]
9028
9029     for idx, disk in enumerate(disk_info):
9030       disk_index = idx + base_index
9031       size = disk[constants.IDISK_SIZE]
9032       feedback_fn("* disk %s, size %s" %
9033                   (disk_index, utils.FormatUnit(size, "h")))
9034       disks.append(objects.Disk(dev_type=dev_type, size=size,
9035                                 logical_id=logical_id_fn(idx, disk_index, disk),
9036                                 iv_name="disk/%d" % disk_index,
9037                                 mode=disk[constants.IDISK_MODE],
9038                                 params={}))
9039
9040   return disks
9041
9042
9043 def _GetInstanceInfoText(instance):
9044   """Compute that text that should be added to the disk's metadata.
9045
9046   """
9047   return "originstname+%s" % instance.name
9048
9049
9050 def _CalcEta(time_taken, written, total_size):
9051   """Calculates the ETA based on size written and total size.
9052
9053   @param time_taken: The time taken so far
9054   @param written: amount written so far
9055   @param total_size: The total size of data to be written
9056   @return: The remaining time in seconds
9057
9058   """
9059   avg_time = time_taken / float(written)
9060   return (total_size - written) * avg_time
9061
9062
9063 def _WipeDisks(lu, instance, disks=None):
9064   """Wipes instance disks.
9065
9066   @type lu: L{LogicalUnit}
9067   @param lu: the logical unit on whose behalf we execute
9068   @type instance: L{objects.Instance}
9069   @param instance: the instance whose disks we should create
9070   @return: the success of the wipe
9071
9072   """
9073   node = instance.primary_node
9074
9075   if disks is None:
9076     disks = [(idx, disk, 0)
9077              for (idx, disk) in enumerate(instance.disks)]
9078
9079   for (_, device, _) in disks:
9080     lu.cfg.SetDiskID(device, node)
9081
9082   logging.info("Pausing synchronization of disks of instance '%s'",
9083                instance.name)
9084   result = lu.rpc.call_blockdev_pause_resume_sync(node,
9085                                                   (map(compat.snd, disks),
9086                                                    instance),
9087                                                   True)
9088   result.Raise("Failed to pause disk synchronization on node '%s'" % node)
9089
9090   for idx, success in enumerate(result.payload):
9091     if not success:
9092       logging.warn("Pausing synchronization of disk %s of instance '%s'"
9093                    " failed", idx, instance.name)
9094
9095   try:
9096     for (idx, device, offset) in disks:
9097       # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
9098       # MAX_WIPE_CHUNK at max. Truncating to integer to avoid rounding errors.
9099       wipe_chunk_size = \
9100         int(min(constants.MAX_WIPE_CHUNK,
9101                 device.size / 100.0 * constants.MIN_WIPE_CHUNK_PERCENT))
9102
9103       size = device.size
9104       last_output = 0
9105       start_time = time.time()
9106
9107       if offset == 0:
9108         info_text = ""
9109       else:
9110         info_text = (" (from %s to %s)" %
9111                      (utils.FormatUnit(offset, "h"),
9112                       utils.FormatUnit(size, "h")))
9113
9114       lu.LogInfo("* Wiping disk %s%s", idx, info_text)
9115
9116       logging.info("Wiping disk %d for instance %s on node %s using"
9117                    " chunk size %s", idx, instance.name, node, wipe_chunk_size)
9118
9119       while offset < size:
9120         wipe_size = min(wipe_chunk_size, size - offset)
9121
9122         logging.debug("Wiping disk %d, offset %s, chunk %s",
9123                       idx, offset, wipe_size)
9124
9125         result = lu.rpc.call_blockdev_wipe(node, (device, instance), offset,
9126                                            wipe_size)
9127         result.Raise("Could not wipe disk %d at offset %d for size %d" %
9128                      (idx, offset, wipe_size))
9129
9130         now = time.time()
9131         offset += wipe_size
9132         if now - last_output >= 60:
9133           eta = _CalcEta(now - start_time, offset, size)
9134           lu.LogInfo(" - done: %.1f%% ETA: %s",
9135                      offset / float(size) * 100, utils.FormatSeconds(eta))
9136           last_output = now
9137   finally:
9138     logging.info("Resuming synchronization of disks for instance '%s'",
9139                  instance.name)
9140
9141     result = lu.rpc.call_blockdev_pause_resume_sync(node,
9142                                                     (map(compat.snd, disks),
9143                                                      instance),
9144                                                     False)
9145
9146     if result.fail_msg:
9147       lu.LogWarning("Failed to resume disk synchronization on node '%s': %s",
9148                     node, result.fail_msg)
9149     else:
9150       for idx, success in enumerate(result.payload):
9151         if not success:
9152           lu.LogWarning("Resuming synchronization of disk %s of instance '%s'"
9153                         " failed", idx, instance.name)
9154
9155
9156 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
9157   """Create all disks for an instance.
9158
9159   This abstracts away some work from AddInstance.
9160
9161   @type lu: L{LogicalUnit}
9162   @param lu: the logical unit on whose behalf we execute
9163   @type instance: L{objects.Instance}
9164   @param instance: the instance whose disks we should create
9165   @type to_skip: list
9166   @param to_skip: list of indices to skip
9167   @type target_node: string
9168   @param target_node: if passed, overrides the target node for creation
9169   @rtype: boolean
9170   @return: the success of the creation
9171
9172   """
9173   info = _GetInstanceInfoText(instance)
9174   if target_node is None:
9175     pnode = instance.primary_node
9176     all_nodes = instance.all_nodes
9177   else:
9178     pnode = target_node
9179     all_nodes = [pnode]
9180
9181   if instance.disk_template in constants.DTS_FILEBASED:
9182     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
9183     result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
9184
9185     result.Raise("Failed to create directory '%s' on"
9186                  " node %s" % (file_storage_dir, pnode))
9187
9188   # Note: this needs to be kept in sync with adding of disks in
9189   # LUInstanceSetParams
9190   for idx, device in enumerate(instance.disks):
9191     if to_skip and idx in to_skip:
9192       continue
9193     logging.info("Creating disk %s for instance '%s'", idx, instance.name)
9194     #HARDCODE
9195     for node in all_nodes:
9196       f_create = node == pnode
9197       _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
9198
9199
9200 def _RemoveDisks(lu, instance, target_node=None, ignore_failures=False):
9201   """Remove all disks for an instance.
9202
9203   This abstracts away some work from `AddInstance()` and
9204   `RemoveInstance()`. Note that in case some of the devices couldn't
9205   be removed, the removal will continue with the other ones (compare
9206   with `_CreateDisks()`).
9207
9208   @type lu: L{LogicalUnit}
9209   @param lu: the logical unit on whose behalf we execute
9210   @type instance: L{objects.Instance}
9211   @param instance: the instance whose disks we should remove
9212   @type target_node: string
9213   @param target_node: used to override the node on which to remove the disks
9214   @rtype: boolean
9215   @return: the success of the removal
9216
9217   """
9218   logging.info("Removing block devices for instance %s", instance.name)
9219
9220   all_result = True
9221   ports_to_release = set()
9222   anno_disks = _AnnotateDiskParams(instance, instance.disks, lu.cfg)
9223   for (idx, device) in enumerate(anno_disks):
9224     if target_node:
9225       edata = [(target_node, device)]
9226     else:
9227       edata = device.ComputeNodeTree(instance.primary_node)
9228     for node, disk in edata:
9229       lu.cfg.SetDiskID(disk, node)
9230       result = lu.rpc.call_blockdev_remove(node, disk)
9231       if result.fail_msg:
9232         lu.LogWarning("Could not remove disk %s on node %s,"
9233                       " continuing anyway: %s", idx, node, result.fail_msg)
9234         if not (result.offline and node != instance.primary_node):
9235           all_result = False
9236
9237     # if this is a DRBD disk, return its port to the pool
9238     if device.dev_type in constants.LDS_DRBD:
9239       ports_to_release.add(device.logical_id[2])
9240
9241   if all_result or ignore_failures:
9242     for port in ports_to_release:
9243       lu.cfg.AddTcpUdpPort(port)
9244
9245   if instance.disk_template in constants.DTS_FILEBASED:
9246     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
9247     if target_node:
9248       tgt = target_node
9249     else:
9250       tgt = instance.primary_node
9251     result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
9252     if result.fail_msg:
9253       lu.LogWarning("Could not remove directory '%s' on node %s: %s",
9254                     file_storage_dir, instance.primary_node, result.fail_msg)
9255       all_result = False
9256
9257   return all_result
9258
9259
9260 def _ComputeDiskSizePerVG(disk_template, disks):
9261   """Compute disk size requirements in the volume group
9262
9263   """
9264   def _compute(disks, payload):
9265     """Universal algorithm.
9266
9267     """
9268     vgs = {}
9269     for disk in disks:
9270       vgs[disk[constants.IDISK_VG]] = \
9271         vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
9272
9273     return vgs
9274
9275   # Required free disk space as a function of disk and swap space
9276   req_size_dict = {
9277     constants.DT_DISKLESS: {},
9278     constants.DT_PLAIN: _compute(disks, 0),
9279     # 128 MB are added for drbd metadata for each disk
9280     constants.DT_DRBD8: _compute(disks, constants.DRBD_META_SIZE),
9281     constants.DT_FILE: {},
9282     constants.DT_SHARED_FILE: {},
9283   }
9284
9285   if disk_template not in req_size_dict:
9286     raise errors.ProgrammerError("Disk template '%s' size requirement"
9287                                  " is unknown" % disk_template)
9288
9289   return req_size_dict[disk_template]
9290
9291
9292 def _FilterVmNodes(lu, nodenames):
9293   """Filters out non-vm_capable nodes from a list.
9294
9295   @type lu: L{LogicalUnit}
9296   @param lu: the logical unit for which we check
9297   @type nodenames: list
9298   @param nodenames: the list of nodes on which we should check
9299   @rtype: list
9300   @return: the list of vm-capable nodes
9301
9302   """
9303   vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
9304   return [name for name in nodenames if name not in vm_nodes]
9305
9306
9307 def _CheckHVParams(lu, nodenames, hvname, hvparams):
9308   """Hypervisor parameter validation.
9309
9310   This function abstract the hypervisor parameter validation to be
9311   used in both instance create and instance modify.
9312
9313   @type lu: L{LogicalUnit}
9314   @param lu: the logical unit for which we check
9315   @type nodenames: list
9316   @param nodenames: the list of nodes on which we should check
9317   @type hvname: string
9318   @param hvname: the name of the hypervisor we should use
9319   @type hvparams: dict
9320   @param hvparams: the parameters which we need to check
9321   @raise errors.OpPrereqError: if the parameters are not valid
9322
9323   """
9324   nodenames = _FilterVmNodes(lu, nodenames)
9325
9326   cluster = lu.cfg.GetClusterInfo()
9327   hvfull = objects.FillDict(cluster.hvparams.get(hvname, {}), hvparams)
9328
9329   hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames, hvname, hvfull)
9330   for node in nodenames:
9331     info = hvinfo[node]
9332     if info.offline:
9333       continue
9334     info.Raise("Hypervisor parameter validation failed on node %s" % node)
9335
9336
9337 def _CheckOSParams(lu, required, nodenames, osname, osparams):
9338   """OS parameters validation.
9339
9340   @type lu: L{LogicalUnit}
9341   @param lu: the logical unit for which we check
9342   @type required: boolean
9343   @param required: whether the validation should fail if the OS is not
9344       found
9345   @type nodenames: list
9346   @param nodenames: the list of nodes on which we should check
9347   @type osname: string
9348   @param osname: the name of the hypervisor we should use
9349   @type osparams: dict
9350   @param osparams: the parameters which we need to check
9351   @raise errors.OpPrereqError: if the parameters are not valid
9352
9353   """
9354   nodenames = _FilterVmNodes(lu, nodenames)
9355   result = lu.rpc.call_os_validate(nodenames, required, osname,
9356                                    [constants.OS_VALIDATE_PARAMETERS],
9357                                    osparams)
9358   for node, nres in result.items():
9359     # we don't check for offline cases since this should be run only
9360     # against the master node and/or an instance's nodes
9361     nres.Raise("OS Parameters validation failed on node %s" % node)
9362     if not nres.payload:
9363       lu.LogInfo("OS %s not found on node %s, validation skipped",
9364                  osname, node)
9365
9366
9367 def _CreateInstanceAllocRequest(op, disks, nics, beparams):
9368   """Wrapper around IAReqInstanceAlloc.
9369
9370   @param op: The instance opcode
9371   @param disks: The computed disks
9372   @param nics: The computed nics
9373   @param beparams: The full filled beparams
9374
9375   @returns: A filled L{iallocator.IAReqInstanceAlloc}
9376
9377   """
9378   spindle_use = beparams[constants.BE_SPINDLE_USE]
9379   return iallocator.IAReqInstanceAlloc(name=op.instance_name,
9380                                        disk_template=op.disk_template,
9381                                        tags=op.tags,
9382                                        os=op.os_type,
9383                                        vcpus=beparams[constants.BE_VCPUS],
9384                                        memory=beparams[constants.BE_MAXMEM],
9385                                        spindle_use=spindle_use,
9386                                        disks=disks,
9387                                        nics=[n.ToDict() for n in nics],
9388                                        hypervisor=op.hypervisor)
9389
9390
9391 def _ComputeNics(op, cluster, default_ip, cfg, proc):
9392   """Computes the nics.
9393
9394   @param op: The instance opcode
9395   @param cluster: Cluster configuration object
9396   @param default_ip: The default ip to assign
9397   @param cfg: An instance of the configuration object
9398   @param proc: The executer instance
9399
9400   @returns: The build up nics
9401
9402   """
9403   nics = []
9404   for idx, nic in enumerate(op.nics):
9405     nic_mode_req = nic.get(constants.INIC_MODE, None)
9406     nic_mode = nic_mode_req
9407     if nic_mode is None or nic_mode == constants.VALUE_AUTO:
9408       nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
9409
9410     # in routed mode, for the first nic, the default ip is 'auto'
9411     if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
9412       default_ip_mode = constants.VALUE_AUTO
9413     else:
9414       default_ip_mode = constants.VALUE_NONE
9415
9416     # ip validity checks
9417     ip = nic.get(constants.INIC_IP, default_ip_mode)
9418     if ip is None or ip.lower() == constants.VALUE_NONE:
9419       nic_ip = None
9420     elif ip.lower() == constants.VALUE_AUTO:
9421       if not op.name_check:
9422         raise errors.OpPrereqError("IP address set to auto but name checks"
9423                                    " have been skipped",
9424                                    errors.ECODE_INVAL)
9425       nic_ip = default_ip
9426     else:
9427       if not netutils.IPAddress.IsValid(ip):
9428         raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
9429                                    errors.ECODE_INVAL)
9430       nic_ip = ip
9431
9432     # TODO: check the ip address for uniqueness
9433     if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
9434       raise errors.OpPrereqError("Routed nic mode requires an ip address",
9435                                  errors.ECODE_INVAL)
9436
9437     # MAC address verification
9438     mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
9439     if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9440       mac = utils.NormalizeAndValidateMac(mac)
9441
9442       try:
9443         # TODO: We need to factor this out
9444         cfg.ReserveMAC(mac, proc.GetECId())
9445       except errors.ReservationError:
9446         raise errors.OpPrereqError("MAC address %s already in use"
9447                                    " in cluster" % mac,
9448                                    errors.ECODE_NOTUNIQUE)
9449
9450     #  Build nic parameters
9451     link = nic.get(constants.INIC_LINK, None)
9452     if link == constants.VALUE_AUTO:
9453       link = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_LINK]
9454     nicparams = {}
9455     if nic_mode_req:
9456       nicparams[constants.NIC_MODE] = nic_mode
9457     if link:
9458       nicparams[constants.NIC_LINK] = link
9459
9460     check_params = cluster.SimpleFillNIC(nicparams)
9461     objects.NIC.CheckParameterSyntax(check_params)
9462     nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
9463
9464   return nics
9465
9466
9467 def _ComputeDisks(op, default_vg):
9468   """Computes the instance disks.
9469
9470   @param op: The instance opcode
9471   @param default_vg: The default_vg to assume
9472
9473   @return: The computer disks
9474
9475   """
9476   disks = []
9477   for disk in op.disks:
9478     mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
9479     if mode not in constants.DISK_ACCESS_SET:
9480       raise errors.OpPrereqError("Invalid disk access mode '%s'" %
9481                                  mode, errors.ECODE_INVAL)
9482     size = disk.get(constants.IDISK_SIZE, None)
9483     if size is None:
9484       raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
9485     try:
9486       size = int(size)
9487     except (TypeError, ValueError):
9488       raise errors.OpPrereqError("Invalid disk size '%s'" % size,
9489                                  errors.ECODE_INVAL)
9490
9491     data_vg = disk.get(constants.IDISK_VG, default_vg)
9492     new_disk = {
9493       constants.IDISK_SIZE: size,
9494       constants.IDISK_MODE: mode,
9495       constants.IDISK_VG: data_vg,
9496       }
9497     if constants.IDISK_METAVG in disk:
9498       new_disk[constants.IDISK_METAVG] = disk[constants.IDISK_METAVG]
9499     if constants.IDISK_ADOPT in disk:
9500       new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
9501     disks.append(new_disk)
9502
9503   return disks
9504
9505
9506 def _ComputeFullBeParams(op, cluster):
9507   """Computes the full beparams.
9508
9509   @param op: The instance opcode
9510   @param cluster: The cluster config object
9511
9512   @return: The fully filled beparams
9513
9514   """
9515   default_beparams = cluster.beparams[constants.PP_DEFAULT]
9516   for param, value in op.beparams.iteritems():
9517     if value == constants.VALUE_AUTO:
9518       op.beparams[param] = default_beparams[param]
9519   objects.UpgradeBeParams(op.beparams)
9520   utils.ForceDictType(op.beparams, constants.BES_PARAMETER_TYPES)
9521   return cluster.SimpleFillBE(op.beparams)
9522
9523
9524 class LUInstanceCreate(LogicalUnit):
9525   """Create an instance.
9526
9527   """
9528   HPATH = "instance-add"
9529   HTYPE = constants.HTYPE_INSTANCE
9530   REQ_BGL = False
9531
9532   def CheckArguments(self):
9533     """Check arguments.
9534
9535     """
9536     # do not require name_check to ease forward/backward compatibility
9537     # for tools
9538     if self.op.no_install and self.op.start:
9539       self.LogInfo("No-installation mode selected, disabling startup")
9540       self.op.start = False
9541     # validate/normalize the instance name
9542     self.op.instance_name = \
9543       netutils.Hostname.GetNormalizedName(self.op.instance_name)
9544
9545     if self.op.ip_check and not self.op.name_check:
9546       # TODO: make the ip check more flexible and not depend on the name check
9547       raise errors.OpPrereqError("Cannot do IP address check without a name"
9548                                  " check", errors.ECODE_INVAL)
9549
9550     # check nics' parameter names
9551     for nic in self.op.nics:
9552       utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
9553
9554     # check disks. parameter names and consistent adopt/no-adopt strategy
9555     has_adopt = has_no_adopt = False
9556     for disk in self.op.disks:
9557       utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
9558       if constants.IDISK_ADOPT in disk:
9559         has_adopt = True
9560       else:
9561         has_no_adopt = True
9562     if has_adopt and has_no_adopt:
9563       raise errors.OpPrereqError("Either all disks are adopted or none is",
9564                                  errors.ECODE_INVAL)
9565     if has_adopt:
9566       if self.op.disk_template not in constants.DTS_MAY_ADOPT:
9567         raise errors.OpPrereqError("Disk adoption is not supported for the"
9568                                    " '%s' disk template" %
9569                                    self.op.disk_template,
9570                                    errors.ECODE_INVAL)
9571       if self.op.iallocator is not None:
9572         raise errors.OpPrereqError("Disk adoption not allowed with an"
9573                                    " iallocator script", errors.ECODE_INVAL)
9574       if self.op.mode == constants.INSTANCE_IMPORT:
9575         raise errors.OpPrereqError("Disk adoption not allowed for"
9576                                    " instance import", errors.ECODE_INVAL)
9577     else:
9578       if self.op.disk_template in constants.DTS_MUST_ADOPT:
9579         raise errors.OpPrereqError("Disk template %s requires disk adoption,"
9580                                    " but no 'adopt' parameter given" %
9581                                    self.op.disk_template,
9582                                    errors.ECODE_INVAL)
9583
9584     self.adopt_disks = has_adopt
9585
9586     # instance name verification
9587     if self.op.name_check:
9588       self.hostname1 = _CheckHostnameSane(self, self.op.instance_name)
9589       self.op.instance_name = self.hostname1.name
9590       # used in CheckPrereq for ip ping check
9591       self.check_ip = self.hostname1.ip
9592     else:
9593       self.check_ip = None
9594
9595     # file storage checks
9596     if (self.op.file_driver and
9597         not self.op.file_driver in constants.FILE_DRIVER):
9598       raise errors.OpPrereqError("Invalid file driver name '%s'" %
9599                                  self.op.file_driver, errors.ECODE_INVAL)
9600
9601     if self.op.disk_template == constants.DT_FILE:
9602       opcodes.RequireFileStorage()
9603     elif self.op.disk_template == constants.DT_SHARED_FILE:
9604       opcodes.RequireSharedFileStorage()
9605
9606     ### Node/iallocator related checks
9607     _CheckIAllocatorOrNode(self, "iallocator", "pnode")
9608
9609     if self.op.pnode is not None:
9610       if self.op.disk_template in constants.DTS_INT_MIRROR:
9611         if self.op.snode is None:
9612           raise errors.OpPrereqError("The networked disk templates need"
9613                                      " a mirror node", errors.ECODE_INVAL)
9614       elif self.op.snode:
9615         self.LogWarning("Secondary node will be ignored on non-mirrored disk"
9616                         " template")
9617         self.op.snode = None
9618
9619     self._cds = _GetClusterDomainSecret()
9620
9621     if self.op.mode == constants.INSTANCE_IMPORT:
9622       # On import force_variant must be True, because if we forced it at
9623       # initial install, our only chance when importing it back is that it
9624       # works again!
9625       self.op.force_variant = True
9626
9627       if self.op.no_install:
9628         self.LogInfo("No-installation mode has no effect during import")
9629
9630     elif self.op.mode == constants.INSTANCE_CREATE:
9631       if self.op.os_type is None:
9632         raise errors.OpPrereqError("No guest OS specified",
9633                                    errors.ECODE_INVAL)
9634       if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
9635         raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
9636                                    " installation" % self.op.os_type,
9637                                    errors.ECODE_STATE)
9638       if self.op.disk_template is None:
9639         raise errors.OpPrereqError("No disk template specified",
9640                                    errors.ECODE_INVAL)
9641
9642     elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
9643       # Check handshake to ensure both clusters have the same domain secret
9644       src_handshake = self.op.source_handshake
9645       if not src_handshake:
9646         raise errors.OpPrereqError("Missing source handshake",
9647                                    errors.ECODE_INVAL)
9648
9649       errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
9650                                                            src_handshake)
9651       if errmsg:
9652         raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
9653                                    errors.ECODE_INVAL)
9654
9655       # Load and check source CA
9656       self.source_x509_ca_pem = self.op.source_x509_ca
9657       if not self.source_x509_ca_pem:
9658         raise errors.OpPrereqError("Missing source X509 CA",
9659                                    errors.ECODE_INVAL)
9660
9661       try:
9662         (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
9663                                                     self._cds)
9664       except OpenSSL.crypto.Error, err:
9665         raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
9666                                    (err, ), errors.ECODE_INVAL)
9667
9668       (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
9669       if errcode is not None:
9670         raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
9671                                    errors.ECODE_INVAL)
9672
9673       self.source_x509_ca = cert
9674
9675       src_instance_name = self.op.source_instance_name
9676       if not src_instance_name:
9677         raise errors.OpPrereqError("Missing source instance name",
9678                                    errors.ECODE_INVAL)
9679
9680       self.source_instance_name = \
9681           netutils.GetHostname(name=src_instance_name).name
9682
9683     else:
9684       raise errors.OpPrereqError("Invalid instance creation mode %r" %
9685                                  self.op.mode, errors.ECODE_INVAL)
9686
9687   def ExpandNames(self):
9688     """ExpandNames for CreateInstance.
9689
9690     Figure out the right locks for instance creation.
9691
9692     """
9693     self.needed_locks = {}
9694
9695     instance_name = self.op.instance_name
9696     # this is just a preventive check, but someone might still add this
9697     # instance in the meantime, and creation will fail at lock-add time
9698     if instance_name in self.cfg.GetInstanceList():
9699       raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
9700                                  instance_name, errors.ECODE_EXISTS)
9701
9702     self.add_locks[locking.LEVEL_INSTANCE] = instance_name
9703
9704     if self.op.iallocator:
9705       # TODO: Find a solution to not lock all nodes in the cluster, e.g. by
9706       # specifying a group on instance creation and then selecting nodes from
9707       # that group
9708       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9709       self.needed_locks[locking.LEVEL_NODE_RES] = locking.ALL_SET
9710     else:
9711       self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
9712       nodelist = [self.op.pnode]
9713       if self.op.snode is not None:
9714         self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
9715         nodelist.append(self.op.snode)
9716       self.needed_locks[locking.LEVEL_NODE] = nodelist
9717       # Lock resources of instance's primary and secondary nodes (copy to
9718       # prevent accidential modification)
9719       self.needed_locks[locking.LEVEL_NODE_RES] = list(nodelist)
9720
9721     # in case of import lock the source node too
9722     if self.op.mode == constants.INSTANCE_IMPORT:
9723       src_node = self.op.src_node
9724       src_path = self.op.src_path
9725
9726       if src_path is None:
9727         self.op.src_path = src_path = self.op.instance_name
9728
9729       if src_node is None:
9730         self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9731         self.op.src_node = None
9732         if os.path.isabs(src_path):
9733           raise errors.OpPrereqError("Importing an instance from a path"
9734                                      " requires a source node option",
9735                                      errors.ECODE_INVAL)
9736       else:
9737         self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
9738         if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
9739           self.needed_locks[locking.LEVEL_NODE].append(src_node)
9740         if not os.path.isabs(src_path):
9741           self.op.src_path = src_path = \
9742             utils.PathJoin(pathutils.EXPORT_DIR, src_path)
9743
9744   def _RunAllocator(self):
9745     """Run the allocator based on input opcode.
9746
9747     """
9748     req = _CreateInstanceAllocRequest(self.op, self.disks,
9749                                       self.nics, self.be_full)
9750     ial = iallocator.IAllocator(self.cfg, self.rpc, req)
9751
9752     ial.Run(self.op.iallocator)
9753
9754     if not ial.success:
9755       raise errors.OpPrereqError("Can't compute nodes using"
9756                                  " iallocator '%s': %s" %
9757                                  (self.op.iallocator, ial.info),
9758                                  errors.ECODE_NORES)
9759     self.op.pnode = ial.result[0]
9760     self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
9761                  self.op.instance_name, self.op.iallocator,
9762                  utils.CommaJoin(ial.result))
9763
9764     assert req.RequiredNodes() in (1, 2), "Wrong node count from iallocator"
9765
9766     if req.RequiredNodes() == 2:
9767       self.op.snode = ial.result[1]
9768
9769   def BuildHooksEnv(self):
9770     """Build hooks env.
9771
9772     This runs on master, primary and secondary nodes of the instance.
9773
9774     """
9775     env = {
9776       "ADD_MODE": self.op.mode,
9777       }
9778     if self.op.mode == constants.INSTANCE_IMPORT:
9779       env["SRC_NODE"] = self.op.src_node
9780       env["SRC_PATH"] = self.op.src_path
9781       env["SRC_IMAGES"] = self.src_images
9782
9783     env.update(_BuildInstanceHookEnv(
9784       name=self.op.instance_name,
9785       primary_node=self.op.pnode,
9786       secondary_nodes=self.secondaries,
9787       status=self.op.start,
9788       os_type=self.op.os_type,
9789       minmem=self.be_full[constants.BE_MINMEM],
9790       maxmem=self.be_full[constants.BE_MAXMEM],
9791       vcpus=self.be_full[constants.BE_VCPUS],
9792       nics=_NICListToTuple(self, self.nics),
9793       disk_template=self.op.disk_template,
9794       disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE])
9795              for d in self.disks],
9796       bep=self.be_full,
9797       hvp=self.hv_full,
9798       hypervisor_name=self.op.hypervisor,
9799       tags=self.op.tags,
9800     ))
9801
9802     return env
9803
9804   def BuildHooksNodes(self):
9805     """Build hooks nodes.
9806
9807     """
9808     nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
9809     return nl, nl
9810
9811   def _ReadExportInfo(self):
9812     """Reads the export information from disk.
9813
9814     It will override the opcode source node and path with the actual
9815     information, if these two were not specified before.
9816
9817     @return: the export information
9818
9819     """
9820     assert self.op.mode == constants.INSTANCE_IMPORT
9821
9822     src_node = self.op.src_node
9823     src_path = self.op.src_path
9824
9825     if src_node is None:
9826       locked_nodes = self.owned_locks(locking.LEVEL_NODE)
9827       exp_list = self.rpc.call_export_list(locked_nodes)
9828       found = False
9829       for node in exp_list:
9830         if exp_list[node].fail_msg:
9831           continue
9832         if src_path in exp_list[node].payload:
9833           found = True
9834           self.op.src_node = src_node = node
9835           self.op.src_path = src_path = utils.PathJoin(pathutils.EXPORT_DIR,
9836                                                        src_path)
9837           break
9838       if not found:
9839         raise errors.OpPrereqError("No export found for relative path %s" %
9840                                     src_path, errors.ECODE_INVAL)
9841
9842     _CheckNodeOnline(self, src_node)
9843     result = self.rpc.call_export_info(src_node, src_path)
9844     result.Raise("No export or invalid export found in dir %s" % src_path)
9845
9846     export_info = objects.SerializableConfigParser.Loads(str(result.payload))
9847     if not export_info.has_section(constants.INISECT_EXP):
9848       raise errors.ProgrammerError("Corrupted export config",
9849                                    errors.ECODE_ENVIRON)
9850
9851     ei_version = export_info.get(constants.INISECT_EXP, "version")
9852     if (int(ei_version) != constants.EXPORT_VERSION):
9853       raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
9854                                  (ei_version, constants.EXPORT_VERSION),
9855                                  errors.ECODE_ENVIRON)
9856     return export_info
9857
9858   def _ReadExportParams(self, einfo):
9859     """Use export parameters as defaults.
9860
9861     In case the opcode doesn't specify (as in override) some instance
9862     parameters, then try to use them from the export information, if
9863     that declares them.
9864
9865     """
9866     self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
9867
9868     if self.op.disk_template is None:
9869       if einfo.has_option(constants.INISECT_INS, "disk_template"):
9870         self.op.disk_template = einfo.get(constants.INISECT_INS,
9871                                           "disk_template")
9872         if self.op.disk_template not in constants.DISK_TEMPLATES:
9873           raise errors.OpPrereqError("Disk template specified in configuration"
9874                                      " file is not one of the allowed values:"
9875                                      " %s" %
9876                                      " ".join(constants.DISK_TEMPLATES),
9877                                      errors.ECODE_INVAL)
9878       else:
9879         raise errors.OpPrereqError("No disk template specified and the export"
9880                                    " is missing the disk_template information",
9881                                    errors.ECODE_INVAL)
9882
9883     if not self.op.disks:
9884       disks = []
9885       # TODO: import the disk iv_name too
9886       for idx in range(constants.MAX_DISKS):
9887         if einfo.has_option(constants.INISECT_INS, "disk%d_size" % idx):
9888           disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
9889           disks.append({constants.IDISK_SIZE: disk_sz})
9890       self.op.disks = disks
9891       if not disks and self.op.disk_template != constants.DT_DISKLESS:
9892         raise errors.OpPrereqError("No disk info specified and the export"
9893                                    " is missing the disk information",
9894                                    errors.ECODE_INVAL)
9895
9896     if not self.op.nics:
9897       nics = []
9898       for idx in range(constants.MAX_NICS):
9899         if einfo.has_option(constants.INISECT_INS, "nic%d_mac" % idx):
9900           ndict = {}
9901           for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
9902             v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
9903             ndict[name] = v
9904           nics.append(ndict)
9905         else:
9906           break
9907       self.op.nics = nics
9908
9909     if not self.op.tags and einfo.has_option(constants.INISECT_INS, "tags"):
9910       self.op.tags = einfo.get(constants.INISECT_INS, "tags").split()
9911
9912     if (self.op.hypervisor is None and
9913         einfo.has_option(constants.INISECT_INS, "hypervisor")):
9914       self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
9915
9916     if einfo.has_section(constants.INISECT_HYP):
9917       # use the export parameters but do not override the ones
9918       # specified by the user
9919       for name, value in einfo.items(constants.INISECT_HYP):
9920         if name not in self.op.hvparams:
9921           self.op.hvparams[name] = value
9922
9923     if einfo.has_section(constants.INISECT_BEP):
9924       # use the parameters, without overriding
9925       for name, value in einfo.items(constants.INISECT_BEP):
9926         if name not in self.op.beparams:
9927           self.op.beparams[name] = value
9928         # Compatibility for the old "memory" be param
9929         if name == constants.BE_MEMORY:
9930           if constants.BE_MAXMEM not in self.op.beparams:
9931             self.op.beparams[constants.BE_MAXMEM] = value
9932           if constants.BE_MINMEM not in self.op.beparams:
9933             self.op.beparams[constants.BE_MINMEM] = value
9934     else:
9935       # try to read the parameters old style, from the main section
9936       for name in constants.BES_PARAMETERS:
9937         if (name not in self.op.beparams and
9938             einfo.has_option(constants.INISECT_INS, name)):
9939           self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
9940
9941     if einfo.has_section(constants.INISECT_OSP):
9942       # use the parameters, without overriding
9943       for name, value in einfo.items(constants.INISECT_OSP):
9944         if name not in self.op.osparams:
9945           self.op.osparams[name] = value
9946
9947   def _RevertToDefaults(self, cluster):
9948     """Revert the instance parameters to the default values.
9949
9950     """
9951     # hvparams
9952     hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
9953     for name in self.op.hvparams.keys():
9954       if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
9955         del self.op.hvparams[name]
9956     # beparams
9957     be_defs = cluster.SimpleFillBE({})
9958     for name in self.op.beparams.keys():
9959       if name in be_defs and be_defs[name] == self.op.beparams[name]:
9960         del self.op.beparams[name]
9961     # nic params
9962     nic_defs = cluster.SimpleFillNIC({})
9963     for nic in self.op.nics:
9964       for name in constants.NICS_PARAMETERS:
9965         if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
9966           del nic[name]
9967     # osparams
9968     os_defs = cluster.SimpleFillOS(self.op.os_type, {})
9969     for name in self.op.osparams.keys():
9970       if name in os_defs and os_defs[name] == self.op.osparams[name]:
9971         del self.op.osparams[name]
9972
9973   def _CalculateFileStorageDir(self):
9974     """Calculate final instance file storage dir.
9975
9976     """
9977     # file storage dir calculation/check
9978     self.instance_file_storage_dir = None
9979     if self.op.disk_template in constants.DTS_FILEBASED:
9980       # build the full file storage dir path
9981       joinargs = []
9982
9983       if self.op.disk_template == constants.DT_SHARED_FILE:
9984         get_fsd_fn = self.cfg.GetSharedFileStorageDir
9985       else:
9986         get_fsd_fn = self.cfg.GetFileStorageDir
9987
9988       cfg_storagedir = get_fsd_fn()
9989       if not cfg_storagedir:
9990         raise errors.OpPrereqError("Cluster file storage dir not defined",
9991                                    errors.ECODE_STATE)
9992       joinargs.append(cfg_storagedir)
9993
9994       if self.op.file_storage_dir is not None:
9995         joinargs.append(self.op.file_storage_dir)
9996
9997       joinargs.append(self.op.instance_name)
9998
9999       # pylint: disable=W0142
10000       self.instance_file_storage_dir = utils.PathJoin(*joinargs)
10001
10002   def CheckPrereq(self): # pylint: disable=R0914
10003     """Check prerequisites.
10004
10005     """
10006     self._CalculateFileStorageDir()
10007
10008     if self.op.mode == constants.INSTANCE_IMPORT:
10009       export_info = self._ReadExportInfo()
10010       self._ReadExportParams(export_info)
10011       self._old_instance_name = export_info.get(constants.INISECT_INS, "name")
10012     else:
10013       self._old_instance_name = None
10014
10015     if (not self.cfg.GetVGName() and
10016         self.op.disk_template not in constants.DTS_NOT_LVM):
10017       raise errors.OpPrereqError("Cluster does not support lvm-based"
10018                                  " instances", errors.ECODE_STATE)
10019
10020     if (self.op.hypervisor is None or
10021         self.op.hypervisor == constants.VALUE_AUTO):
10022       self.op.hypervisor = self.cfg.GetHypervisorType()
10023
10024     cluster = self.cfg.GetClusterInfo()
10025     enabled_hvs = cluster.enabled_hypervisors
10026     if self.op.hypervisor not in enabled_hvs:
10027       raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
10028                                  " cluster (%s)" %
10029                                  (self.op.hypervisor, ",".join(enabled_hvs)),
10030                                  errors.ECODE_STATE)
10031
10032     # Check tag validity
10033     for tag in self.op.tags:
10034       objects.TaggableObject.ValidateTag(tag)
10035
10036     # check hypervisor parameter syntax (locally)
10037     utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
10038     filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
10039                                       self.op.hvparams)
10040     hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
10041     hv_type.CheckParameterSyntax(filled_hvp)
10042     self.hv_full = filled_hvp
10043     # check that we don't specify global parameters on an instance
10044     _CheckGlobalHvParams(self.op.hvparams)
10045
10046     # fill and remember the beparams dict
10047     self.be_full = _ComputeFullBeParams(self.op, cluster)
10048
10049     # build os parameters
10050     self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
10051
10052     # now that hvp/bep are in final format, let's reset to defaults,
10053     # if told to do so
10054     if self.op.identify_defaults:
10055       self._RevertToDefaults(cluster)
10056
10057     # NIC buildup
10058     self.nics = _ComputeNics(self.op, cluster, self.hostname1.ip, self.cfg,
10059                              self.proc)
10060
10061     # disk checks/pre-build
10062     default_vg = self.cfg.GetVGName()
10063     self.disks = _ComputeDisks(self.op, default_vg)
10064
10065     if self.op.mode == constants.INSTANCE_IMPORT:
10066       disk_images = []
10067       for idx in range(len(self.disks)):
10068         option = "disk%d_dump" % idx
10069         if export_info.has_option(constants.INISECT_INS, option):
10070           # FIXME: are the old os-es, disk sizes, etc. useful?
10071           export_name = export_info.get(constants.INISECT_INS, option)
10072           image = utils.PathJoin(self.op.src_path, export_name)
10073           disk_images.append(image)
10074         else:
10075           disk_images.append(False)
10076
10077       self.src_images = disk_images
10078
10079       if self.op.instance_name == self._old_instance_name:
10080         for idx, nic in enumerate(self.nics):
10081           if nic.mac == constants.VALUE_AUTO:
10082             nic_mac_ini = "nic%d_mac" % idx
10083             nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
10084
10085     # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
10086
10087     # ip ping checks (we use the same ip that was resolved in ExpandNames)
10088     if self.op.ip_check:
10089       if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
10090         raise errors.OpPrereqError("IP %s of instance %s already in use" %
10091                                    (self.check_ip, self.op.instance_name),
10092                                    errors.ECODE_NOTUNIQUE)
10093
10094     #### mac address generation
10095     # By generating here the mac address both the allocator and the hooks get
10096     # the real final mac address rather than the 'auto' or 'generate' value.
10097     # There is a race condition between the generation and the instance object
10098     # creation, which means that we know the mac is valid now, but we're not
10099     # sure it will be when we actually add the instance. If things go bad
10100     # adding the instance will abort because of a duplicate mac, and the
10101     # creation job will fail.
10102     for nic in self.nics:
10103       if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
10104         nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
10105
10106     #### allocator run
10107
10108     if self.op.iallocator is not None:
10109       self._RunAllocator()
10110
10111     # Release all unneeded node locks
10112     _ReleaseLocks(self, locking.LEVEL_NODE,
10113                   keep=filter(None, [self.op.pnode, self.op.snode,
10114                                      self.op.src_node]))
10115     _ReleaseLocks(self, locking.LEVEL_NODE_RES,
10116                   keep=filter(None, [self.op.pnode, self.op.snode,
10117                                      self.op.src_node]))
10118
10119     #### node related checks
10120
10121     # check primary node
10122     self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
10123     assert self.pnode is not None, \
10124       "Cannot retrieve locked node %s" % self.op.pnode
10125     if pnode.offline:
10126       raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
10127                                  pnode.name, errors.ECODE_STATE)
10128     if pnode.drained:
10129       raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
10130                                  pnode.name, errors.ECODE_STATE)
10131     if not pnode.vm_capable:
10132       raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
10133                                  " '%s'" % pnode.name, errors.ECODE_STATE)
10134
10135     self.secondaries = []
10136
10137     # mirror node verification
10138     if self.op.disk_template in constants.DTS_INT_MIRROR:
10139       if self.op.snode == pnode.name:
10140         raise errors.OpPrereqError("The secondary node cannot be the"
10141                                    " primary node", errors.ECODE_INVAL)
10142       _CheckNodeOnline(self, self.op.snode)
10143       _CheckNodeNotDrained(self, self.op.snode)
10144       _CheckNodeVmCapable(self, self.op.snode)
10145       self.secondaries.append(self.op.snode)
10146
10147       snode = self.cfg.GetNodeInfo(self.op.snode)
10148       if pnode.group != snode.group:
10149         self.LogWarning("The primary and secondary nodes are in two"
10150                         " different node groups; the disk parameters"
10151                         " from the first disk's node group will be"
10152                         " used")
10153
10154     nodenames = [pnode.name] + self.secondaries
10155
10156     # Verify instance specs
10157     spindle_use = self.be_full.get(constants.BE_SPINDLE_USE, None)
10158     ispec = {
10159       constants.ISPEC_MEM_SIZE: self.be_full.get(constants.BE_MAXMEM, None),
10160       constants.ISPEC_CPU_COUNT: self.be_full.get(constants.BE_VCPUS, None),
10161       constants.ISPEC_DISK_COUNT: len(self.disks),
10162       constants.ISPEC_DISK_SIZE: [disk["size"] for disk in self.disks],
10163       constants.ISPEC_NIC_COUNT: len(self.nics),
10164       constants.ISPEC_SPINDLE_USE: spindle_use,
10165       }
10166
10167     group_info = self.cfg.GetNodeGroup(pnode.group)
10168     ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster, group_info)
10169     res = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec)
10170     if not self.op.ignore_ipolicy and res:
10171       msg = ("Instance allocation to group %s (%s) violates policy: %s" %
10172              (pnode.group, group_info.name, utils.CommaJoin(res)))
10173       raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
10174
10175     if not self.adopt_disks:
10176       if self.op.disk_template == constants.DT_RBD:
10177         # _CheckRADOSFreeSpace() is just a placeholder.
10178         # Any function that checks prerequisites can be placed here.
10179         # Check if there is enough space on the RADOS cluster.
10180         _CheckRADOSFreeSpace()
10181       else:
10182         # Check lv size requirements, if not adopting
10183         req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
10184         _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
10185
10186     elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
10187       all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
10188                                 disk[constants.IDISK_ADOPT])
10189                      for disk in self.disks])
10190       if len(all_lvs) != len(self.disks):
10191         raise errors.OpPrereqError("Duplicate volume names given for adoption",
10192                                    errors.ECODE_INVAL)
10193       for lv_name in all_lvs:
10194         try:
10195           # FIXME: lv_name here is "vg/lv" need to ensure that other calls
10196           # to ReserveLV uses the same syntax
10197           self.cfg.ReserveLV(lv_name, self.proc.GetECId())
10198         except errors.ReservationError:
10199           raise errors.OpPrereqError("LV named %s used by another instance" %
10200                                      lv_name, errors.ECODE_NOTUNIQUE)
10201
10202       vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
10203       vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
10204
10205       node_lvs = self.rpc.call_lv_list([pnode.name],
10206                                        vg_names.payload.keys())[pnode.name]
10207       node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
10208       node_lvs = node_lvs.payload
10209
10210       delta = all_lvs.difference(node_lvs.keys())
10211       if delta:
10212         raise errors.OpPrereqError("Missing logical volume(s): %s" %
10213                                    utils.CommaJoin(delta),
10214                                    errors.ECODE_INVAL)
10215       online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
10216       if online_lvs:
10217         raise errors.OpPrereqError("Online logical volumes found, cannot"
10218                                    " adopt: %s" % utils.CommaJoin(online_lvs),
10219                                    errors.ECODE_STATE)
10220       # update the size of disk based on what is found
10221       for dsk in self.disks:
10222         dsk[constants.IDISK_SIZE] = \
10223           int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
10224                                         dsk[constants.IDISK_ADOPT])][0]))
10225
10226     elif self.op.disk_template == constants.DT_BLOCK:
10227       # Normalize and de-duplicate device paths
10228       all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
10229                        for disk in self.disks])
10230       if len(all_disks) != len(self.disks):
10231         raise errors.OpPrereqError("Duplicate disk names given for adoption",
10232                                    errors.ECODE_INVAL)
10233       baddisks = [d for d in all_disks
10234                   if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
10235       if baddisks:
10236         raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
10237                                    " cannot be adopted" %
10238                                    (", ".join(baddisks),
10239                                     constants.ADOPTABLE_BLOCKDEV_ROOT),
10240                                    errors.ECODE_INVAL)
10241
10242       node_disks = self.rpc.call_bdev_sizes([pnode.name],
10243                                             list(all_disks))[pnode.name]
10244       node_disks.Raise("Cannot get block device information from node %s" %
10245                        pnode.name)
10246       node_disks = node_disks.payload
10247       delta = all_disks.difference(node_disks.keys())
10248       if delta:
10249         raise errors.OpPrereqError("Missing block device(s): %s" %
10250                                    utils.CommaJoin(delta),
10251                                    errors.ECODE_INVAL)
10252       for dsk in self.disks:
10253         dsk[constants.IDISK_SIZE] = \
10254           int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
10255
10256     # Verify instance specs
10257     spindle_use = self.be_full.get(constants.BE_SPINDLE_USE, None)
10258     ispec = {
10259       constants.ISPEC_MEM_SIZE: self.be_full.get(constants.BE_MAXMEM, None),
10260       constants.ISPEC_CPU_COUNT: self.be_full.get(constants.BE_VCPUS, None),
10261       constants.ISPEC_DISK_COUNT: len(self.disks),
10262       constants.ISPEC_DISK_SIZE: [disk[constants.IDISK_SIZE]
10263                                   for disk in self.disks],
10264       constants.ISPEC_NIC_COUNT: len(self.nics),
10265       constants.ISPEC_SPINDLE_USE: spindle_use,
10266       }
10267
10268     group_info = self.cfg.GetNodeGroup(pnode.group)
10269     ipolicy = _CalculateGroupIPolicy(cluster, group_info)
10270     res = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec)
10271     if not self.op.ignore_ipolicy and res:
10272       raise errors.OpPrereqError(("Instance allocation to group %s violates"
10273                                   " policy: %s") % (pnode.group,
10274                                                     utils.CommaJoin(res)),
10275                                   errors.ECODE_INVAL)
10276
10277     _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
10278
10279     _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
10280     # check OS parameters (remotely)
10281     _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
10282
10283     _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
10284
10285     # memory check on primary node
10286     #TODO(dynmem): use MINMEM for checking
10287     if self.op.start:
10288       _CheckNodeFreeMemory(self, self.pnode.name,
10289                            "creating instance %s" % self.op.instance_name,
10290                            self.be_full[constants.BE_MAXMEM],
10291                            self.op.hypervisor)
10292
10293     self.dry_run_result = list(nodenames)
10294
10295   def Exec(self, feedback_fn):
10296     """Create and add the instance to the cluster.
10297
10298     """
10299     instance = self.op.instance_name
10300     pnode_name = self.pnode.name
10301
10302     assert not (self.owned_locks(locking.LEVEL_NODE_RES) -
10303                 self.owned_locks(locking.LEVEL_NODE)), \
10304       "Node locks differ from node resource locks"
10305
10306     ht_kind = self.op.hypervisor
10307     if ht_kind in constants.HTS_REQ_PORT:
10308       network_port = self.cfg.AllocatePort()
10309     else:
10310       network_port = None
10311
10312     # This is ugly but we got a chicken-egg problem here
10313     # We can only take the group disk parameters, as the instance
10314     # has no disks yet (we are generating them right here).
10315     node = self.cfg.GetNodeInfo(pnode_name)
10316     nodegroup = self.cfg.GetNodeGroup(node.group)
10317     disks = _GenerateDiskTemplate(self,
10318                                   self.op.disk_template,
10319                                   instance, pnode_name,
10320                                   self.secondaries,
10321                                   self.disks,
10322                                   self.instance_file_storage_dir,
10323                                   self.op.file_driver,
10324                                   0,
10325                                   feedback_fn,
10326                                   self.cfg.GetGroupDiskParams(nodegroup))
10327
10328     iobj = objects.Instance(name=instance, os=self.op.os_type,
10329                             primary_node=pnode_name,
10330                             nics=self.nics, disks=disks,
10331                             disk_template=self.op.disk_template,
10332                             admin_state=constants.ADMINST_DOWN,
10333                             network_port=network_port,
10334                             beparams=self.op.beparams,
10335                             hvparams=self.op.hvparams,
10336                             hypervisor=self.op.hypervisor,
10337                             osparams=self.op.osparams,
10338                             )
10339
10340     if self.op.tags:
10341       for tag in self.op.tags:
10342         iobj.AddTag(tag)
10343
10344     if self.adopt_disks:
10345       if self.op.disk_template == constants.DT_PLAIN:
10346         # rename LVs to the newly-generated names; we need to construct
10347         # 'fake' LV disks with the old data, plus the new unique_id
10348         tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
10349         rename_to = []
10350         for t_dsk, a_dsk in zip(tmp_disks, self.disks):
10351           rename_to.append(t_dsk.logical_id)
10352           t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
10353           self.cfg.SetDiskID(t_dsk, pnode_name)
10354         result = self.rpc.call_blockdev_rename(pnode_name,
10355                                                zip(tmp_disks, rename_to))
10356         result.Raise("Failed to rename adoped LVs")
10357     else:
10358       feedback_fn("* creating instance disks...")
10359       try:
10360         _CreateDisks(self, iobj)
10361       except errors.OpExecError:
10362         self.LogWarning("Device creation failed, reverting...")
10363         try:
10364           _RemoveDisks(self, iobj)
10365         finally:
10366           self.cfg.ReleaseDRBDMinors(instance)
10367           raise
10368
10369     feedback_fn("adding instance %s to cluster config" % instance)
10370
10371     self.cfg.AddInstance(iobj, self.proc.GetECId())
10372
10373     # Declare that we don't want to remove the instance lock anymore, as we've
10374     # added the instance to the config
10375     del self.remove_locks[locking.LEVEL_INSTANCE]
10376
10377     if self.op.mode == constants.INSTANCE_IMPORT:
10378       # Release unused nodes
10379       _ReleaseLocks(self, locking.LEVEL_NODE, keep=[self.op.src_node])
10380     else:
10381       # Release all nodes
10382       _ReleaseLocks(self, locking.LEVEL_NODE)
10383
10384     disk_abort = False
10385     if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
10386       feedback_fn("* wiping instance disks...")
10387       try:
10388         _WipeDisks(self, iobj)
10389       except errors.OpExecError, err:
10390         logging.exception("Wiping disks failed")
10391         self.LogWarning("Wiping instance disks failed (%s)", err)
10392         disk_abort = True
10393
10394     if disk_abort:
10395       # Something is already wrong with the disks, don't do anything else
10396       pass
10397     elif self.op.wait_for_sync:
10398       disk_abort = not _WaitForSync(self, iobj)
10399     elif iobj.disk_template in constants.DTS_INT_MIRROR:
10400       # make sure the disks are not degraded (still sync-ing is ok)
10401       feedback_fn("* checking mirrors status")
10402       disk_abort = not _WaitForSync(self, iobj, oneshot=True)
10403     else:
10404       disk_abort = False
10405
10406     if disk_abort:
10407       _RemoveDisks(self, iobj)
10408       self.cfg.RemoveInstance(iobj.name)
10409       # Make sure the instance lock gets removed
10410       self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
10411       raise errors.OpExecError("There are some degraded disks for"
10412                                " this instance")
10413
10414     # Release all node resource locks
10415     _ReleaseLocks(self, locking.LEVEL_NODE_RES)
10416
10417     if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
10418       # we need to set the disks ID to the primary node, since the
10419       # preceding code might or might have not done it, depending on
10420       # disk template and other options
10421       for disk in iobj.disks:
10422         self.cfg.SetDiskID(disk, pnode_name)
10423       if self.op.mode == constants.INSTANCE_CREATE:
10424         if not self.op.no_install:
10425           pause_sync = (iobj.disk_template in constants.DTS_INT_MIRROR and
10426                         not self.op.wait_for_sync)
10427           if pause_sync:
10428             feedback_fn("* pausing disk sync to install instance OS")
10429             result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
10430                                                               (iobj.disks,
10431                                                                iobj), True)
10432             for idx, success in enumerate(result.payload):
10433               if not success:
10434                 logging.warn("pause-sync of instance %s for disk %d failed",
10435                              instance, idx)
10436
10437           feedback_fn("* running the instance OS create scripts...")
10438           # FIXME: pass debug option from opcode to backend
10439           os_add_result = \
10440             self.rpc.call_instance_os_add(pnode_name, (iobj, None), False,
10441                                           self.op.debug_level)
10442           if pause_sync:
10443             feedback_fn("* resuming disk sync")
10444             result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
10445                                                               (iobj.disks,
10446                                                                iobj), False)
10447             for idx, success in enumerate(result.payload):
10448               if not success:
10449                 logging.warn("resume-sync of instance %s for disk %d failed",
10450                              instance, idx)
10451
10452           os_add_result.Raise("Could not add os for instance %s"
10453                               " on node %s" % (instance, pnode_name))
10454
10455       else:
10456         if self.op.mode == constants.INSTANCE_IMPORT:
10457           feedback_fn("* running the instance OS import scripts...")
10458
10459           transfers = []
10460
10461           for idx, image in enumerate(self.src_images):
10462             if not image:
10463               continue
10464
10465             # FIXME: pass debug option from opcode to backend
10466             dt = masterd.instance.DiskTransfer("disk/%s" % idx,
10467                                                constants.IEIO_FILE, (image, ),
10468                                                constants.IEIO_SCRIPT,
10469                                                (iobj.disks[idx], idx),
10470                                                None)
10471             transfers.append(dt)
10472
10473           import_result = \
10474             masterd.instance.TransferInstanceData(self, feedback_fn,
10475                                                   self.op.src_node, pnode_name,
10476                                                   self.pnode.secondary_ip,
10477                                                   iobj, transfers)
10478           if not compat.all(import_result):
10479             self.LogWarning("Some disks for instance %s on node %s were not"
10480                             " imported successfully" % (instance, pnode_name))
10481
10482           rename_from = self._old_instance_name
10483
10484         elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
10485           feedback_fn("* preparing remote import...")
10486           # The source cluster will stop the instance before attempting to make
10487           # a connection. In some cases stopping an instance can take a long
10488           # time, hence the shutdown timeout is added to the connection
10489           # timeout.
10490           connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
10491                              self.op.source_shutdown_timeout)
10492           timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
10493
10494           assert iobj.primary_node == self.pnode.name
10495           disk_results = \
10496             masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
10497                                           self.source_x509_ca,
10498                                           self._cds, timeouts)
10499           if not compat.all(disk_results):
10500             # TODO: Should the instance still be started, even if some disks
10501             # failed to import (valid for local imports, too)?
10502             self.LogWarning("Some disks for instance %s on node %s were not"
10503                             " imported successfully" % (instance, pnode_name))
10504
10505           rename_from = self.source_instance_name
10506
10507         else:
10508           # also checked in the prereq part
10509           raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
10510                                        % self.op.mode)
10511
10512         # Run rename script on newly imported instance
10513         assert iobj.name == instance
10514         feedback_fn("Running rename script for %s" % instance)
10515         result = self.rpc.call_instance_run_rename(pnode_name, iobj,
10516                                                    rename_from,
10517                                                    self.op.debug_level)
10518         if result.fail_msg:
10519           self.LogWarning("Failed to run rename script for %s on node"
10520                           " %s: %s" % (instance, pnode_name, result.fail_msg))
10521
10522     assert not self.owned_locks(locking.LEVEL_NODE_RES)
10523
10524     if self.op.start:
10525       iobj.admin_state = constants.ADMINST_UP
10526       self.cfg.Update(iobj, feedback_fn)
10527       logging.info("Starting instance %s on node %s", instance, pnode_name)
10528       feedback_fn("* starting instance...")
10529       result = self.rpc.call_instance_start(pnode_name, (iobj, None, None),
10530                                             False)
10531       result.Raise("Could not start instance")
10532
10533     return list(iobj.all_nodes)
10534
10535
10536 class LUInstanceMultiAlloc(NoHooksLU):
10537   """Allocates multiple instances at the same time.
10538
10539   """
10540   REQ_BGL = False
10541
10542   def CheckArguments(self):
10543     """Check arguments.
10544
10545     """
10546     nodes = []
10547     for inst in self.op.instances:
10548       if inst.iallocator is not None:
10549         raise errors.OpPrereqError("iallocator are not allowed to be set on"
10550                                    " instance objects", errors.ECODE_INVAL)
10551       nodes.append(bool(inst.pnode))
10552       if inst.disk_template in constants.DTS_INT_MIRROR:
10553         nodes.append(bool(inst.snode))
10554
10555     has_nodes = compat.any(nodes)
10556     if compat.all(nodes) ^ has_nodes:
10557       raise errors.OpPrereqError("There are instance objects providing"
10558                                  " pnode/snode while others do not",
10559                                  errors.ECODE_INVAL)
10560
10561     if self.op.iallocator is None:
10562       default_iallocator = self.cfg.GetDefaultIAllocator()
10563       if default_iallocator and has_nodes:
10564         self.op.iallocator = default_iallocator
10565       else:
10566         raise errors.OpPrereqError("No iallocator or nodes on the instances"
10567                                    " given and no cluster-wide default"
10568                                    " iallocator found; please specify either"
10569                                    " an iallocator or nodes on the instances"
10570                                    " or set a cluster-wide default iallocator",
10571                                    errors.ECODE_INVAL)
10572
10573     dups = utils.FindDuplicates([op.instance_name for op in self.op.instances])
10574     if dups:
10575       raise errors.OpPrereqError("There are duplicate instance names: %s" %
10576                                  utils.CommaJoin(dups), errors.ECODE_INVAL)
10577
10578   def ExpandNames(self):
10579     """Calculate the locks.
10580
10581     """
10582     self.share_locks = _ShareAll()
10583     self.needed_locks = {}
10584
10585     if self.op.iallocator:
10586       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
10587       self.needed_locks[locking.LEVEL_NODE_RES] = locking.ALL_SET
10588     else:
10589       nodeslist = []
10590       for inst in self.op.instances:
10591         inst.pnode = _ExpandNodeName(self.cfg, inst.pnode)
10592         nodeslist.append(inst.pnode)
10593         if inst.snode is not None:
10594           inst.snode = _ExpandNodeName(self.cfg, inst.snode)
10595           nodeslist.append(inst.snode)
10596
10597       self.needed_locks[locking.LEVEL_NODE] = nodeslist
10598       # Lock resources of instance's primary and secondary nodes (copy to
10599       # prevent accidential modification)
10600       self.needed_locks[locking.LEVEL_NODE_RES] = list(nodeslist)
10601
10602   def CheckPrereq(self):
10603     """Check prerequisite.
10604
10605     """
10606     cluster = self.cfg.GetClusterInfo()
10607     default_vg = self.cfg.GetVGName()
10608     insts = [_CreateInstanceAllocRequest(op, _ComputeDisks(op, default_vg),
10609                                          _ComputeNics(op, cluster, None,
10610                                                       self.cfg, self.proc),
10611                                          _ComputeFullBeParams(op, cluster))
10612              for op in self.op.instances]
10613     req = iallocator.IAReqMultiInstanceAlloc(instances=insts)
10614     ial = iallocator.IAllocator(self.cfg, self.rpc, req)
10615
10616     ial.Run(self.op.iallocator)
10617
10618     if not ial.success:
10619       raise errors.OpPrereqError("Can't compute nodes using"
10620                                  " iallocator '%s': %s" %
10621                                  (self.op.iallocator, ial.info),
10622                                  errors.ECODE_NORES)
10623
10624     self.ia_result = ial.result
10625
10626     if self.op.dry_run:
10627       self.dry_run_rsult = objects.FillDict(self._ConstructPartialResult(), {
10628         constants.JOB_IDS_KEY: [],
10629         })
10630
10631   def _ConstructPartialResult(self):
10632     """Contructs the partial result.
10633
10634     """
10635     (allocatable, failed) = self.ia_result
10636     return {
10637       opcodes.OpInstanceMultiAlloc.ALLOCATABLE_KEY:
10638         map(compat.fst, allocatable),
10639       opcodes.OpInstanceMultiAlloc.FAILED_KEY: failed,
10640       }
10641
10642   def Exec(self, feedback_fn):
10643     """Executes the opcode.
10644
10645     """
10646     op2inst = dict((op.instance_name, op) for op in self.op.instances)
10647     (allocatable, failed) = self.ia_result
10648
10649     jobs = []
10650     for (name, nodes) in allocatable:
10651       op = op2inst.pop(name)
10652
10653       if len(nodes) > 1:
10654         (op.pnode, op.snode) = nodes
10655       else:
10656         (op.pnode,) = nodes
10657
10658       jobs.append([op])
10659
10660     missing = set(op2inst.keys()) - set(failed)
10661     assert not missing, \
10662       "Iallocator did return incomplete result: %s" % utils.CommaJoin(missing)
10663
10664     return ResultWithJobs(jobs, **self._ConstructPartialResult())
10665
10666
10667 def _CheckRADOSFreeSpace():
10668   """Compute disk size requirements inside the RADOS cluster.
10669
10670   """
10671   # For the RADOS cluster we assume there is always enough space.
10672   pass
10673
10674
10675 class LUInstanceConsole(NoHooksLU):
10676   """Connect to an instance's console.
10677
10678   This is somewhat special in that it returns the command line that
10679   you need to run on the master node in order to connect to the
10680   console.
10681
10682   """
10683   REQ_BGL = False
10684
10685   def ExpandNames(self):
10686     self.share_locks = _ShareAll()
10687     self._ExpandAndLockInstance()
10688
10689   def CheckPrereq(self):
10690     """Check prerequisites.
10691
10692     This checks that the instance is in the cluster.
10693
10694     """
10695     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10696     assert self.instance is not None, \
10697       "Cannot retrieve locked instance %s" % self.op.instance_name
10698     _CheckNodeOnline(self, self.instance.primary_node)
10699
10700   def Exec(self, feedback_fn):
10701     """Connect to the console of an instance
10702
10703     """
10704     instance = self.instance
10705     node = instance.primary_node
10706
10707     node_insts = self.rpc.call_instance_list([node],
10708                                              [instance.hypervisor])[node]
10709     node_insts.Raise("Can't get node information from %s" % node)
10710
10711     if instance.name not in node_insts.payload:
10712       if instance.admin_state == constants.ADMINST_UP:
10713         state = constants.INSTST_ERRORDOWN
10714       elif instance.admin_state == constants.ADMINST_DOWN:
10715         state = constants.INSTST_ADMINDOWN
10716       else:
10717         state = constants.INSTST_ADMINOFFLINE
10718       raise errors.OpExecError("Instance %s is not running (state %s)" %
10719                                (instance.name, state))
10720
10721     logging.debug("Connecting to console of %s on %s", instance.name, node)
10722
10723     return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
10724
10725
10726 def _GetInstanceConsole(cluster, instance):
10727   """Returns console information for an instance.
10728
10729   @type cluster: L{objects.Cluster}
10730   @type instance: L{objects.Instance}
10731   @rtype: dict
10732
10733   """
10734   hyper = hypervisor.GetHypervisor(instance.hypervisor)
10735   # beparams and hvparams are passed separately, to avoid editing the
10736   # instance and then saving the defaults in the instance itself.
10737   hvparams = cluster.FillHV(instance)
10738   beparams = cluster.FillBE(instance)
10739   console = hyper.GetInstanceConsole(instance, hvparams, beparams)
10740
10741   assert console.instance == instance.name
10742   assert console.Validate()
10743
10744   return console.ToDict()
10745
10746
10747 class LUInstanceReplaceDisks(LogicalUnit):
10748   """Replace the disks of an instance.
10749
10750   """
10751   HPATH = "mirrors-replace"
10752   HTYPE = constants.HTYPE_INSTANCE
10753   REQ_BGL = False
10754
10755   def CheckArguments(self):
10756     """Check arguments.
10757
10758     """
10759     remote_node = self.op.remote_node
10760     ialloc = self.op.iallocator
10761     if self.op.mode == constants.REPLACE_DISK_CHG:
10762       if remote_node is None and ialloc is None:
10763         raise errors.OpPrereqError("When changing the secondary either an"
10764                                    " iallocator script must be used or the"
10765                                    " new node given", errors.ECODE_INVAL)
10766       else:
10767         _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
10768
10769     elif remote_node is not None or ialloc is not None:
10770       # Not replacing the secondary
10771       raise errors.OpPrereqError("The iallocator and new node options can"
10772                                  " only be used when changing the"
10773                                  " secondary node", errors.ECODE_INVAL)
10774
10775   def ExpandNames(self):
10776     self._ExpandAndLockInstance()
10777
10778     assert locking.LEVEL_NODE not in self.needed_locks
10779     assert locking.LEVEL_NODE_RES not in self.needed_locks
10780     assert locking.LEVEL_NODEGROUP not in self.needed_locks
10781
10782     assert self.op.iallocator is None or self.op.remote_node is None, \
10783       "Conflicting options"
10784
10785     if self.op.remote_node is not None:
10786       self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10787
10788       # Warning: do not remove the locking of the new secondary here
10789       # unless DRBD8.AddChildren is changed to work in parallel;
10790       # currently it doesn't since parallel invocations of
10791       # FindUnusedMinor will conflict
10792       self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
10793       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
10794     else:
10795       self.needed_locks[locking.LEVEL_NODE] = []
10796       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10797
10798       if self.op.iallocator is not None:
10799         # iallocator will select a new node in the same group
10800         self.needed_locks[locking.LEVEL_NODEGROUP] = []
10801
10802     self.needed_locks[locking.LEVEL_NODE_RES] = []
10803
10804     self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
10805                                    self.op.iallocator, self.op.remote_node,
10806                                    self.op.disks, False, self.op.early_release,
10807                                    self.op.ignore_ipolicy)
10808
10809     self.tasklets = [self.replacer]
10810
10811   def DeclareLocks(self, level):
10812     if level == locking.LEVEL_NODEGROUP:
10813       assert self.op.remote_node is None
10814       assert self.op.iallocator is not None
10815       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
10816
10817       self.share_locks[locking.LEVEL_NODEGROUP] = 1
10818       # Lock all groups used by instance optimistically; this requires going
10819       # via the node before it's locked, requiring verification later on
10820       self.needed_locks[locking.LEVEL_NODEGROUP] = \
10821         self.cfg.GetInstanceNodeGroups(self.op.instance_name)
10822
10823     elif level == locking.LEVEL_NODE:
10824       if self.op.iallocator is not None:
10825         assert self.op.remote_node is None
10826         assert not self.needed_locks[locking.LEVEL_NODE]
10827
10828         # Lock member nodes of all locked groups
10829         self.needed_locks[locking.LEVEL_NODE] = \
10830             [node_name
10831              for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
10832              for node_name in self.cfg.GetNodeGroup(group_uuid).members]
10833       else:
10834         self._LockInstancesNodes()
10835     elif level == locking.LEVEL_NODE_RES:
10836       # Reuse node locks
10837       self.needed_locks[locking.LEVEL_NODE_RES] = \
10838         self.needed_locks[locking.LEVEL_NODE]
10839
10840   def BuildHooksEnv(self):
10841     """Build hooks env.
10842
10843     This runs on the master, the primary and all the secondaries.
10844
10845     """
10846     instance = self.replacer.instance
10847     env = {
10848       "MODE": self.op.mode,
10849       "NEW_SECONDARY": self.op.remote_node,
10850       "OLD_SECONDARY": instance.secondary_nodes[0],
10851       }
10852     env.update(_BuildInstanceHookEnvByObject(self, instance))
10853     return env
10854
10855   def BuildHooksNodes(self):
10856     """Build hooks nodes.
10857
10858     """
10859     instance = self.replacer.instance
10860     nl = [
10861       self.cfg.GetMasterNode(),
10862       instance.primary_node,
10863       ]
10864     if self.op.remote_node is not None:
10865       nl.append(self.op.remote_node)
10866     return nl, nl
10867
10868   def CheckPrereq(self):
10869     """Check prerequisites.
10870
10871     """
10872     assert (self.glm.is_owned(locking.LEVEL_NODEGROUP) or
10873             self.op.iallocator is None)
10874
10875     # Verify if node group locks are still correct
10876     owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
10877     if owned_groups:
10878       _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
10879
10880     return LogicalUnit.CheckPrereq(self)
10881
10882
10883 class TLReplaceDisks(Tasklet):
10884   """Replaces disks for an instance.
10885
10886   Note: Locking is not within the scope of this class.
10887
10888   """
10889   def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
10890                disks, delay_iallocator, early_release, ignore_ipolicy):
10891     """Initializes this class.
10892
10893     """
10894     Tasklet.__init__(self, lu)
10895
10896     # Parameters
10897     self.instance_name = instance_name
10898     self.mode = mode
10899     self.iallocator_name = iallocator_name
10900     self.remote_node = remote_node
10901     self.disks = disks
10902     self.delay_iallocator = delay_iallocator
10903     self.early_release = early_release
10904     self.ignore_ipolicy = ignore_ipolicy
10905
10906     # Runtime data
10907     self.instance = None
10908     self.new_node = None
10909     self.target_node = None
10910     self.other_node = None
10911     self.remote_node_info = None
10912     self.node_secondary_ip = None
10913
10914   @staticmethod
10915   def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
10916     """Compute a new secondary node using an IAllocator.
10917
10918     """
10919     req = iallocator.IAReqRelocate(name=instance_name,
10920                                    relocate_from=list(relocate_from))
10921     ial = iallocator.IAllocator(lu.cfg, lu.rpc, req)
10922
10923     ial.Run(iallocator_name)
10924
10925     if not ial.success:
10926       raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
10927                                  " %s" % (iallocator_name, ial.info),
10928                                  errors.ECODE_NORES)
10929
10930     remote_node_name = ial.result[0]
10931
10932     lu.LogInfo("Selected new secondary for instance '%s': %s",
10933                instance_name, remote_node_name)
10934
10935     return remote_node_name
10936
10937   def _FindFaultyDisks(self, node_name):
10938     """Wrapper for L{_FindFaultyInstanceDisks}.
10939
10940     """
10941     return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
10942                                     node_name, True)
10943
10944   def _CheckDisksActivated(self, instance):
10945     """Checks if the instance disks are activated.
10946
10947     @param instance: The instance to check disks
10948     @return: True if they are activated, False otherwise
10949
10950     """
10951     nodes = instance.all_nodes
10952
10953     for idx, dev in enumerate(instance.disks):
10954       for node in nodes:
10955         self.lu.LogInfo("Checking disk/%d on %s", idx, node)
10956         self.cfg.SetDiskID(dev, node)
10957
10958         result = _BlockdevFind(self, node, dev, instance)
10959
10960         if result.offline:
10961           continue
10962         elif result.fail_msg or not result.payload:
10963           return False
10964
10965     return True
10966
10967   def CheckPrereq(self):
10968     """Check prerequisites.
10969
10970     This checks that the instance is in the cluster.
10971
10972     """
10973     self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
10974     assert instance is not None, \
10975       "Cannot retrieve locked instance %s" % self.instance_name
10976
10977     if instance.disk_template != constants.DT_DRBD8:
10978       raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
10979                                  " instances", errors.ECODE_INVAL)
10980
10981     if len(instance.secondary_nodes) != 1:
10982       raise errors.OpPrereqError("The instance has a strange layout,"
10983                                  " expected one secondary but found %d" %
10984                                  len(instance.secondary_nodes),
10985                                  errors.ECODE_FAULT)
10986
10987     if not self.delay_iallocator:
10988       self._CheckPrereq2()
10989
10990   def _CheckPrereq2(self):
10991     """Check prerequisites, second part.
10992
10993     This function should always be part of CheckPrereq. It was separated and is
10994     now called from Exec because during node evacuation iallocator was only
10995     called with an unmodified cluster model, not taking planned changes into
10996     account.
10997
10998     """
10999     instance = self.instance
11000     secondary_node = instance.secondary_nodes[0]
11001
11002     if self.iallocator_name is None:
11003       remote_node = self.remote_node
11004     else:
11005       remote_node = self._RunAllocator(self.lu, self.iallocator_name,
11006                                        instance.name, instance.secondary_nodes)
11007
11008     if remote_node is None:
11009       self.remote_node_info = None
11010     else:
11011       assert remote_node in self.lu.owned_locks(locking.LEVEL_NODE), \
11012              "Remote node '%s' is not locked" % remote_node
11013
11014       self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
11015       assert self.remote_node_info is not None, \
11016         "Cannot retrieve locked node %s" % remote_node
11017
11018     if remote_node == self.instance.primary_node:
11019       raise errors.OpPrereqError("The specified node is the primary node of"
11020                                  " the instance", errors.ECODE_INVAL)
11021
11022     if remote_node == secondary_node:
11023       raise errors.OpPrereqError("The specified node is already the"
11024                                  " secondary node of the instance",
11025                                  errors.ECODE_INVAL)
11026
11027     if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
11028                                     constants.REPLACE_DISK_CHG):
11029       raise errors.OpPrereqError("Cannot specify disks to be replaced",
11030                                  errors.ECODE_INVAL)
11031
11032     if self.mode == constants.REPLACE_DISK_AUTO:
11033       if not self._CheckDisksActivated(instance):
11034         raise errors.OpPrereqError("Please run activate-disks on instance %s"
11035                                    " first" % self.instance_name,
11036                                    errors.ECODE_STATE)
11037       faulty_primary = self._FindFaultyDisks(instance.primary_node)
11038       faulty_secondary = self._FindFaultyDisks(secondary_node)
11039
11040       if faulty_primary and faulty_secondary:
11041         raise errors.OpPrereqError("Instance %s has faulty disks on more than"
11042                                    " one node and can not be repaired"
11043                                    " automatically" % self.instance_name,
11044                                    errors.ECODE_STATE)
11045
11046       if faulty_primary:
11047         self.disks = faulty_primary
11048         self.target_node = instance.primary_node
11049         self.other_node = secondary_node
11050         check_nodes = [self.target_node, self.other_node]
11051       elif faulty_secondary:
11052         self.disks = faulty_secondary
11053         self.target_node = secondary_node
11054         self.other_node = instance.primary_node
11055         check_nodes = [self.target_node, self.other_node]
11056       else:
11057         self.disks = []
11058         check_nodes = []
11059
11060     else:
11061       # Non-automatic modes
11062       if self.mode == constants.REPLACE_DISK_PRI:
11063         self.target_node = instance.primary_node
11064         self.other_node = secondary_node
11065         check_nodes = [self.target_node, self.other_node]
11066
11067       elif self.mode == constants.REPLACE_DISK_SEC:
11068         self.target_node = secondary_node
11069         self.other_node = instance.primary_node
11070         check_nodes = [self.target_node, self.other_node]
11071
11072       elif self.mode == constants.REPLACE_DISK_CHG:
11073         self.new_node = remote_node
11074         self.other_node = instance.primary_node
11075         self.target_node = secondary_node
11076         check_nodes = [self.new_node, self.other_node]
11077
11078         _CheckNodeNotDrained(self.lu, remote_node)
11079         _CheckNodeVmCapable(self.lu, remote_node)
11080
11081         old_node_info = self.cfg.GetNodeInfo(secondary_node)
11082         assert old_node_info is not None
11083         if old_node_info.offline and not self.early_release:
11084           # doesn't make sense to delay the release
11085           self.early_release = True
11086           self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
11087                           " early-release mode", secondary_node)
11088
11089       else:
11090         raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
11091                                      self.mode)
11092
11093       # If not specified all disks should be replaced
11094       if not self.disks:
11095         self.disks = range(len(self.instance.disks))
11096
11097     # TODO: This is ugly, but right now we can't distinguish between internal
11098     # submitted opcode and external one. We should fix that.
11099     if self.remote_node_info:
11100       # We change the node, lets verify it still meets instance policy
11101       new_group_info = self.cfg.GetNodeGroup(self.remote_node_info.group)
11102       cluster = self.cfg.GetClusterInfo()
11103       ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
11104                                                               new_group_info)
11105       _CheckTargetNodeIPolicy(self, ipolicy, instance, self.remote_node_info,
11106                               ignore=self.ignore_ipolicy)
11107
11108     for node in check_nodes:
11109       _CheckNodeOnline(self.lu, node)
11110
11111     touched_nodes = frozenset(node_name for node_name in [self.new_node,
11112                                                           self.other_node,
11113                                                           self.target_node]
11114                               if node_name is not None)
11115
11116     # Release unneeded node and node resource locks
11117     _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
11118     _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES, keep=touched_nodes)
11119
11120     # Release any owned node group
11121     if self.lu.glm.is_owned(locking.LEVEL_NODEGROUP):
11122       _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
11123
11124     # Check whether disks are valid
11125     for disk_idx in self.disks:
11126       instance.FindDisk(disk_idx)
11127
11128     # Get secondary node IP addresses
11129     self.node_secondary_ip = dict((name, node.secondary_ip) for (name, node)
11130                                   in self.cfg.GetMultiNodeInfo(touched_nodes))
11131
11132   def Exec(self, feedback_fn):
11133     """Execute disk replacement.
11134
11135     This dispatches the disk replacement to the appropriate handler.
11136
11137     """
11138     if self.delay_iallocator:
11139       self._CheckPrereq2()
11140
11141     if __debug__:
11142       # Verify owned locks before starting operation
11143       owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
11144       assert set(owned_nodes) == set(self.node_secondary_ip), \
11145           ("Incorrect node locks, owning %s, expected %s" %
11146            (owned_nodes, self.node_secondary_ip.keys()))
11147       assert (self.lu.owned_locks(locking.LEVEL_NODE) ==
11148               self.lu.owned_locks(locking.LEVEL_NODE_RES))
11149
11150       owned_instances = self.lu.owned_locks(locking.LEVEL_INSTANCE)
11151       assert list(owned_instances) == [self.instance_name], \
11152           "Instance '%s' not locked" % self.instance_name
11153
11154       assert not self.lu.glm.is_owned(locking.LEVEL_NODEGROUP), \
11155           "Should not own any node group lock at this point"
11156
11157     if not self.disks:
11158       feedback_fn("No disks need replacement for instance '%s'" %
11159                   self.instance.name)
11160       return
11161
11162     feedback_fn("Replacing disk(s) %s for instance '%s'" %
11163                 (utils.CommaJoin(self.disks), self.instance.name))
11164     feedback_fn("Current primary node: %s", self.instance.primary_node)
11165     feedback_fn("Current seconary node: %s",
11166                 utils.CommaJoin(self.instance.secondary_nodes))
11167
11168     activate_disks = (self.instance.admin_state != constants.ADMINST_UP)
11169
11170     # Activate the instance disks if we're replacing them on a down instance
11171     if activate_disks:
11172       _StartInstanceDisks(self.lu, self.instance, True)
11173
11174     try:
11175       # Should we replace the secondary node?
11176       if self.new_node is not None:
11177         fn = self._ExecDrbd8Secondary
11178       else:
11179         fn = self._ExecDrbd8DiskOnly
11180
11181       result = fn(feedback_fn)
11182     finally:
11183       # Deactivate the instance disks if we're replacing them on a
11184       # down instance
11185       if activate_disks:
11186         _SafeShutdownInstanceDisks(self.lu, self.instance)
11187
11188     assert not self.lu.owned_locks(locking.LEVEL_NODE)
11189
11190     if __debug__:
11191       # Verify owned locks
11192       owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE_RES)
11193       nodes = frozenset(self.node_secondary_ip)
11194       assert ((self.early_release and not owned_nodes) or
11195               (not self.early_release and not (set(owned_nodes) - nodes))), \
11196         ("Not owning the correct locks, early_release=%s, owned=%r,"
11197          " nodes=%r" % (self.early_release, owned_nodes, nodes))
11198
11199     return result
11200
11201   def _CheckVolumeGroup(self, nodes):
11202     self.lu.LogInfo("Checking volume groups")
11203
11204     vgname = self.cfg.GetVGName()
11205
11206     # Make sure volume group exists on all involved nodes
11207     results = self.rpc.call_vg_list(nodes)
11208     if not results:
11209       raise errors.OpExecError("Can't list volume groups on the nodes")
11210
11211     for node in nodes:
11212       res = results[node]
11213       res.Raise("Error checking node %s" % node)
11214       if vgname not in res.payload:
11215         raise errors.OpExecError("Volume group '%s' not found on node %s" %
11216                                  (vgname, node))
11217
11218   def _CheckDisksExistence(self, nodes):
11219     # Check disk existence
11220     for idx, dev in enumerate(self.instance.disks):
11221       if idx not in self.disks:
11222         continue
11223
11224       for node in nodes:
11225         self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
11226         self.cfg.SetDiskID(dev, node)
11227
11228         result = _BlockdevFind(self, node, dev, self.instance)
11229
11230         msg = result.fail_msg
11231         if msg or not result.payload:
11232           if not msg:
11233             msg = "disk not found"
11234           raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
11235                                    (idx, node, msg))
11236
11237   def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
11238     for idx, dev in enumerate(self.instance.disks):
11239       if idx not in self.disks:
11240         continue
11241
11242       self.lu.LogInfo("Checking disk/%d consistency on node %s" %
11243                       (idx, node_name))
11244
11245       if not _CheckDiskConsistency(self.lu, self.instance, dev, node_name,
11246                                    on_primary, ldisk=ldisk):
11247         raise errors.OpExecError("Node %s has degraded storage, unsafe to"
11248                                  " replace disks for instance %s" %
11249                                  (node_name, self.instance.name))
11250
11251   def _CreateNewStorage(self, node_name):
11252     """Create new storage on the primary or secondary node.
11253
11254     This is only used for same-node replaces, not for changing the
11255     secondary node, hence we don't want to modify the existing disk.
11256
11257     """
11258     iv_names = {}
11259
11260     disks = _AnnotateDiskParams(self.instance, self.instance.disks, self.cfg)
11261     for idx, dev in enumerate(disks):
11262       if idx not in self.disks:
11263         continue
11264
11265       self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
11266
11267       self.cfg.SetDiskID(dev, node_name)
11268
11269       lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
11270       names = _GenerateUniqueNames(self.lu, lv_names)
11271
11272       (data_disk, meta_disk) = dev.children
11273       vg_data = data_disk.logical_id[0]
11274       lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
11275                              logical_id=(vg_data, names[0]),
11276                              params=data_disk.params)
11277       vg_meta = meta_disk.logical_id[0]
11278       lv_meta = objects.Disk(dev_type=constants.LD_LV,
11279                              size=constants.DRBD_META_SIZE,
11280                              logical_id=(vg_meta, names[1]),
11281                              params=meta_disk.params)
11282
11283       new_lvs = [lv_data, lv_meta]
11284       old_lvs = [child.Copy() for child in dev.children]
11285       iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
11286
11287       # we pass force_create=True to force the LVM creation
11288       for new_lv in new_lvs:
11289         _CreateBlockDevInner(self.lu, node_name, self.instance, new_lv, True,
11290                              _GetInstanceInfoText(self.instance), False)
11291
11292     return iv_names
11293
11294   def _CheckDevices(self, node_name, iv_names):
11295     for name, (dev, _, _) in iv_names.iteritems():
11296       self.cfg.SetDiskID(dev, node_name)
11297
11298       result = _BlockdevFind(self, node_name, dev, self.instance)
11299
11300       msg = result.fail_msg
11301       if msg or not result.payload:
11302         if not msg:
11303           msg = "disk not found"
11304         raise errors.OpExecError("Can't find DRBD device %s: %s" %
11305                                  (name, msg))
11306
11307       if result.payload.is_degraded:
11308         raise errors.OpExecError("DRBD device %s is degraded!" % name)
11309
11310   def _RemoveOldStorage(self, node_name, iv_names):
11311     for name, (_, old_lvs, _) in iv_names.iteritems():
11312       self.lu.LogInfo("Remove logical volumes for %s" % name)
11313
11314       for lv in old_lvs:
11315         self.cfg.SetDiskID(lv, node_name)
11316
11317         msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
11318         if msg:
11319           self.lu.LogWarning("Can't remove old LV: %s" % msg,
11320                              hint="remove unused LVs manually")
11321
11322   def _ExecDrbd8DiskOnly(self, feedback_fn): # pylint: disable=W0613
11323     """Replace a disk on the primary or secondary for DRBD 8.
11324
11325     The algorithm for replace is quite complicated:
11326
11327       1. for each disk to be replaced:
11328
11329         1. create new LVs on the target node with unique names
11330         1. detach old LVs from the drbd device
11331         1. rename old LVs to name_replaced.<time_t>
11332         1. rename new LVs to old LVs
11333         1. attach the new LVs (with the old names now) to the drbd device
11334
11335       1. wait for sync across all devices
11336
11337       1. for each modified disk:
11338
11339         1. remove old LVs (which have the name name_replaces.<time_t>)
11340
11341     Failures are not very well handled.
11342
11343     """
11344     steps_total = 6
11345
11346     # Step: check device activation
11347     self.lu.LogStep(1, steps_total, "Check device existence")
11348     self._CheckDisksExistence([self.other_node, self.target_node])
11349     self._CheckVolumeGroup([self.target_node, self.other_node])
11350
11351     # Step: check other node consistency
11352     self.lu.LogStep(2, steps_total, "Check peer consistency")
11353     self._CheckDisksConsistency(self.other_node,
11354                                 self.other_node == self.instance.primary_node,
11355                                 False)
11356
11357     # Step: create new storage
11358     self.lu.LogStep(3, steps_total, "Allocate new storage")
11359     iv_names = self._CreateNewStorage(self.target_node)
11360
11361     # Step: for each lv, detach+rename*2+attach
11362     self.lu.LogStep(4, steps_total, "Changing drbd configuration")
11363     for dev, old_lvs, new_lvs in iv_names.itervalues():
11364       self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
11365
11366       result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
11367                                                      old_lvs)
11368       result.Raise("Can't detach drbd from local storage on node"
11369                    " %s for device %s" % (self.target_node, dev.iv_name))
11370       #dev.children = []
11371       #cfg.Update(instance)
11372
11373       # ok, we created the new LVs, so now we know we have the needed
11374       # storage; as such, we proceed on the target node to rename
11375       # old_lv to _old, and new_lv to old_lv; note that we rename LVs
11376       # using the assumption that logical_id == physical_id (which in
11377       # turn is the unique_id on that node)
11378
11379       # FIXME(iustin): use a better name for the replaced LVs
11380       temp_suffix = int(time.time())
11381       ren_fn = lambda d, suff: (d.physical_id[0],
11382                                 d.physical_id[1] + "_replaced-%s" % suff)
11383
11384       # Build the rename list based on what LVs exist on the node
11385       rename_old_to_new = []
11386       for to_ren in old_lvs:
11387         result = self.rpc.call_blockdev_find(self.target_node, to_ren)
11388         if not result.fail_msg and result.payload:
11389           # device exists
11390           rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
11391
11392       self.lu.LogInfo("Renaming the old LVs on the target node")
11393       result = self.rpc.call_blockdev_rename(self.target_node,
11394                                              rename_old_to_new)
11395       result.Raise("Can't rename old LVs on node %s" % self.target_node)
11396
11397       # Now we rename the new LVs to the old LVs
11398       self.lu.LogInfo("Renaming the new LVs on the target node")
11399       rename_new_to_old = [(new, old.physical_id)
11400                            for old, new in zip(old_lvs, new_lvs)]
11401       result = self.rpc.call_blockdev_rename(self.target_node,
11402                                              rename_new_to_old)
11403       result.Raise("Can't rename new LVs on node %s" % self.target_node)
11404
11405       # Intermediate steps of in memory modifications
11406       for old, new in zip(old_lvs, new_lvs):
11407         new.logical_id = old.logical_id
11408         self.cfg.SetDiskID(new, self.target_node)
11409
11410       # We need to modify old_lvs so that removal later removes the
11411       # right LVs, not the newly added ones; note that old_lvs is a
11412       # copy here
11413       for disk in old_lvs:
11414         disk.logical_id = ren_fn(disk, temp_suffix)
11415         self.cfg.SetDiskID(disk, self.target_node)
11416
11417       # Now that the new lvs have the old name, we can add them to the device
11418       self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
11419       result = self.rpc.call_blockdev_addchildren(self.target_node,
11420                                                   (dev, self.instance), new_lvs)
11421       msg = result.fail_msg
11422       if msg:
11423         for new_lv in new_lvs:
11424           msg2 = self.rpc.call_blockdev_remove(self.target_node,
11425                                                new_lv).fail_msg
11426           if msg2:
11427             self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
11428                                hint=("cleanup manually the unused logical"
11429                                      "volumes"))
11430         raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
11431
11432     cstep = itertools.count(5)
11433
11434     if self.early_release:
11435       self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11436       self._RemoveOldStorage(self.target_node, iv_names)
11437       # TODO: Check if releasing locks early still makes sense
11438       _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
11439     else:
11440       # Release all resource locks except those used by the instance
11441       _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
11442                     keep=self.node_secondary_ip.keys())
11443
11444     # Release all node locks while waiting for sync
11445     _ReleaseLocks(self.lu, locking.LEVEL_NODE)
11446
11447     # TODO: Can the instance lock be downgraded here? Take the optional disk
11448     # shutdown in the caller into consideration.
11449
11450     # Wait for sync
11451     # This can fail as the old devices are degraded and _WaitForSync
11452     # does a combined result over all disks, so we don't check its return value
11453     self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
11454     _WaitForSync(self.lu, self.instance)
11455
11456     # Check all devices manually
11457     self._CheckDevices(self.instance.primary_node, iv_names)
11458
11459     # Step: remove old storage
11460     if not self.early_release:
11461       self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11462       self._RemoveOldStorage(self.target_node, iv_names)
11463
11464   def _ExecDrbd8Secondary(self, feedback_fn):
11465     """Replace the secondary node for DRBD 8.
11466
11467     The algorithm for replace is quite complicated:
11468       - for all disks of the instance:
11469         - create new LVs on the new node with same names
11470         - shutdown the drbd device on the old secondary
11471         - disconnect the drbd network on the primary
11472         - create the drbd device on the new secondary
11473         - network attach the drbd on the primary, using an artifice:
11474           the drbd code for Attach() will connect to the network if it
11475           finds a device which is connected to the good local disks but
11476           not network enabled
11477       - wait for sync across all devices
11478       - remove all disks from the old secondary
11479
11480     Failures are not very well handled.
11481
11482     """
11483     steps_total = 6
11484
11485     pnode = self.instance.primary_node
11486
11487     # Step: check device activation
11488     self.lu.LogStep(1, steps_total, "Check device existence")
11489     self._CheckDisksExistence([self.instance.primary_node])
11490     self._CheckVolumeGroup([self.instance.primary_node])
11491
11492     # Step: check other node consistency
11493     self.lu.LogStep(2, steps_total, "Check peer consistency")
11494     self._CheckDisksConsistency(self.instance.primary_node, True, True)
11495
11496     # Step: create new storage
11497     self.lu.LogStep(3, steps_total, "Allocate new storage")
11498     disks = _AnnotateDiskParams(self.instance, self.instance.disks, self.cfg)
11499     for idx, dev in enumerate(disks):
11500       self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
11501                       (self.new_node, idx))
11502       # we pass force_create=True to force LVM creation
11503       for new_lv in dev.children:
11504         _CreateBlockDevInner(self.lu, self.new_node, self.instance, new_lv,
11505                              True, _GetInstanceInfoText(self.instance), False)
11506
11507     # Step 4: dbrd minors and drbd setups changes
11508     # after this, we must manually remove the drbd minors on both the
11509     # error and the success paths
11510     self.lu.LogStep(4, steps_total, "Changing drbd configuration")
11511     minors = self.cfg.AllocateDRBDMinor([self.new_node
11512                                          for dev in self.instance.disks],
11513                                         self.instance.name)
11514     logging.debug("Allocated minors %r", minors)
11515
11516     iv_names = {}
11517     for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
11518       self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
11519                       (self.new_node, idx))
11520       # create new devices on new_node; note that we create two IDs:
11521       # one without port, so the drbd will be activated without
11522       # networking information on the new node at this stage, and one
11523       # with network, for the latter activation in step 4
11524       (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
11525       if self.instance.primary_node == o_node1:
11526         p_minor = o_minor1
11527       else:
11528         assert self.instance.primary_node == o_node2, "Three-node instance?"
11529         p_minor = o_minor2
11530
11531       new_alone_id = (self.instance.primary_node, self.new_node, None,
11532                       p_minor, new_minor, o_secret)
11533       new_net_id = (self.instance.primary_node, self.new_node, o_port,
11534                     p_minor, new_minor, o_secret)
11535
11536       iv_names[idx] = (dev, dev.children, new_net_id)
11537       logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
11538                     new_net_id)
11539       new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
11540                               logical_id=new_alone_id,
11541                               children=dev.children,
11542                               size=dev.size,
11543                               params={})
11544       (anno_new_drbd,) = _AnnotateDiskParams(self.instance, [new_drbd],
11545                                              self.cfg)
11546       try:
11547         _CreateSingleBlockDev(self.lu, self.new_node, self.instance,
11548                               anno_new_drbd,
11549                               _GetInstanceInfoText(self.instance), False)
11550       except errors.GenericError:
11551         self.cfg.ReleaseDRBDMinors(self.instance.name)
11552         raise
11553
11554     # We have new devices, shutdown the drbd on the old secondary
11555     for idx, dev in enumerate(self.instance.disks):
11556       self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
11557       self.cfg.SetDiskID(dev, self.target_node)
11558       msg = self.rpc.call_blockdev_shutdown(self.target_node,
11559                                             (dev, self.instance)).fail_msg
11560       if msg:
11561         self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
11562                            "node: %s" % (idx, msg),
11563                            hint=("Please cleanup this device manually as"
11564                                  " soon as possible"))
11565
11566     self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
11567     result = self.rpc.call_drbd_disconnect_net([pnode], self.node_secondary_ip,
11568                                                self.instance.disks)[pnode]
11569
11570     msg = result.fail_msg
11571     if msg:
11572       # detaches didn't succeed (unlikely)
11573       self.cfg.ReleaseDRBDMinors(self.instance.name)
11574       raise errors.OpExecError("Can't detach the disks from the network on"
11575                                " old node: %s" % (msg,))
11576
11577     # if we managed to detach at least one, we update all the disks of
11578     # the instance to point to the new secondary
11579     self.lu.LogInfo("Updating instance configuration")
11580     for dev, _, new_logical_id in iv_names.itervalues():
11581       dev.logical_id = new_logical_id
11582       self.cfg.SetDiskID(dev, self.instance.primary_node)
11583
11584     self.cfg.Update(self.instance, feedback_fn)
11585
11586     # Release all node locks (the configuration has been updated)
11587     _ReleaseLocks(self.lu, locking.LEVEL_NODE)
11588
11589     # and now perform the drbd attach
11590     self.lu.LogInfo("Attaching primary drbds to new secondary"
11591                     " (standalone => connected)")
11592     result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
11593                                             self.new_node],
11594                                            self.node_secondary_ip,
11595                                            (self.instance.disks, self.instance),
11596                                            self.instance.name,
11597                                            False)
11598     for to_node, to_result in result.items():
11599       msg = to_result.fail_msg
11600       if msg:
11601         self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
11602                            to_node, msg,
11603                            hint=("please do a gnt-instance info to see the"
11604                                  " status of disks"))
11605
11606     cstep = itertools.count(5)
11607
11608     if self.early_release:
11609       self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11610       self._RemoveOldStorage(self.target_node, iv_names)
11611       # TODO: Check if releasing locks early still makes sense
11612       _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
11613     else:
11614       # Release all resource locks except those used by the instance
11615       _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
11616                     keep=self.node_secondary_ip.keys())
11617
11618     # TODO: Can the instance lock be downgraded here? Take the optional disk
11619     # shutdown in the caller into consideration.
11620
11621     # Wait for sync
11622     # This can fail as the old devices are degraded and _WaitForSync
11623     # does a combined result over all disks, so we don't check its return value
11624     self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
11625     _WaitForSync(self.lu, self.instance)
11626
11627     # Check all devices manually
11628     self._CheckDevices(self.instance.primary_node, iv_names)
11629
11630     # Step: remove old storage
11631     if not self.early_release:
11632       self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11633       self._RemoveOldStorage(self.target_node, iv_names)
11634
11635
11636 class LURepairNodeStorage(NoHooksLU):
11637   """Repairs the volume group on a node.
11638
11639   """
11640   REQ_BGL = False
11641
11642   def CheckArguments(self):
11643     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
11644
11645     storage_type = self.op.storage_type
11646
11647     if (constants.SO_FIX_CONSISTENCY not in
11648         constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
11649       raise errors.OpPrereqError("Storage units of type '%s' can not be"
11650                                  " repaired" % storage_type,
11651                                  errors.ECODE_INVAL)
11652
11653   def ExpandNames(self):
11654     self.needed_locks = {
11655       locking.LEVEL_NODE: [self.op.node_name],
11656       }
11657
11658   def _CheckFaultyDisks(self, instance, node_name):
11659     """Ensure faulty disks abort the opcode or at least warn."""
11660     try:
11661       if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
11662                                   node_name, True):
11663         raise errors.OpPrereqError("Instance '%s' has faulty disks on"
11664                                    " node '%s'" % (instance.name, node_name),
11665                                    errors.ECODE_STATE)
11666     except errors.OpPrereqError, err:
11667       if self.op.ignore_consistency:
11668         self.proc.LogWarning(str(err.args[0]))
11669       else:
11670         raise
11671
11672   def CheckPrereq(self):
11673     """Check prerequisites.
11674
11675     """
11676     # Check whether any instance on this node has faulty disks
11677     for inst in _GetNodeInstances(self.cfg, self.op.node_name):
11678       if inst.admin_state != constants.ADMINST_UP:
11679         continue
11680       check_nodes = set(inst.all_nodes)
11681       check_nodes.discard(self.op.node_name)
11682       for inst_node_name in check_nodes:
11683         self._CheckFaultyDisks(inst, inst_node_name)
11684
11685   def Exec(self, feedback_fn):
11686     feedback_fn("Repairing storage unit '%s' on %s ..." %
11687                 (self.op.name, self.op.node_name))
11688
11689     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
11690     result = self.rpc.call_storage_execute(self.op.node_name,
11691                                            self.op.storage_type, st_args,
11692                                            self.op.name,
11693                                            constants.SO_FIX_CONSISTENCY)
11694     result.Raise("Failed to repair storage unit '%s' on %s" %
11695                  (self.op.name, self.op.node_name))
11696
11697
11698 class LUNodeEvacuate(NoHooksLU):
11699   """Evacuates instances off a list of nodes.
11700
11701   """
11702   REQ_BGL = False
11703
11704   _MODE2IALLOCATOR = {
11705     constants.NODE_EVAC_PRI: constants.IALLOCATOR_NEVAC_PRI,
11706     constants.NODE_EVAC_SEC: constants.IALLOCATOR_NEVAC_SEC,
11707     constants.NODE_EVAC_ALL: constants.IALLOCATOR_NEVAC_ALL,
11708     }
11709   assert frozenset(_MODE2IALLOCATOR.keys()) == constants.NODE_EVAC_MODES
11710   assert (frozenset(_MODE2IALLOCATOR.values()) ==
11711           constants.IALLOCATOR_NEVAC_MODES)
11712
11713   def CheckArguments(self):
11714     _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
11715
11716   def ExpandNames(self):
11717     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
11718
11719     if self.op.remote_node is not None:
11720       self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
11721       assert self.op.remote_node
11722
11723       if self.op.remote_node == self.op.node_name:
11724         raise errors.OpPrereqError("Can not use evacuated node as a new"
11725                                    " secondary node", errors.ECODE_INVAL)
11726
11727       if self.op.mode != constants.NODE_EVAC_SEC:
11728         raise errors.OpPrereqError("Without the use of an iallocator only"
11729                                    " secondary instances can be evacuated",
11730                                    errors.ECODE_INVAL)
11731
11732     # Declare locks
11733     self.share_locks = _ShareAll()
11734     self.needed_locks = {
11735       locking.LEVEL_INSTANCE: [],
11736       locking.LEVEL_NODEGROUP: [],
11737       locking.LEVEL_NODE: [],
11738       }
11739
11740     # Determine nodes (via group) optimistically, needs verification once locks
11741     # have been acquired
11742     self.lock_nodes = self._DetermineNodes()
11743
11744   def _DetermineNodes(self):
11745     """Gets the list of nodes to operate on.
11746
11747     """
11748     if self.op.remote_node is None:
11749       # Iallocator will choose any node(s) in the same group
11750       group_nodes = self.cfg.GetNodeGroupMembersByNodes([self.op.node_name])
11751     else:
11752       group_nodes = frozenset([self.op.remote_node])
11753
11754     # Determine nodes to be locked
11755     return set([self.op.node_name]) | group_nodes
11756
11757   def _DetermineInstances(self):
11758     """Builds list of instances to operate on.
11759
11760     """
11761     assert self.op.mode in constants.NODE_EVAC_MODES
11762
11763     if self.op.mode == constants.NODE_EVAC_PRI:
11764       # Primary instances only
11765       inst_fn = _GetNodePrimaryInstances
11766       assert self.op.remote_node is None, \
11767         "Evacuating primary instances requires iallocator"
11768     elif self.op.mode == constants.NODE_EVAC_SEC:
11769       # Secondary instances only
11770       inst_fn = _GetNodeSecondaryInstances
11771     else:
11772       # All instances
11773       assert self.op.mode == constants.NODE_EVAC_ALL
11774       inst_fn = _GetNodeInstances
11775       # TODO: In 2.6, change the iallocator interface to take an evacuation mode
11776       # per instance
11777       raise errors.OpPrereqError("Due to an issue with the iallocator"
11778                                  " interface it is not possible to evacuate"
11779                                  " all instances at once; specify explicitly"
11780                                  " whether to evacuate primary or secondary"
11781                                  " instances",
11782                                  errors.ECODE_INVAL)
11783
11784     return inst_fn(self.cfg, self.op.node_name)
11785
11786   def DeclareLocks(self, level):
11787     if level == locking.LEVEL_INSTANCE:
11788       # Lock instances optimistically, needs verification once node and group
11789       # locks have been acquired
11790       self.needed_locks[locking.LEVEL_INSTANCE] = \
11791         set(i.name for i in self._DetermineInstances())
11792
11793     elif level == locking.LEVEL_NODEGROUP:
11794       # Lock node groups for all potential target nodes optimistically, needs
11795       # verification once nodes have been acquired
11796       self.needed_locks[locking.LEVEL_NODEGROUP] = \
11797         self.cfg.GetNodeGroupsFromNodes(self.lock_nodes)
11798
11799     elif level == locking.LEVEL_NODE:
11800       self.needed_locks[locking.LEVEL_NODE] = self.lock_nodes
11801
11802   def CheckPrereq(self):
11803     # Verify locks
11804     owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
11805     owned_nodes = self.owned_locks(locking.LEVEL_NODE)
11806     owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
11807
11808     need_nodes = self._DetermineNodes()
11809
11810     if not owned_nodes.issuperset(need_nodes):
11811       raise errors.OpPrereqError("Nodes in same group as '%s' changed since"
11812                                  " locks were acquired, current nodes are"
11813                                  " are '%s', used to be '%s'; retry the"
11814                                  " operation" %
11815                                  (self.op.node_name,
11816                                   utils.CommaJoin(need_nodes),
11817                                   utils.CommaJoin(owned_nodes)),
11818                                  errors.ECODE_STATE)
11819
11820     wanted_groups = self.cfg.GetNodeGroupsFromNodes(owned_nodes)
11821     if owned_groups != wanted_groups:
11822       raise errors.OpExecError("Node groups changed since locks were acquired,"
11823                                " current groups are '%s', used to be '%s';"
11824                                " retry the operation" %
11825                                (utils.CommaJoin(wanted_groups),
11826                                 utils.CommaJoin(owned_groups)))
11827
11828     # Determine affected instances
11829     self.instances = self._DetermineInstances()
11830     self.instance_names = [i.name for i in self.instances]
11831
11832     if set(self.instance_names) != owned_instances:
11833       raise errors.OpExecError("Instances on node '%s' changed since locks"
11834                                " were acquired, current instances are '%s',"
11835                                " used to be '%s'; retry the operation" %
11836                                (self.op.node_name,
11837                                 utils.CommaJoin(self.instance_names),
11838                                 utils.CommaJoin(owned_instances)))
11839
11840     if self.instance_names:
11841       self.LogInfo("Evacuating instances from node '%s': %s",
11842                    self.op.node_name,
11843                    utils.CommaJoin(utils.NiceSort(self.instance_names)))
11844     else:
11845       self.LogInfo("No instances to evacuate from node '%s'",
11846                    self.op.node_name)
11847
11848     if self.op.remote_node is not None:
11849       for i in self.instances:
11850         if i.primary_node == self.op.remote_node:
11851           raise errors.OpPrereqError("Node %s is the primary node of"
11852                                      " instance %s, cannot use it as"
11853                                      " secondary" %
11854                                      (self.op.remote_node, i.name),
11855                                      errors.ECODE_INVAL)
11856
11857   def Exec(self, feedback_fn):
11858     assert (self.op.iallocator is not None) ^ (self.op.remote_node is not None)
11859
11860     if not self.instance_names:
11861       # No instances to evacuate
11862       jobs = []
11863
11864     elif self.op.iallocator is not None:
11865       # TODO: Implement relocation to other group
11866       evac_mode = self._MODE2IALLOCATOR[self.op.mode]
11867       req = iallocator.IAReqNodeEvac(evac_mode=evac_mode,
11868                                      instances=list(self.instance_names))
11869       ial = iallocator.IAllocator(self.cfg, self.rpc, req)
11870
11871       ial.Run(self.op.iallocator)
11872
11873       if not ial.success:
11874         raise errors.OpPrereqError("Can't compute node evacuation using"
11875                                    " iallocator '%s': %s" %
11876                                    (self.op.iallocator, ial.info),
11877                                    errors.ECODE_NORES)
11878
11879       jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, True)
11880
11881     elif self.op.remote_node is not None:
11882       assert self.op.mode == constants.NODE_EVAC_SEC
11883       jobs = [
11884         [opcodes.OpInstanceReplaceDisks(instance_name=instance_name,
11885                                         remote_node=self.op.remote_node,
11886                                         disks=[],
11887                                         mode=constants.REPLACE_DISK_CHG,
11888                                         early_release=self.op.early_release)]
11889         for instance_name in self.instance_names
11890         ]
11891
11892     else:
11893       raise errors.ProgrammerError("No iallocator or remote node")
11894
11895     return ResultWithJobs(jobs)
11896
11897
11898 def _SetOpEarlyRelease(early_release, op):
11899   """Sets C{early_release} flag on opcodes if available.
11900
11901   """
11902   try:
11903     op.early_release = early_release
11904   except AttributeError:
11905     assert not isinstance(op, opcodes.OpInstanceReplaceDisks)
11906
11907   return op
11908
11909
11910 def _NodeEvacDest(use_nodes, group, nodes):
11911   """Returns group or nodes depending on caller's choice.
11912
11913   """
11914   if use_nodes:
11915     return utils.CommaJoin(nodes)
11916   else:
11917     return group
11918
11919
11920 def _LoadNodeEvacResult(lu, alloc_result, early_release, use_nodes):
11921   """Unpacks the result of change-group and node-evacuate iallocator requests.
11922
11923   Iallocator modes L{constants.IALLOCATOR_MODE_NODE_EVAC} and
11924   L{constants.IALLOCATOR_MODE_CHG_GROUP}.
11925
11926   @type lu: L{LogicalUnit}
11927   @param lu: Logical unit instance
11928   @type alloc_result: tuple/list
11929   @param alloc_result: Result from iallocator
11930   @type early_release: bool
11931   @param early_release: Whether to release locks early if possible
11932   @type use_nodes: bool
11933   @param use_nodes: Whether to display node names instead of groups
11934
11935   """
11936   (moved, failed, jobs) = alloc_result
11937
11938   if failed:
11939     failreason = utils.CommaJoin("%s (%s)" % (name, reason)
11940                                  for (name, reason) in failed)
11941     lu.LogWarning("Unable to evacuate instances %s", failreason)
11942     raise errors.OpExecError("Unable to evacuate instances %s" % failreason)
11943
11944   if moved:
11945     lu.LogInfo("Instances to be moved: %s",
11946                utils.CommaJoin("%s (to %s)" %
11947                                (name, _NodeEvacDest(use_nodes, group, nodes))
11948                                for (name, group, nodes) in moved))
11949
11950   return [map(compat.partial(_SetOpEarlyRelease, early_release),
11951               map(opcodes.OpCode.LoadOpCode, ops))
11952           for ops in jobs]
11953
11954
11955 def _DiskSizeInBytesToMebibytes(lu, size):
11956   """Converts a disk size in bytes to mebibytes.
11957
11958   Warns and rounds up if the size isn't an even multiple of 1 MiB.
11959
11960   """
11961   (mib, remainder) = divmod(size, 1024 * 1024)
11962
11963   if remainder != 0:
11964     lu.LogWarning("Disk size is not an even multiple of 1 MiB; rounding up"
11965                   " to not overwrite existing data (%s bytes will not be"
11966                   " wiped)", (1024 * 1024) - remainder)
11967     mib += 1
11968
11969   return mib
11970
11971
11972 class LUInstanceGrowDisk(LogicalUnit):
11973   """Grow a disk of an instance.
11974
11975   """
11976   HPATH = "disk-grow"
11977   HTYPE = constants.HTYPE_INSTANCE
11978   REQ_BGL = False
11979
11980   def ExpandNames(self):
11981     self._ExpandAndLockInstance()
11982     self.needed_locks[locking.LEVEL_NODE] = []
11983     self.needed_locks[locking.LEVEL_NODE_RES] = []
11984     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
11985     self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
11986
11987   def DeclareLocks(self, level):
11988     if level == locking.LEVEL_NODE:
11989       self._LockInstancesNodes()
11990     elif level == locking.LEVEL_NODE_RES:
11991       # Copy node locks
11992       self.needed_locks[locking.LEVEL_NODE_RES] = \
11993         _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
11994
11995   def BuildHooksEnv(self):
11996     """Build hooks env.
11997
11998     This runs on the master, the primary and all the secondaries.
11999
12000     """
12001     env = {
12002       "DISK": self.op.disk,
12003       "AMOUNT": self.op.amount,
12004       "ABSOLUTE": self.op.absolute,
12005       }
12006     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
12007     return env
12008
12009   def BuildHooksNodes(self):
12010     """Build hooks nodes.
12011
12012     """
12013     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
12014     return (nl, nl)
12015
12016   def CheckPrereq(self):
12017     """Check prerequisites.
12018
12019     This checks that the instance is in the cluster.
12020
12021     """
12022     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
12023     assert instance is not None, \
12024       "Cannot retrieve locked instance %s" % self.op.instance_name
12025     nodenames = list(instance.all_nodes)
12026     for node in nodenames:
12027       _CheckNodeOnline(self, node)
12028
12029     self.instance = instance
12030
12031     if instance.disk_template not in constants.DTS_GROWABLE:
12032       raise errors.OpPrereqError("Instance's disk layout does not support"
12033                                  " growing", errors.ECODE_INVAL)
12034
12035     self.disk = instance.FindDisk(self.op.disk)
12036
12037     if self.op.absolute:
12038       self.target = self.op.amount
12039       self.delta = self.target - self.disk.size
12040       if self.delta < 0:
12041         raise errors.OpPrereqError("Requested size (%s) is smaller than "
12042                                    "current disk size (%s)" %
12043                                    (utils.FormatUnit(self.target, "h"),
12044                                     utils.FormatUnit(self.disk.size, "h")),
12045                                    errors.ECODE_STATE)
12046     else:
12047       self.delta = self.op.amount
12048       self.target = self.disk.size + self.delta
12049       if self.delta < 0:
12050         raise errors.OpPrereqError("Requested increment (%s) is negative" %
12051                                    utils.FormatUnit(self.delta, "h"),
12052                                    errors.ECODE_INVAL)
12053
12054     if instance.disk_template not in (constants.DT_FILE,
12055                                       constants.DT_SHARED_FILE,
12056                                       constants.DT_RBD):
12057       # TODO: check the free disk space for file, when that feature will be
12058       # supported
12059       _CheckNodesFreeDiskPerVG(self, nodenames,
12060                                self.disk.ComputeGrowth(self.delta))
12061
12062   def Exec(self, feedback_fn):
12063     """Execute disk grow.
12064
12065     """
12066     instance = self.instance
12067     disk = self.disk
12068
12069     assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
12070     assert (self.owned_locks(locking.LEVEL_NODE) ==
12071             self.owned_locks(locking.LEVEL_NODE_RES))
12072
12073     wipe_disks = self.cfg.GetClusterInfo().prealloc_wipe_disks
12074
12075     disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
12076     if not disks_ok:
12077       raise errors.OpExecError("Cannot activate block device to grow")
12078
12079     feedback_fn("Growing disk %s of instance '%s' by %s to %s" %
12080                 (self.op.disk, instance.name,
12081                  utils.FormatUnit(self.delta, "h"),
12082                  utils.FormatUnit(self.target, "h")))
12083
12084     # First run all grow ops in dry-run mode
12085     for node in instance.all_nodes:
12086       self.cfg.SetDiskID(disk, node)
12087       result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
12088                                            True, True)
12089       result.Raise("Dry-run grow request failed to node %s" % node)
12090
12091     if wipe_disks:
12092       # Get disk size from primary node for wiping
12093       result = self.rpc.call_blockdev_getsize(instance.primary_node, [disk])
12094       result.Raise("Failed to retrieve disk size from node '%s'" %
12095                    instance.primary_node)
12096
12097       (disk_size_in_bytes, ) = result.payload
12098
12099       if disk_size_in_bytes is None:
12100         raise errors.OpExecError("Failed to retrieve disk size from primary"
12101                                  " node '%s'" % instance.primary_node)
12102
12103       old_disk_size = _DiskSizeInBytesToMebibytes(self, disk_size_in_bytes)
12104
12105       assert old_disk_size >= disk.size, \
12106         ("Retrieved disk size too small (got %s, should be at least %s)" %
12107          (old_disk_size, disk.size))
12108     else:
12109       old_disk_size = None
12110
12111     # We know that (as far as we can test) operations across different
12112     # nodes will succeed, time to run it for real on the backing storage
12113     for node in instance.all_nodes:
12114       self.cfg.SetDiskID(disk, node)
12115       result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
12116                                            False, True)
12117       result.Raise("Grow request failed to node %s" % node)
12118
12119     # And now execute it for logical storage, on the primary node
12120     node = instance.primary_node
12121     self.cfg.SetDiskID(disk, node)
12122     result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
12123                                          False, False)
12124     result.Raise("Grow request failed to node %s" % node)
12125
12126     disk.RecordGrow(self.delta)
12127     self.cfg.Update(instance, feedback_fn)
12128
12129     # Changes have been recorded, release node lock
12130     _ReleaseLocks(self, locking.LEVEL_NODE)
12131
12132     # Downgrade lock while waiting for sync
12133     self.glm.downgrade(locking.LEVEL_INSTANCE)
12134
12135     assert wipe_disks ^ (old_disk_size is None)
12136
12137     if wipe_disks:
12138       assert instance.disks[self.op.disk] == disk
12139
12140       # Wipe newly added disk space
12141       _WipeDisks(self, instance,
12142                  disks=[(self.op.disk, disk, old_disk_size)])
12143
12144     if self.op.wait_for_sync:
12145       disk_abort = not _WaitForSync(self, instance, disks=[disk])
12146       if disk_abort:
12147         self.proc.LogWarning("Disk sync-ing has not returned a good"
12148                              " status; please check the instance")
12149       if instance.admin_state != constants.ADMINST_UP:
12150         _SafeShutdownInstanceDisks(self, instance, disks=[disk])
12151     elif instance.admin_state != constants.ADMINST_UP:
12152       self.proc.LogWarning("Not shutting down the disk even if the instance is"
12153                            " not supposed to be running because no wait for"
12154                            " sync mode was requested")
12155
12156     assert self.owned_locks(locking.LEVEL_NODE_RES)
12157     assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
12158
12159
12160 class LUInstanceQueryData(NoHooksLU):
12161   """Query runtime instance data.
12162
12163   """
12164   REQ_BGL = False
12165
12166   def ExpandNames(self):
12167     self.needed_locks = {}
12168
12169     # Use locking if requested or when non-static information is wanted
12170     if not (self.op.static or self.op.use_locking):
12171       self.LogWarning("Non-static data requested, locks need to be acquired")
12172       self.op.use_locking = True
12173
12174     if self.op.instances or not self.op.use_locking:
12175       # Expand instance names right here
12176       self.wanted_names = _GetWantedInstances(self, self.op.instances)
12177     else:
12178       # Will use acquired locks
12179       self.wanted_names = None
12180
12181     if self.op.use_locking:
12182       self.share_locks = _ShareAll()
12183
12184       if self.wanted_names is None:
12185         self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
12186       else:
12187         self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
12188
12189       self.needed_locks[locking.LEVEL_NODEGROUP] = []
12190       self.needed_locks[locking.LEVEL_NODE] = []
12191       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
12192
12193   def DeclareLocks(self, level):
12194     if self.op.use_locking:
12195       if level == locking.LEVEL_NODEGROUP:
12196         owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
12197
12198         # Lock all groups used by instances optimistically; this requires going
12199         # via the node before it's locked, requiring verification later on
12200         self.needed_locks[locking.LEVEL_NODEGROUP] = \
12201           frozenset(group_uuid
12202                     for instance_name in owned_instances
12203                     for group_uuid in
12204                       self.cfg.GetInstanceNodeGroups(instance_name))
12205
12206       elif level == locking.LEVEL_NODE:
12207         self._LockInstancesNodes()
12208
12209   def CheckPrereq(self):
12210     """Check prerequisites.
12211
12212     This only checks the optional instance list against the existing names.
12213
12214     """
12215     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
12216     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
12217     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
12218
12219     if self.wanted_names is None:
12220       assert self.op.use_locking, "Locking was not used"
12221       self.wanted_names = owned_instances
12222
12223     instances = dict(self.cfg.GetMultiInstanceInfo(self.wanted_names))
12224
12225     if self.op.use_locking:
12226       _CheckInstancesNodeGroups(self.cfg, instances, owned_groups, owned_nodes,
12227                                 None)
12228     else:
12229       assert not (owned_instances or owned_groups or owned_nodes)
12230
12231     self.wanted_instances = instances.values()
12232
12233   def _ComputeBlockdevStatus(self, node, instance, dev):
12234     """Returns the status of a block device
12235
12236     """
12237     if self.op.static or not node:
12238       return None
12239
12240     self.cfg.SetDiskID(dev, node)
12241
12242     result = self.rpc.call_blockdev_find(node, dev)
12243     if result.offline:
12244       return None
12245
12246     result.Raise("Can't compute disk status for %s" % instance.name)
12247
12248     status = result.payload
12249     if status is None:
12250       return None
12251
12252     return (status.dev_path, status.major, status.minor,
12253             status.sync_percent, status.estimated_time,
12254             status.is_degraded, status.ldisk_status)
12255
12256   def _ComputeDiskStatus(self, instance, snode, dev):
12257     """Compute block device status.
12258
12259     """
12260     (anno_dev,) = _AnnotateDiskParams(instance, [dev], self.cfg)
12261
12262     return self._ComputeDiskStatusInner(instance, snode, anno_dev)
12263
12264   def _ComputeDiskStatusInner(self, instance, snode, dev):
12265     """Compute block device status.
12266
12267     @attention: The device has to be annotated already.
12268
12269     """
12270     if dev.dev_type in constants.LDS_DRBD:
12271       # we change the snode then (otherwise we use the one passed in)
12272       if dev.logical_id[0] == instance.primary_node:
12273         snode = dev.logical_id[1]
12274       else:
12275         snode = dev.logical_id[0]
12276
12277     dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
12278                                               instance, dev)
12279     dev_sstatus = self._ComputeBlockdevStatus(snode, instance, dev)
12280
12281     if dev.children:
12282       dev_children = map(compat.partial(self._ComputeDiskStatusInner,
12283                                         instance, snode),
12284                          dev.children)
12285     else:
12286       dev_children = []
12287
12288     return {
12289       "iv_name": dev.iv_name,
12290       "dev_type": dev.dev_type,
12291       "logical_id": dev.logical_id,
12292       "physical_id": dev.physical_id,
12293       "pstatus": dev_pstatus,
12294       "sstatus": dev_sstatus,
12295       "children": dev_children,
12296       "mode": dev.mode,
12297       "size": dev.size,
12298       }
12299
12300   def Exec(self, feedback_fn):
12301     """Gather and return data"""
12302     result = {}
12303
12304     cluster = self.cfg.GetClusterInfo()
12305
12306     node_names = itertools.chain(*(i.all_nodes for i in self.wanted_instances))
12307     nodes = dict(self.cfg.GetMultiNodeInfo(node_names))
12308
12309     groups = dict(self.cfg.GetMultiNodeGroupInfo(node.group
12310                                                  for node in nodes.values()))
12311
12312     group2name_fn = lambda uuid: groups[uuid].name
12313
12314     for instance in self.wanted_instances:
12315       pnode = nodes[instance.primary_node]
12316
12317       if self.op.static or pnode.offline:
12318         remote_state = None
12319         if pnode.offline:
12320           self.LogWarning("Primary node %s is marked offline, returning static"
12321                           " information only for instance %s" %
12322                           (pnode.name, instance.name))
12323       else:
12324         remote_info = self.rpc.call_instance_info(instance.primary_node,
12325                                                   instance.name,
12326                                                   instance.hypervisor)
12327         remote_info.Raise("Error checking node %s" % instance.primary_node)
12328         remote_info = remote_info.payload
12329         if remote_info and "state" in remote_info:
12330           remote_state = "up"
12331         else:
12332           if instance.admin_state == constants.ADMINST_UP:
12333             remote_state = "down"
12334           else:
12335             remote_state = instance.admin_state
12336
12337       disks = map(compat.partial(self._ComputeDiskStatus, instance, None),
12338                   instance.disks)
12339
12340       snodes_group_uuids = [nodes[snode_name].group
12341                             for snode_name in instance.secondary_nodes]
12342
12343       result[instance.name] = {
12344         "name": instance.name,
12345         "config_state": instance.admin_state,
12346         "run_state": remote_state,
12347         "pnode": instance.primary_node,
12348         "pnode_group_uuid": pnode.group,
12349         "pnode_group_name": group2name_fn(pnode.group),
12350         "snodes": instance.secondary_nodes,
12351         "snodes_group_uuids": snodes_group_uuids,
12352         "snodes_group_names": map(group2name_fn, snodes_group_uuids),
12353         "os": instance.os,
12354         # this happens to be the same format used for hooks
12355         "nics": _NICListToTuple(self, instance.nics),
12356         "disk_template": instance.disk_template,
12357         "disks": disks,
12358         "hypervisor": instance.hypervisor,
12359         "network_port": instance.network_port,
12360         "hv_instance": instance.hvparams,
12361         "hv_actual": cluster.FillHV(instance, skip_globals=True),
12362         "be_instance": instance.beparams,
12363         "be_actual": cluster.FillBE(instance),
12364         "os_instance": instance.osparams,
12365         "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
12366         "serial_no": instance.serial_no,
12367         "mtime": instance.mtime,
12368         "ctime": instance.ctime,
12369         "uuid": instance.uuid,
12370         }
12371
12372     return result
12373
12374
12375 def PrepareContainerMods(mods, private_fn):
12376   """Prepares a list of container modifications by adding a private data field.
12377
12378   @type mods: list of tuples; (operation, index, parameters)
12379   @param mods: List of modifications
12380   @type private_fn: callable or None
12381   @param private_fn: Callable for constructing a private data field for a
12382     modification
12383   @rtype: list
12384
12385   """
12386   if private_fn is None:
12387     fn = lambda: None
12388   else:
12389     fn = private_fn
12390
12391   return [(op, idx, params, fn()) for (op, idx, params) in mods]
12392
12393
12394 #: Type description for changes as returned by L{ApplyContainerMods}'s
12395 #: callbacks
12396 _TApplyContModsCbChanges = \
12397   ht.TMaybeListOf(ht.TAnd(ht.TIsLength(2), ht.TItems([
12398     ht.TNonEmptyString,
12399     ht.TAny,
12400     ])))
12401
12402
12403 def ApplyContainerMods(kind, container, chgdesc, mods,
12404                        create_fn, modify_fn, remove_fn):
12405   """Applies descriptions in C{mods} to C{container}.
12406
12407   @type kind: string
12408   @param kind: One-word item description
12409   @type container: list
12410   @param container: Container to modify
12411   @type chgdesc: None or list
12412   @param chgdesc: List of applied changes
12413   @type mods: list
12414   @param mods: Modifications as returned by L{PrepareContainerMods}
12415   @type create_fn: callable
12416   @param create_fn: Callback for creating a new item (L{constants.DDM_ADD});
12417     receives absolute item index, parameters and private data object as added
12418     by L{PrepareContainerMods}, returns tuple containing new item and changes
12419     as list
12420   @type modify_fn: callable
12421   @param modify_fn: Callback for modifying an existing item
12422     (L{constants.DDM_MODIFY}); receives absolute item index, item, parameters
12423     and private data object as added by L{PrepareContainerMods}, returns
12424     changes as list
12425   @type remove_fn: callable
12426   @param remove_fn: Callback on removing item; receives absolute item index,
12427     item and private data object as added by L{PrepareContainerMods}
12428
12429   """
12430   for (op, idx, params, private) in mods:
12431     if idx == -1:
12432       # Append
12433       absidx = len(container) - 1
12434     elif idx < 0:
12435       raise IndexError("Not accepting negative indices other than -1")
12436     elif idx > len(container):
12437       raise IndexError("Got %s index %s, but there are only %s" %
12438                        (kind, idx, len(container)))
12439     else:
12440       absidx = idx
12441
12442     changes = None
12443
12444     if op == constants.DDM_ADD:
12445       # Calculate where item will be added
12446       if idx == -1:
12447         addidx = len(container)
12448       else:
12449         addidx = idx
12450
12451       if create_fn is None:
12452         item = params
12453       else:
12454         (item, changes) = create_fn(addidx, params, private)
12455
12456       if idx == -1:
12457         container.append(item)
12458       else:
12459         assert idx >= 0
12460         assert idx <= len(container)
12461         # list.insert does so before the specified index
12462         container.insert(idx, item)
12463     else:
12464       # Retrieve existing item
12465       try:
12466         item = container[absidx]
12467       except IndexError:
12468         raise IndexError("Invalid %s index %s" % (kind, idx))
12469
12470       if op == constants.DDM_REMOVE:
12471         assert not params
12472
12473         if remove_fn is not None:
12474           remove_fn(absidx, item, private)
12475
12476         changes = [("%s/%s" % (kind, absidx), "remove")]
12477
12478         assert container[absidx] == item
12479         del container[absidx]
12480       elif op == constants.DDM_MODIFY:
12481         if modify_fn is not None:
12482           changes = modify_fn(absidx, item, params, private)
12483       else:
12484         raise errors.ProgrammerError("Unhandled operation '%s'" % op)
12485
12486     assert _TApplyContModsCbChanges(changes)
12487
12488     if not (chgdesc is None or changes is None):
12489       chgdesc.extend(changes)
12490
12491
12492 def _UpdateIvNames(base_index, disks):
12493   """Updates the C{iv_name} attribute of disks.
12494
12495   @type disks: list of L{objects.Disk}
12496
12497   """
12498   for (idx, disk) in enumerate(disks):
12499     disk.iv_name = "disk/%s" % (base_index + idx, )
12500
12501
12502 class _InstNicModPrivate:
12503   """Data structure for network interface modifications.
12504
12505   Used by L{LUInstanceSetParams}.
12506
12507   """
12508   def __init__(self):
12509     self.params = None
12510     self.filled = None
12511
12512
12513 class LUInstanceSetParams(LogicalUnit):
12514   """Modifies an instances's parameters.
12515
12516   """
12517   HPATH = "instance-modify"
12518   HTYPE = constants.HTYPE_INSTANCE
12519   REQ_BGL = False
12520
12521   @staticmethod
12522   def _UpgradeDiskNicMods(kind, mods, verify_fn):
12523     assert ht.TList(mods)
12524     assert not mods or len(mods[0]) in (2, 3)
12525
12526     if mods and len(mods[0]) == 2:
12527       result = []
12528
12529       addremove = 0
12530       for op, params in mods:
12531         if op in (constants.DDM_ADD, constants.DDM_REMOVE):
12532           result.append((op, -1, params))
12533           addremove += 1
12534
12535           if addremove > 1:
12536             raise errors.OpPrereqError("Only one %s add or remove operation is"
12537                                        " supported at a time" % kind,
12538                                        errors.ECODE_INVAL)
12539         else:
12540           result.append((constants.DDM_MODIFY, op, params))
12541
12542       assert verify_fn(result)
12543     else:
12544       result = mods
12545
12546     return result
12547
12548   @staticmethod
12549   def _CheckMods(kind, mods, key_types, item_fn):
12550     """Ensures requested disk/NIC modifications are valid.
12551
12552     """
12553     for (op, _, params) in mods:
12554       assert ht.TDict(params)
12555
12556       utils.ForceDictType(params, key_types)
12557
12558       if op == constants.DDM_REMOVE:
12559         if params:
12560           raise errors.OpPrereqError("No settings should be passed when"
12561                                      " removing a %s" % kind,
12562                                      errors.ECODE_INVAL)
12563       elif op in (constants.DDM_ADD, constants.DDM_MODIFY):
12564         item_fn(op, params)
12565       else:
12566         raise errors.ProgrammerError("Unhandled operation '%s'" % op)
12567
12568   @staticmethod
12569   def _VerifyDiskModification(op, params):
12570     """Verifies a disk modification.
12571
12572     """
12573     if op == constants.DDM_ADD:
12574       mode = params.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
12575       if mode not in constants.DISK_ACCESS_SET:
12576         raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
12577                                    errors.ECODE_INVAL)
12578
12579       size = params.get(constants.IDISK_SIZE, None)
12580       if size is None:
12581         raise errors.OpPrereqError("Required disk parameter '%s' missing" %
12582                                    constants.IDISK_SIZE, errors.ECODE_INVAL)
12583
12584       try:
12585         size = int(size)
12586       except (TypeError, ValueError), err:
12587         raise errors.OpPrereqError("Invalid disk size parameter: %s" % err,
12588                                    errors.ECODE_INVAL)
12589
12590       params[constants.IDISK_SIZE] = size
12591
12592     elif op == constants.DDM_MODIFY and constants.IDISK_SIZE in params:
12593       raise errors.OpPrereqError("Disk size change not possible, use"
12594                                  " grow-disk", errors.ECODE_INVAL)
12595
12596   @staticmethod
12597   def _VerifyNicModification(op, params):
12598     """Verifies a network interface modification.
12599
12600     """
12601     if op in (constants.DDM_ADD, constants.DDM_MODIFY):
12602       ip = params.get(constants.INIC_IP, None)
12603       if ip is None:
12604         pass
12605       elif ip.lower() == constants.VALUE_NONE:
12606         params[constants.INIC_IP] = None
12607       elif not netutils.IPAddress.IsValid(ip):
12608         raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
12609                                    errors.ECODE_INVAL)
12610
12611       bridge = params.get("bridge", None)
12612       link = params.get(constants.INIC_LINK, None)
12613       if bridge and link:
12614         raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
12615                                    " at the same time", errors.ECODE_INVAL)
12616       elif bridge and bridge.lower() == constants.VALUE_NONE:
12617         params["bridge"] = None
12618       elif link and link.lower() == constants.VALUE_NONE:
12619         params[constants.INIC_LINK] = None
12620
12621       if op == constants.DDM_ADD:
12622         macaddr = params.get(constants.INIC_MAC, None)
12623         if macaddr is None:
12624           params[constants.INIC_MAC] = constants.VALUE_AUTO
12625
12626       if constants.INIC_MAC in params:
12627         macaddr = params[constants.INIC_MAC]
12628         if macaddr not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
12629           macaddr = utils.NormalizeAndValidateMac(macaddr)
12630
12631         if op == constants.DDM_MODIFY and macaddr == constants.VALUE_AUTO:
12632           raise errors.OpPrereqError("'auto' is not a valid MAC address when"
12633                                      " modifying an existing NIC",
12634                                      errors.ECODE_INVAL)
12635
12636   def CheckArguments(self):
12637     if not (self.op.nics or self.op.disks or self.op.disk_template or
12638             self.op.hvparams or self.op.beparams or self.op.os_name or
12639             self.op.offline is not None or self.op.runtime_mem):
12640       raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
12641
12642     if self.op.hvparams:
12643       _CheckGlobalHvParams(self.op.hvparams)
12644
12645     self.op.disks = self._UpgradeDiskNicMods(
12646       "disk", self.op.disks, opcodes.OpInstanceSetParams.TestDiskModifications)
12647     self.op.nics = self._UpgradeDiskNicMods(
12648       "NIC", self.op.nics, opcodes.OpInstanceSetParams.TestNicModifications)
12649
12650     # Check disk modifications
12651     self._CheckMods("disk", self.op.disks, constants.IDISK_PARAMS_TYPES,
12652                     self._VerifyDiskModification)
12653
12654     if self.op.disks and self.op.disk_template is not None:
12655       raise errors.OpPrereqError("Disk template conversion and other disk"
12656                                  " changes not supported at the same time",
12657                                  errors.ECODE_INVAL)
12658
12659     if (self.op.disk_template and
12660         self.op.disk_template in constants.DTS_INT_MIRROR and
12661         self.op.remote_node is None):
12662       raise errors.OpPrereqError("Changing the disk template to a mirrored"
12663                                  " one requires specifying a secondary node",
12664                                  errors.ECODE_INVAL)
12665
12666     # Check NIC modifications
12667     self._CheckMods("NIC", self.op.nics, constants.INIC_PARAMS_TYPES,
12668                     self._VerifyNicModification)
12669
12670   def ExpandNames(self):
12671     self._ExpandAndLockInstance()
12672     # Can't even acquire node locks in shared mode as upcoming changes in
12673     # Ganeti 2.6 will start to modify the node object on disk conversion
12674     self.needed_locks[locking.LEVEL_NODE] = []
12675     self.needed_locks[locking.LEVEL_NODE_RES] = []
12676     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
12677
12678   def DeclareLocks(self, level):
12679     # TODO: Acquire group lock in shared mode (disk parameters)
12680     if level == locking.LEVEL_NODE:
12681       self._LockInstancesNodes()
12682       if self.op.disk_template and self.op.remote_node:
12683         self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
12684         self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
12685     elif level == locking.LEVEL_NODE_RES and self.op.disk_template:
12686       # Copy node locks
12687       self.needed_locks[locking.LEVEL_NODE_RES] = \
12688         _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
12689
12690   def BuildHooksEnv(self):
12691     """Build hooks env.
12692
12693     This runs on the master, primary and secondaries.
12694
12695     """
12696     args = dict()
12697     if constants.BE_MINMEM in self.be_new:
12698       args["minmem"] = self.be_new[constants.BE_MINMEM]
12699     if constants.BE_MAXMEM in self.be_new:
12700       args["maxmem"] = self.be_new[constants.BE_MAXMEM]
12701     if constants.BE_VCPUS in self.be_new:
12702       args["vcpus"] = self.be_new[constants.BE_VCPUS]
12703     # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
12704     # information at all.
12705
12706     if self._new_nics is not None:
12707       nics = []
12708
12709       for nic in self._new_nics:
12710         nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
12711         mode = nicparams[constants.NIC_MODE]
12712         link = nicparams[constants.NIC_LINK]
12713         nics.append((nic.ip, nic.mac, mode, link))
12714
12715       args["nics"] = nics
12716
12717     env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
12718     if self.op.disk_template:
12719       env["NEW_DISK_TEMPLATE"] = self.op.disk_template
12720     if self.op.runtime_mem:
12721       env["RUNTIME_MEMORY"] = self.op.runtime_mem
12722
12723     return env
12724
12725   def BuildHooksNodes(self):
12726     """Build hooks nodes.
12727
12728     """
12729     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
12730     return (nl, nl)
12731
12732   def _PrepareNicModification(self, params, private, old_ip, old_params,
12733                               cluster, pnode):
12734     update_params_dict = dict([(key, params[key])
12735                                for key in constants.NICS_PARAMETERS
12736                                if key in params])
12737
12738     if "bridge" in params:
12739       update_params_dict[constants.NIC_LINK] = params["bridge"]
12740
12741     new_params = _GetUpdatedParams(old_params, update_params_dict)
12742     utils.ForceDictType(new_params, constants.NICS_PARAMETER_TYPES)
12743
12744     new_filled_params = cluster.SimpleFillNIC(new_params)
12745     objects.NIC.CheckParameterSyntax(new_filled_params)
12746
12747     new_mode = new_filled_params[constants.NIC_MODE]
12748     if new_mode == constants.NIC_MODE_BRIDGED:
12749       bridge = new_filled_params[constants.NIC_LINK]
12750       msg = self.rpc.call_bridges_exist(pnode, [bridge]).fail_msg
12751       if msg:
12752         msg = "Error checking bridges on node '%s': %s" % (pnode, msg)
12753         if self.op.force:
12754           self.warn.append(msg)
12755         else:
12756           raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
12757
12758     elif new_mode == constants.NIC_MODE_ROUTED:
12759       ip = params.get(constants.INIC_IP, old_ip)
12760       if ip is None:
12761         raise errors.OpPrereqError("Cannot set the NIC IP address to None"
12762                                    " on a routed NIC", errors.ECODE_INVAL)
12763
12764     if constants.INIC_MAC in params:
12765       mac = params[constants.INIC_MAC]
12766       if mac is None:
12767         raise errors.OpPrereqError("Cannot unset the NIC MAC address",
12768                                    errors.ECODE_INVAL)
12769       elif mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
12770         # otherwise generate the MAC address
12771         params[constants.INIC_MAC] = \
12772           self.cfg.GenerateMAC(self.proc.GetECId())
12773       else:
12774         # or validate/reserve the current one
12775         try:
12776           self.cfg.ReserveMAC(mac, self.proc.GetECId())
12777         except errors.ReservationError:
12778           raise errors.OpPrereqError("MAC address '%s' already in use"
12779                                      " in cluster" % mac,
12780                                      errors.ECODE_NOTUNIQUE)
12781
12782     private.params = new_params
12783     private.filled = new_filled_params
12784
12785   def CheckPrereq(self):
12786     """Check prerequisites.
12787
12788     This only checks the instance list against the existing names.
12789
12790     """
12791     # checking the new params on the primary/secondary nodes
12792
12793     instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
12794     cluster = self.cluster = self.cfg.GetClusterInfo()
12795     assert self.instance is not None, \
12796       "Cannot retrieve locked instance %s" % self.op.instance_name
12797     pnode = instance.primary_node
12798     nodelist = list(instance.all_nodes)
12799     pnode_info = self.cfg.GetNodeInfo(pnode)
12800     self.diskparams = self.cfg.GetInstanceDiskParams(instance)
12801
12802     # Prepare disk/NIC modifications
12803     self.diskmod = PrepareContainerMods(self.op.disks, None)
12804     self.nicmod = PrepareContainerMods(self.op.nics, _InstNicModPrivate)
12805
12806     # OS change
12807     if self.op.os_name and not self.op.force:
12808       _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
12809                       self.op.force_variant)
12810       instance_os = self.op.os_name
12811     else:
12812       instance_os = instance.os
12813
12814     assert not (self.op.disk_template and self.op.disks), \
12815       "Can't modify disk template and apply disk changes at the same time"
12816
12817     if self.op.disk_template:
12818       if instance.disk_template == self.op.disk_template:
12819         raise errors.OpPrereqError("Instance already has disk template %s" %
12820                                    instance.disk_template, errors.ECODE_INVAL)
12821
12822       if (instance.disk_template,
12823           self.op.disk_template) not in self._DISK_CONVERSIONS:
12824         raise errors.OpPrereqError("Unsupported disk template conversion from"
12825                                    " %s to %s" % (instance.disk_template,
12826                                                   self.op.disk_template),
12827                                    errors.ECODE_INVAL)
12828       _CheckInstanceState(self, instance, INSTANCE_DOWN,
12829                           msg="cannot change disk template")
12830       if self.op.disk_template in constants.DTS_INT_MIRROR:
12831         if self.op.remote_node == pnode:
12832           raise errors.OpPrereqError("Given new secondary node %s is the same"
12833                                      " as the primary node of the instance" %
12834                                      self.op.remote_node, errors.ECODE_STATE)
12835         _CheckNodeOnline(self, self.op.remote_node)
12836         _CheckNodeNotDrained(self, self.op.remote_node)
12837         # FIXME: here we assume that the old instance type is DT_PLAIN
12838         assert instance.disk_template == constants.DT_PLAIN
12839         disks = [{constants.IDISK_SIZE: d.size,
12840                   constants.IDISK_VG: d.logical_id[0]}
12841                  for d in instance.disks]
12842         required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
12843         _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
12844
12845         snode_info = self.cfg.GetNodeInfo(self.op.remote_node)
12846         snode_group = self.cfg.GetNodeGroup(snode_info.group)
12847         ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
12848                                                                 snode_group)
12849         _CheckTargetNodeIPolicy(self, ipolicy, instance, snode_info,
12850                                 ignore=self.op.ignore_ipolicy)
12851         if pnode_info.group != snode_info.group:
12852           self.LogWarning("The primary and secondary nodes are in two"
12853                           " different node groups; the disk parameters"
12854                           " from the first disk's node group will be"
12855                           " used")
12856
12857     # hvparams processing
12858     if self.op.hvparams:
12859       hv_type = instance.hypervisor
12860       i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
12861       utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
12862       hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
12863
12864       # local check
12865       hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
12866       _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
12867       self.hv_proposed = self.hv_new = hv_new # the new actual values
12868       self.hv_inst = i_hvdict # the new dict (without defaults)
12869     else:
12870       self.hv_proposed = cluster.SimpleFillHV(instance.hypervisor, instance.os,
12871                                               instance.hvparams)
12872       self.hv_new = self.hv_inst = {}
12873
12874     # beparams processing
12875     if self.op.beparams:
12876       i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
12877                                    use_none=True)
12878       objects.UpgradeBeParams(i_bedict)
12879       utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
12880       be_new = cluster.SimpleFillBE(i_bedict)
12881       self.be_proposed = self.be_new = be_new # the new actual values
12882       self.be_inst = i_bedict # the new dict (without defaults)
12883     else:
12884       self.be_new = self.be_inst = {}
12885       self.be_proposed = cluster.SimpleFillBE(instance.beparams)
12886     be_old = cluster.FillBE(instance)
12887
12888     # CPU param validation -- checking every time a parameter is
12889     # changed to cover all cases where either CPU mask or vcpus have
12890     # changed
12891     if (constants.BE_VCPUS in self.be_proposed and
12892         constants.HV_CPU_MASK in self.hv_proposed):
12893       cpu_list = \
12894         utils.ParseMultiCpuMask(self.hv_proposed[constants.HV_CPU_MASK])
12895       # Verify mask is consistent with number of vCPUs. Can skip this
12896       # test if only 1 entry in the CPU mask, which means same mask
12897       # is applied to all vCPUs.
12898       if (len(cpu_list) > 1 and
12899           len(cpu_list) != self.be_proposed[constants.BE_VCPUS]):
12900         raise errors.OpPrereqError("Number of vCPUs [%d] does not match the"
12901                                    " CPU mask [%s]" %
12902                                    (self.be_proposed[constants.BE_VCPUS],
12903                                     self.hv_proposed[constants.HV_CPU_MASK]),
12904                                    errors.ECODE_INVAL)
12905
12906       # Only perform this test if a new CPU mask is given
12907       if constants.HV_CPU_MASK in self.hv_new:
12908         # Calculate the largest CPU number requested
12909         max_requested_cpu = max(map(max, cpu_list))
12910         # Check that all of the instance's nodes have enough physical CPUs to
12911         # satisfy the requested CPU mask
12912         _CheckNodesPhysicalCPUs(self, instance.all_nodes,
12913                                 max_requested_cpu + 1, instance.hypervisor)
12914
12915     # osparams processing
12916     if self.op.osparams:
12917       i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
12918       _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
12919       self.os_inst = i_osdict # the new dict (without defaults)
12920     else:
12921       self.os_inst = {}
12922
12923     self.warn = []
12924
12925     #TODO(dynmem): do the appropriate check involving MINMEM
12926     if (constants.BE_MAXMEM in self.op.beparams and not self.op.force and
12927         be_new[constants.BE_MAXMEM] > be_old[constants.BE_MAXMEM]):
12928       mem_check_list = [pnode]
12929       if be_new[constants.BE_AUTO_BALANCE]:
12930         # either we changed auto_balance to yes or it was from before
12931         mem_check_list.extend(instance.secondary_nodes)
12932       instance_info = self.rpc.call_instance_info(pnode, instance.name,
12933                                                   instance.hypervisor)
12934       nodeinfo = self.rpc.call_node_info(mem_check_list, None,
12935                                          [instance.hypervisor])
12936       pninfo = nodeinfo[pnode]
12937       msg = pninfo.fail_msg
12938       if msg:
12939         # Assume the primary node is unreachable and go ahead
12940         self.warn.append("Can't get info from primary node %s: %s" %
12941                          (pnode, msg))
12942       else:
12943         (_, _, (pnhvinfo, )) = pninfo.payload
12944         if not isinstance(pnhvinfo.get("memory_free", None), int):
12945           self.warn.append("Node data from primary node %s doesn't contain"
12946                            " free memory information" % pnode)
12947         elif instance_info.fail_msg:
12948           self.warn.append("Can't get instance runtime information: %s" %
12949                            instance_info.fail_msg)
12950         else:
12951           if instance_info.payload:
12952             current_mem = int(instance_info.payload["memory"])
12953           else:
12954             # Assume instance not running
12955             # (there is a slight race condition here, but it's not very
12956             # probable, and we have no other way to check)
12957             # TODO: Describe race condition
12958             current_mem = 0
12959           #TODO(dynmem): do the appropriate check involving MINMEM
12960           miss_mem = (be_new[constants.BE_MAXMEM] - current_mem -
12961                       pnhvinfo["memory_free"])
12962           if miss_mem > 0:
12963             raise errors.OpPrereqError("This change will prevent the instance"
12964                                        " from starting, due to %d MB of memory"
12965                                        " missing on its primary node" %
12966                                        miss_mem, errors.ECODE_NORES)
12967
12968       if be_new[constants.BE_AUTO_BALANCE]:
12969         for node, nres in nodeinfo.items():
12970           if node not in instance.secondary_nodes:
12971             continue
12972           nres.Raise("Can't get info from secondary node %s" % node,
12973                      prereq=True, ecode=errors.ECODE_STATE)
12974           (_, _, (nhvinfo, )) = nres.payload
12975           if not isinstance(nhvinfo.get("memory_free", None), int):
12976             raise errors.OpPrereqError("Secondary node %s didn't return free"
12977                                        " memory information" % node,
12978                                        errors.ECODE_STATE)
12979           #TODO(dynmem): do the appropriate check involving MINMEM
12980           elif be_new[constants.BE_MAXMEM] > nhvinfo["memory_free"]:
12981             raise errors.OpPrereqError("This change will prevent the instance"
12982                                        " from failover to its secondary node"
12983                                        " %s, due to not enough memory" % node,
12984                                        errors.ECODE_STATE)
12985
12986     if self.op.runtime_mem:
12987       remote_info = self.rpc.call_instance_info(instance.primary_node,
12988                                                 instance.name,
12989                                                 instance.hypervisor)
12990       remote_info.Raise("Error checking node %s" % instance.primary_node)
12991       if not remote_info.payload: # not running already
12992         raise errors.OpPrereqError("Instance %s is not running" %
12993                                    instance.name, errors.ECODE_STATE)
12994
12995       current_memory = remote_info.payload["memory"]
12996       if (not self.op.force and
12997            (self.op.runtime_mem > self.be_proposed[constants.BE_MAXMEM] or
12998             self.op.runtime_mem < self.be_proposed[constants.BE_MINMEM])):
12999         raise errors.OpPrereqError("Instance %s must have memory between %d"
13000                                    " and %d MB of memory unless --force is"
13001                                    " given" %
13002                                    (instance.name,
13003                                     self.be_proposed[constants.BE_MINMEM],
13004                                     self.be_proposed[constants.BE_MAXMEM]),
13005                                    errors.ECODE_INVAL)
13006
13007       delta = self.op.runtime_mem - current_memory
13008       if delta > 0:
13009         _CheckNodeFreeMemory(self, instance.primary_node,
13010                              "ballooning memory for instance %s" %
13011                              instance.name, delta, instance.hypervisor)
13012
13013     if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
13014       raise errors.OpPrereqError("Disk operations not supported for"
13015                                  " diskless instances", errors.ECODE_INVAL)
13016
13017     def _PrepareNicCreate(_, params, private):
13018       self._PrepareNicModification(params, private, None, {}, cluster, pnode)
13019       return (None, None)
13020
13021     def _PrepareNicMod(_, nic, params, private):
13022       self._PrepareNicModification(params, private, nic.ip,
13023                                    nic.nicparams, cluster, pnode)
13024       return None
13025
13026     # Verify NIC changes (operating on copy)
13027     nics = instance.nics[:]
13028     ApplyContainerMods("NIC", nics, None, self.nicmod,
13029                        _PrepareNicCreate, _PrepareNicMod, None)
13030     if len(nics) > constants.MAX_NICS:
13031       raise errors.OpPrereqError("Instance has too many network interfaces"
13032                                  " (%d), cannot add more" % constants.MAX_NICS,
13033                                  errors.ECODE_STATE)
13034
13035     # Verify disk changes (operating on a copy)
13036     disks = instance.disks[:]
13037     ApplyContainerMods("disk", disks, None, self.diskmod, None, None, None)
13038     if len(disks) > constants.MAX_DISKS:
13039       raise errors.OpPrereqError("Instance has too many disks (%d), cannot add"
13040                                  " more" % constants.MAX_DISKS,
13041                                  errors.ECODE_STATE)
13042
13043     if self.op.offline is not None:
13044       if self.op.offline:
13045         msg = "can't change to offline"
13046       else:
13047         msg = "can't change to online"
13048       _CheckInstanceState(self, instance, CAN_CHANGE_INSTANCE_OFFLINE, msg=msg)
13049
13050     # Pre-compute NIC changes (necessary to use result in hooks)
13051     self._nic_chgdesc = []
13052     if self.nicmod:
13053       # Operate on copies as this is still in prereq
13054       nics = [nic.Copy() for nic in instance.nics]
13055       ApplyContainerMods("NIC", nics, self._nic_chgdesc, self.nicmod,
13056                          self._CreateNewNic, self._ApplyNicMods, None)
13057       self._new_nics = nics
13058     else:
13059       self._new_nics = None
13060
13061   def _ConvertPlainToDrbd(self, feedback_fn):
13062     """Converts an instance from plain to drbd.
13063
13064     """
13065     feedback_fn("Converting template to drbd")
13066     instance = self.instance
13067     pnode = instance.primary_node
13068     snode = self.op.remote_node
13069
13070     assert instance.disk_template == constants.DT_PLAIN
13071
13072     # create a fake disk info for _GenerateDiskTemplate
13073     disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
13074                   constants.IDISK_VG: d.logical_id[0]}
13075                  for d in instance.disks]
13076     new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
13077                                       instance.name, pnode, [snode],
13078                                       disk_info, None, None, 0, feedback_fn,
13079                                       self.diskparams)
13080     anno_disks = rpc.AnnotateDiskParams(constants.DT_DRBD8, new_disks,
13081                                         self.diskparams)
13082     info = _GetInstanceInfoText(instance)
13083     feedback_fn("Creating additional volumes...")
13084     # first, create the missing data and meta devices
13085     for disk in anno_disks:
13086       # unfortunately this is... not too nice
13087       _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
13088                             info, True)
13089       for child in disk.children:
13090         _CreateSingleBlockDev(self, snode, instance, child, info, True)
13091     # at this stage, all new LVs have been created, we can rename the
13092     # old ones
13093     feedback_fn("Renaming original volumes...")
13094     rename_list = [(o, n.children[0].logical_id)
13095                    for (o, n) in zip(instance.disks, new_disks)]
13096     result = self.rpc.call_blockdev_rename(pnode, rename_list)
13097     result.Raise("Failed to rename original LVs")
13098
13099     feedback_fn("Initializing DRBD devices...")
13100     # all child devices are in place, we can now create the DRBD devices
13101     for disk in anno_disks:
13102       for node in [pnode, snode]:
13103         f_create = node == pnode
13104         _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
13105
13106     # at this point, the instance has been modified
13107     instance.disk_template = constants.DT_DRBD8
13108     instance.disks = new_disks
13109     self.cfg.Update(instance, feedback_fn)
13110
13111     # Release node locks while waiting for sync
13112     _ReleaseLocks(self, locking.LEVEL_NODE)
13113
13114     # disks are created, waiting for sync
13115     disk_abort = not _WaitForSync(self, instance,
13116                                   oneshot=not self.op.wait_for_sync)
13117     if disk_abort:
13118       raise errors.OpExecError("There are some degraded disks for"
13119                                " this instance, please cleanup manually")
13120
13121     # Node resource locks will be released by caller
13122
13123   def _ConvertDrbdToPlain(self, feedback_fn):
13124     """Converts an instance from drbd to plain.
13125
13126     """
13127     instance = self.instance
13128
13129     assert len(instance.secondary_nodes) == 1
13130     assert instance.disk_template == constants.DT_DRBD8
13131
13132     pnode = instance.primary_node
13133     snode = instance.secondary_nodes[0]
13134     feedback_fn("Converting template to plain")
13135
13136     old_disks = _AnnotateDiskParams(instance, instance.disks, self.cfg)
13137     new_disks = [d.children[0] for d in instance.disks]
13138
13139     # copy over size and mode
13140     for parent, child in zip(old_disks, new_disks):
13141       child.size = parent.size
13142       child.mode = parent.mode
13143
13144     # this is a DRBD disk, return its port to the pool
13145     # NOTE: this must be done right before the call to cfg.Update!
13146     for disk in old_disks:
13147       tcp_port = disk.logical_id[2]
13148       self.cfg.AddTcpUdpPort(tcp_port)
13149
13150     # update instance structure
13151     instance.disks = new_disks
13152     instance.disk_template = constants.DT_PLAIN
13153     self.cfg.Update(instance, feedback_fn)
13154
13155     # Release locks in case removing disks takes a while
13156     _ReleaseLocks(self, locking.LEVEL_NODE)
13157
13158     feedback_fn("Removing volumes on the secondary node...")
13159     for disk in old_disks:
13160       self.cfg.SetDiskID(disk, snode)
13161       msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
13162       if msg:
13163         self.LogWarning("Could not remove block device %s on node %s,"
13164                         " continuing anyway: %s", disk.iv_name, snode, msg)
13165
13166     feedback_fn("Removing unneeded volumes on the primary node...")
13167     for idx, disk in enumerate(old_disks):
13168       meta = disk.children[1]
13169       self.cfg.SetDiskID(meta, pnode)
13170       msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
13171       if msg:
13172         self.LogWarning("Could not remove metadata for disk %d on node %s,"
13173                         " continuing anyway: %s", idx, pnode, msg)
13174
13175   def _CreateNewDisk(self, idx, params, _):
13176     """Creates a new disk.
13177
13178     """
13179     instance = self.instance
13180
13181     # add a new disk
13182     if instance.disk_template in constants.DTS_FILEBASED:
13183       (file_driver, file_path) = instance.disks[0].logical_id
13184       file_path = os.path.dirname(file_path)
13185     else:
13186       file_driver = file_path = None
13187
13188     disk = \
13189       _GenerateDiskTemplate(self, instance.disk_template, instance.name,
13190                             instance.primary_node, instance.secondary_nodes,
13191                             [params], file_path, file_driver, idx,
13192                             self.Log, self.diskparams)[0]
13193
13194     info = _GetInstanceInfoText(instance)
13195
13196     logging.info("Creating volume %s for instance %s",
13197                  disk.iv_name, instance.name)
13198     # Note: this needs to be kept in sync with _CreateDisks
13199     #HARDCODE
13200     for node in instance.all_nodes:
13201       f_create = (node == instance.primary_node)
13202       try:
13203         _CreateBlockDev(self, node, instance, disk, f_create, info, f_create)
13204       except errors.OpExecError, err:
13205         self.LogWarning("Failed to create volume %s (%s) on node '%s': %s",
13206                         disk.iv_name, disk, node, err)
13207
13208     return (disk, [
13209       ("disk/%d" % idx, "add:size=%s,mode=%s" % (disk.size, disk.mode)),
13210       ])
13211
13212   @staticmethod
13213   def _ModifyDisk(idx, disk, params, _):
13214     """Modifies a disk.
13215
13216     """
13217     disk.mode = params[constants.IDISK_MODE]
13218
13219     return [
13220       ("disk.mode/%d" % idx, disk.mode),
13221       ]
13222
13223   def _RemoveDisk(self, idx, root, _):
13224     """Removes a disk.
13225
13226     """
13227     (anno_disk,) = _AnnotateDiskParams(self.instance, [root], self.cfg)
13228     for node, disk in anno_disk.ComputeNodeTree(self.instance.primary_node):
13229       self.cfg.SetDiskID(disk, node)
13230       msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
13231       if msg:
13232         self.LogWarning("Could not remove disk/%d on node '%s': %s,"
13233                         " continuing anyway", idx, node, msg)
13234
13235     # if this is a DRBD disk, return its port to the pool
13236     if root.dev_type in constants.LDS_DRBD:
13237       self.cfg.AddTcpUdpPort(root.logical_id[2])
13238
13239   @staticmethod
13240   def _CreateNewNic(idx, params, private):
13241     """Creates data structure for a new network interface.
13242
13243     """
13244     mac = params[constants.INIC_MAC]
13245     ip = params.get(constants.INIC_IP, None)
13246     nicparams = private.params
13247
13248     return (objects.NIC(mac=mac, ip=ip, nicparams=nicparams), [
13249       ("nic.%d" % idx,
13250        "add:mac=%s,ip=%s,mode=%s,link=%s" %
13251        (mac, ip, private.filled[constants.NIC_MODE],
13252        private.filled[constants.NIC_LINK])),
13253       ])
13254
13255   @staticmethod
13256   def _ApplyNicMods(idx, nic, params, private):
13257     """Modifies a network interface.
13258
13259     """
13260     changes = []
13261
13262     for key in [constants.INIC_MAC, constants.INIC_IP]:
13263       if key in params:
13264         changes.append(("nic.%s/%d" % (key, idx), params[key]))
13265         setattr(nic, key, params[key])
13266
13267     if private.params:
13268       nic.nicparams = private.params
13269
13270       for (key, val) in params.items():
13271         changes.append(("nic.%s/%d" % (key, idx), val))
13272
13273     return changes
13274
13275   def Exec(self, feedback_fn):
13276     """Modifies an instance.
13277
13278     All parameters take effect only at the next restart of the instance.
13279
13280     """
13281     # Process here the warnings from CheckPrereq, as we don't have a
13282     # feedback_fn there.
13283     # TODO: Replace with self.LogWarning
13284     for warn in self.warn:
13285       feedback_fn("WARNING: %s" % warn)
13286
13287     assert ((self.op.disk_template is None) ^
13288             bool(self.owned_locks(locking.LEVEL_NODE_RES))), \
13289       "Not owning any node resource locks"
13290
13291     result = []
13292     instance = self.instance
13293
13294     # runtime memory
13295     if self.op.runtime_mem:
13296       rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node,
13297                                                      instance,
13298                                                      self.op.runtime_mem)
13299       rpcres.Raise("Cannot modify instance runtime memory")
13300       result.append(("runtime_memory", self.op.runtime_mem))
13301
13302     # Apply disk changes
13303     ApplyContainerMods("disk", instance.disks, result, self.diskmod,
13304                        self._CreateNewDisk, self._ModifyDisk, self._RemoveDisk)
13305     _UpdateIvNames(0, instance.disks)
13306
13307     if self.op.disk_template:
13308       if __debug__:
13309         check_nodes = set(instance.all_nodes)
13310         if self.op.remote_node:
13311           check_nodes.add(self.op.remote_node)
13312         for level in [locking.LEVEL_NODE, locking.LEVEL_NODE_RES]:
13313           owned = self.owned_locks(level)
13314           assert not (check_nodes - owned), \
13315             ("Not owning the correct locks, owning %r, expected at least %r" %
13316              (owned, check_nodes))
13317
13318       r_shut = _ShutdownInstanceDisks(self, instance)
13319       if not r_shut:
13320         raise errors.OpExecError("Cannot shutdown instance disks, unable to"
13321                                  " proceed with disk template conversion")
13322       mode = (instance.disk_template, self.op.disk_template)
13323       try:
13324         self._DISK_CONVERSIONS[mode](self, feedback_fn)
13325       except:
13326         self.cfg.ReleaseDRBDMinors(instance.name)
13327         raise
13328       result.append(("disk_template", self.op.disk_template))
13329
13330       assert instance.disk_template == self.op.disk_template, \
13331         ("Expected disk template '%s', found '%s'" %
13332          (self.op.disk_template, instance.disk_template))
13333
13334     # Release node and resource locks if there are any (they might already have
13335     # been released during disk conversion)
13336     _ReleaseLocks(self, locking.LEVEL_NODE)
13337     _ReleaseLocks(self, locking.LEVEL_NODE_RES)
13338
13339     # Apply NIC changes
13340     if self._new_nics is not None:
13341       instance.nics = self._new_nics
13342       result.extend(self._nic_chgdesc)
13343
13344     # hvparams changes
13345     if self.op.hvparams:
13346       instance.hvparams = self.hv_inst
13347       for key, val in self.op.hvparams.iteritems():
13348         result.append(("hv/%s" % key, val))
13349
13350     # beparams changes
13351     if self.op.beparams:
13352       instance.beparams = self.be_inst
13353       for key, val in self.op.beparams.iteritems():
13354         result.append(("be/%s" % key, val))
13355
13356     # OS change
13357     if self.op.os_name:
13358       instance.os = self.op.os_name
13359
13360     # osparams changes
13361     if self.op.osparams:
13362       instance.osparams = self.os_inst
13363       for key, val in self.op.osparams.iteritems():
13364         result.append(("os/%s" % key, val))
13365
13366     if self.op.offline is None:
13367       # Ignore
13368       pass
13369     elif self.op.offline:
13370       # Mark instance as offline
13371       self.cfg.MarkInstanceOffline(instance.name)
13372       result.append(("admin_state", constants.ADMINST_OFFLINE))
13373     else:
13374       # Mark instance as online, but stopped
13375       self.cfg.MarkInstanceDown(instance.name)
13376       result.append(("admin_state", constants.ADMINST_DOWN))
13377
13378     self.cfg.Update(instance, feedback_fn)
13379
13380     assert not (self.owned_locks(locking.LEVEL_NODE_RES) or
13381                 self.owned_locks(locking.LEVEL_NODE)), \
13382       "All node locks should have been released by now"
13383
13384     return result
13385
13386   _DISK_CONVERSIONS = {
13387     (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
13388     (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
13389     }
13390
13391
13392 class LUInstanceChangeGroup(LogicalUnit):
13393   HPATH = "instance-change-group"
13394   HTYPE = constants.HTYPE_INSTANCE
13395   REQ_BGL = False
13396
13397   def ExpandNames(self):
13398     self.share_locks = _ShareAll()
13399     self.needed_locks = {
13400       locking.LEVEL_NODEGROUP: [],
13401       locking.LEVEL_NODE: [],
13402       }
13403
13404     self._ExpandAndLockInstance()
13405
13406     if self.op.target_groups:
13407       self.req_target_uuids = map(self.cfg.LookupNodeGroup,
13408                                   self.op.target_groups)
13409     else:
13410       self.req_target_uuids = None
13411
13412     self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
13413
13414   def DeclareLocks(self, level):
13415     if level == locking.LEVEL_NODEGROUP:
13416       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
13417
13418       if self.req_target_uuids:
13419         lock_groups = set(self.req_target_uuids)
13420
13421         # Lock all groups used by instance optimistically; this requires going
13422         # via the node before it's locked, requiring verification later on
13423         instance_groups = self.cfg.GetInstanceNodeGroups(self.op.instance_name)
13424         lock_groups.update(instance_groups)
13425       else:
13426         # No target groups, need to lock all of them
13427         lock_groups = locking.ALL_SET
13428
13429       self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
13430
13431     elif level == locking.LEVEL_NODE:
13432       if self.req_target_uuids:
13433         # Lock all nodes used by instances
13434         self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
13435         self._LockInstancesNodes()
13436
13437         # Lock all nodes in all potential target groups
13438         lock_groups = (frozenset(self.owned_locks(locking.LEVEL_NODEGROUP)) -
13439                        self.cfg.GetInstanceNodeGroups(self.op.instance_name))
13440         member_nodes = [node_name
13441                         for group in lock_groups
13442                         for node_name in self.cfg.GetNodeGroup(group).members]
13443         self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
13444       else:
13445         # Lock all nodes as all groups are potential targets
13446         self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
13447
13448   def CheckPrereq(self):
13449     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
13450     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
13451     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
13452
13453     assert (self.req_target_uuids is None or
13454             owned_groups.issuperset(self.req_target_uuids))
13455     assert owned_instances == set([self.op.instance_name])
13456
13457     # Get instance information
13458     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
13459
13460     # Check if node groups for locked instance are still correct
13461     assert owned_nodes.issuperset(self.instance.all_nodes), \
13462       ("Instance %s's nodes changed while we kept the lock" %
13463        self.op.instance_name)
13464
13465     inst_groups = _CheckInstanceNodeGroups(self.cfg, self.op.instance_name,
13466                                            owned_groups)
13467
13468     if self.req_target_uuids:
13469       # User requested specific target groups
13470       self.target_uuids = frozenset(self.req_target_uuids)
13471     else:
13472       # All groups except those used by the instance are potential targets
13473       self.target_uuids = owned_groups - inst_groups
13474
13475     conflicting_groups = self.target_uuids & inst_groups
13476     if conflicting_groups:
13477       raise errors.OpPrereqError("Can't use group(s) '%s' as targets, they are"
13478                                  " used by the instance '%s'" %
13479                                  (utils.CommaJoin(conflicting_groups),
13480                                   self.op.instance_name),
13481                                  errors.ECODE_INVAL)
13482
13483     if not self.target_uuids:
13484       raise errors.OpPrereqError("There are no possible target groups",
13485                                  errors.ECODE_INVAL)
13486
13487   def BuildHooksEnv(self):
13488     """Build hooks env.
13489
13490     """
13491     assert self.target_uuids
13492
13493     env = {
13494       "TARGET_GROUPS": " ".join(self.target_uuids),
13495       }
13496
13497     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
13498
13499     return env
13500
13501   def BuildHooksNodes(self):
13502     """Build hooks nodes.
13503
13504     """
13505     mn = self.cfg.GetMasterNode()
13506     return ([mn], [mn])
13507
13508   def Exec(self, feedback_fn):
13509     instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
13510
13511     assert instances == [self.op.instance_name], "Instance not locked"
13512
13513     req = iallocator.IAReqGroupChange(instances=instances,
13514                                       target_groups=list(self.target_uuids))
13515     ial = iallocator.IAllocator(self.cfg, self.rpc, req)
13516
13517     ial.Run(self.op.iallocator)
13518
13519     if not ial.success:
13520       raise errors.OpPrereqError("Can't compute solution for changing group of"
13521                                  " instance '%s' using iallocator '%s': %s" %
13522                                  (self.op.instance_name, self.op.iallocator,
13523                                   ial.info), errors.ECODE_NORES)
13524
13525     jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
13526
13527     self.LogInfo("Iallocator returned %s job(s) for changing group of"
13528                  " instance '%s'", len(jobs), self.op.instance_name)
13529
13530     return ResultWithJobs(jobs)
13531
13532
13533 class LUBackupQuery(NoHooksLU):
13534   """Query the exports list
13535
13536   """
13537   REQ_BGL = False
13538
13539   def CheckArguments(self):
13540     self.expq = _ExportQuery(qlang.MakeSimpleFilter("node", self.op.nodes),
13541                              ["node", "export"], self.op.use_locking)
13542
13543   def ExpandNames(self):
13544     self.expq.ExpandNames(self)
13545
13546   def DeclareLocks(self, level):
13547     self.expq.DeclareLocks(self, level)
13548
13549   def Exec(self, feedback_fn):
13550     result = {}
13551
13552     for (node, expname) in self.expq.OldStyleQuery(self):
13553       if expname is None:
13554         result[node] = False
13555       else:
13556         result.setdefault(node, []).append(expname)
13557
13558     return result
13559
13560
13561 class _ExportQuery(_QueryBase):
13562   FIELDS = query.EXPORT_FIELDS
13563
13564   #: The node name is not a unique key for this query
13565   SORT_FIELD = "node"
13566
13567   def ExpandNames(self, lu):
13568     lu.needed_locks = {}
13569
13570     # The following variables interact with _QueryBase._GetNames
13571     if self.names:
13572       self.wanted = _GetWantedNodes(lu, self.names)
13573     else:
13574       self.wanted = locking.ALL_SET
13575
13576     self.do_locking = self.use_locking
13577
13578     if self.do_locking:
13579       lu.share_locks = _ShareAll()
13580       lu.needed_locks = {
13581         locking.LEVEL_NODE: self.wanted,
13582         }
13583
13584   def DeclareLocks(self, lu, level):
13585     pass
13586
13587   def _GetQueryData(self, lu):
13588     """Computes the list of nodes and their attributes.
13589
13590     """
13591     # Locking is not used
13592     # TODO
13593     assert not (compat.any(lu.glm.is_owned(level)
13594                            for level in locking.LEVELS
13595                            if level != locking.LEVEL_CLUSTER) or
13596                 self.do_locking or self.use_locking)
13597
13598     nodes = self._GetNames(lu, lu.cfg.GetNodeList(), locking.LEVEL_NODE)
13599
13600     result = []
13601
13602     for (node, nres) in lu.rpc.call_export_list(nodes).items():
13603       if nres.fail_msg:
13604         result.append((node, None))
13605       else:
13606         result.extend((node, expname) for expname in nres.payload)
13607
13608     return result
13609
13610
13611 class LUBackupPrepare(NoHooksLU):
13612   """Prepares an instance for an export and returns useful information.
13613
13614   """
13615   REQ_BGL = False
13616
13617   def ExpandNames(self):
13618     self._ExpandAndLockInstance()
13619
13620   def CheckPrereq(self):
13621     """Check prerequisites.
13622
13623     """
13624     instance_name = self.op.instance_name
13625
13626     self.instance = self.cfg.GetInstanceInfo(instance_name)
13627     assert self.instance is not None, \
13628           "Cannot retrieve locked instance %s" % self.op.instance_name
13629     _CheckNodeOnline(self, self.instance.primary_node)
13630
13631     self._cds = _GetClusterDomainSecret()
13632
13633   def Exec(self, feedback_fn):
13634     """Prepares an instance for an export.
13635
13636     """
13637     instance = self.instance
13638
13639     if self.op.mode == constants.EXPORT_MODE_REMOTE:
13640       salt = utils.GenerateSecret(8)
13641
13642       feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
13643       result = self.rpc.call_x509_cert_create(instance.primary_node,
13644                                               constants.RIE_CERT_VALIDITY)
13645       result.Raise("Can't create X509 key and certificate on %s" % result.node)
13646
13647       (name, cert_pem) = result.payload
13648
13649       cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
13650                                              cert_pem)
13651
13652       return {
13653         "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
13654         "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
13655                           salt),
13656         "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
13657         }
13658
13659     return None
13660
13661
13662 class LUBackupExport(LogicalUnit):
13663   """Export an instance to an image in the cluster.
13664
13665   """
13666   HPATH = "instance-export"
13667   HTYPE = constants.HTYPE_INSTANCE
13668   REQ_BGL = False
13669
13670   def CheckArguments(self):
13671     """Check the arguments.
13672
13673     """
13674     self.x509_key_name = self.op.x509_key_name
13675     self.dest_x509_ca_pem = self.op.destination_x509_ca
13676
13677     if self.op.mode == constants.EXPORT_MODE_REMOTE:
13678       if not self.x509_key_name:
13679         raise errors.OpPrereqError("Missing X509 key name for encryption",
13680                                    errors.ECODE_INVAL)
13681
13682       if not self.dest_x509_ca_pem:
13683         raise errors.OpPrereqError("Missing destination X509 CA",
13684                                    errors.ECODE_INVAL)
13685
13686   def ExpandNames(self):
13687     self._ExpandAndLockInstance()
13688
13689     # Lock all nodes for local exports
13690     if self.op.mode == constants.EXPORT_MODE_LOCAL:
13691       # FIXME: lock only instance primary and destination node
13692       #
13693       # Sad but true, for now we have do lock all nodes, as we don't know where
13694       # the previous export might be, and in this LU we search for it and
13695       # remove it from its current node. In the future we could fix this by:
13696       #  - making a tasklet to search (share-lock all), then create the
13697       #    new one, then one to remove, after
13698       #  - removing the removal operation altogether
13699       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
13700
13701   def DeclareLocks(self, level):
13702     """Last minute lock declaration."""
13703     # All nodes are locked anyway, so nothing to do here.
13704
13705   def BuildHooksEnv(self):
13706     """Build hooks env.
13707
13708     This will run on the master, primary node and target node.
13709
13710     """
13711     env = {
13712       "EXPORT_MODE": self.op.mode,
13713       "EXPORT_NODE": self.op.target_node,
13714       "EXPORT_DO_SHUTDOWN": self.op.shutdown,
13715       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
13716       # TODO: Generic function for boolean env variables
13717       "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
13718       }
13719
13720     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
13721
13722     return env
13723
13724   def BuildHooksNodes(self):
13725     """Build hooks nodes.
13726
13727     """
13728     nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
13729
13730     if self.op.mode == constants.EXPORT_MODE_LOCAL:
13731       nl.append(self.op.target_node)
13732
13733     return (nl, nl)
13734
13735   def CheckPrereq(self):
13736     """Check prerequisites.
13737
13738     This checks that the instance and node names are valid.
13739
13740     """
13741     instance_name = self.op.instance_name
13742
13743     self.instance = self.cfg.GetInstanceInfo(instance_name)
13744     assert self.instance is not None, \
13745           "Cannot retrieve locked instance %s" % self.op.instance_name
13746     _CheckNodeOnline(self, self.instance.primary_node)
13747
13748     if (self.op.remove_instance and
13749         self.instance.admin_state == constants.ADMINST_UP and
13750         not self.op.shutdown):
13751       raise errors.OpPrereqError("Can not remove instance without shutting it"
13752                                  " down before", errors.ECODE_STATE)
13753
13754     if self.op.mode == constants.EXPORT_MODE_LOCAL:
13755       self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
13756       self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
13757       assert self.dst_node is not None
13758
13759       _CheckNodeOnline(self, self.dst_node.name)
13760       _CheckNodeNotDrained(self, self.dst_node.name)
13761
13762       self._cds = None
13763       self.dest_disk_info = None
13764       self.dest_x509_ca = None
13765
13766     elif self.op.mode == constants.EXPORT_MODE_REMOTE:
13767       self.dst_node = None
13768
13769       if len(self.op.target_node) != len(self.instance.disks):
13770         raise errors.OpPrereqError(("Received destination information for %s"
13771                                     " disks, but instance %s has %s disks") %
13772                                    (len(self.op.target_node), instance_name,
13773                                     len(self.instance.disks)),
13774                                    errors.ECODE_INVAL)
13775
13776       cds = _GetClusterDomainSecret()
13777
13778       # Check X509 key name
13779       try:
13780         (key_name, hmac_digest, hmac_salt) = self.x509_key_name
13781       except (TypeError, ValueError), err:
13782         raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err,
13783                                    errors.ECODE_INVAL)
13784
13785       if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
13786         raise errors.OpPrereqError("HMAC for X509 key name is wrong",
13787                                    errors.ECODE_INVAL)
13788
13789       # Load and verify CA
13790       try:
13791         (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
13792       except OpenSSL.crypto.Error, err:
13793         raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
13794                                    (err, ), errors.ECODE_INVAL)
13795
13796       (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
13797       if errcode is not None:
13798         raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
13799                                    (msg, ), errors.ECODE_INVAL)
13800
13801       self.dest_x509_ca = cert
13802
13803       # Verify target information
13804       disk_info = []
13805       for idx, disk_data in enumerate(self.op.target_node):
13806         try:
13807           (host, port, magic) = \
13808             masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
13809         except errors.GenericError, err:
13810           raise errors.OpPrereqError("Target info for disk %s: %s" %
13811                                      (idx, err), errors.ECODE_INVAL)
13812
13813         disk_info.append((host, port, magic))
13814
13815       assert len(disk_info) == len(self.op.target_node)
13816       self.dest_disk_info = disk_info
13817
13818     else:
13819       raise errors.ProgrammerError("Unhandled export mode %r" %
13820                                    self.op.mode)
13821
13822     # instance disk type verification
13823     # TODO: Implement export support for file-based disks
13824     for disk in self.instance.disks:
13825       if disk.dev_type == constants.LD_FILE:
13826         raise errors.OpPrereqError("Export not supported for instances with"
13827                                    " file-based disks", errors.ECODE_INVAL)
13828
13829   def _CleanupExports(self, feedback_fn):
13830     """Removes exports of current instance from all other nodes.
13831
13832     If an instance in a cluster with nodes A..D was exported to node C, its
13833     exports will be removed from the nodes A, B and D.
13834
13835     """
13836     assert self.op.mode != constants.EXPORT_MODE_REMOTE
13837
13838     nodelist = self.cfg.GetNodeList()
13839     nodelist.remove(self.dst_node.name)
13840
13841     # on one-node clusters nodelist will be empty after the removal
13842     # if we proceed the backup would be removed because OpBackupQuery
13843     # substitutes an empty list with the full cluster node list.
13844     iname = self.instance.name
13845     if nodelist:
13846       feedback_fn("Removing old exports for instance %s" % iname)
13847       exportlist = self.rpc.call_export_list(nodelist)
13848       for node in exportlist:
13849         if exportlist[node].fail_msg:
13850           continue
13851         if iname in exportlist[node].payload:
13852           msg = self.rpc.call_export_remove(node, iname).fail_msg
13853           if msg:
13854             self.LogWarning("Could not remove older export for instance %s"
13855                             " on node %s: %s", iname, node, msg)
13856
13857   def Exec(self, feedback_fn):
13858     """Export an instance to an image in the cluster.
13859
13860     """
13861     assert self.op.mode in constants.EXPORT_MODES
13862
13863     instance = self.instance
13864     src_node = instance.primary_node
13865
13866     if self.op.shutdown:
13867       # shutdown the instance, but not the disks
13868       feedback_fn("Shutting down instance %s" % instance.name)
13869       result = self.rpc.call_instance_shutdown(src_node, instance,
13870                                                self.op.shutdown_timeout)
13871       # TODO: Maybe ignore failures if ignore_remove_failures is set
13872       result.Raise("Could not shutdown instance %s on"
13873                    " node %s" % (instance.name, src_node))
13874
13875     # set the disks ID correctly since call_instance_start needs the
13876     # correct drbd minor to create the symlinks
13877     for disk in instance.disks:
13878       self.cfg.SetDiskID(disk, src_node)
13879
13880     activate_disks = (instance.admin_state != constants.ADMINST_UP)
13881
13882     if activate_disks:
13883       # Activate the instance disks if we'exporting a stopped instance
13884       feedback_fn("Activating disks for %s" % instance.name)
13885       _StartInstanceDisks(self, instance, None)
13886
13887     try:
13888       helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
13889                                                      instance)
13890
13891       helper.CreateSnapshots()
13892       try:
13893         if (self.op.shutdown and
13894             instance.admin_state == constants.ADMINST_UP and
13895             not self.op.remove_instance):
13896           assert not activate_disks
13897           feedback_fn("Starting instance %s" % instance.name)
13898           result = self.rpc.call_instance_start(src_node,
13899                                                 (instance, None, None), False)
13900           msg = result.fail_msg
13901           if msg:
13902             feedback_fn("Failed to start instance: %s" % msg)
13903             _ShutdownInstanceDisks(self, instance)
13904             raise errors.OpExecError("Could not start instance: %s" % msg)
13905
13906         if self.op.mode == constants.EXPORT_MODE_LOCAL:
13907           (fin_resu, dresults) = helper.LocalExport(self.dst_node)
13908         elif self.op.mode == constants.EXPORT_MODE_REMOTE:
13909           connect_timeout = constants.RIE_CONNECT_TIMEOUT
13910           timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
13911
13912           (key_name, _, _) = self.x509_key_name
13913
13914           dest_ca_pem = \
13915             OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
13916                                             self.dest_x509_ca)
13917
13918           (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
13919                                                      key_name, dest_ca_pem,
13920                                                      timeouts)
13921       finally:
13922         helper.Cleanup()
13923
13924       # Check for backwards compatibility
13925       assert len(dresults) == len(instance.disks)
13926       assert compat.all(isinstance(i, bool) for i in dresults), \
13927              "Not all results are boolean: %r" % dresults
13928
13929     finally:
13930       if activate_disks:
13931         feedback_fn("Deactivating disks for %s" % instance.name)
13932         _ShutdownInstanceDisks(self, instance)
13933
13934     if not (compat.all(dresults) and fin_resu):
13935       failures = []
13936       if not fin_resu:
13937         failures.append("export finalization")
13938       if not compat.all(dresults):
13939         fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
13940                                if not dsk)
13941         failures.append("disk export: disk(s) %s" % fdsk)
13942
13943       raise errors.OpExecError("Export failed, errors in %s" %
13944                                utils.CommaJoin(failures))
13945
13946     # At this point, the export was successful, we can cleanup/finish
13947
13948     # Remove instance if requested
13949     if self.op.remove_instance:
13950       feedback_fn("Removing instance %s" % instance.name)
13951       _RemoveInstance(self, feedback_fn, instance,
13952                       self.op.ignore_remove_failures)
13953
13954     if self.op.mode == constants.EXPORT_MODE_LOCAL:
13955       self._CleanupExports(feedback_fn)
13956
13957     return fin_resu, dresults
13958
13959
13960 class LUBackupRemove(NoHooksLU):
13961   """Remove exports related to the named instance.
13962
13963   """
13964   REQ_BGL = False
13965
13966   def ExpandNames(self):
13967     self.needed_locks = {}
13968     # We need all nodes to be locked in order for RemoveExport to work, but we
13969     # don't need to lock the instance itself, as nothing will happen to it (and
13970     # we can remove exports also for a removed instance)
13971     self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
13972
13973   def Exec(self, feedback_fn):
13974     """Remove any export.
13975
13976     """
13977     instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
13978     # If the instance was not found we'll try with the name that was passed in.
13979     # This will only work if it was an FQDN, though.
13980     fqdn_warn = False
13981     if not instance_name:
13982       fqdn_warn = True
13983       instance_name = self.op.instance_name
13984
13985     locked_nodes = self.owned_locks(locking.LEVEL_NODE)
13986     exportlist = self.rpc.call_export_list(locked_nodes)
13987     found = False
13988     for node in exportlist:
13989       msg = exportlist[node].fail_msg
13990       if msg:
13991         self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
13992         continue
13993       if instance_name in exportlist[node].payload:
13994         found = True
13995         result = self.rpc.call_export_remove(node, instance_name)
13996         msg = result.fail_msg
13997         if msg:
13998           logging.error("Could not remove export for instance %s"
13999                         " on node %s: %s", instance_name, node, msg)
14000
14001     if fqdn_warn and not found:
14002       feedback_fn("Export not found. If trying to remove an export belonging"
14003                   " to a deleted instance please use its Fully Qualified"
14004                   " Domain Name.")
14005
14006
14007 class LUGroupAdd(LogicalUnit):
14008   """Logical unit for creating node groups.
14009
14010   """
14011   HPATH = "group-add"
14012   HTYPE = constants.HTYPE_GROUP
14013   REQ_BGL = False
14014
14015   def ExpandNames(self):
14016     # We need the new group's UUID here so that we can create and acquire the
14017     # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
14018     # that it should not check whether the UUID exists in the configuration.
14019     self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
14020     self.needed_locks = {}
14021     self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
14022
14023   def CheckPrereq(self):
14024     """Check prerequisites.
14025
14026     This checks that the given group name is not an existing node group
14027     already.
14028
14029     """
14030     try:
14031       existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14032     except errors.OpPrereqError:
14033       pass
14034     else:
14035       raise errors.OpPrereqError("Desired group name '%s' already exists as a"
14036                                  " node group (UUID: %s)" %
14037                                  (self.op.group_name, existing_uuid),
14038                                  errors.ECODE_EXISTS)
14039
14040     if self.op.ndparams:
14041       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
14042
14043     if self.op.hv_state:
14044       self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
14045     else:
14046       self.new_hv_state = None
14047
14048     if self.op.disk_state:
14049       self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
14050     else:
14051       self.new_disk_state = None
14052
14053     if self.op.diskparams:
14054       for templ in constants.DISK_TEMPLATES:
14055         if templ in self.op.diskparams:
14056           utils.ForceDictType(self.op.diskparams[templ],
14057                               constants.DISK_DT_TYPES)
14058       self.new_diskparams = self.op.diskparams
14059       try:
14060         utils.VerifyDictOptions(self.new_diskparams, constants.DISK_DT_DEFAULTS)
14061       except errors.OpPrereqError, err:
14062         raise errors.OpPrereqError("While verify diskparams options: %s" % err,
14063                                    errors.ECODE_INVAL)
14064     else:
14065       self.new_diskparams = {}
14066
14067     if self.op.ipolicy:
14068       cluster = self.cfg.GetClusterInfo()
14069       full_ipolicy = cluster.SimpleFillIPolicy(self.op.ipolicy)
14070       try:
14071         objects.InstancePolicy.CheckParameterSyntax(full_ipolicy, False)
14072       except errors.ConfigurationError, err:
14073         raise errors.OpPrereqError("Invalid instance policy: %s" % err,
14074                                    errors.ECODE_INVAL)
14075
14076   def BuildHooksEnv(self):
14077     """Build hooks env.
14078
14079     """
14080     return {
14081       "GROUP_NAME": self.op.group_name,
14082       }
14083
14084   def BuildHooksNodes(self):
14085     """Build hooks nodes.
14086
14087     """
14088     mn = self.cfg.GetMasterNode()
14089     return ([mn], [mn])
14090
14091   def Exec(self, feedback_fn):
14092     """Add the node group to the cluster.
14093
14094     """
14095     group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
14096                                   uuid=self.group_uuid,
14097                                   alloc_policy=self.op.alloc_policy,
14098                                   ndparams=self.op.ndparams,
14099                                   diskparams=self.new_diskparams,
14100                                   ipolicy=self.op.ipolicy,
14101                                   hv_state_static=self.new_hv_state,
14102                                   disk_state_static=self.new_disk_state)
14103
14104     self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
14105     del self.remove_locks[locking.LEVEL_NODEGROUP]
14106
14107
14108 class LUGroupAssignNodes(NoHooksLU):
14109   """Logical unit for assigning nodes to groups.
14110
14111   """
14112   REQ_BGL = False
14113
14114   def ExpandNames(self):
14115     # These raise errors.OpPrereqError on their own:
14116     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14117     self.op.nodes = _GetWantedNodes(self, self.op.nodes)
14118
14119     # We want to lock all the affected nodes and groups. We have readily
14120     # available the list of nodes, and the *destination* group. To gather the
14121     # list of "source" groups, we need to fetch node information later on.
14122     self.needed_locks = {
14123       locking.LEVEL_NODEGROUP: set([self.group_uuid]),
14124       locking.LEVEL_NODE: self.op.nodes,
14125       }
14126
14127   def DeclareLocks(self, level):
14128     if level == locking.LEVEL_NODEGROUP:
14129       assert len(self.needed_locks[locking.LEVEL_NODEGROUP]) == 1
14130
14131       # Try to get all affected nodes' groups without having the group or node
14132       # lock yet. Needs verification later in the code flow.
14133       groups = self.cfg.GetNodeGroupsFromNodes(self.op.nodes)
14134
14135       self.needed_locks[locking.LEVEL_NODEGROUP].update(groups)
14136
14137   def CheckPrereq(self):
14138     """Check prerequisites.
14139
14140     """
14141     assert self.needed_locks[locking.LEVEL_NODEGROUP]
14142     assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
14143             frozenset(self.op.nodes))
14144
14145     expected_locks = (set([self.group_uuid]) |
14146                       self.cfg.GetNodeGroupsFromNodes(self.op.nodes))
14147     actual_locks = self.owned_locks(locking.LEVEL_NODEGROUP)
14148     if actual_locks != expected_locks:
14149       raise errors.OpExecError("Nodes changed groups since locks were acquired,"
14150                                " current groups are '%s', used to be '%s'" %
14151                                (utils.CommaJoin(expected_locks),
14152                                 utils.CommaJoin(actual_locks)))
14153
14154     self.node_data = self.cfg.GetAllNodesInfo()
14155     self.group = self.cfg.GetNodeGroup(self.group_uuid)
14156     instance_data = self.cfg.GetAllInstancesInfo()
14157
14158     if self.group is None:
14159       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
14160                                (self.op.group_name, self.group_uuid))
14161
14162     (new_splits, previous_splits) = \
14163       self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
14164                                              for node in self.op.nodes],
14165                                             self.node_data, instance_data)
14166
14167     if new_splits:
14168       fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
14169
14170       if not self.op.force:
14171         raise errors.OpExecError("The following instances get split by this"
14172                                  " change and --force was not given: %s" %
14173                                  fmt_new_splits)
14174       else:
14175         self.LogWarning("This operation will split the following instances: %s",
14176                         fmt_new_splits)
14177
14178         if previous_splits:
14179           self.LogWarning("In addition, these already-split instances continue"
14180                           " to be split across groups: %s",
14181                           utils.CommaJoin(utils.NiceSort(previous_splits)))
14182
14183   def Exec(self, feedback_fn):
14184     """Assign nodes to a new group.
14185
14186     """
14187     mods = [(node_name, self.group_uuid) for node_name in self.op.nodes]
14188
14189     self.cfg.AssignGroupNodes(mods)
14190
14191   @staticmethod
14192   def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
14193     """Check for split instances after a node assignment.
14194
14195     This method considers a series of node assignments as an atomic operation,
14196     and returns information about split instances after applying the set of
14197     changes.
14198
14199     In particular, it returns information about newly split instances, and
14200     instances that were already split, and remain so after the change.
14201
14202     Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
14203     considered.
14204
14205     @type changes: list of (node_name, new_group_uuid) pairs.
14206     @param changes: list of node assignments to consider.
14207     @param node_data: a dict with data for all nodes
14208     @param instance_data: a dict with all instances to consider
14209     @rtype: a two-tuple
14210     @return: a list of instances that were previously okay and result split as a
14211       consequence of this change, and a list of instances that were previously
14212       split and this change does not fix.
14213
14214     """
14215     changed_nodes = dict((node, group) for node, group in changes
14216                          if node_data[node].group != group)
14217
14218     all_split_instances = set()
14219     previously_split_instances = set()
14220
14221     def InstanceNodes(instance):
14222       return [instance.primary_node] + list(instance.secondary_nodes)
14223
14224     for inst in instance_data.values():
14225       if inst.disk_template not in constants.DTS_INT_MIRROR:
14226         continue
14227
14228       instance_nodes = InstanceNodes(inst)
14229
14230       if len(set(node_data[node].group for node in instance_nodes)) > 1:
14231         previously_split_instances.add(inst.name)
14232
14233       if len(set(changed_nodes.get(node, node_data[node].group)
14234                  for node in instance_nodes)) > 1:
14235         all_split_instances.add(inst.name)
14236
14237     return (list(all_split_instances - previously_split_instances),
14238             list(previously_split_instances & all_split_instances))
14239
14240
14241 class _GroupQuery(_QueryBase):
14242   FIELDS = query.GROUP_FIELDS
14243
14244   def ExpandNames(self, lu):
14245     lu.needed_locks = {}
14246
14247     self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
14248     self._cluster = lu.cfg.GetClusterInfo()
14249     name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
14250
14251     if not self.names:
14252       self.wanted = [name_to_uuid[name]
14253                      for name in utils.NiceSort(name_to_uuid.keys())]
14254     else:
14255       # Accept names to be either names or UUIDs.
14256       missing = []
14257       self.wanted = []
14258       all_uuid = frozenset(self._all_groups.keys())
14259
14260       for name in self.names:
14261         if name in all_uuid:
14262           self.wanted.append(name)
14263         elif name in name_to_uuid:
14264           self.wanted.append(name_to_uuid[name])
14265         else:
14266           missing.append(name)
14267
14268       if missing:
14269         raise errors.OpPrereqError("Some groups do not exist: %s" %
14270                                    utils.CommaJoin(missing),
14271                                    errors.ECODE_NOENT)
14272
14273   def DeclareLocks(self, lu, level):
14274     pass
14275
14276   def _GetQueryData(self, lu):
14277     """Computes the list of node groups and their attributes.
14278
14279     """
14280     do_nodes = query.GQ_NODE in self.requested_data
14281     do_instances = query.GQ_INST in self.requested_data
14282
14283     group_to_nodes = None
14284     group_to_instances = None
14285
14286     # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
14287     # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
14288     # latter GetAllInstancesInfo() is not enough, for we have to go through
14289     # instance->node. Hence, we will need to process nodes even if we only need
14290     # instance information.
14291     if do_nodes or do_instances:
14292       all_nodes = lu.cfg.GetAllNodesInfo()
14293       group_to_nodes = dict((uuid, []) for uuid in self.wanted)
14294       node_to_group = {}
14295
14296       for node in all_nodes.values():
14297         if node.group in group_to_nodes:
14298           group_to_nodes[node.group].append(node.name)
14299           node_to_group[node.name] = node.group
14300
14301       if do_instances:
14302         all_instances = lu.cfg.GetAllInstancesInfo()
14303         group_to_instances = dict((uuid, []) for uuid in self.wanted)
14304
14305         for instance in all_instances.values():
14306           node = instance.primary_node
14307           if node in node_to_group:
14308             group_to_instances[node_to_group[node]].append(instance.name)
14309
14310         if not do_nodes:
14311           # Do not pass on node information if it was not requested.
14312           group_to_nodes = None
14313
14314     return query.GroupQueryData(self._cluster,
14315                                 [self._all_groups[uuid]
14316                                  for uuid in self.wanted],
14317                                 group_to_nodes, group_to_instances,
14318                                 query.GQ_DISKPARAMS in self.requested_data)
14319
14320
14321 class LUGroupQuery(NoHooksLU):
14322   """Logical unit for querying node groups.
14323
14324   """
14325   REQ_BGL = False
14326
14327   def CheckArguments(self):
14328     self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
14329                           self.op.output_fields, False)
14330
14331   def ExpandNames(self):
14332     self.gq.ExpandNames(self)
14333
14334   def DeclareLocks(self, level):
14335     self.gq.DeclareLocks(self, level)
14336
14337   def Exec(self, feedback_fn):
14338     return self.gq.OldStyleQuery(self)
14339
14340
14341 class LUGroupSetParams(LogicalUnit):
14342   """Modifies the parameters of a node group.
14343
14344   """
14345   HPATH = "group-modify"
14346   HTYPE = constants.HTYPE_GROUP
14347   REQ_BGL = False
14348
14349   def CheckArguments(self):
14350     all_changes = [
14351       self.op.ndparams,
14352       self.op.diskparams,
14353       self.op.alloc_policy,
14354       self.op.hv_state,
14355       self.op.disk_state,
14356       self.op.ipolicy,
14357       ]
14358
14359     if all_changes.count(None) == len(all_changes):
14360       raise errors.OpPrereqError("Please pass at least one modification",
14361                                  errors.ECODE_INVAL)
14362
14363   def ExpandNames(self):
14364     # This raises errors.OpPrereqError on its own:
14365     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14366
14367     self.needed_locks = {
14368       locking.LEVEL_INSTANCE: [],
14369       locking.LEVEL_NODEGROUP: [self.group_uuid],
14370       }
14371
14372     self.share_locks[locking.LEVEL_INSTANCE] = 1
14373
14374   def DeclareLocks(self, level):
14375     if level == locking.LEVEL_INSTANCE:
14376       assert not self.needed_locks[locking.LEVEL_INSTANCE]
14377
14378       # Lock instances optimistically, needs verification once group lock has
14379       # been acquired
14380       self.needed_locks[locking.LEVEL_INSTANCE] = \
14381           self.cfg.GetNodeGroupInstances(self.group_uuid)
14382
14383   @staticmethod
14384   def _UpdateAndVerifyDiskParams(old, new):
14385     """Updates and verifies disk parameters.
14386
14387     """
14388     new_params = _GetUpdatedParams(old, new)
14389     utils.ForceDictType(new_params, constants.DISK_DT_TYPES)
14390     return new_params
14391
14392   def CheckPrereq(self):
14393     """Check prerequisites.
14394
14395     """
14396     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
14397
14398     # Check if locked instances are still correct
14399     _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
14400
14401     self.group = self.cfg.GetNodeGroup(self.group_uuid)
14402     cluster = self.cfg.GetClusterInfo()
14403
14404     if self.group is None:
14405       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
14406                                (self.op.group_name, self.group_uuid))
14407
14408     if self.op.ndparams:
14409       new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
14410       utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
14411       self.new_ndparams = new_ndparams
14412
14413     if self.op.diskparams:
14414       diskparams = self.group.diskparams
14415       uavdp = self._UpdateAndVerifyDiskParams
14416       # For each disktemplate subdict update and verify the values
14417       new_diskparams = dict((dt,
14418                              uavdp(diskparams.get(dt, {}),
14419                                    self.op.diskparams[dt]))
14420                             for dt in constants.DISK_TEMPLATES
14421                             if dt in self.op.diskparams)
14422       # As we've all subdicts of diskparams ready, lets merge the actual
14423       # dict with all updated subdicts
14424       self.new_diskparams = objects.FillDict(diskparams, new_diskparams)
14425       try:
14426         utils.VerifyDictOptions(self.new_diskparams, constants.DISK_DT_DEFAULTS)
14427       except errors.OpPrereqError, err:
14428         raise errors.OpPrereqError("While verify diskparams options: %s" % err,
14429                                    errors.ECODE_INVAL)
14430
14431     if self.op.hv_state:
14432       self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
14433                                                  self.group.hv_state_static)
14434
14435     if self.op.disk_state:
14436       self.new_disk_state = \
14437         _MergeAndVerifyDiskState(self.op.disk_state,
14438                                  self.group.disk_state_static)
14439
14440     if self.op.ipolicy:
14441       self.new_ipolicy = _GetUpdatedIPolicy(self.group.ipolicy,
14442                                             self.op.ipolicy,
14443                                             group_policy=True)
14444
14445       new_ipolicy = cluster.SimpleFillIPolicy(self.new_ipolicy)
14446       inst_filter = lambda inst: inst.name in owned_instances
14447       instances = self.cfg.GetInstancesInfoByFilter(inst_filter).values()
14448       gmi = ganeti.masterd.instance
14449       violations = \
14450           _ComputeNewInstanceViolations(gmi.CalculateGroupIPolicy(cluster,
14451                                                                   self.group),
14452                                         new_ipolicy, instances)
14453
14454       if violations:
14455         self.LogWarning("After the ipolicy change the following instances"
14456                         " violate them: %s",
14457                         utils.CommaJoin(violations))
14458
14459   def BuildHooksEnv(self):
14460     """Build hooks env.
14461
14462     """
14463     return {
14464       "GROUP_NAME": self.op.group_name,
14465       "NEW_ALLOC_POLICY": self.op.alloc_policy,
14466       }
14467
14468   def BuildHooksNodes(self):
14469     """Build hooks nodes.
14470
14471     """
14472     mn = self.cfg.GetMasterNode()
14473     return ([mn], [mn])
14474
14475   def Exec(self, feedback_fn):
14476     """Modifies the node group.
14477
14478     """
14479     result = []
14480
14481     if self.op.ndparams:
14482       self.group.ndparams = self.new_ndparams
14483       result.append(("ndparams", str(self.group.ndparams)))
14484
14485     if self.op.diskparams:
14486       self.group.diskparams = self.new_diskparams
14487       result.append(("diskparams", str(self.group.diskparams)))
14488
14489     if self.op.alloc_policy:
14490       self.group.alloc_policy = self.op.alloc_policy
14491
14492     if self.op.hv_state:
14493       self.group.hv_state_static = self.new_hv_state
14494
14495     if self.op.disk_state:
14496       self.group.disk_state_static = self.new_disk_state
14497
14498     if self.op.ipolicy:
14499       self.group.ipolicy = self.new_ipolicy
14500
14501     self.cfg.Update(self.group, feedback_fn)
14502     return result
14503
14504
14505 class LUGroupRemove(LogicalUnit):
14506   HPATH = "group-remove"
14507   HTYPE = constants.HTYPE_GROUP
14508   REQ_BGL = False
14509
14510   def ExpandNames(self):
14511     # This will raises errors.OpPrereqError on its own:
14512     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14513     self.needed_locks = {
14514       locking.LEVEL_NODEGROUP: [self.group_uuid],
14515       }
14516
14517   def CheckPrereq(self):
14518     """Check prerequisites.
14519
14520     This checks that the given group name exists as a node group, that is
14521     empty (i.e., contains no nodes), and that is not the last group of the
14522     cluster.
14523
14524     """
14525     # Verify that the group is empty.
14526     group_nodes = [node.name
14527                    for node in self.cfg.GetAllNodesInfo().values()
14528                    if node.group == self.group_uuid]
14529
14530     if group_nodes:
14531       raise errors.OpPrereqError("Group '%s' not empty, has the following"
14532                                  " nodes: %s" %
14533                                  (self.op.group_name,
14534                                   utils.CommaJoin(utils.NiceSort(group_nodes))),
14535                                  errors.ECODE_STATE)
14536
14537     # Verify the cluster would not be left group-less.
14538     if len(self.cfg.GetNodeGroupList()) == 1:
14539       raise errors.OpPrereqError("Group '%s' is the only group, cannot be"
14540                                  " removed" % self.op.group_name,
14541                                  errors.ECODE_STATE)
14542
14543   def BuildHooksEnv(self):
14544     """Build hooks env.
14545
14546     """
14547     return {
14548       "GROUP_NAME": self.op.group_name,
14549       }
14550
14551   def BuildHooksNodes(self):
14552     """Build hooks nodes.
14553
14554     """
14555     mn = self.cfg.GetMasterNode()
14556     return ([mn], [mn])
14557
14558   def Exec(self, feedback_fn):
14559     """Remove the node group.
14560
14561     """
14562     try:
14563       self.cfg.RemoveNodeGroup(self.group_uuid)
14564     except errors.ConfigurationError:
14565       raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
14566                                (self.op.group_name, self.group_uuid))
14567
14568     self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
14569
14570
14571 class LUGroupRename(LogicalUnit):
14572   HPATH = "group-rename"
14573   HTYPE = constants.HTYPE_GROUP
14574   REQ_BGL = False
14575
14576   def ExpandNames(self):
14577     # This raises errors.OpPrereqError on its own:
14578     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14579
14580     self.needed_locks = {
14581       locking.LEVEL_NODEGROUP: [self.group_uuid],
14582       }
14583
14584   def CheckPrereq(self):
14585     """Check prerequisites.
14586
14587     Ensures requested new name is not yet used.
14588
14589     """
14590     try:
14591       new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
14592     except errors.OpPrereqError:
14593       pass
14594     else:
14595       raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
14596                                  " node group (UUID: %s)" %
14597                                  (self.op.new_name, new_name_uuid),
14598                                  errors.ECODE_EXISTS)
14599
14600   def BuildHooksEnv(self):
14601     """Build hooks env.
14602
14603     """
14604     return {
14605       "OLD_NAME": self.op.group_name,
14606       "NEW_NAME": self.op.new_name,
14607       }
14608
14609   def BuildHooksNodes(self):
14610     """Build hooks nodes.
14611
14612     """
14613     mn = self.cfg.GetMasterNode()
14614
14615     all_nodes = self.cfg.GetAllNodesInfo()
14616     all_nodes.pop(mn, None)
14617
14618     run_nodes = [mn]
14619     run_nodes.extend(node.name for node in all_nodes.values()
14620                      if node.group == self.group_uuid)
14621
14622     return (run_nodes, run_nodes)
14623
14624   def Exec(self, feedback_fn):
14625     """Rename the node group.
14626
14627     """
14628     group = self.cfg.GetNodeGroup(self.group_uuid)
14629
14630     if group is None:
14631       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
14632                                (self.op.group_name, self.group_uuid))
14633
14634     group.name = self.op.new_name
14635     self.cfg.Update(group, feedback_fn)
14636
14637     return self.op.new_name
14638
14639
14640 class LUGroupEvacuate(LogicalUnit):
14641   HPATH = "group-evacuate"
14642   HTYPE = constants.HTYPE_GROUP
14643   REQ_BGL = False
14644
14645   def ExpandNames(self):
14646     # This raises errors.OpPrereqError on its own:
14647     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14648
14649     if self.op.target_groups:
14650       self.req_target_uuids = map(self.cfg.LookupNodeGroup,
14651                                   self.op.target_groups)
14652     else:
14653       self.req_target_uuids = []
14654
14655     if self.group_uuid in self.req_target_uuids:
14656       raise errors.OpPrereqError("Group to be evacuated (%s) can not be used"
14657                                  " as a target group (targets are %s)" %
14658                                  (self.group_uuid,
14659                                   utils.CommaJoin(self.req_target_uuids)),
14660                                  errors.ECODE_INVAL)
14661
14662     self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
14663
14664     self.share_locks = _ShareAll()
14665     self.needed_locks = {
14666       locking.LEVEL_INSTANCE: [],
14667       locking.LEVEL_NODEGROUP: [],
14668       locking.LEVEL_NODE: [],
14669       }
14670
14671   def DeclareLocks(self, level):
14672     if level == locking.LEVEL_INSTANCE:
14673       assert not self.needed_locks[locking.LEVEL_INSTANCE]
14674
14675       # Lock instances optimistically, needs verification once node and group
14676       # locks have been acquired
14677       self.needed_locks[locking.LEVEL_INSTANCE] = \
14678         self.cfg.GetNodeGroupInstances(self.group_uuid)
14679
14680     elif level == locking.LEVEL_NODEGROUP:
14681       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
14682
14683       if self.req_target_uuids:
14684         lock_groups = set([self.group_uuid] + self.req_target_uuids)
14685
14686         # Lock all groups used by instances optimistically; this requires going
14687         # via the node before it's locked, requiring verification later on
14688         lock_groups.update(group_uuid
14689                            for instance_name in
14690                              self.owned_locks(locking.LEVEL_INSTANCE)
14691                            for group_uuid in
14692                              self.cfg.GetInstanceNodeGroups(instance_name))
14693       else:
14694         # No target groups, need to lock all of them
14695         lock_groups = locking.ALL_SET
14696
14697       self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
14698
14699     elif level == locking.LEVEL_NODE:
14700       # This will only lock the nodes in the group to be evacuated which
14701       # contain actual instances
14702       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
14703       self._LockInstancesNodes()
14704
14705       # Lock all nodes in group to be evacuated and target groups
14706       owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
14707       assert self.group_uuid in owned_groups
14708       member_nodes = [node_name
14709                       for group in owned_groups
14710                       for node_name in self.cfg.GetNodeGroup(group).members]
14711       self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
14712
14713   def CheckPrereq(self):
14714     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
14715     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
14716     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
14717
14718     assert owned_groups.issuperset(self.req_target_uuids)
14719     assert self.group_uuid in owned_groups
14720
14721     # Check if locked instances are still correct
14722     _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
14723
14724     # Get instance information
14725     self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
14726
14727     # Check if node groups for locked instances are still correct
14728     _CheckInstancesNodeGroups(self.cfg, self.instances,
14729                               owned_groups, owned_nodes, self.group_uuid)
14730
14731     if self.req_target_uuids:
14732       # User requested specific target groups
14733       self.target_uuids = self.req_target_uuids
14734     else:
14735       # All groups except the one to be evacuated are potential targets
14736       self.target_uuids = [group_uuid for group_uuid in owned_groups
14737                            if group_uuid != self.group_uuid]
14738
14739       if not self.target_uuids:
14740         raise errors.OpPrereqError("There are no possible target groups",
14741                                    errors.ECODE_INVAL)
14742
14743   def BuildHooksEnv(self):
14744     """Build hooks env.
14745
14746     """
14747     return {
14748       "GROUP_NAME": self.op.group_name,
14749       "TARGET_GROUPS": " ".join(self.target_uuids),
14750       }
14751
14752   def BuildHooksNodes(self):
14753     """Build hooks nodes.
14754
14755     """
14756     mn = self.cfg.GetMasterNode()
14757
14758     assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
14759
14760     run_nodes = [mn] + self.cfg.GetNodeGroup(self.group_uuid).members
14761
14762     return (run_nodes, run_nodes)
14763
14764   def Exec(self, feedback_fn):
14765     instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
14766
14767     assert self.group_uuid not in self.target_uuids
14768
14769     req = iallocator.IAReqGroupChange(instances=instances,
14770                                       target_groups=self.target_uuids)
14771     ial = iallocator.IAllocator(self.cfg, self.rpc, req)
14772
14773     ial.Run(self.op.iallocator)
14774
14775     if not ial.success:
14776       raise errors.OpPrereqError("Can't compute group evacuation using"
14777                                  " iallocator '%s': %s" %
14778                                  (self.op.iallocator, ial.info),
14779                                  errors.ECODE_NORES)
14780
14781     jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
14782
14783     self.LogInfo("Iallocator returned %s job(s) for evacuating node group %s",
14784                  len(jobs), self.op.group_name)
14785
14786     return ResultWithJobs(jobs)
14787
14788
14789 class TagsLU(NoHooksLU): # pylint: disable=W0223
14790   """Generic tags LU.
14791
14792   This is an abstract class which is the parent of all the other tags LUs.
14793
14794   """
14795   def ExpandNames(self):
14796     self.group_uuid = None
14797     self.needed_locks = {}
14798
14799     if self.op.kind == constants.TAG_NODE:
14800       self.op.name = _ExpandNodeName(self.cfg, self.op.name)
14801       lock_level = locking.LEVEL_NODE
14802       lock_name = self.op.name
14803     elif self.op.kind == constants.TAG_INSTANCE:
14804       self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
14805       lock_level = locking.LEVEL_INSTANCE
14806       lock_name = self.op.name
14807     elif self.op.kind == constants.TAG_NODEGROUP:
14808       self.group_uuid = self.cfg.LookupNodeGroup(self.op.name)
14809       lock_level = locking.LEVEL_NODEGROUP
14810       lock_name = self.group_uuid
14811     else:
14812       lock_level = None
14813       lock_name = None
14814
14815     if lock_level and getattr(self.op, "use_locking", True):
14816       self.needed_locks[lock_level] = lock_name
14817
14818     # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
14819     # not possible to acquire the BGL based on opcode parameters)
14820
14821   def CheckPrereq(self):
14822     """Check prerequisites.
14823
14824     """
14825     if self.op.kind == constants.TAG_CLUSTER:
14826       self.target = self.cfg.GetClusterInfo()
14827     elif self.op.kind == constants.TAG_NODE:
14828       self.target = self.cfg.GetNodeInfo(self.op.name)
14829     elif self.op.kind == constants.TAG_INSTANCE:
14830       self.target = self.cfg.GetInstanceInfo(self.op.name)
14831     elif self.op.kind == constants.TAG_NODEGROUP:
14832       self.target = self.cfg.GetNodeGroup(self.group_uuid)
14833     else:
14834       raise errors.OpPrereqError("Wrong tag type requested (%s)" %
14835                                  str(self.op.kind), errors.ECODE_INVAL)
14836
14837
14838 class LUTagsGet(TagsLU):
14839   """Returns the tags of a given object.
14840
14841   """
14842   REQ_BGL = False
14843
14844   def ExpandNames(self):
14845     TagsLU.ExpandNames(self)
14846
14847     # Share locks as this is only a read operation
14848     self.share_locks = _ShareAll()
14849
14850   def Exec(self, feedback_fn):
14851     """Returns the tag list.
14852
14853     """
14854     return list(self.target.GetTags())
14855
14856
14857 class LUTagsSearch(NoHooksLU):
14858   """Searches the tags for a given pattern.
14859
14860   """
14861   REQ_BGL = False
14862
14863   def ExpandNames(self):
14864     self.needed_locks = {}
14865
14866   def CheckPrereq(self):
14867     """Check prerequisites.
14868
14869     This checks the pattern passed for validity by compiling it.
14870
14871     """
14872     try:
14873       self.re = re.compile(self.op.pattern)
14874     except re.error, err:
14875       raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
14876                                  (self.op.pattern, err), errors.ECODE_INVAL)
14877
14878   def Exec(self, feedback_fn):
14879     """Returns the tag list.
14880
14881     """
14882     cfg = self.cfg
14883     tgts = [("/cluster", cfg.GetClusterInfo())]
14884     ilist = cfg.GetAllInstancesInfo().values()
14885     tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
14886     nlist = cfg.GetAllNodesInfo().values()
14887     tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
14888     tgts.extend(("/nodegroup/%s" % n.name, n)
14889                 for n in cfg.GetAllNodeGroupsInfo().values())
14890     results = []
14891     for path, target in tgts:
14892       for tag in target.GetTags():
14893         if self.re.search(tag):
14894           results.append((path, tag))
14895     return results
14896
14897
14898 class LUTagsSet(TagsLU):
14899   """Sets a tag on a given object.
14900
14901   """
14902   REQ_BGL = False
14903
14904   def CheckPrereq(self):
14905     """Check prerequisites.
14906
14907     This checks the type and length of the tag name and value.
14908
14909     """
14910     TagsLU.CheckPrereq(self)
14911     for tag in self.op.tags:
14912       objects.TaggableObject.ValidateTag(tag)
14913
14914   def Exec(self, feedback_fn):
14915     """Sets the tag.
14916
14917     """
14918     try:
14919       for tag in self.op.tags:
14920         self.target.AddTag(tag)
14921     except errors.TagError, err:
14922       raise errors.OpExecError("Error while setting tag: %s" % str(err))
14923     self.cfg.Update(self.target, feedback_fn)
14924
14925
14926 class LUTagsDel(TagsLU):
14927   """Delete a list of tags from a given object.
14928
14929   """
14930   REQ_BGL = False
14931
14932   def CheckPrereq(self):
14933     """Check prerequisites.
14934
14935     This checks that we have the given tag.
14936
14937     """
14938     TagsLU.CheckPrereq(self)
14939     for tag in self.op.tags:
14940       objects.TaggableObject.ValidateTag(tag)
14941     del_tags = frozenset(self.op.tags)
14942     cur_tags = self.target.GetTags()
14943
14944     diff_tags = del_tags - cur_tags
14945     if diff_tags:
14946       diff_names = ("'%s'" % i for i in sorted(diff_tags))
14947       raise errors.OpPrereqError("Tag(s) %s not found" %
14948                                  (utils.CommaJoin(diff_names), ),
14949                                  errors.ECODE_NOENT)
14950
14951   def Exec(self, feedback_fn):
14952     """Remove the tag from the object.
14953
14954     """
14955     for tag in self.op.tags:
14956       self.target.RemoveTag(tag)
14957     self.cfg.Update(self.target, feedback_fn)
14958
14959
14960 class LUTestDelay(NoHooksLU):
14961   """Sleep for a specified amount of time.
14962
14963   This LU sleeps on the master and/or nodes for a specified amount of
14964   time.
14965
14966   """
14967   REQ_BGL = False
14968
14969   def ExpandNames(self):
14970     """Expand names and set required locks.
14971
14972     This expands the node list, if any.
14973
14974     """
14975     self.needed_locks = {}
14976     if self.op.on_nodes:
14977       # _GetWantedNodes can be used here, but is not always appropriate to use
14978       # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
14979       # more information.
14980       self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
14981       self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
14982
14983   def _TestDelay(self):
14984     """Do the actual sleep.
14985
14986     """
14987     if self.op.on_master:
14988       if not utils.TestDelay(self.op.duration):
14989         raise errors.OpExecError("Error during master delay test")
14990     if self.op.on_nodes:
14991       result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
14992       for node, node_result in result.items():
14993         node_result.Raise("Failure during rpc call to node %s" % node)
14994
14995   def Exec(self, feedback_fn):
14996     """Execute the test delay opcode, with the wanted repetitions.
14997
14998     """
14999     if self.op.repeat == 0:
15000       self._TestDelay()
15001     else:
15002       top_value = self.op.repeat - 1
15003       for i in range(self.op.repeat):
15004         self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
15005         self._TestDelay()
15006
15007
15008 class LUTestJqueue(NoHooksLU):
15009   """Utility LU to test some aspects of the job queue.
15010
15011   """
15012   REQ_BGL = False
15013
15014   # Must be lower than default timeout for WaitForJobChange to see whether it
15015   # notices changed jobs
15016   _CLIENT_CONNECT_TIMEOUT = 20.0
15017   _CLIENT_CONFIRM_TIMEOUT = 60.0
15018
15019   @classmethod
15020   def _NotifyUsingSocket(cls, cb, errcls):
15021     """Opens a Unix socket and waits for another program to connect.
15022
15023     @type cb: callable
15024     @param cb: Callback to send socket name to client
15025     @type errcls: class
15026     @param errcls: Exception class to use for errors
15027
15028     """
15029     # Using a temporary directory as there's no easy way to create temporary
15030     # sockets without writing a custom loop around tempfile.mktemp and
15031     # socket.bind
15032     tmpdir = tempfile.mkdtemp()
15033     try:
15034       tmpsock = utils.PathJoin(tmpdir, "sock")
15035
15036       logging.debug("Creating temporary socket at %s", tmpsock)
15037       sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
15038       try:
15039         sock.bind(tmpsock)
15040         sock.listen(1)
15041
15042         # Send details to client
15043         cb(tmpsock)
15044
15045         # Wait for client to connect before continuing
15046         sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
15047         try:
15048           (conn, _) = sock.accept()
15049         except socket.error, err:
15050           raise errcls("Client didn't connect in time (%s)" % err)
15051       finally:
15052         sock.close()
15053     finally:
15054       # Remove as soon as client is connected
15055       shutil.rmtree(tmpdir)
15056
15057     # Wait for client to close
15058     try:
15059       try:
15060         # pylint: disable=E1101
15061         # Instance of '_socketobject' has no ... member
15062         conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
15063         conn.recv(1)
15064       except socket.error, err:
15065         raise errcls("Client failed to confirm notification (%s)" % err)
15066     finally:
15067       conn.close()
15068
15069   def _SendNotification(self, test, arg, sockname):
15070     """Sends a notification to the client.
15071
15072     @type test: string
15073     @param test: Test name
15074     @param arg: Test argument (depends on test)
15075     @type sockname: string
15076     @param sockname: Socket path
15077
15078     """
15079     self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
15080
15081   def _Notify(self, prereq, test, arg):
15082     """Notifies the client of a test.
15083
15084     @type prereq: bool
15085     @param prereq: Whether this is a prereq-phase test
15086     @type test: string
15087     @param test: Test name
15088     @param arg: Test argument (depends on test)
15089
15090     """
15091     if prereq:
15092       errcls = errors.OpPrereqError
15093     else:
15094       errcls = errors.OpExecError
15095
15096     return self._NotifyUsingSocket(compat.partial(self._SendNotification,
15097                                                   test, arg),
15098                                    errcls)
15099
15100   def CheckArguments(self):
15101     self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
15102     self.expandnames_calls = 0
15103
15104   def ExpandNames(self):
15105     checkargs_calls = getattr(self, "checkargs_calls", 0)
15106     if checkargs_calls < 1:
15107       raise errors.ProgrammerError("CheckArguments was not called")
15108
15109     self.expandnames_calls += 1
15110
15111     if self.op.notify_waitlock:
15112       self._Notify(True, constants.JQT_EXPANDNAMES, None)
15113
15114     self.LogInfo("Expanding names")
15115
15116     # Get lock on master node (just to get a lock, not for a particular reason)
15117     self.needed_locks = {
15118       locking.LEVEL_NODE: self.cfg.GetMasterNode(),
15119       }
15120
15121   def Exec(self, feedback_fn):
15122     if self.expandnames_calls < 1:
15123       raise errors.ProgrammerError("ExpandNames was not called")
15124
15125     if self.op.notify_exec:
15126       self._Notify(False, constants.JQT_EXEC, None)
15127
15128     self.LogInfo("Executing")
15129
15130     if self.op.log_messages:
15131       self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
15132       for idx, msg in enumerate(self.op.log_messages):
15133         self.LogInfo("Sending log message %s", idx + 1)
15134         feedback_fn(constants.JQT_MSGPREFIX + msg)
15135         # Report how many test messages have been sent
15136         self._Notify(False, constants.JQT_LOGMSG, idx + 1)
15137
15138     if self.op.fail:
15139       raise errors.OpExecError("Opcode failure was requested")
15140
15141     return True
15142
15143
15144 class LUTestAllocator(NoHooksLU):
15145   """Run allocator tests.
15146
15147   This LU runs the allocator tests
15148
15149   """
15150   def CheckPrereq(self):
15151     """Check prerequisites.
15152
15153     This checks the opcode parameters depending on the director and mode test.
15154
15155     """
15156     if self.op.mode in (constants.IALLOCATOR_MODE_ALLOC,
15157                         constants.IALLOCATOR_MODE_MULTI_ALLOC):
15158       for attr in ["memory", "disks", "disk_template",
15159                    "os", "tags", "nics", "vcpus"]:
15160         if not hasattr(self.op, attr):
15161           raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
15162                                      attr, errors.ECODE_INVAL)
15163       iname = self.cfg.ExpandInstanceName(self.op.name)
15164       if iname is not None:
15165         raise errors.OpPrereqError("Instance '%s' already in the cluster" %
15166                                    iname, errors.ECODE_EXISTS)
15167       if not isinstance(self.op.nics, list):
15168         raise errors.OpPrereqError("Invalid parameter 'nics'",
15169                                    errors.ECODE_INVAL)
15170       if not isinstance(self.op.disks, list):
15171         raise errors.OpPrereqError("Invalid parameter 'disks'",
15172                                    errors.ECODE_INVAL)
15173       for row in self.op.disks:
15174         if (not isinstance(row, dict) or
15175             constants.IDISK_SIZE not in row or
15176             not isinstance(row[constants.IDISK_SIZE], int) or
15177             constants.IDISK_MODE not in row or
15178             row[constants.IDISK_MODE] not in constants.DISK_ACCESS_SET):
15179           raise errors.OpPrereqError("Invalid contents of the 'disks'"
15180                                      " parameter", errors.ECODE_INVAL)
15181       if self.op.hypervisor is None:
15182         self.op.hypervisor = self.cfg.GetHypervisorType()
15183     elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
15184       fname = _ExpandInstanceName(self.cfg, self.op.name)
15185       self.op.name = fname
15186       self.relocate_from = \
15187           list(self.cfg.GetInstanceInfo(fname).secondary_nodes)
15188     elif self.op.mode in (constants.IALLOCATOR_MODE_CHG_GROUP,
15189                           constants.IALLOCATOR_MODE_NODE_EVAC):
15190       if not self.op.instances:
15191         raise errors.OpPrereqError("Missing instances", errors.ECODE_INVAL)
15192       self.op.instances = _GetWantedInstances(self, self.op.instances)
15193     else:
15194       raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
15195                                  self.op.mode, errors.ECODE_INVAL)
15196
15197     if self.op.direction == constants.IALLOCATOR_DIR_OUT:
15198       if self.op.allocator is None:
15199         raise errors.OpPrereqError("Missing allocator name",
15200                                    errors.ECODE_INVAL)
15201     elif self.op.direction != constants.IALLOCATOR_DIR_IN:
15202       raise errors.OpPrereqError("Wrong allocator test '%s'" %
15203                                  self.op.direction, errors.ECODE_INVAL)
15204
15205   def Exec(self, feedback_fn):
15206     """Run the allocator test.
15207
15208     """
15209     if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
15210       req = iallocator.IAReqInstanceAlloc(name=self.op.name,
15211                                           memory=self.op.memory,
15212                                           disks=self.op.disks,
15213                                           disk_template=self.op.disk_template,
15214                                           os=self.op.os,
15215                                           tags=self.op.tags,
15216                                           nics=self.op.nics,
15217                                           vcpus=self.op.vcpus,
15218                                           spindle_use=self.op.spindle_use,
15219                                           hypervisor=self.op.hypervisor)
15220     elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
15221       req = iallocator.IAReqRelocate(name=self.op.name,
15222                                      relocate_from=list(self.relocate_from))
15223     elif self.op.mode == constants.IALLOCATOR_MODE_CHG_GROUP:
15224       req = iallocator.IAReqGroupChange(instances=self.op.instances,
15225                                         target_groups=self.op.target_groups)
15226     elif self.op.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
15227       req = iallocator.IAReqNodeEvac(instances=self.op.instances,
15228                                      evac_mode=self.op.evac_mode)
15229     elif self.op.mode == constants.IALLOCATOR_MODE_MULTI_ALLOC:
15230       disk_template = self.op.disk_template
15231       insts = [iallocator.IAReqInstanceAlloc(name="%s%s" % (self.op.name, idx),
15232                                              memory=self.op.memory,
15233                                              disks=self.op.disks,
15234                                              disk_template=disk_template,
15235                                              os=self.op.os,
15236                                              tags=self.op.tags,
15237                                              nics=self.op.nics,
15238                                              vcpus=self.op.vcpus,
15239                                              spindle_use=self.op.spindle_use,
15240                                              hypervisor=self.op.hypervisor)
15241                for idx in range(self.op.count)]
15242       req = iallocator.IAReqMultiInstanceAlloc(instances=insts)
15243     else:
15244       raise errors.ProgrammerError("Uncatched mode %s in"
15245                                    " LUTestAllocator.Exec", self.op.mode)
15246
15247     ial = iallocator.IAllocator(self.cfg, self.rpc, req)
15248     if self.op.direction == constants.IALLOCATOR_DIR_IN:
15249       result = ial.in_text
15250     else:
15251       ial.Run(self.op.allocator, validate=False)
15252       result = ial.out_text
15253     return result
15254
15255
15256 #: Query type implementations
15257 _QUERY_IMPL = {
15258   constants.QR_CLUSTER: _ClusterQuery,
15259   constants.QR_INSTANCE: _InstanceQuery,
15260   constants.QR_NODE: _NodeQuery,
15261   constants.QR_GROUP: _GroupQuery,
15262   constants.QR_OS: _OsQuery,
15263   constants.QR_EXPORT: _ExportQuery,
15264   }
15265
15266 assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
15267
15268
15269 def _GetQueryImplementation(name):
15270   """Returns the implemtnation for a query type.
15271
15272   @param name: Query type, must be one of L{constants.QR_VIA_OP}
15273
15274   """
15275   try:
15276     return _QUERY_IMPL[name]
15277   except KeyError:
15278     raise errors.OpPrereqError("Unknown query resource '%s'" % name,
15279                                errors.ECODE_INVAL)