code.grnet.gr Git - ganeti-local/blob - lib/cmdlib.py

   1 #
   2 #
   3
   4 # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012 Google Inc.
   5 #
   6 # This program is free software; you can redistribute it and/or modify
   7 # it under the terms of the GNU General Public License as published by
   8 # the Free Software Foundation; either version 2 of the License, or
   9 # (at your option) any later version.
  10 #
  11 # This program is distributed in the hope that it will be useful, but
  12 # WITHOUT ANY WARRANTY; without even the implied warranty of
  13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 # General Public License for more details.
  15 #
  16 # You should have received a copy of the GNU General Public License
  17 # along with this program; if not, write to the Free Software
  18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  19 # 02110-1301, USA.
  20
  21
  22 """Module implementing the master-side code."""
  23
  24 # pylint: disable=W0201,C0302
  25
  26 # W0201 since most LU attributes are defined in CheckPrereq or similar
  27 # functions
  28
  29 # C0302: since we have waaaay too many lines in this module
  30
  31 import os
  32 import os.path
  33 import time
  34 import re
  35 import logging
  36 import copy
  37 import OpenSSL
  38 import socket
  39 import tempfile
  40 import shutil
  41 import itertools
  42 import operator
  43
  44 from ganeti import ssh
  45 from ganeti import utils
  46 from ganeti import errors
  47 from ganeti import hypervisor
  48 from ganeti import locking
  49 from ganeti import constants
  50 from ganeti import objects
  51 from ganeti import ssconf
  52 from ganeti import uidpool
  53 from ganeti import compat
  54 from ganeti import masterd
  55 from ganeti import netutils
  56 from ganeti import query
  57 from ganeti import qlang
  58 from ganeti import opcodes
  59 from ganeti import ht
  60 from ganeti import rpc
  61 from ganeti import runtime
  62 from ganeti.masterd import iallocator
  63
  64 import ganeti.masterd.instance # pylint: disable=W0611
  65
  66
  67 # States of instance
  68 INSTANCE_DOWN = [constants.ADMINST_DOWN]
  69 INSTANCE_ONLINE = [constants.ADMINST_DOWN, constants.ADMINST_UP]
  70 INSTANCE_NOT_RUNNING = [constants.ADMINST_DOWN, constants.ADMINST_OFFLINE]
  71
  72 #: Instance status in which an instance can be marked as offline/online
  73 CAN_CHANGE_INSTANCE_OFFLINE = (frozenset(INSTANCE_DOWN) | frozenset([
  74   constants.ADMINST_OFFLINE,
  75   ]))
  76
  77
  78 class ResultWithJobs:
  79   """Data container for LU results with jobs.
  80
  81   Instances of this class returned from L{LogicalUnit.Exec} will be recognized
  82   by L{mcpu._ProcessResult}. The latter will then submit the jobs
  83   contained in the C{jobs} attribute and include the job IDs in the opcode
  84   result.
  85
  86   """
  87   def __init__(self, jobs, **kwargs):
  88     """Initializes this class.
  89
  90     Additional return values can be specified as keyword arguments.
  91
  92     @type jobs: list of lists of L{opcode.OpCode}
  93     @param jobs: A list of lists of opcode objects
  94
  95     """
  96     self.jobs = jobs
  97     self.other = kwargs
  98
  99
 100 class LogicalUnit(object):
 101   """Logical Unit base class.
 102
 103   Subclasses must follow these rules:
 104     - implement ExpandNames
 105     - implement CheckPrereq (except when tasklets are used)
 106     - implement Exec (except when tasklets are used)
 107     - implement BuildHooksEnv
 108     - implement BuildHooksNodes
 109     - redefine HPATH and HTYPE
 110     - optionally redefine their run requirements:
 111         REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
 112
 113   Note that all commands require root permissions.
 114
 115   @ivar dry_run_result: the value (if any) that will be returned to the caller
 116       in dry-run mode (signalled by opcode dry_run parameter)
 117
 118   """
 119   HPATH = None
 120   HTYPE = None
 121   REQ_BGL = True
 122
 123   def __init__(self, processor, op, context, rpc_runner):
 124     """Constructor for LogicalUnit.
 125
 126     This needs to be overridden in derived classes in order to check op
 127     validity.
 128
 129     """
 130     self.proc = processor
 131     self.op = op
 132     self.cfg = context.cfg
 133     self.glm = context.glm
 134     # readability alias
 135     self.owned_locks = context.glm.list_owned
 136     self.context = context
 137     self.rpc = rpc_runner
 138     # Dicts used to declare locking needs to mcpu
 139     self.needed_locks = None
 140     self.share_locks = dict.fromkeys(locking.LEVELS, 0)
 141     self.add_locks = {}
 142     self.remove_locks = {}
 143     # Used to force good behavior when calling helper functions
 144     self.recalculate_locks = {}
 145     # logging
 146     self.Log = processor.Log # pylint: disable=C0103
 147     self.LogWarning = processor.LogWarning # pylint: disable=C0103
 148     self.LogInfo = processor.LogInfo # pylint: disable=C0103
 149     self.LogStep = processor.LogStep # pylint: disable=C0103
 150     # support for dry-run
 151     self.dry_run_result = None
 152     # support for generic debug attribute
 153     if (not hasattr(self.op, "debug_level") or
 154         not isinstance(self.op.debug_level, int)):
 155       self.op.debug_level = 0
 156
 157     # Tasklets
 158     self.tasklets = None
 159
 160     # Validate opcode parameters and set defaults
 161     self.op.Validate(True)
 162
 163     self.CheckArguments()
 164
 165   def CheckArguments(self):
 166     """Check syntactic validity for the opcode arguments.
 167
 168     This method is for doing a simple syntactic check and ensure
 169     validity of opcode parameters, without any cluster-related
 170     checks. While the same can be accomplished in ExpandNames and/or
 171     CheckPrereq, doing these separate is better because:
 172
 173       - ExpandNames is left as as purely a lock-related function
 174       - CheckPrereq is run after we have acquired locks (and possible
 175         waited for them)
 176
 177     The function is allowed to change the self.op attribute so that
 178     later methods can no longer worry about missing parameters.
 179
 180     """
 181     pass
 182
 183   def ExpandNames(self):
 184     """Expand names for this LU.
 185
 186     This method is called before starting to execute the opcode, and it should
 187     update all the parameters of the opcode to their canonical form (e.g. a
 188     short node name must be fully expanded after this method has successfully
 189     completed). This way locking, hooks, logging, etc. can work correctly.
 190
 191     LUs which implement this method must also populate the self.needed_locks
 192     member, as a dict with lock levels as keys, and a list of needed lock names
 193     as values. Rules:
 194
 195       - use an empty dict if you don't need any lock
 196       - if you don't need any lock at a particular level omit that
 197         level (note that in this case C{DeclareLocks} won't be called
 198         at all for that level)
 199       - if you need locks at a level, but you can't calculate it in
 200         this function, initialise that level with an empty list and do
 201         further processing in L{LogicalUnit.DeclareLocks} (see that
 202         function's docstring)
 203       - don't put anything for the BGL level
 204       - if you want all locks at a level use L{locking.ALL_SET} as a value
 205
 206     If you need to share locks (rather than acquire them exclusively) at one
 207     level you can modify self.share_locks, setting a true value (usually 1) for
 208     that level. By default locks are not shared.
 209
 210     This function can also define a list of tasklets, which then will be
 211     executed in order instead of the usual LU-level CheckPrereq and Exec
 212     functions, if those are not defined by the LU.
 213
 214     Examples::
 215
 216       # Acquire all nodes and one instance
 217       self.needed_locks = {
 218         locking.LEVEL_NODE: locking.ALL_SET,
 219         locking.LEVEL_INSTANCE: ['instance1.example.com'],
 220       }
 221       # Acquire just two nodes
 222       self.needed_locks = {
 223         locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
 224       }
 225       # Acquire no locks
 226       self.needed_locks = {} # No, you can't leave it to the default value None
 227
 228     """
 229     # The implementation of this method is mandatory only if the new LU is
 230     # concurrent, so that old LUs don't need to be changed all at the same
 231     # time.
 232     if self.REQ_BGL:
 233       self.needed_locks = {} # Exclusive LUs don't need locks.
 234     else:
 235       raise NotImplementedError
 236
 237   def DeclareLocks(self, level):
 238     """Declare LU locking needs for a level
 239
 240     While most LUs can just declare their locking needs at ExpandNames time,
 241     sometimes there's the need to calculate some locks after having acquired
 242     the ones before. This function is called just before acquiring locks at a
 243     particular level, but after acquiring the ones at lower levels, and permits
 244     such calculations. It can be used to modify self.needed_locks, and by
 245     default it does nothing.
 246
 247     This function is only called if you have something already set in
 248     self.needed_locks for the level.
 249
 250     @param level: Locking level which is going to be locked
 251     @type level: member of L{ganeti.locking.LEVELS}
 252
 253     """
 254
 255   def CheckPrereq(self):
 256     """Check prerequisites for this LU.
 257
 258     This method should check that the prerequisites for the execution
 259     of this LU are fulfilled. It can do internode communication, but
 260     it should be idempotent - no cluster or system changes are
 261     allowed.
 262
 263     The method should raise errors.OpPrereqError in case something is
 264     not fulfilled. Its return value is ignored.
 265
 266     This method should also update all the parameters of the opcode to
 267     their canonical form if it hasn't been done by ExpandNames before.
 268
 269     """
 270     if self.tasklets is not None:
 271       for (idx, tl) in enumerate(self.tasklets):
 272         logging.debug("Checking prerequisites for tasklet %s/%s",
 273                       idx + 1, len(self.tasklets))
 274         tl.CheckPrereq()
 275     else:
 276       pass
 277
 278   def Exec(self, feedback_fn):
 279     """Execute the LU.
 280
 281     This method should implement the actual work. It should raise
 282     errors.OpExecError for failures that are somewhat dealt with in
 283     code, or expected.
 284
 285     """
 286     if self.tasklets is not None:
 287       for (idx, tl) in enumerate(self.tasklets):
 288         logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
 289         tl.Exec(feedback_fn)
 290     else:
 291       raise NotImplementedError
 292
 293   def BuildHooksEnv(self):
 294     """Build hooks environment for this LU.
 295
 296     @rtype: dict
 297     @return: Dictionary containing the environment that will be used for
 298       running the hooks for this LU. The keys of the dict must not be prefixed
 299       with "GANETI_"--that'll be added by the hooks runner. The hooks runner
 300       will extend the environment with additional variables. If no environment
 301       should be defined, an empty dictionary should be returned (not C{None}).
 302     @note: If the C{HPATH} attribute of the LU class is C{None}, this function
 303       will not be called.
 304
 305     """
 306     raise NotImplementedError
 307
 308   def BuildHooksNodes(self):
 309     """Build list of nodes to run LU's hooks.
 310
 311     @rtype: tuple; (list, list)
 312     @return: Tuple containing a list of node names on which the hook
 313       should run before the execution and a list of node names on which the
 314       hook should run after the execution. No nodes should be returned as an
 315       empty list (and not None).
 316     @note: If the C{HPATH} attribute of the LU class is C{None}, this function
 317       will not be called.
 318
 319     """
 320     raise NotImplementedError
 321
 322   def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
 323     """Notify the LU about the results of its hooks.
 324
 325     This method is called every time a hooks phase is executed, and notifies
 326     the Logical Unit about the hooks' result. The LU can then use it to alter
 327     its result based on the hooks.  By default the method does nothing and the
 328     previous result is passed back unchanged but any LU can define it if it
 329     wants to use the local cluster hook-scripts somehow.
 330
 331     @param phase: one of L{constants.HOOKS_PHASE_POST} or
 332         L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
 333     @param hook_results: the results of the multi-node hooks rpc call
 334     @param feedback_fn: function used send feedback back to the caller
 335     @param lu_result: the previous Exec result this LU had, or None
 336         in the PRE phase
 337     @return: the new Exec result, based on the previous result
 338         and hook results
 339
 340     """
 341     # API must be kept, thus we ignore the unused argument and could
 342     # be a function warnings
 343     # pylint: disable=W0613,R0201
 344     return lu_result
 345
 346   def _ExpandAndLockInstance(self):
 347     """Helper function to expand and lock an instance.
 348
 349     Many LUs that work on an instance take its name in self.op.instance_name
 350     and need to expand it and then declare the expanded name for locking. This
 351     function does it, and then updates self.op.instance_name to the expanded
 352     name. It also initializes needed_locks as a dict, if this hasn't been done
 353     before.
 354
 355     """
 356     if self.needed_locks is None:
 357       self.needed_locks = {}
 358     else:
 359       assert locking.LEVEL_INSTANCE not in self.needed_locks, \
 360         "_ExpandAndLockInstance called with instance-level locks set"
 361     self.op.instance_name = _ExpandInstanceName(self.cfg,
 362                                                 self.op.instance_name)
 363     self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
 364
 365   def _LockInstancesNodes(self, primary_only=False,
 366                           level=locking.LEVEL_NODE):
 367     """Helper function to declare instances' nodes for locking.
 368
 369     This function should be called after locking one or more instances to lock
 370     their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
 371     with all primary or secondary nodes for instances already locked and
 372     present in self.needed_locks[locking.LEVEL_INSTANCE].
 373
 374     It should be called from DeclareLocks, and for safety only works if
 375     self.recalculate_locks[locking.LEVEL_NODE] is set.
 376
 377     In the future it may grow parameters to just lock some instance's nodes, or
 378     to just lock primaries or secondary nodes, if needed.
 379
 380     If should be called in DeclareLocks in a way similar to::
 381
 382       if level == locking.LEVEL_NODE:
 383         self._LockInstancesNodes()
 384
 385     @type primary_only: boolean
 386     @param primary_only: only lock primary nodes of locked instances
 387     @param level: Which lock level to use for locking nodes
 388
 389     """
 390     assert level in self.recalculate_locks, \
 391       "_LockInstancesNodes helper function called with no nodes to recalculate"
 392
 393     # TODO: check if we're really been called with the instance locks held
 394
 395     # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
 396     # future we might want to have different behaviors depending on the value
 397     # of self.recalculate_locks[locking.LEVEL_NODE]
 398     wanted_nodes = []
 399     locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
 400     for _, instance in self.cfg.GetMultiInstanceInfo(locked_i):
 401       wanted_nodes.append(instance.primary_node)
 402       if not primary_only:
 403         wanted_nodes.extend(instance.secondary_nodes)
 404
 405     if self.recalculate_locks[level] == constants.LOCKS_REPLACE:
 406       self.needed_locks[level] = wanted_nodes
 407     elif self.recalculate_locks[level] == constants.LOCKS_APPEND:
 408       self.needed_locks[level].extend(wanted_nodes)
 409     else:
 410       raise errors.ProgrammerError("Unknown recalculation mode")
 411
 412     del self.recalculate_locks[level]
 413
 414
 415 class NoHooksLU(LogicalUnit): # pylint: disable=W0223
 416   """Simple LU which runs no hooks.
 417
 418   This LU is intended as a parent for other LogicalUnits which will
 419   run no hooks, in order to reduce duplicate code.
 420
 421   """
 422   HPATH = None
 423   HTYPE = None
 424
 425   def BuildHooksEnv(self):
 426     """Empty BuildHooksEnv for NoHooksLu.
 427
 428     This just raises an error.
 429
 430     """
 431     raise AssertionError("BuildHooksEnv called for NoHooksLUs")
 432
 433   def BuildHooksNodes(self):
 434     """Empty BuildHooksNodes for NoHooksLU.
 435
 436     """
 437     raise AssertionError("BuildHooksNodes called for NoHooksLU")
 438
 439
 440 class Tasklet:
 441   """Tasklet base class.
 442
 443   Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
 444   they can mix legacy code with tasklets. Locking needs to be done in the LU,
 445   tasklets know nothing about locks.
 446
 447   Subclasses must follow these rules:
 448     - Implement CheckPrereq
 449     - Implement Exec
 450
 451   """
 452   def __init__(self, lu):
 453     self.lu = lu
 454
 455     # Shortcuts
 456     self.cfg = lu.cfg
 457     self.rpc = lu.rpc
 458
 459   def CheckPrereq(self):
 460     """Check prerequisites for this tasklets.
 461
 462     This method should check whether the prerequisites for the execution of
 463     this tasklet are fulfilled. It can do internode communication, but it
 464     should be idempotent - no cluster or system changes are allowed.
 465
 466     The method should raise errors.OpPrereqError in case something is not
 467     fulfilled. Its return value is ignored.
 468
 469     This method should also update all parameters to their canonical form if it
 470     hasn't been done before.
 471
 472     """
 473     pass
 474
 475   def Exec(self, feedback_fn):
 476     """Execute the tasklet.
 477
 478     This method should implement the actual work. It should raise
 479     errors.OpExecError for failures that are somewhat dealt with in code, or
 480     expected.
 481
 482     """
 483     raise NotImplementedError
 484
 485
 486 class _QueryBase:
 487   """Base for query utility classes.
 488
 489   """
 490   #: Attribute holding field definitions
 491   FIELDS = None
 492
 493   #: Field to sort by
 494   SORT_FIELD = "name"
 495
 496   def __init__(self, qfilter, fields, use_locking):
 497     """Initializes this class.
 498
 499     """
 500     self.use_locking = use_locking
 501
 502     self.query = query.Query(self.FIELDS, fields, qfilter=qfilter,
 503                              namefield=self.SORT_FIELD)
 504     self.requested_data = self.query.RequestedData()
 505     self.names = self.query.RequestedNames()
 506
 507     # Sort only if no names were requested
 508     self.sort_by_name = not self.names
 509
 510     self.do_locking = None
 511     self.wanted = None
 512
 513   def _GetNames(self, lu, all_names, lock_level):
 514     """Helper function to determine names asked for in the query.
 515
 516     """
 517     if self.do_locking:
 518       names = lu.owned_locks(lock_level)
 519     else:
 520       names = all_names
 521
 522     if self.wanted == locking.ALL_SET:
 523       assert not self.names
 524       # caller didn't specify names, so ordering is not important
 525       return utils.NiceSort(names)
 526
 527     # caller specified names and we must keep the same order
 528     assert self.names
 529     assert not self.do_locking or lu.glm.is_owned(lock_level)
 530
 531     missing = set(self.wanted).difference(names)
 532     if missing:
 533       raise errors.OpExecError("Some items were removed before retrieving"
 534                                " their data: %s" % missing)
 535
 536     # Return expanded names
 537     return self.wanted
 538
 539   def ExpandNames(self, lu):
 540     """Expand names for this query.
 541
 542     See L{LogicalUnit.ExpandNames}.
 543
 544     """
 545     raise NotImplementedError()
 546
 547   def DeclareLocks(self, lu, level):
 548     """Declare locks for this query.
 549
 550     See L{LogicalUnit.DeclareLocks}.
 551
 552     """
 553     raise NotImplementedError()
 554
 555   def _GetQueryData(self, lu):
 556     """Collects all data for this query.
 557
 558     @return: Query data object
 559
 560     """
 561     raise NotImplementedError()
 562
 563   def NewStyleQuery(self, lu):
 564     """Collect data and execute query.
 565
 566     """
 567     return query.GetQueryResponse(self.query, self._GetQueryData(lu),
 568                                   sort_by_name=self.sort_by_name)
 569
 570   def OldStyleQuery(self, lu):
 571     """Collect data and execute query.
 572
 573     """
 574     return self.query.OldStyleQuery(self._GetQueryData(lu),
 575                                     sort_by_name=self.sort_by_name)
 576
 577
 578 def _ShareAll():
 579   """Returns a dict declaring all lock levels shared.
 580
 581   """
 582   return dict.fromkeys(locking.LEVELS, 1)
 583
 584
 585 def _AnnotateDiskParams(instance, devs, cfg):
 586   """Little helper wrapper to the rpc annotation method.
 587
 588   @param instance: The instance object
 589   @type devs: List of L{objects.Disk}
 590   @param devs: The root devices (not any of its children!)
 591   @param cfg: The config object
 592   @returns The annotated disk copies
 593   @see L{rpc.AnnotateDiskParams}
 594
 595   """
 596   return rpc.AnnotateDiskParams(instance.disk_template, devs,
 597                                 cfg.GetInstanceDiskParams(instance))
 598
 599
 600 def _CheckInstancesNodeGroups(cfg, instances, owned_groups, owned_nodes,
 601                               cur_group_uuid):
 602   """Checks if node groups for locked instances are still correct.
 603
 604   @type cfg: L{config.ConfigWriter}
 605   @param cfg: Cluster configuration
 606   @type instances: dict; string as key, L{objects.Instance} as value
 607   @param instances: Dictionary, instance name as key, instance object as value
 608   @type owned_groups: iterable of string
 609   @param owned_groups: List of owned groups
 610   @type owned_nodes: iterable of string
 611   @param owned_nodes: List of owned nodes
 612   @type cur_group_uuid: string or None
 613   @param cur_group_uuid: Optional group UUID to check against instance's groups
 614
 615   """
 616   for (name, inst) in instances.items():
 617     assert owned_nodes.issuperset(inst.all_nodes), \
 618       "Instance %s's nodes changed while we kept the lock" % name
 619
 620     inst_groups = _CheckInstanceNodeGroups(cfg, name, owned_groups)
 621
 622     assert cur_group_uuid is None or cur_group_uuid in inst_groups, \
 623       "Instance %s has no node in group %s" % (name, cur_group_uuid)
 624
 625
 626 def _CheckInstanceNodeGroups(cfg, instance_name, owned_groups,
 627                              primary_only=False):
 628   """Checks if the owned node groups are still correct for an instance.
 629
 630   @type cfg: L{config.ConfigWriter}
 631   @param cfg: The cluster configuration
 632   @type instance_name: string
 633   @param instance_name: Instance name
 634   @type owned_groups: set or frozenset
 635   @param owned_groups: List of currently owned node groups
 636   @type primary_only: boolean
 637   @param primary_only: Whether to check node groups for only the primary node
 638
 639   """
 640   inst_groups = cfg.GetInstanceNodeGroups(instance_name, primary_only)
 641
 642   if not owned_groups.issuperset(inst_groups):
 643     raise errors.OpPrereqError("Instance %s's node groups changed since"
 644                                " locks were acquired, current groups are"
 645                                " are '%s', owning groups '%s'; retry the"
 646                                " operation" %
 647                                (instance_name,
 648                                 utils.CommaJoin(inst_groups),
 649                                 utils.CommaJoin(owned_groups)),
 650                                errors.ECODE_STATE)
 651
 652   return inst_groups
 653
 654
 655 def _CheckNodeGroupInstances(cfg, group_uuid, owned_instances):
 656   """Checks if the instances in a node group are still correct.
 657
 658   @type cfg: L{config.ConfigWriter}
 659   @param cfg: The cluster configuration
 660   @type group_uuid: string
 661   @param group_uuid: Node group UUID
 662   @type owned_instances: set or frozenset
 663   @param owned_instances: List of currently owned instances
 664
 665   """
 666   wanted_instances = cfg.GetNodeGroupInstances(group_uuid)
 667   if owned_instances != wanted_instances:
 668     raise errors.OpPrereqError("Instances in node group '%s' changed since"
 669                                " locks were acquired, wanted '%s', have '%s';"
 670                                " retry the operation" %
 671                                (group_uuid,
 672                                 utils.CommaJoin(wanted_instances),
 673                                 utils.CommaJoin(owned_instances)),
 674                                errors.ECODE_STATE)
 675
 676   return wanted_instances
 677
 678
 679 def _SupportsOob(cfg, node):
 680   """Tells if node supports OOB.
 681
 682   @type cfg: L{config.ConfigWriter}
 683   @param cfg: The cluster configuration
 684   @type node: L{objects.Node}
 685   @param node: The node
 686   @return: The OOB script if supported or an empty string otherwise
 687
 688   """
 689   return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
 690
 691
 692 def _GetWantedNodes(lu, nodes):
 693   """Returns list of checked and expanded node names.
 694
 695   @type lu: L{LogicalUnit}
 696   @param lu: the logical unit on whose behalf we execute
 697   @type nodes: list
 698   @param nodes: list of node names or None for all nodes
 699   @rtype: list
 700   @return: the list of nodes, sorted
 701   @raise errors.ProgrammerError: if the nodes parameter is wrong type
 702
 703   """
 704   if nodes:
 705     return [_ExpandNodeName(lu.cfg, name) for name in nodes]
 706
 707   return utils.NiceSort(lu.cfg.GetNodeList())
 708
 709
 710 def _GetWantedInstances(lu, instances):
 711   """Returns list of checked and expanded instance names.
 712
 713   @type lu: L{LogicalUnit}
 714   @param lu: the logical unit on whose behalf we execute
 715   @type instances: list
 716   @param instances: list of instance names or None for all instances
 717   @rtype: list
 718   @return: the list of instances, sorted
 719   @raise errors.OpPrereqError: if the instances parameter is wrong type
 720   @raise errors.OpPrereqError: if any of the passed instances is not found
 721
 722   """
 723   if instances:
 724     wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
 725   else:
 726     wanted = utils.NiceSort(lu.cfg.GetInstanceList())
 727   return wanted
 728
 729
 730 def _GetUpdatedParams(old_params, update_dict,
 731                       use_default=True, use_none=False):
 732   """Return the new version of a parameter dictionary.
 733
 734   @type old_params: dict
 735   @param old_params: old parameters
 736   @type update_dict: dict
 737   @param update_dict: dict containing new parameter values, or
 738       constants.VALUE_DEFAULT to reset the parameter to its default
 739       value
 740   @param use_default: boolean
 741   @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
 742       values as 'to be deleted' values
 743   @param use_none: boolean
 744   @type use_none: whether to recognise C{None} values as 'to be
 745       deleted' values
 746   @rtype: dict
 747   @return: the new parameter dictionary
 748
 749   """
 750   params_copy = copy.deepcopy(old_params)
 751   for key, val in update_dict.iteritems():
 752     if ((use_default and val == constants.VALUE_DEFAULT) or
 753         (use_none and val is None)):
 754       try:
 755         del params_copy[key]
 756       except KeyError:
 757         pass
 758     else:
 759       params_copy[key] = val
 760   return params_copy
 761
 762
 763 def _GetUpdatedIPolicy(old_ipolicy, new_ipolicy, group_policy=False):
 764   """Return the new version of a instance policy.
 765
 766   @param group_policy: whether this policy applies to a group and thus
 767     we should support removal of policy entries
 768
 769   """
 770   use_none = use_default = group_policy
 771   ipolicy = copy.deepcopy(old_ipolicy)
 772   for key, value in new_ipolicy.items():
 773     if key not in constants.IPOLICY_ALL_KEYS:
 774       raise errors.OpPrereqError("Invalid key in new ipolicy: %s" % key,
 775                                  errors.ECODE_INVAL)
 776     if key in constants.IPOLICY_ISPECS:
 777       utils.ForceDictType(value, constants.ISPECS_PARAMETER_TYPES)
 778       ipolicy[key] = _GetUpdatedParams(old_ipolicy.get(key, {}), value,
 779                                        use_none=use_none,
 780                                        use_default=use_default)
 781     else:
 782       if (not value or value == [constants.VALUE_DEFAULT] or
 783           value == constants.VALUE_DEFAULT):
 784         if group_policy:
 785           del ipolicy[key]
 786         else:
 787           raise errors.OpPrereqError("Can't unset ipolicy attribute '%s'"
 788                                      " on the cluster'" % key,
 789                                      errors.ECODE_INVAL)
 790       else:
 791         if key in constants.IPOLICY_PARAMETERS:
 792           # FIXME: we assume all such values are float
 793           try:
 794             ipolicy[key] = float(value)
 795           except (TypeError, ValueError), err:
 796             raise errors.OpPrereqError("Invalid value for attribute"
 797                                        " '%s': '%s', error: %s" %
 798                                        (key, value, err), errors.ECODE_INVAL)
 799         else:
 800           # FIXME: we assume all others are lists; this should be redone
 801           # in a nicer way
 802           ipolicy[key] = list(value)
 803   try:
 804     objects.InstancePolicy.CheckParameterSyntax(ipolicy, not group_policy)
 805   except errors.ConfigurationError, err:
 806     raise errors.OpPrereqError("Invalid instance policy: %s" % err,
 807                                errors.ECODE_INVAL)
 808   return ipolicy
 809
 810
 811 def _UpdateAndVerifySubDict(base, updates, type_check):
 812   """Updates and verifies a dict with sub dicts of the same type.
 813
 814   @param base: The dict with the old data
 815   @param updates: The dict with the new data
 816   @param type_check: Dict suitable to ForceDictType to verify correct types
 817   @returns: A new dict with updated and verified values
 818
 819   """
 820   def fn(old, value):
 821     new = _GetUpdatedParams(old, value)
 822     utils.ForceDictType(new, type_check)
 823     return new
 824
 825   ret = copy.deepcopy(base)
 826   ret.update(dict((key, fn(base.get(key, {}), value))
 827                   for key, value in updates.items()))
 828   return ret
 829
 830
 831 def _MergeAndVerifyHvState(op_input, obj_input):
 832   """Combines the hv state from an opcode with the one of the object
 833
 834   @param op_input: The input dict from the opcode
 835   @param obj_input: The input dict from the objects
 836   @return: The verified and updated dict
 837
 838   """
 839   if op_input:
 840     invalid_hvs = set(op_input) - constants.HYPER_TYPES
 841     if invalid_hvs:
 842       raise errors.OpPrereqError("Invalid hypervisor(s) in hypervisor state:"
 843                                  " %s" % utils.CommaJoin(invalid_hvs),
 844                                  errors.ECODE_INVAL)
 845     if obj_input is None:
 846       obj_input = {}
 847     type_check = constants.HVSTS_PARAMETER_TYPES
 848     return _UpdateAndVerifySubDict(obj_input, op_input, type_check)
 849
 850   return None
 851
 852
 853 def _MergeAndVerifyDiskState(op_input, obj_input):
 854   """Combines the disk state from an opcode with the one of the object
 855
 856   @param op_input: The input dict from the opcode
 857   @param obj_input: The input dict from the objects
 858   @return: The verified and updated dict
 859   """
 860   if op_input:
 861     invalid_dst = set(op_input) - constants.DS_VALID_TYPES
 862     if invalid_dst:
 863       raise errors.OpPrereqError("Invalid storage type(s) in disk state: %s" %
 864                                  utils.CommaJoin(invalid_dst),
 865                                  errors.ECODE_INVAL)
 866     type_check = constants.DSS_PARAMETER_TYPES
 867     if obj_input is None:
 868       obj_input = {}
 869     return dict((key, _UpdateAndVerifySubDict(obj_input.get(key, {}), value,
 870                                               type_check))
 871                 for key, value in op_input.items())
 872
 873   return None
 874
 875
 876 def _ReleaseLocks(lu, level, names=None, keep=None):
 877   """Releases locks owned by an LU.
 878
 879   @type lu: L{LogicalUnit}
 880   @param level: Lock level
 881   @type names: list or None
 882   @param names: Names of locks to release
 883   @type keep: list or None
 884   @param keep: Names of locks to retain
 885
 886   """
 887   assert not (keep is not None and names is not None), \
 888          "Only one of the 'names' and the 'keep' parameters can be given"
 889
 890   if names is not None:
 891     should_release = names.__contains__
 892   elif keep:
 893     should_release = lambda name: name not in keep
 894   else:
 895     should_release = None
 896
 897   owned = lu.owned_locks(level)
 898   if not owned:
 899     # Not owning any lock at this level, do nothing
 900     pass
 901
 902   elif should_release:
 903     retain = []
 904     release = []
 905
 906     # Determine which locks to release
 907     for name in owned:
 908       if should_release(name):
 909         release.append(name)
 910       else:
 911         retain.append(name)
 912
 913     assert len(lu.owned_locks(level)) == (len(retain) + len(release))
 914
 915     # Release just some locks
 916     lu.glm.release(level, names=release)
 917
 918     assert frozenset(lu.owned_locks(level)) == frozenset(retain)
 919   else:
 920     # Release everything
 921     lu.glm.release(level)
 922
 923     assert not lu.glm.is_owned(level), "No locks should be owned"
 924
 925
 926 def _MapInstanceDisksToNodes(instances):
 927   """Creates a map from (node, volume) to instance name.
 928
 929   @type instances: list of L{objects.Instance}
 930   @rtype: dict; tuple of (node name, volume name) as key, instance name as value
 931
 932   """
 933   return dict(((node, vol), inst.name)
 934               for inst in instances
 935               for (node, vols) in inst.MapLVsByNode().items()
 936               for vol in vols)
 937
 938
 939 def _RunPostHook(lu, node_name):
 940   """Runs the post-hook for an opcode on a single node.
 941
 942   """
 943   hm = lu.proc.BuildHooksManager(lu)
 944   try:
 945     hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
 946   except Exception, err: # pylint: disable=W0703
 947     lu.LogWarning("Errors occurred running hooks on %s: %s" % (node_name, err))
 948
 949
 950 def _CheckOutputFields(static, dynamic, selected):
 951   """Checks whether all selected fields are valid.
 952
 953   @type static: L{utils.FieldSet}
 954   @param static: static fields set
 955   @type dynamic: L{utils.FieldSet}
 956   @param dynamic: dynamic fields set
 957
 958   """
 959   f = utils.FieldSet()
 960   f.Extend(static)
 961   f.Extend(dynamic)
 962
 963   delta = f.NonMatching(selected)
 964   if delta:
 965     raise errors.OpPrereqError("Unknown output fields selected: %s"
 966                                % ",".join(delta), errors.ECODE_INVAL)
 967
 968
 969 def _CheckGlobalHvParams(params):
 970   """Validates that given hypervisor params are not global ones.
 971
 972   This will ensure that instances don't get customised versions of
 973   global params.
 974
 975   """
 976   used_globals = constants.HVC_GLOBALS.intersection(params)
 977   if used_globals:
 978     msg = ("The following hypervisor parameters are global and cannot"
 979            " be customized at instance level, please modify them at"
 980            " cluster level: %s" % utils.CommaJoin(used_globals))
 981     raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
 982
 983
 984 def _CheckNodeOnline(lu, node, msg=None):
 985   """Ensure that a given node is online.
 986
 987   @param lu: the LU on behalf of which we make the check
 988   @param node: the node to check
 989   @param msg: if passed, should be a message to replace the default one
 990   @raise errors.OpPrereqError: if the node is offline
 991
 992   """
 993   if msg is None:
 994     msg = "Can't use offline node"
 995   if lu.cfg.GetNodeInfo(node).offline:
 996     raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
 997
 998
 999 def _CheckNodeNotDrained(lu, node):
1000   """Ensure that a given node is not drained.
1001
1002   @param lu: the LU on behalf of which we make the check
1003   @param node: the node to check
1004   @raise errors.OpPrereqError: if the node is drained
1005
1006   """
1007   if lu.cfg.GetNodeInfo(node).drained:
1008     raise errors.OpPrereqError("Can't use drained node %s" % node,
1009                                errors.ECODE_STATE)
1010
1011
1012 def _CheckNodeVmCapable(lu, node):
1013   """Ensure that a given node is vm capable.
1014
1015   @param lu: the LU on behalf of which we make the check
1016   @param node: the node to check
1017   @raise errors.OpPrereqError: if the node is not vm capable
1018
1019   """
1020   if not lu.cfg.GetNodeInfo(node).vm_capable:
1021     raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
1022                                errors.ECODE_STATE)
1023
1024
1025 def _CheckNodeHasOS(lu, node, os_name, force_variant):
1026   """Ensure that a node supports a given OS.
1027
1028   @param lu: the LU on behalf of which we make the check
1029   @param node: the node to check
1030   @param os_name: the OS to query about
1031   @param force_variant: whether to ignore variant errors
1032   @raise errors.OpPrereqError: if the node is not supporting the OS
1033
1034   """
1035   result = lu.rpc.call_os_get(node, os_name)
1036   result.Raise("OS '%s' not in supported OS list for node %s" %
1037                (os_name, node),
1038                prereq=True, ecode=errors.ECODE_INVAL)
1039   if not force_variant:
1040     _CheckOSVariant(result.payload, os_name)
1041
1042
1043 def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
1044   """Ensure that a node has the given secondary ip.
1045
1046   @type lu: L{LogicalUnit}
1047   @param lu: the LU on behalf of which we make the check
1048   @type node: string
1049   @param node: the node to check
1050   @type secondary_ip: string
1051   @param secondary_ip: the ip to check
1052   @type prereq: boolean
1053   @param prereq: whether to throw a prerequisite or an execute error
1054   @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
1055   @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
1056
1057   """
1058   result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
1059   result.Raise("Failure checking secondary ip on node %s" % node,
1060                prereq=prereq, ecode=errors.ECODE_ENVIRON)
1061   if not result.payload:
1062     msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
1063            " please fix and re-run this command" % secondary_ip)
1064     if prereq:
1065       raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
1066     else:
1067       raise errors.OpExecError(msg)
1068
1069
1070 def _GetClusterDomainSecret():
1071   """Reads the cluster domain secret.
1072
1073   """
1074   return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
1075                                strict=True)
1076
1077
1078 def _CheckInstanceState(lu, instance, req_states, msg=None):
1079   """Ensure that an instance is in one of the required states.
1080
1081   @param lu: the LU on behalf of which we make the check
1082   @param instance: the instance to check
1083   @param msg: if passed, should be a message to replace the default one
1084   @raise errors.OpPrereqError: if the instance is not in the required state
1085
1086   """
1087   if msg is None:
1088     msg = "can't use instance from outside %s states" % ", ".join(req_states)
1089   if instance.admin_state not in req_states:
1090     raise errors.OpPrereqError("Instance '%s' is marked to be %s, %s" %
1091                                (instance.name, instance.admin_state, msg),
1092                                errors.ECODE_STATE)
1093
1094   if constants.ADMINST_UP not in req_states:
1095     pnode = instance.primary_node
1096     if not lu.cfg.GetNodeInfo(pnode).offline:
1097       ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
1098       ins_l.Raise("Can't contact node %s for instance information" % pnode,
1099                   prereq=True, ecode=errors.ECODE_ENVIRON)
1100       if instance.name in ins_l.payload:
1101         raise errors.OpPrereqError("Instance %s is running, %s" %
1102                                    (instance.name, msg), errors.ECODE_STATE)
1103     else:
1104       lu.LogWarning("Primary node offline, ignoring check that instance"
1105                      " is down")
1106
1107
1108 def _ComputeMinMaxSpec(name, qualifier, ipolicy, value):
1109   """Computes if value is in the desired range.
1110
1111   @param name: name of the parameter for which we perform the check
1112   @param qualifier: a qualifier used in the error message (e.g. 'disk/1',
1113       not just 'disk')
1114   @param ipolicy: dictionary containing min, max and std values
1115   @param value: actual value that we want to use
1116   @return: None or element not meeting the criteria
1117
1118
1119   """
1120   if value in [None, constants.VALUE_AUTO]:
1121     return None
1122   max_v = ipolicy[constants.ISPECS_MAX].get(name, value)
1123   min_v = ipolicy[constants.ISPECS_MIN].get(name, value)
1124   if value > max_v or min_v > value:
1125     if qualifier:
1126       fqn = "%s/%s" % (name, qualifier)
1127     else:
1128       fqn = name
1129     return ("%s value %s is not in range [%s, %s]" %
1130             (fqn, value, min_v, max_v))
1131   return None
1132
1133
1134 def _ComputeIPolicySpecViolation(ipolicy, mem_size, cpu_count, disk_count,
1135                                  nic_count, disk_sizes, spindle_use,
1136                                  _compute_fn=_ComputeMinMaxSpec):
1137   """Verifies ipolicy against provided specs.
1138
1139   @type ipolicy: dict
1140   @param ipolicy: The ipolicy
1141   @type mem_size: int
1142   @param mem_size: The memory size
1143   @type cpu_count: int
1144   @param cpu_count: Used cpu cores
1145   @type disk_count: int
1146   @param disk_count: Number of disks used
1147   @type nic_count: int
1148   @param nic_count: Number of nics used
1149   @type disk_sizes: list of ints
1150   @param disk_sizes: Disk sizes of used disk (len must match C{disk_count})
1151   @type spindle_use: int
1152   @param spindle_use: The number of spindles this instance uses
1153   @param _compute_fn: The compute function (unittest only)
1154   @return: A list of violations, or an empty list of no violations are found
1155
1156   """
1157   assert disk_count == len(disk_sizes)
1158
1159   test_settings = [
1160     (constants.ISPEC_MEM_SIZE, "", mem_size),
1161     (constants.ISPEC_CPU_COUNT, "", cpu_count),
1162     (constants.ISPEC_DISK_COUNT, "", disk_count),
1163     (constants.ISPEC_NIC_COUNT, "", nic_count),
1164     (constants.ISPEC_SPINDLE_USE, "", spindle_use),
1165     ] + [(constants.ISPEC_DISK_SIZE, str(idx), d)
1166          for idx, d in enumerate(disk_sizes)]
1167
1168   return filter(None,
1169                 (_compute_fn(name, qualifier, ipolicy, value)
1170                  for (name, qualifier, value) in test_settings))
1171
1172
1173 def _ComputeIPolicyInstanceViolation(ipolicy, instance,
1174                                      _compute_fn=_ComputeIPolicySpecViolation):
1175   """Compute if instance meets the specs of ipolicy.
1176
1177   @type ipolicy: dict
1178   @param ipolicy: The ipolicy to verify against
1179   @type instance: L{objects.Instance}
1180   @param instance: The instance to verify
1181   @param _compute_fn: The function to verify ipolicy (unittest only)
1182   @see: L{_ComputeIPolicySpecViolation}
1183
1184   """
1185   mem_size = instance.beparams.get(constants.BE_MAXMEM, None)
1186   cpu_count = instance.beparams.get(constants.BE_VCPUS, None)
1187   spindle_use = instance.beparams.get(constants.BE_SPINDLE_USE, None)
1188   disk_count = len(instance.disks)
1189   disk_sizes = [disk.size for disk in instance.disks]
1190   nic_count = len(instance.nics)
1191
1192   return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1193                      disk_sizes, spindle_use)
1194
1195
1196 def _ComputeIPolicyInstanceSpecViolation(
1197   ipolicy, instance_spec, _compute_fn=_ComputeIPolicySpecViolation):
1198   """Compute if instance specs meets the specs of ipolicy.
1199
1200   @type ipolicy: dict
1201   @param ipolicy: The ipolicy to verify against
1202   @param instance_spec: dict
1203   @param instance_spec: The instance spec to verify
1204   @param _compute_fn: The function to verify ipolicy (unittest only)
1205   @see: L{_ComputeIPolicySpecViolation}
1206
1207   """
1208   mem_size = instance_spec.get(constants.ISPEC_MEM_SIZE, None)
1209   cpu_count = instance_spec.get(constants.ISPEC_CPU_COUNT, None)
1210   disk_count = instance_spec.get(constants.ISPEC_DISK_COUNT, 0)
1211   disk_sizes = instance_spec.get(constants.ISPEC_DISK_SIZE, [])
1212   nic_count = instance_spec.get(constants.ISPEC_NIC_COUNT, 0)
1213   spindle_use = instance_spec.get(constants.ISPEC_SPINDLE_USE, None)
1214
1215   return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1216                      disk_sizes, spindle_use)
1217
1218
1219 def _ComputeIPolicyNodeViolation(ipolicy, instance, current_group,
1220                                  target_group,
1221                                  _compute_fn=_ComputeIPolicyInstanceViolation):
1222   """Compute if instance meets the specs of the new target group.
1223
1224   @param ipolicy: The ipolicy to verify
1225   @param instance: The instance object to verify
1226   @param current_group: The current group of the instance
1227   @param target_group: The new group of the instance
1228   @param _compute_fn: The function to verify ipolicy (unittest only)
1229   @see: L{_ComputeIPolicySpecViolation}
1230
1231   """
1232   if current_group == target_group:
1233     return []
1234   else:
1235     return _compute_fn(ipolicy, instance)
1236
1237
1238 def _CheckTargetNodeIPolicy(lu, ipolicy, instance, node, ignore=False,
1239                             _compute_fn=_ComputeIPolicyNodeViolation):
1240   """Checks that the target node is correct in terms of instance policy.
1241
1242   @param ipolicy: The ipolicy to verify
1243   @param instance: The instance object to verify
1244   @param node: The new node to relocate
1245   @param ignore: Ignore violations of the ipolicy
1246   @param _compute_fn: The function to verify ipolicy (unittest only)
1247   @see: L{_ComputeIPolicySpecViolation}
1248
1249   """
1250   primary_node = lu.cfg.GetNodeInfo(instance.primary_node)
1251   res = _compute_fn(ipolicy, instance, primary_node.group, node.group)
1252
1253   if res:
1254     msg = ("Instance does not meet target node group's (%s) instance"
1255            " policy: %s") % (node.group, utils.CommaJoin(res))
1256     if ignore:
1257       lu.LogWarning(msg)
1258     else:
1259       raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
1260
1261
1262 def _ComputeNewInstanceViolations(old_ipolicy, new_ipolicy, instances):
1263   """Computes a set of any instances that would violate the new ipolicy.
1264
1265   @param old_ipolicy: The current (still in-place) ipolicy
1266   @param new_ipolicy: The new (to become) ipolicy
1267   @param instances: List of instances to verify
1268   @return: A list of instances which violates the new ipolicy but
1269       did not before
1270
1271   """
1272   return (_ComputeViolatingInstances(new_ipolicy, instances) -
1273           _ComputeViolatingInstances(old_ipolicy, instances))
1274
1275
1276 def _ExpandItemName(fn, name, kind):
1277   """Expand an item name.
1278
1279   @param fn: the function to use for expansion
1280   @param name: requested item name
1281   @param kind: text description ('Node' or 'Instance')
1282   @return: the resolved (full) name
1283   @raise errors.OpPrereqError: if the item is not found
1284
1285   """
1286   full_name = fn(name)
1287   if full_name is None:
1288     raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
1289                                errors.ECODE_NOENT)
1290   return full_name
1291
1292
1293 def _ExpandNodeName(cfg, name):
1294   """Wrapper over L{_ExpandItemName} for nodes."""
1295   return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
1296
1297
1298 def _ExpandInstanceName(cfg, name):
1299   """Wrapper over L{_ExpandItemName} for instance."""
1300   return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
1301
1302
1303 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
1304                           minmem, maxmem, vcpus, nics, disk_template, disks,
1305                           bep, hvp, hypervisor_name, tags):
1306   """Builds instance related env variables for hooks
1307
1308   This builds the hook environment from individual variables.
1309
1310   @type name: string
1311   @param name: the name of the instance
1312   @type primary_node: string
1313   @param primary_node: the name of the instance's primary node
1314   @type secondary_nodes: list
1315   @param secondary_nodes: list of secondary nodes as strings
1316   @type os_type: string
1317   @param os_type: the name of the instance's OS
1318   @type status: string
1319   @param status: the desired status of the instance
1320   @type minmem: string
1321   @param minmem: the minimum memory size of the instance
1322   @type maxmem: string
1323   @param maxmem: the maximum memory size of the instance
1324   @type vcpus: string
1325   @param vcpus: the count of VCPUs the instance has
1326   @type nics: list
1327   @param nics: list of tuples (ip, mac, mode, link) representing
1328       the NICs the instance has
1329   @type disk_template: string
1330   @param disk_template: the disk template of the instance
1331   @type disks: list
1332   @param disks: the list of (size, mode) pairs
1333   @type bep: dict
1334   @param bep: the backend parameters for the instance
1335   @type hvp: dict
1336   @param hvp: the hypervisor parameters for the instance
1337   @type hypervisor_name: string
1338   @param hypervisor_name: the hypervisor for the instance
1339   @type tags: list
1340   @param tags: list of instance tags as strings
1341   @rtype: dict
1342   @return: the hook environment for this instance
1343
1344   """
1345   env = {
1346     "OP_TARGET": name,
1347     "INSTANCE_NAME": name,
1348     "INSTANCE_PRIMARY": primary_node,
1349     "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
1350     "INSTANCE_OS_TYPE": os_type,
1351     "INSTANCE_STATUS": status,
1352     "INSTANCE_MINMEM": minmem,
1353     "INSTANCE_MAXMEM": maxmem,
1354     # TODO(2.7) remove deprecated "memory" value
1355     "INSTANCE_MEMORY": maxmem,
1356     "INSTANCE_VCPUS": vcpus,
1357     "INSTANCE_DISK_TEMPLATE": disk_template,
1358     "INSTANCE_HYPERVISOR": hypervisor_name,
1359   }
1360   if nics:
1361     nic_count = len(nics)
1362     for idx, (ip, mac, mode, link) in enumerate(nics):
1363       if ip is None:
1364         ip = ""
1365       env["INSTANCE_NIC%d_IP" % idx] = ip
1366       env["INSTANCE_NIC%d_MAC" % idx] = mac
1367       env["INSTANCE_NIC%d_MODE" % idx] = mode
1368       env["INSTANCE_NIC%d_LINK" % idx] = link
1369       if mode == constants.NIC_MODE_BRIDGED:
1370         env["INSTANCE_NIC%d_BRIDGE" % idx] = link
1371   else:
1372     nic_count = 0
1373
1374   env["INSTANCE_NIC_COUNT"] = nic_count
1375
1376   if disks:
1377     disk_count = len(disks)
1378     for idx, (size, mode) in enumerate(disks):
1379       env["INSTANCE_DISK%d_SIZE" % idx] = size
1380       env["INSTANCE_DISK%d_MODE" % idx] = mode
1381   else:
1382     disk_count = 0
1383
1384   env["INSTANCE_DISK_COUNT"] = disk_count
1385
1386   if not tags:
1387     tags = []
1388
1389   env["INSTANCE_TAGS"] = " ".join(tags)
1390
1391   for source, kind in [(bep, "BE"), (hvp, "HV")]:
1392     for key, value in source.items():
1393       env["INSTANCE_%s_%s" % (kind, key)] = value
1394
1395   return env
1396
1397
1398 def _NICListToTuple(lu, nics):
1399   """Build a list of nic information tuples.
1400
1401   This list is suitable to be passed to _BuildInstanceHookEnv or as a return
1402   value in LUInstanceQueryData.
1403
1404   @type lu:  L{LogicalUnit}
1405   @param lu: the logical unit on whose behalf we execute
1406   @type nics: list of L{objects.NIC}
1407   @param nics: list of nics to convert to hooks tuples
1408
1409   """
1410   hooks_nics = []
1411   cluster = lu.cfg.GetClusterInfo()
1412   for nic in nics:
1413     ip = nic.ip
1414     mac = nic.mac
1415     filled_params = cluster.SimpleFillNIC(nic.nicparams)
1416     mode = filled_params[constants.NIC_MODE]
1417     link = filled_params[constants.NIC_LINK]
1418     hooks_nics.append((ip, mac, mode, link))
1419   return hooks_nics
1420
1421
1422 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
1423   """Builds instance related env variables for hooks from an object.
1424
1425   @type lu: L{LogicalUnit}
1426   @param lu: the logical unit on whose behalf we execute
1427   @type instance: L{objects.Instance}
1428   @param instance: the instance for which we should build the
1429       environment
1430   @type override: dict
1431   @param override: dictionary with key/values that will override
1432       our values
1433   @rtype: dict
1434   @return: the hook environment dictionary
1435
1436   """
1437   cluster = lu.cfg.GetClusterInfo()
1438   bep = cluster.FillBE(instance)
1439   hvp = cluster.FillHV(instance)
1440   args = {
1441     "name": instance.name,
1442     "primary_node": instance.primary_node,
1443     "secondary_nodes": instance.secondary_nodes,
1444     "os_type": instance.os,
1445     "status": instance.admin_state,
1446     "maxmem": bep[constants.BE_MAXMEM],
1447     "minmem": bep[constants.BE_MINMEM],
1448     "vcpus": bep[constants.BE_VCPUS],
1449     "nics": _NICListToTuple(lu, instance.nics),
1450     "disk_template": instance.disk_template,
1451     "disks": [(disk.size, disk.mode) for disk in instance.disks],
1452     "bep": bep,
1453     "hvp": hvp,
1454     "hypervisor_name": instance.hypervisor,
1455     "tags": instance.tags,
1456   }
1457   if override:
1458     args.update(override)
1459   return _BuildInstanceHookEnv(**args) # pylint: disable=W0142
1460
1461
1462 def _AdjustCandidatePool(lu, exceptions):
1463   """Adjust the candidate pool after node operations.
1464
1465   """
1466   mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1467   if mod_list:
1468     lu.LogInfo("Promoted nodes to master candidate role: %s",
1469                utils.CommaJoin(node.name for node in mod_list))
1470     for name in mod_list:
1471       lu.context.ReaddNode(name)
1472   mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1473   if mc_now > mc_max:
1474     lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1475                (mc_now, mc_max))
1476
1477
1478 def _DecideSelfPromotion(lu, exceptions=None):
1479   """Decide whether I should promote myself as a master candidate.
1480
1481   """
1482   cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1483   mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1484   # the new node will increase mc_max with one, so:
1485   mc_should = min(mc_should + 1, cp_size)
1486   return mc_now < mc_should
1487
1488
1489 def _ComputeViolatingInstances(ipolicy, instances):
1490   """Computes a set of instances who violates given ipolicy.
1491
1492   @param ipolicy: The ipolicy to verify
1493   @type instances: object.Instance
1494   @param instances: List of instances to verify
1495   @return: A frozenset of instance names violating the ipolicy
1496
1497   """
1498   return frozenset([inst.name for inst in instances
1499                     if _ComputeIPolicyInstanceViolation(ipolicy, inst)])
1500
1501
1502 def _CheckNicsBridgesExist(lu, target_nics, target_node):
1503   """Check that the brigdes needed by a list of nics exist.
1504
1505   """
1506   cluster = lu.cfg.GetClusterInfo()
1507   paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1508   brlist = [params[constants.NIC_LINK] for params in paramslist
1509             if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1510   if brlist:
1511     result = lu.rpc.call_bridges_exist(target_node, brlist)
1512     result.Raise("Error checking bridges on destination node '%s'" %
1513                  target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1514
1515
1516 def _CheckInstanceBridgesExist(lu, instance, node=None):
1517   """Check that the brigdes needed by an instance exist.
1518
1519   """
1520   if node is None:
1521     node = instance.primary_node
1522   _CheckNicsBridgesExist(lu, instance.nics, node)
1523
1524
1525 def _CheckOSVariant(os_obj, name):
1526   """Check whether an OS name conforms to the os variants specification.
1527
1528   @type os_obj: L{objects.OS}
1529   @param os_obj: OS object to check
1530   @type name: string
1531   @param name: OS name passed by the user, to check for validity
1532
1533   """
1534   variant = objects.OS.GetVariant(name)
1535   if not os_obj.supported_variants:
1536     if variant:
1537       raise errors.OpPrereqError("OS '%s' doesn't support variants ('%s'"
1538                                  " passed)" % (os_obj.name, variant),
1539                                  errors.ECODE_INVAL)
1540     return
1541   if not variant:
1542     raise errors.OpPrereqError("OS name must include a variant",
1543                                errors.ECODE_INVAL)
1544
1545   if variant not in os_obj.supported_variants:
1546     raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1547
1548
1549 def _GetNodeInstancesInner(cfg, fn):
1550   return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1551
1552
1553 def _GetNodeInstances(cfg, node_name):
1554   """Returns a list of all primary and secondary instances on a node.
1555
1556   """
1557
1558   return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1559
1560
1561 def _GetNodePrimaryInstances(cfg, node_name):
1562   """Returns primary instances on a node.
1563
1564   """
1565   return _GetNodeInstancesInner(cfg,
1566                                 lambda inst: node_name == inst.primary_node)
1567
1568
1569 def _GetNodeSecondaryInstances(cfg, node_name):
1570   """Returns secondary instances on a node.
1571
1572   """
1573   return _GetNodeInstancesInner(cfg,
1574                                 lambda inst: node_name in inst.secondary_nodes)
1575
1576
1577 def _GetStorageTypeArgs(cfg, storage_type):
1578   """Returns the arguments for a storage type.
1579
1580   """
1581   # Special case for file storage
1582   if storage_type == constants.ST_FILE:
1583     # storage.FileStorage wants a list of storage directories
1584     return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1585
1586   return []
1587
1588
1589 def _FindFaultyInstanceDisks(cfg, rpc_runner, instance, node_name, prereq):
1590   faulty = []
1591
1592   for dev in instance.disks:
1593     cfg.SetDiskID(dev, node_name)
1594
1595   result = rpc_runner.call_blockdev_getmirrorstatus(node_name, (instance.disks,
1596                                                                 instance))
1597   result.Raise("Failed to get disk status from node %s" % node_name,
1598                prereq=prereq, ecode=errors.ECODE_ENVIRON)
1599
1600   for idx, bdev_status in enumerate(result.payload):
1601     if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1602       faulty.append(idx)
1603
1604   return faulty
1605
1606
1607 def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1608   """Check the sanity of iallocator and node arguments and use the
1609   cluster-wide iallocator if appropriate.
1610
1611   Check that at most one of (iallocator, node) is specified. If none is
1612   specified, then the LU's opcode's iallocator slot is filled with the
1613   cluster-wide default iallocator.
1614
1615   @type iallocator_slot: string
1616   @param iallocator_slot: the name of the opcode iallocator slot
1617   @type node_slot: string
1618   @param node_slot: the name of the opcode target node slot
1619
1620   """
1621   node = getattr(lu.op, node_slot, None)
1622   ialloc = getattr(lu.op, iallocator_slot, None)
1623
1624   if node is not None and ialloc is not None:
1625     raise errors.OpPrereqError("Do not specify both, iallocator and node",
1626                                errors.ECODE_INVAL)
1627   elif node is None and ialloc is None:
1628     default_iallocator = lu.cfg.GetDefaultIAllocator()
1629     if default_iallocator:
1630       setattr(lu.op, iallocator_slot, default_iallocator)
1631     else:
1632       raise errors.OpPrereqError("No iallocator or node given and no"
1633                                  " cluster-wide default iallocator found;"
1634                                  " please specify either an iallocator or a"
1635                                  " node, or set a cluster-wide default"
1636                                  " iallocator", errors.ECODE_INVAL)
1637
1638
1639 def _GetDefaultIAllocator(cfg, ialloc):
1640   """Decides on which iallocator to use.
1641
1642   @type cfg: L{config.ConfigWriter}
1643   @param cfg: Cluster configuration object
1644   @type ialloc: string or None
1645   @param ialloc: Iallocator specified in opcode
1646   @rtype: string
1647   @return: Iallocator name
1648
1649   """
1650   if not ialloc:
1651     # Use default iallocator
1652     ialloc = cfg.GetDefaultIAllocator()
1653
1654   if not ialloc:
1655     raise errors.OpPrereqError("No iallocator was specified, neither in the"
1656                                " opcode nor as a cluster-wide default",
1657                                errors.ECODE_INVAL)
1658
1659   return ialloc
1660
1661
1662 class LUClusterPostInit(LogicalUnit):
1663   """Logical unit for running hooks after cluster initialization.
1664
1665   """
1666   HPATH = "cluster-init"
1667   HTYPE = constants.HTYPE_CLUSTER
1668
1669   def BuildHooksEnv(self):
1670     """Build hooks env.
1671
1672     """
1673     return {
1674       "OP_TARGET": self.cfg.GetClusterName(),
1675       }
1676
1677   def BuildHooksNodes(self):
1678     """Build hooks nodes.
1679
1680     """
1681     return ([], [self.cfg.GetMasterNode()])
1682
1683   def Exec(self, feedback_fn):
1684     """Nothing to do.
1685
1686     """
1687     return True
1688
1689
1690 class LUClusterDestroy(LogicalUnit):
1691   """Logical unit for destroying the cluster.
1692
1693   """
1694   HPATH = "cluster-destroy"
1695   HTYPE = constants.HTYPE_CLUSTER
1696
1697   def BuildHooksEnv(self):
1698     """Build hooks env.
1699
1700     """
1701     return {
1702       "OP_TARGET": self.cfg.GetClusterName(),
1703       }
1704
1705   def BuildHooksNodes(self):
1706     """Build hooks nodes.
1707
1708     """
1709     return ([], [])
1710
1711   def CheckPrereq(self):
1712     """Check prerequisites.
1713
1714     This checks whether the cluster is empty.
1715
1716     Any errors are signaled by raising errors.OpPrereqError.
1717
1718     """
1719     master = self.cfg.GetMasterNode()
1720
1721     nodelist = self.cfg.GetNodeList()
1722     if len(nodelist) != 1 or nodelist[0] != master:
1723       raise errors.OpPrereqError("There are still %d node(s) in"
1724                                  " this cluster." % (len(nodelist) - 1),
1725                                  errors.ECODE_INVAL)
1726     instancelist = self.cfg.GetInstanceList()
1727     if instancelist:
1728       raise errors.OpPrereqError("There are still %d instance(s) in"
1729                                  " this cluster." % len(instancelist),
1730                                  errors.ECODE_INVAL)
1731
1732   def Exec(self, feedback_fn):
1733     """Destroys the cluster.
1734
1735     """
1736     master_params = self.cfg.GetMasterNetworkParameters()
1737
1738     # Run post hooks on master node before it's removed
1739     _RunPostHook(self, master_params.name)
1740
1741     ems = self.cfg.GetUseExternalMipScript()
1742     result = self.rpc.call_node_deactivate_master_ip(master_params.name,
1743                                                      master_params, ems)
1744     if result.fail_msg:
1745       self.LogWarning("Error disabling the master IP address: %s",
1746                       result.fail_msg)
1747
1748     return master_params.name
1749
1750
1751 def _VerifyCertificate(filename):
1752   """Verifies a certificate for L{LUClusterVerifyConfig}.
1753
1754   @type filename: string
1755   @param filename: Path to PEM file
1756
1757   """
1758   try:
1759     cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1760                                            utils.ReadFile(filename))
1761   except Exception, err: # pylint: disable=W0703
1762     return (LUClusterVerifyConfig.ETYPE_ERROR,
1763             "Failed to load X509 certificate %s: %s" % (filename, err))
1764
1765   (errcode, msg) = \
1766     utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1767                                 constants.SSL_CERT_EXPIRATION_ERROR)
1768
1769   if msg:
1770     fnamemsg = "While verifying %s: %s" % (filename, msg)
1771   else:
1772     fnamemsg = None
1773
1774   if errcode is None:
1775     return (None, fnamemsg)
1776   elif errcode == utils.CERT_WARNING:
1777     return (LUClusterVerifyConfig.ETYPE_WARNING, fnamemsg)
1778   elif errcode == utils.CERT_ERROR:
1779     return (LUClusterVerifyConfig.ETYPE_ERROR, fnamemsg)
1780
1781   raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1782
1783
1784 def _GetAllHypervisorParameters(cluster, instances):
1785   """Compute the set of all hypervisor parameters.
1786
1787   @type cluster: L{objects.Cluster}
1788   @param cluster: the cluster object
1789   @param instances: list of L{objects.Instance}
1790   @param instances: additional instances from which to obtain parameters
1791   @rtype: list of (origin, hypervisor, parameters)
1792   @return: a list with all parameters found, indicating the hypervisor they
1793        apply to, and the origin (can be "cluster", "os X", or "instance Y")
1794
1795   """
1796   hvp_data = []
1797
1798   for hv_name in cluster.enabled_hypervisors:
1799     hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
1800
1801   for os_name, os_hvp in cluster.os_hvp.items():
1802     for hv_name, hv_params in os_hvp.items():
1803       if hv_params:
1804         full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
1805         hvp_data.append(("os %s" % os_name, hv_name, full_params))
1806
1807   # TODO: collapse identical parameter values in a single one
1808   for instance in instances:
1809     if instance.hvparams:
1810       hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
1811                        cluster.FillHV(instance)))
1812
1813   return hvp_data
1814
1815
1816 class _VerifyErrors(object):
1817   """Mix-in for cluster/group verify LUs.
1818
1819   It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects
1820   self.op and self._feedback_fn to be available.)
1821
1822   """
1823
1824   ETYPE_FIELD = "code"
1825   ETYPE_ERROR = "ERROR"
1826   ETYPE_WARNING = "WARNING"
1827
1828   def _Error(self, ecode, item, msg, *args, **kwargs):
1829     """Format an error message.
1830
1831     Based on the opcode's error_codes parameter, either format a
1832     parseable error code, or a simpler error string.
1833
1834     This must be called only from Exec and functions called from Exec.
1835
1836     """
1837     ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1838     itype, etxt, _ = ecode
1839     # first complete the msg
1840     if args:
1841       msg = msg % args
1842     # then format the whole message
1843     if self.op.error_codes: # This is a mix-in. pylint: disable=E1101
1844       msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1845     else:
1846       if item:
1847         item = " " + item
1848       else:
1849         item = ""
1850       msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1851     # and finally report it via the feedback_fn
1852     self._feedback_fn("  - %s" % msg) # Mix-in. pylint: disable=E1101
1853
1854   def _ErrorIf(self, cond, ecode, *args, **kwargs):
1855     """Log an error message if the passed condition is True.
1856
1857     """
1858     cond = (bool(cond)
1859             or self.op.debug_simulate_errors) # pylint: disable=E1101
1860
1861     # If the error code is in the list of ignored errors, demote the error to a
1862     # warning
1863     (_, etxt, _) = ecode
1864     if etxt in self.op.ignore_errors:     # pylint: disable=E1101
1865       kwargs[self.ETYPE_FIELD] = self.ETYPE_WARNING
1866
1867     if cond:
1868       self._Error(ecode, *args, **kwargs)
1869
1870     # do not mark the operation as failed for WARN cases only
1871     if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1872       self.bad = self.bad or cond
1873
1874
1875 class LUClusterVerify(NoHooksLU):
1876   """Submits all jobs necessary to verify the cluster.
1877
1878   """
1879   REQ_BGL = False
1880
1881   def ExpandNames(self):
1882     self.needed_locks = {}
1883
1884   def Exec(self, feedback_fn):
1885     jobs = []
1886
1887     if self.op.group_name:
1888       groups = [self.op.group_name]
1889       depends_fn = lambda: None
1890     else:
1891       groups = self.cfg.GetNodeGroupList()
1892
1893       # Verify global configuration
1894       jobs.append([
1895         opcodes.OpClusterVerifyConfig(ignore_errors=self.op.ignore_errors)
1896         ])
1897
1898       # Always depend on global verification
1899       depends_fn = lambda: [(-len(jobs), [])]
1900
1901     jobs.extend(
1902       [opcodes.OpClusterVerifyGroup(group_name=group,
1903                                     ignore_errors=self.op.ignore_errors,
1904                                     depends=depends_fn())]
1905       for group in groups)
1906
1907     # Fix up all parameters
1908     for op in itertools.chain(*jobs): # pylint: disable=W0142
1909       op.debug_simulate_errors = self.op.debug_simulate_errors
1910       op.verbose = self.op.verbose
1911       op.error_codes = self.op.error_codes
1912       try:
1913         op.skip_checks = self.op.skip_checks
1914       except AttributeError:
1915         assert not isinstance(op, opcodes.OpClusterVerifyGroup)
1916
1917     return ResultWithJobs(jobs)
1918
1919
1920 class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
1921   """Verifies the cluster config.
1922
1923   """
1924   REQ_BGL = False
1925
1926   def _VerifyHVP(self, hvp_data):
1927     """Verifies locally the syntax of the hypervisor parameters.
1928
1929     """
1930     for item, hv_name, hv_params in hvp_data:
1931       msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
1932              (item, hv_name))
1933       try:
1934         hv_class = hypervisor.GetHypervisor(hv_name)
1935         utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
1936         hv_class.CheckParameterSyntax(hv_params)
1937       except errors.GenericError, err:
1938         self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg % str(err))
1939
1940   def ExpandNames(self):
1941     self.needed_locks = dict.fromkeys(locking.LEVELS, locking.ALL_SET)
1942     self.share_locks = _ShareAll()
1943
1944   def CheckPrereq(self):
1945     """Check prerequisites.
1946
1947     """
1948     # Retrieve all information
1949     self.all_group_info = self.cfg.GetAllNodeGroupsInfo()
1950     self.all_node_info = self.cfg.GetAllNodesInfo()
1951     self.all_inst_info = self.cfg.GetAllInstancesInfo()
1952
1953   def Exec(self, feedback_fn):
1954     """Verify integrity of cluster, performing various test on nodes.
1955
1956     """
1957     self.bad = False
1958     self._feedback_fn = feedback_fn
1959
1960     feedback_fn("* Verifying cluster config")
1961
1962     for msg in self.cfg.VerifyConfig():
1963       self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg)
1964
1965     feedback_fn("* Verifying cluster certificate files")
1966
1967     for cert_filename in constants.ALL_CERT_FILES:
1968       (errcode, msg) = _VerifyCertificate(cert_filename)
1969       self._ErrorIf(errcode, constants.CV_ECLUSTERCERT, None, msg, code=errcode)
1970
1971     feedback_fn("* Verifying hypervisor parameters")
1972
1973     self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(),
1974                                                 self.all_inst_info.values()))
1975
1976     feedback_fn("* Verifying all nodes belong to an existing group")
1977
1978     # We do this verification here because, should this bogus circumstance
1979     # occur, it would never be caught by VerifyGroup, which only acts on
1980     # nodes/instances reachable from existing node groups.
1981
1982     dangling_nodes = set(node.name for node in self.all_node_info.values()
1983                          if node.group not in self.all_group_info)
1984
1985     dangling_instances = {}
1986     no_node_instances = []
1987
1988     for inst in self.all_inst_info.values():
1989       if inst.primary_node in dangling_nodes:
1990         dangling_instances.setdefault(inst.primary_node, []).append(inst.name)
1991       elif inst.primary_node not in self.all_node_info:
1992         no_node_instances.append(inst.name)
1993
1994     pretty_dangling = [
1995         "%s (%s)" %
1996         (node.name,
1997          utils.CommaJoin(dangling_instances.get(node.name,
1998                                                 ["no instances"])))
1999         for node in dangling_nodes]
2000
2001     self._ErrorIf(bool(dangling_nodes), constants.CV_ECLUSTERDANGLINGNODES,
2002                   None,
2003                   "the following nodes (and their instances) belong to a non"
2004                   " existing group: %s", utils.CommaJoin(pretty_dangling))
2005
2006     self._ErrorIf(bool(no_node_instances), constants.CV_ECLUSTERDANGLINGINST,
2007                   None,
2008                   "the following instances have a non-existing primary-node:"
2009                   " %s", utils.CommaJoin(no_node_instances))
2010
2011     return not self.bad
2012
2013
2014 class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
2015   """Verifies the status of a node group.
2016
2017   """
2018   HPATH = "cluster-verify"
2019   HTYPE = constants.HTYPE_CLUSTER
2020   REQ_BGL = False
2021
2022   _HOOKS_INDENT_RE = re.compile("^", re.M)
2023
2024   class NodeImage(object):
2025     """A class representing the logical and physical status of a node.
2026
2027     @type name: string
2028     @ivar name: the node name to which this object refers
2029     @ivar volumes: a structure as returned from
2030         L{ganeti.backend.GetVolumeList} (runtime)
2031     @ivar instances: a list of running instances (runtime)
2032     @ivar pinst: list of configured primary instances (config)
2033     @ivar sinst: list of configured secondary instances (config)
2034     @ivar sbp: dictionary of {primary-node: list of instances} for all
2035         instances for which this node is secondary (config)
2036     @ivar mfree: free memory, as reported by hypervisor (runtime)
2037     @ivar dfree: free disk, as reported by the node (runtime)
2038     @ivar offline: the offline status (config)
2039     @type rpc_fail: boolean
2040     @ivar rpc_fail: whether the RPC verify call was successfull (overall,
2041         not whether the individual keys were correct) (runtime)
2042     @type lvm_fail: boolean
2043     @ivar lvm_fail: whether the RPC call didn't return valid LVM data
2044     @type hyp_fail: boolean
2045     @ivar hyp_fail: whether the RPC call didn't return the instance list
2046     @type ghost: boolean
2047     @ivar ghost: whether this is a known node or not (config)
2048     @type os_fail: boolean
2049     @ivar os_fail: whether the RPC call didn't return valid OS data
2050     @type oslist: list
2051     @ivar oslist: list of OSes as diagnosed by DiagnoseOS
2052     @type vm_capable: boolean
2053     @ivar vm_capable: whether the node can host instances
2054
2055     """
2056     def __init__(self, offline=False, name=None, vm_capable=True):
2057       self.name = name
2058       self.volumes = {}
2059       self.instances = []
2060       self.pinst = []
2061       self.sinst = []
2062       self.sbp = {}
2063       self.mfree = 0
2064       self.dfree = 0
2065       self.offline = offline
2066       self.vm_capable = vm_capable
2067       self.rpc_fail = False
2068       self.lvm_fail = False
2069       self.hyp_fail = False
2070       self.ghost = False
2071       self.os_fail = False
2072       self.oslist = {}
2073
2074   def ExpandNames(self):
2075     # This raises errors.OpPrereqError on its own:
2076     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
2077
2078     # Get instances in node group; this is unsafe and needs verification later
2079     inst_names = \
2080       self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True)
2081
2082     self.needed_locks = {
2083       locking.LEVEL_INSTANCE: inst_names,
2084       locking.LEVEL_NODEGROUP: [self.group_uuid],
2085       locking.LEVEL_NODE: [],
2086       }
2087
2088     self.share_locks = _ShareAll()
2089
2090   def DeclareLocks(self, level):
2091     if level == locking.LEVEL_NODE:
2092       # Get members of node group; this is unsafe and needs verification later
2093       nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
2094
2095       all_inst_info = self.cfg.GetAllInstancesInfo()
2096
2097       # In Exec(), we warn about mirrored instances that have primary and
2098       # secondary living in separate node groups. To fully verify that
2099       # volumes for these instances are healthy, we will need to do an
2100       # extra call to their secondaries. We ensure here those nodes will
2101       # be locked.
2102       for inst in self.owned_locks(locking.LEVEL_INSTANCE):
2103         # Important: access only the instances whose lock is owned
2104         if all_inst_info[inst].disk_template in constants.DTS_INT_MIRROR:
2105           nodes.update(all_inst_info[inst].secondary_nodes)
2106
2107       self.needed_locks[locking.LEVEL_NODE] = nodes
2108
2109   def CheckPrereq(self):
2110     assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
2111     self.group_info = self.cfg.GetNodeGroup(self.group_uuid)
2112
2113     group_nodes = set(self.group_info.members)
2114     group_instances = \
2115       self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True)
2116
2117     unlocked_nodes = \
2118         group_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
2119
2120     unlocked_instances = \
2121         group_instances.difference(self.owned_locks(locking.LEVEL_INSTANCE))
2122
2123     if unlocked_nodes:
2124       raise errors.OpPrereqError("Missing lock for nodes: %s" %
2125                                  utils.CommaJoin(unlocked_nodes),
2126                                  errors.ECODE_STATE)
2127
2128     if unlocked_instances:
2129       raise errors.OpPrereqError("Missing lock for instances: %s" %
2130                                  utils.CommaJoin(unlocked_instances),
2131                                  errors.ECODE_STATE)
2132
2133     self.all_node_info = self.cfg.GetAllNodesInfo()
2134     self.all_inst_info = self.cfg.GetAllInstancesInfo()
2135
2136     self.my_node_names = utils.NiceSort(group_nodes)
2137     self.my_inst_names = utils.NiceSort(group_instances)
2138
2139     self.my_node_info = dict((name, self.all_node_info[name])
2140                              for name in self.my_node_names)
2141
2142     self.my_inst_info = dict((name, self.all_inst_info[name])
2143                              for name in self.my_inst_names)
2144
2145     # We detect here the nodes that will need the extra RPC calls for verifying
2146     # split LV volumes; they should be locked.
2147     extra_lv_nodes = set()
2148
2149     for inst in self.my_inst_info.values():
2150       if inst.disk_template in constants.DTS_INT_MIRROR:
2151         for nname in inst.all_nodes:
2152           if self.all_node_info[nname].group != self.group_uuid:
2153             extra_lv_nodes.add(nname)
2154
2155     unlocked_lv_nodes = \
2156         extra_lv_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
2157
2158     if unlocked_lv_nodes:
2159       raise errors.OpPrereqError("Missing node locks for LV check: %s" %
2160                                  utils.CommaJoin(unlocked_lv_nodes),
2161                                  errors.ECODE_STATE)
2162     self.extra_lv_nodes = list(extra_lv_nodes)
2163
2164   def _VerifyNode(self, ninfo, nresult):
2165     """Perform some basic validation on data returned from a node.
2166
2167       - check the result data structure is well formed and has all the
2168         mandatory fields
2169       - check ganeti version
2170
2171     @type ninfo: L{objects.Node}
2172     @param ninfo: the node to check
2173     @param nresult: the results from the node
2174     @rtype: boolean
2175     @return: whether overall this call was successful (and we can expect
2176          reasonable values in the respose)
2177
2178     """
2179     node = ninfo.name
2180     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2181
2182     # main result, nresult should be a non-empty dict
2183     test = not nresult or not isinstance(nresult, dict)
2184     _ErrorIf(test, constants.CV_ENODERPC, node,
2185                   "unable to verify node: no data returned")
2186     if test:
2187       return False
2188
2189     # compares ganeti version
2190     local_version = constants.PROTOCOL_VERSION
2191     remote_version = nresult.get("version", None)
2192     test = not (remote_version and
2193                 isinstance(remote_version, (list, tuple)) and
2194                 len(remote_version) == 2)
2195     _ErrorIf(test, constants.CV_ENODERPC, node,
2196              "connection to node returned invalid data")
2197     if test:
2198       return False
2199
2200     test = local_version != remote_version[0]
2201     _ErrorIf(test, constants.CV_ENODEVERSION, node,
2202              "incompatible protocol versions: master %s,"
2203              " node %s", local_version, remote_version[0])
2204     if test:
2205       return False
2206
2207     # node seems compatible, we can actually try to look into its results
2208
2209     # full package version
2210     self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
2211                   constants.CV_ENODEVERSION, node,
2212                   "software version mismatch: master %s, node %s",
2213                   constants.RELEASE_VERSION, remote_version[1],
2214                   code=self.ETYPE_WARNING)
2215
2216     hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
2217     if ninfo.vm_capable and isinstance(hyp_result, dict):
2218       for hv_name, hv_result in hyp_result.iteritems():
2219         test = hv_result is not None
2220         _ErrorIf(test, constants.CV_ENODEHV, node,
2221                  "hypervisor %s verify failure: '%s'", hv_name, hv_result)
2222
2223     hvp_result = nresult.get(constants.NV_HVPARAMS, None)
2224     if ninfo.vm_capable and isinstance(hvp_result, list):
2225       for item, hv_name, hv_result in hvp_result:
2226         _ErrorIf(True, constants.CV_ENODEHV, node,
2227                  "hypervisor %s parameter verify failure (source %s): %s",
2228                  hv_name, item, hv_result)
2229
2230     test = nresult.get(constants.NV_NODESETUP,
2231                        ["Missing NODESETUP results"])
2232     _ErrorIf(test, constants.CV_ENODESETUP, node, "node setup error: %s",
2233              "; ".join(test))
2234
2235     return True
2236
2237   def _VerifyNodeTime(self, ninfo, nresult,
2238                       nvinfo_starttime, nvinfo_endtime):
2239     """Check the node time.
2240
2241     @type ninfo: L{objects.Node}
2242     @param ninfo: the node to check
2243     @param nresult: the remote results for the node
2244     @param nvinfo_starttime: the start time of the RPC call
2245     @param nvinfo_endtime: the end time of the RPC call
2246
2247     """
2248     node = ninfo.name
2249     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2250
2251     ntime = nresult.get(constants.NV_TIME, None)
2252     try:
2253       ntime_merged = utils.MergeTime(ntime)
2254     except (ValueError, TypeError):
2255       _ErrorIf(True, constants.CV_ENODETIME, node, "Node returned invalid time")
2256       return
2257
2258     if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
2259       ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
2260     elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
2261       ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
2262     else:
2263       ntime_diff = None
2264
2265     _ErrorIf(ntime_diff is not None, constants.CV_ENODETIME, node,
2266              "Node time diverges by at least %s from master node time",
2267              ntime_diff)
2268
2269   def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
2270     """Check the node LVM results.
2271
2272     @type ninfo: L{objects.Node}
2273     @param ninfo: the node to check
2274     @param nresult: the remote results for the node
2275     @param vg_name: the configured VG name
2276
2277     """
2278     if vg_name is None:
2279       return
2280
2281     node = ninfo.name
2282     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2283
2284     # checks vg existence and size > 20G
2285     vglist = nresult.get(constants.NV_VGLIST, None)
2286     test = not vglist
2287     _ErrorIf(test, constants.CV_ENODELVM, node, "unable to check volume groups")
2288     if not test:
2289       vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
2290                                             constants.MIN_VG_SIZE)
2291       _ErrorIf(vgstatus, constants.CV_ENODELVM, node, vgstatus)
2292
2293     # check pv names
2294     pvlist = nresult.get(constants.NV_PVLIST, None)
2295     test = pvlist is None
2296     _ErrorIf(test, constants.CV_ENODELVM, node, "Can't get PV list from node")
2297     if not test:
2298       # check that ':' is not present in PV names, since it's a
2299       # special character for lvcreate (denotes the range of PEs to
2300       # use on the PV)
2301       for _, pvname, owner_vg in pvlist:
2302         test = ":" in pvname
2303         _ErrorIf(test, constants.CV_ENODELVM, node,
2304                  "Invalid character ':' in PV '%s' of VG '%s'",
2305                  pvname, owner_vg)
2306
2307   def _VerifyNodeBridges(self, ninfo, nresult, bridges):
2308     """Check the node bridges.
2309
2310     @type ninfo: L{objects.Node}
2311     @param ninfo: the node to check
2312     @param nresult: the remote results for the node
2313     @param bridges: the expected list of bridges
2314
2315     """
2316     if not bridges:
2317       return
2318
2319     node = ninfo.name
2320     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2321
2322     missing = nresult.get(constants.NV_BRIDGES, None)
2323     test = not isinstance(missing, list)
2324     _ErrorIf(test, constants.CV_ENODENET, node,
2325              "did not return valid bridge information")
2326     if not test:
2327       _ErrorIf(bool(missing), constants.CV_ENODENET, node,
2328                "missing bridges: %s" % utils.CommaJoin(sorted(missing)))
2329
2330   def _VerifyNodeUserScripts(self, ninfo, nresult):
2331     """Check the results of user scripts presence and executability on the node
2332
2333     @type ninfo: L{objects.Node}
2334     @param ninfo: the node to check
2335     @param nresult: the remote results for the node
2336
2337     """
2338     node = ninfo.name
2339
2340     test = not constants.NV_USERSCRIPTS in nresult
2341     self._ErrorIf(test, constants.CV_ENODEUSERSCRIPTS, node,
2342                   "did not return user scripts information")
2343
2344     broken_scripts = nresult.get(constants.NV_USERSCRIPTS, None)
2345     if not test:
2346       self._ErrorIf(broken_scripts, constants.CV_ENODEUSERSCRIPTS, node,
2347                     "user scripts not present or not executable: %s" %
2348                     utils.CommaJoin(sorted(broken_scripts)))
2349
2350   def _VerifyNodeNetwork(self, ninfo, nresult):
2351     """Check the node network connectivity results.
2352
2353     @type ninfo: L{objects.Node}
2354     @param ninfo: the node to check
2355     @param nresult: the remote results for the node
2356
2357     """
2358     node = ninfo.name
2359     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2360
2361     test = constants.NV_NODELIST not in nresult
2362     _ErrorIf(test, constants.CV_ENODESSH, node,
2363              "node hasn't returned node ssh connectivity data")
2364     if not test:
2365       if nresult[constants.NV_NODELIST]:
2366         for a_node, a_msg in nresult[constants.NV_NODELIST].items():
2367           _ErrorIf(True, constants.CV_ENODESSH, node,
2368                    "ssh communication with node '%s': %s", a_node, a_msg)
2369
2370     test = constants.NV_NODENETTEST not in nresult
2371     _ErrorIf(test, constants.CV_ENODENET, node,
2372              "node hasn't returned node tcp connectivity data")
2373     if not test:
2374       if nresult[constants.NV_NODENETTEST]:
2375         nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
2376         for anode in nlist:
2377           _ErrorIf(True, constants.CV_ENODENET, node,
2378                    "tcp communication with node '%s': %s",
2379                    anode, nresult[constants.NV_NODENETTEST][anode])
2380
2381     test = constants.NV_MASTERIP not in nresult
2382     _ErrorIf(test, constants.CV_ENODENET, node,
2383              "node hasn't returned node master IP reachability data")
2384     if not test:
2385       if not nresult[constants.NV_MASTERIP]:
2386         if node == self.master_node:
2387           msg = "the master node cannot reach the master IP (not configured?)"
2388         else:
2389           msg = "cannot reach the master IP"
2390         _ErrorIf(True, constants.CV_ENODENET, node, msg)
2391
2392   def _VerifyInstance(self, instance, instanceconfig, node_image,
2393                       diskstatus):
2394     """Verify an instance.
2395
2396     This function checks to see if the required block devices are
2397     available on the instance's node.
2398
2399     """
2400     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2401     node_current = instanceconfig.primary_node
2402
2403     node_vol_should = {}
2404     instanceconfig.MapLVsByNode(node_vol_should)
2405
2406     cluster = self.cfg.GetClusterInfo()
2407     ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
2408                                                             self.group_info)
2409     err = _ComputeIPolicyInstanceViolation(ipolicy, instanceconfig)
2410     _ErrorIf(err, constants.CV_EINSTANCEPOLICY, instance, utils.CommaJoin(err))
2411
2412     for node in node_vol_should:
2413       n_img = node_image[node]
2414       if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2415         # ignore missing volumes on offline or broken nodes
2416         continue
2417       for volume in node_vol_should[node]:
2418         test = volume not in n_img.volumes
2419         _ErrorIf(test, constants.CV_EINSTANCEMISSINGDISK, instance,
2420                  "volume %s missing on node %s", volume, node)
2421
2422     if instanceconfig.admin_state == constants.ADMINST_UP:
2423       pri_img = node_image[node_current]
2424       test = instance not in pri_img.instances and not pri_img.offline
2425       _ErrorIf(test, constants.CV_EINSTANCEDOWN, instance,
2426                "instance not running on its primary node %s",
2427                node_current)
2428
2429     diskdata = [(nname, success, status, idx)
2430                 for (nname, disks) in diskstatus.items()
2431                 for idx, (success, status) in enumerate(disks)]
2432
2433     for nname, success, bdev_status, idx in diskdata:
2434       # the 'ghost node' construction in Exec() ensures that we have a
2435       # node here
2436       snode = node_image[nname]
2437       bad_snode = snode.ghost or snode.offline
2438       _ErrorIf(instanceconfig.admin_state == constants.ADMINST_UP and
2439                not success and not bad_snode,
2440                constants.CV_EINSTANCEFAULTYDISK, instance,
2441                "couldn't retrieve status for disk/%s on %s: %s",
2442                idx, nname, bdev_status)
2443       _ErrorIf((instanceconfig.admin_state == constants.ADMINST_UP and
2444                 success and bdev_status.ldisk_status == constants.LDS_FAULTY),
2445                constants.CV_EINSTANCEFAULTYDISK, instance,
2446                "disk/%s on %s is faulty", idx, nname)
2447
2448   def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
2449     """Verify if there are any unknown volumes in the cluster.
2450
2451     The .os, .swap and backup volumes are ignored. All other volumes are
2452     reported as unknown.
2453
2454     @type reserved: L{ganeti.utils.FieldSet}
2455     @param reserved: a FieldSet of reserved volume names
2456
2457     """
2458     for node, n_img in node_image.items():
2459       if (n_img.offline or n_img.rpc_fail or n_img.lvm_fail or
2460           self.all_node_info[node].group != self.group_uuid):
2461         # skip non-healthy nodes
2462         continue
2463       for volume in n_img.volumes:
2464         test = ((node not in node_vol_should or
2465                 volume not in node_vol_should[node]) and
2466                 not reserved.Matches(volume))
2467         self._ErrorIf(test, constants.CV_ENODEORPHANLV, node,
2468                       "volume %s is unknown", volume)
2469
2470   def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
2471     """Verify N+1 Memory Resilience.
2472
2473     Check that if one single node dies we can still start all the
2474     instances it was primary for.
2475
2476     """
2477     cluster_info = self.cfg.GetClusterInfo()
2478     for node, n_img in node_image.items():
2479       # This code checks that every node which is now listed as
2480       # secondary has enough memory to host all instances it is
2481       # supposed to should a single other node in the cluster fail.
2482       # FIXME: not ready for failover to an arbitrary node
2483       # FIXME: does not support file-backed instances
2484       # WARNING: we currently take into account down instances as well
2485       # as up ones, considering that even if they're down someone
2486       # might want to start them even in the event of a node failure.
2487       if n_img.offline or self.all_node_info[node].group != self.group_uuid:
2488         # we're skipping nodes marked offline and nodes in other groups from
2489         # the N+1 warning, since most likely we don't have good memory
2490         # infromation from them; we already list instances living on such
2491         # nodes, and that's enough warning
2492         continue
2493       #TODO(dynmem): also consider ballooning out other instances
2494       for prinode, instances in n_img.sbp.items():
2495         needed_mem = 0
2496         for instance in instances:
2497           bep = cluster_info.FillBE(instance_cfg[instance])
2498           if bep[constants.BE_AUTO_BALANCE]:
2499             needed_mem += bep[constants.BE_MINMEM]
2500         test = n_img.mfree < needed_mem
2501         self._ErrorIf(test, constants.CV_ENODEN1, node,
2502                       "not enough memory to accomodate instance failovers"
2503                       " should node %s fail (%dMiB needed, %dMiB available)",
2504                       prinode, needed_mem, n_img.mfree)
2505
2506   @classmethod
2507   def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
2508                    (files_all, files_opt, files_mc, files_vm)):
2509     """Verifies file checksums collected from all nodes.
2510
2511     @param errorif: Callback for reporting errors
2512     @param nodeinfo: List of L{objects.Node} objects
2513     @param master_node: Name of master node
2514     @param all_nvinfo: RPC results
2515
2516     """
2517     # Define functions determining which nodes to consider for a file
2518     files2nodefn = [
2519       (files_all, None),
2520       (files_mc, lambda node: (node.master_candidate or
2521                                node.name == master_node)),
2522       (files_vm, lambda node: node.vm_capable),
2523       ]
2524
2525     # Build mapping from filename to list of nodes which should have the file
2526     nodefiles = {}
2527     for (files, fn) in files2nodefn:
2528       if fn is None:
2529         filenodes = nodeinfo
2530       else:
2531         filenodes = filter(fn, nodeinfo)
2532       nodefiles.update((filename,
2533                         frozenset(map(operator.attrgetter("name"), filenodes)))
2534                        for filename in files)
2535
2536     assert set(nodefiles) == (files_all | files_mc | files_vm)
2537
2538     fileinfo = dict((filename, {}) for filename in nodefiles)
2539     ignore_nodes = set()
2540
2541     for node in nodeinfo:
2542       if node.offline:
2543         ignore_nodes.add(node.name)
2544         continue
2545
2546       nresult = all_nvinfo[node.name]
2547
2548       if nresult.fail_msg or not nresult.payload:
2549         node_files = None
2550       else:
2551         node_files = nresult.payload.get(constants.NV_FILELIST, None)
2552
2553       test = not (node_files and isinstance(node_files, dict))
2554       errorif(test, constants.CV_ENODEFILECHECK, node.name,
2555               "Node did not return file checksum data")
2556       if test:
2557         ignore_nodes.add(node.name)
2558         continue
2559
2560       # Build per-checksum mapping from filename to nodes having it
2561       for (filename, checksum) in node_files.items():
2562         assert filename in nodefiles
2563         fileinfo[filename].setdefault(checksum, set()).add(node.name)
2564
2565     for (filename, checksums) in fileinfo.items():
2566       assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
2567
2568       # Nodes having the file
2569       with_file = frozenset(node_name
2570                             for nodes in fileinfo[filename].values()
2571                             for node_name in nodes) - ignore_nodes
2572
2573       expected_nodes = nodefiles[filename] - ignore_nodes
2574
2575       # Nodes missing file
2576       missing_file = expected_nodes - with_file
2577
2578       if filename in files_opt:
2579         # All or no nodes
2580         errorif(missing_file and missing_file != expected_nodes,
2581                 constants.CV_ECLUSTERFILECHECK, None,
2582                 "File %s is optional, but it must exist on all or no"
2583                 " nodes (not found on %s)",
2584                 filename, utils.CommaJoin(utils.NiceSort(missing_file)))
2585       else:
2586         errorif(missing_file, constants.CV_ECLUSTERFILECHECK, None,
2587                 "File %s is missing from node(s) %s", filename,
2588                 utils.CommaJoin(utils.NiceSort(missing_file)))
2589
2590         # Warn if a node has a file it shouldn't
2591         unexpected = with_file - expected_nodes
2592         errorif(unexpected,
2593                 constants.CV_ECLUSTERFILECHECK, None,
2594                 "File %s should not exist on node(s) %s",
2595                 filename, utils.CommaJoin(utils.NiceSort(unexpected)))
2596
2597       # See if there are multiple versions of the file
2598       test = len(checksums) > 1
2599       if test:
2600         variants = ["variant %s on %s" %
2601                     (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
2602                     for (idx, (checksum, nodes)) in
2603                       enumerate(sorted(checksums.items()))]
2604       else:
2605         variants = []
2606
2607       errorif(test, constants.CV_ECLUSTERFILECHECK, None,
2608               "File %s found with %s different checksums (%s)",
2609               filename, len(checksums), "; ".join(variants))
2610
2611   def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
2612                       drbd_map):
2613     """Verifies and the node DRBD status.
2614
2615     @type ninfo: L{objects.Node}
2616     @param ninfo: the node to check
2617     @param nresult: the remote results for the node
2618     @param instanceinfo: the dict of instances
2619     @param drbd_helper: the configured DRBD usermode helper
2620     @param drbd_map: the DRBD map as returned by
2621         L{ganeti.config.ConfigWriter.ComputeDRBDMap}
2622
2623     """
2624     node = ninfo.name
2625     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2626
2627     if drbd_helper:
2628       helper_result = nresult.get(constants.NV_DRBDHELPER, None)
2629       test = (helper_result is None)
2630       _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2631                "no drbd usermode helper returned")
2632       if helper_result:
2633         status, payload = helper_result
2634         test = not status
2635         _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2636                  "drbd usermode helper check unsuccessful: %s", payload)
2637         test = status and (payload != drbd_helper)
2638         _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2639                  "wrong drbd usermode helper: %s", payload)
2640
2641     # compute the DRBD minors
2642     node_drbd = {}
2643     for minor, instance in drbd_map[node].items():
2644       test = instance not in instanceinfo
2645       _ErrorIf(test, constants.CV_ECLUSTERCFG, None,
2646                "ghost instance '%s' in temporary DRBD map", instance)
2647         # ghost instance should not be running, but otherwise we
2648         # don't give double warnings (both ghost instance and
2649         # unallocated minor in use)
2650       if test:
2651         node_drbd[minor] = (instance, False)
2652       else:
2653         instance = instanceinfo[instance]
2654         node_drbd[minor] = (instance.name,
2655                             instance.admin_state == constants.ADMINST_UP)
2656
2657     # and now check them
2658     used_minors = nresult.get(constants.NV_DRBDLIST, [])
2659     test = not isinstance(used_minors, (tuple, list))
2660     _ErrorIf(test, constants.CV_ENODEDRBD, node,
2661              "cannot parse drbd status file: %s", str(used_minors))
2662     if test:
2663       # we cannot check drbd status
2664       return
2665
2666     for minor, (iname, must_exist) in node_drbd.items():
2667       test = minor not in used_minors and must_exist
2668       _ErrorIf(test, constants.CV_ENODEDRBD, node,
2669                "drbd minor %d of instance %s is not active", minor, iname)
2670     for minor in used_minors:
2671       test = minor not in node_drbd
2672       _ErrorIf(test, constants.CV_ENODEDRBD, node,
2673                "unallocated drbd minor %d is in use", minor)
2674
2675   def _UpdateNodeOS(self, ninfo, nresult, nimg):
2676     """Builds the node OS structures.
2677
2678     @type ninfo: L{objects.Node}
2679     @param ninfo: the node to check
2680     @param nresult: the remote results for the node
2681     @param nimg: the node image object
2682
2683     """
2684     node = ninfo.name
2685     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2686
2687     remote_os = nresult.get(constants.NV_OSLIST, None)
2688     test = (not isinstance(remote_os, list) or
2689             not compat.all(isinstance(v, list) and len(v) == 7
2690                            for v in remote_os))
2691
2692     _ErrorIf(test, constants.CV_ENODEOS, node,
2693              "node hasn't returned valid OS data")
2694
2695     nimg.os_fail = test
2696
2697     if test:
2698       return
2699
2700     os_dict = {}
2701
2702     for (name, os_path, status, diagnose,
2703          variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
2704
2705       if name not in os_dict:
2706         os_dict[name] = []
2707
2708       # parameters is a list of lists instead of list of tuples due to
2709       # JSON lacking a real tuple type, fix it:
2710       parameters = [tuple(v) for v in parameters]
2711       os_dict[name].append((os_path, status, diagnose,
2712                             set(variants), set(parameters), set(api_ver)))
2713
2714     nimg.oslist = os_dict
2715
2716   def _VerifyNodeOS(self, ninfo, nimg, base):
2717     """Verifies the node OS list.
2718
2719     @type ninfo: L{objects.Node}
2720     @param ninfo: the node to check
2721     @param nimg: the node image object
2722     @param base: the 'template' node we match against (e.g. from the master)
2723
2724     """
2725     node = ninfo.name
2726     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2727
2728     assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
2729
2730     beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
2731     for os_name, os_data in nimg.oslist.items():
2732       assert os_data, "Empty OS status for OS %s?!" % os_name
2733       f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
2734       _ErrorIf(not f_status, constants.CV_ENODEOS, node,
2735                "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
2736       _ErrorIf(len(os_data) > 1, constants.CV_ENODEOS, node,
2737                "OS '%s' has multiple entries (first one shadows the rest): %s",
2738                os_name, utils.CommaJoin([v[0] for v in os_data]))
2739       # comparisons with the 'base' image
2740       test = os_name not in base.oslist
2741       _ErrorIf(test, constants.CV_ENODEOS, node,
2742                "Extra OS %s not present on reference node (%s)",
2743                os_name, base.name)
2744       if test:
2745         continue
2746       assert base.oslist[os_name], "Base node has empty OS status?"
2747       _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
2748       if not b_status:
2749         # base OS is invalid, skipping
2750         continue
2751       for kind, a, b in [("API version", f_api, b_api),
2752                          ("variants list", f_var, b_var),
2753                          ("parameters", beautify_params(f_param),
2754                           beautify_params(b_param))]:
2755         _ErrorIf(a != b, constants.CV_ENODEOS, node,
2756                  "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
2757                  kind, os_name, base.name,
2758                  utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
2759
2760     # check any missing OSes
2761     missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
2762     _ErrorIf(missing, constants.CV_ENODEOS, node,
2763              "OSes present on reference node %s but missing on this node: %s",
2764              base.name, utils.CommaJoin(missing))
2765
2766   def _VerifyOob(self, ninfo, nresult):
2767     """Verifies out of band functionality of a node.
2768
2769     @type ninfo: L{objects.Node}
2770     @param ninfo: the node to check
2771     @param nresult: the remote results for the node
2772
2773     """
2774     node = ninfo.name
2775     # We just have to verify the paths on master and/or master candidates
2776     # as the oob helper is invoked on the master
2777     if ((ninfo.master_candidate or ninfo.master_capable) and
2778         constants.NV_OOB_PATHS in nresult):
2779       for path_result in nresult[constants.NV_OOB_PATHS]:
2780         self._ErrorIf(path_result, constants.CV_ENODEOOBPATH, node, path_result)
2781
2782   def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
2783     """Verifies and updates the node volume data.
2784
2785     This function will update a L{NodeImage}'s internal structures
2786     with data from the remote call.
2787
2788     @type ninfo: L{objects.Node}
2789     @param ninfo: the node to check
2790     @param nresult: the remote results for the node
2791     @param nimg: the node image object
2792     @param vg_name: the configured VG name
2793
2794     """
2795     node = ninfo.name
2796     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2797
2798     nimg.lvm_fail = True
2799     lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
2800     if vg_name is None:
2801       pass
2802     elif isinstance(lvdata, basestring):
2803       _ErrorIf(True, constants.CV_ENODELVM, node, "LVM problem on node: %s",
2804                utils.SafeEncode(lvdata))
2805     elif not isinstance(lvdata, dict):
2806       _ErrorIf(True, constants.CV_ENODELVM, node,
2807                "rpc call to node failed (lvlist)")
2808     else:
2809       nimg.volumes = lvdata
2810       nimg.lvm_fail = False
2811
2812   def _UpdateNodeInstances(self, ninfo, nresult, nimg):
2813     """Verifies and updates the node instance list.
2814
2815     If the listing was successful, then updates this node's instance
2816     list. Otherwise, it marks the RPC call as failed for the instance
2817     list key.
2818
2819     @type ninfo: L{objects.Node}
2820     @param ninfo: the node to check
2821     @param nresult: the remote results for the node
2822     @param nimg: the node image object
2823
2824     """
2825     idata = nresult.get(constants.NV_INSTANCELIST, None)
2826     test = not isinstance(idata, list)
2827     self._ErrorIf(test, constants.CV_ENODEHV, ninfo.name,
2828                   "rpc call to node failed (instancelist): %s",
2829                   utils.SafeEncode(str(idata)))
2830     if test:
2831       nimg.hyp_fail = True
2832     else:
2833       nimg.instances = idata
2834
2835   def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
2836     """Verifies and computes a node information map
2837
2838     @type ninfo: L{objects.Node}
2839     @param ninfo: the node to check
2840     @param nresult: the remote results for the node
2841     @param nimg: the node image object
2842     @param vg_name: the configured VG name
2843
2844     """
2845     node = ninfo.name
2846     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2847
2848     # try to read free memory (from the hypervisor)
2849     hv_info = nresult.get(constants.NV_HVINFO, None)
2850     test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
2851     _ErrorIf(test, constants.CV_ENODEHV, node,
2852              "rpc call to node failed (hvinfo)")
2853     if not test:
2854       try:
2855         nimg.mfree = int(hv_info["memory_free"])
2856       except (ValueError, TypeError):
2857         _ErrorIf(True, constants.CV_ENODERPC, node,
2858                  "node returned invalid nodeinfo, check hypervisor")
2859
2860     # FIXME: devise a free space model for file based instances as well
2861     if vg_name is not None:
2862       test = (constants.NV_VGLIST not in nresult or
2863               vg_name not in nresult[constants.NV_VGLIST])
2864       _ErrorIf(test, constants.CV_ENODELVM, node,
2865                "node didn't return data for the volume group '%s'"
2866                " - it is either missing or broken", vg_name)
2867       if not test:
2868         try:
2869           nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
2870         except (ValueError, TypeError):
2871           _ErrorIf(True, constants.CV_ENODERPC, node,
2872                    "node returned invalid LVM info, check LVM status")
2873
2874   def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
2875     """Gets per-disk status information for all instances.
2876
2877     @type nodelist: list of strings
2878     @param nodelist: Node names
2879     @type node_image: dict of (name, L{objects.Node})
2880     @param node_image: Node objects
2881     @type instanceinfo: dict of (name, L{objects.Instance})
2882     @param instanceinfo: Instance objects
2883     @rtype: {instance: {node: [(succes, payload)]}}
2884     @return: a dictionary of per-instance dictionaries with nodes as
2885         keys and disk information as values; the disk information is a
2886         list of tuples (success, payload)
2887
2888     """
2889     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2890
2891     node_disks = {}
2892     node_disks_devonly = {}
2893     diskless_instances = set()
2894     diskless = constants.DT_DISKLESS
2895
2896     for nname in nodelist:
2897       node_instances = list(itertools.chain(node_image[nname].pinst,
2898                                             node_image[nname].sinst))
2899       diskless_instances.update(inst for inst in node_instances
2900                                 if instanceinfo[inst].disk_template == diskless)
2901       disks = [(inst, disk)
2902                for inst in node_instances
2903                for disk in instanceinfo[inst].disks]
2904
2905       if not disks:
2906         # No need to collect data
2907         continue
2908
2909       node_disks[nname] = disks
2910
2911       # _AnnotateDiskParams makes already copies of the disks
2912       devonly = []
2913       for (inst, dev) in disks:
2914         (anno_disk,) = _AnnotateDiskParams(instanceinfo[inst], [dev], self.cfg)
2915         self.cfg.SetDiskID(anno_disk, nname)
2916         devonly.append(anno_disk)
2917
2918       node_disks_devonly[nname] = devonly
2919
2920     assert len(node_disks) == len(node_disks_devonly)
2921
2922     # Collect data from all nodes with disks
2923     result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
2924                                                           node_disks_devonly)
2925
2926     assert len(result) == len(node_disks)
2927
2928     instdisk = {}
2929
2930     for (nname, nres) in result.items():
2931       disks = node_disks[nname]
2932
2933       if nres.offline:
2934         # No data from this node
2935         data = len(disks) * [(False, "node offline")]
2936       else:
2937         msg = nres.fail_msg
2938         _ErrorIf(msg, constants.CV_ENODERPC, nname,
2939                  "while getting disk information: %s", msg)
2940         if msg:
2941           # No data from this node
2942           data = len(disks) * [(False, msg)]
2943         else:
2944           data = []
2945           for idx, i in enumerate(nres.payload):
2946             if isinstance(i, (tuple, list)) and len(i) == 2:
2947               data.append(i)
2948             else:
2949               logging.warning("Invalid result from node %s, entry %d: %s",
2950                               nname, idx, i)
2951               data.append((False, "Invalid result from the remote node"))
2952
2953       for ((inst, _), status) in zip(disks, data):
2954         instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
2955
2956     # Add empty entries for diskless instances.
2957     for inst in diskless_instances:
2958       assert inst not in instdisk
2959       instdisk[inst] = {}
2960
2961     assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
2962                       len(nnames) <= len(instanceinfo[inst].all_nodes) and
2963                       compat.all(isinstance(s, (tuple, list)) and
2964                                  len(s) == 2 for s in statuses)
2965                       for inst, nnames in instdisk.items()
2966                       for nname, statuses in nnames.items())
2967     assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
2968
2969     return instdisk
2970
2971   @staticmethod
2972   def _SshNodeSelector(group_uuid, all_nodes):
2973     """Create endless iterators for all potential SSH check hosts.
2974
2975     """
2976     nodes = [node for node in all_nodes
2977              if (node.group != group_uuid and
2978                  not node.offline)]
2979     keyfunc = operator.attrgetter("group")
2980
2981     return map(itertools.cycle,
2982                [sorted(map(operator.attrgetter("name"), names))
2983                 for _, names in itertools.groupby(sorted(nodes, key=keyfunc),
2984                                                   keyfunc)])
2985
2986   @classmethod
2987   def _SelectSshCheckNodes(cls, group_nodes, group_uuid, all_nodes):
2988     """Choose which nodes should talk to which other nodes.
2989
2990     We will make nodes contact all nodes in their group, and one node from
2991     every other group.
2992
2993     @warning: This algorithm has a known issue if one node group is much
2994       smaller than others (e.g. just one node). In such a case all other
2995       nodes will talk to the single node.
2996
2997     """
2998     online_nodes = sorted(node.name for node in group_nodes if not node.offline)
2999     sel = cls._SshNodeSelector(group_uuid, all_nodes)
3000
3001     return (online_nodes,
3002             dict((name, sorted([i.next() for i in sel]))
3003                  for name in online_nodes))
3004
3005   def BuildHooksEnv(self):
3006     """Build hooks env.
3007
3008     Cluster-Verify hooks just ran in the post phase and their failure makes
3009     the output be logged in the verify output and the verification to fail.
3010
3011     """
3012     env = {
3013       "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
3014       }
3015
3016     env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
3017                for node in self.my_node_info.values())
3018
3019     return env
3020
3021   def BuildHooksNodes(self):
3022     """Build hooks nodes.
3023
3024     """
3025     return ([], self.my_node_names)
3026
3027   def Exec(self, feedback_fn):
3028     """Verify integrity of the node group, performing various test on nodes.
3029
3030     """
3031     # This method has too many local variables. pylint: disable=R0914
3032     feedback_fn("* Verifying group '%s'" % self.group_info.name)
3033
3034     if not self.my_node_names:
3035       # empty node group
3036       feedback_fn("* Empty node group, skipping verification")
3037       return True
3038
3039     self.bad = False
3040     _ErrorIf = self._ErrorIf # pylint: disable=C0103
3041     verbose = self.op.verbose
3042     self._feedback_fn = feedback_fn
3043
3044     vg_name = self.cfg.GetVGName()
3045     drbd_helper = self.cfg.GetDRBDHelper()
3046     cluster = self.cfg.GetClusterInfo()
3047     groupinfo = self.cfg.GetAllNodeGroupsInfo()
3048     hypervisors = cluster.enabled_hypervisors
3049     node_data_list = [self.my_node_info[name] for name in self.my_node_names]
3050
3051     i_non_redundant = [] # Non redundant instances
3052     i_non_a_balanced = [] # Non auto-balanced instances
3053     i_offline = 0 # Count of offline instances
3054     n_offline = 0 # Count of offline nodes
3055     n_drained = 0 # Count of nodes being drained
3056     node_vol_should = {}
3057
3058     # FIXME: verify OS list
3059
3060     # File verification
3061     filemap = _ComputeAncillaryFiles(cluster, False)
3062
3063     # do local checksums
3064     master_node = self.master_node = self.cfg.GetMasterNode()
3065     master_ip = self.cfg.GetMasterIP()
3066
3067     feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names))
3068
3069     user_scripts = []
3070     if self.cfg.GetUseExternalMipScript():
3071       user_scripts.append(constants.EXTERNAL_MASTER_SETUP_SCRIPT)
3072
3073     node_verify_param = {
3074       constants.NV_FILELIST:
3075         utils.UniqueSequence(filename
3076                              for files in filemap
3077                              for filename in files),
3078       constants.NV_NODELIST:
3079         self._SelectSshCheckNodes(node_data_list, self.group_uuid,
3080                                   self.all_node_info.values()),
3081       constants.NV_HYPERVISOR: hypervisors,
3082       constants.NV_HVPARAMS:
3083         _GetAllHypervisorParameters(cluster, self.all_inst_info.values()),
3084       constants.NV_NODENETTEST: [(node.name, node.primary_ip, node.secondary_ip)
3085                                  for node in node_data_list
3086                                  if not node.offline],
3087       constants.NV_INSTANCELIST: hypervisors,
3088       constants.NV_VERSION: None,
3089       constants.NV_HVINFO: self.cfg.GetHypervisorType(),
3090       constants.NV_NODESETUP: None,
3091       constants.NV_TIME: None,
3092       constants.NV_MASTERIP: (master_node, master_ip),
3093       constants.NV_OSLIST: None,
3094       constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
3095       constants.NV_USERSCRIPTS: user_scripts,
3096       }
3097
3098     if vg_name is not None:
3099       node_verify_param[constants.NV_VGLIST] = None
3100       node_verify_param[constants.NV_LVLIST] = vg_name
3101       node_verify_param[constants.NV_PVLIST] = [vg_name]
3102       node_verify_param[constants.NV_DRBDLIST] = None
3103
3104     if drbd_helper:
3105       node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
3106
3107     # bridge checks
3108     # FIXME: this needs to be changed per node-group, not cluster-wide
3109     bridges = set()
3110     default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
3111     if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3112       bridges.add(default_nicpp[constants.NIC_LINK])
3113     for instance in self.my_inst_info.values():
3114       for nic in instance.nics:
3115         full_nic = cluster.SimpleFillNIC(nic.nicparams)
3116         if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3117           bridges.add(full_nic[constants.NIC_LINK])
3118
3119     if bridges:
3120       node_verify_param[constants.NV_BRIDGES] = list(bridges)
3121
3122     # Build our expected cluster state
3123     node_image = dict((node.name, self.NodeImage(offline=node.offline,
3124                                                  name=node.name,
3125                                                  vm_capable=node.vm_capable))
3126                       for node in node_data_list)
3127
3128     # Gather OOB paths
3129     oob_paths = []
3130     for node in self.all_node_info.values():
3131       path = _SupportsOob(self.cfg, node)
3132       if path and path not in oob_paths:
3133         oob_paths.append(path)
3134
3135     if oob_paths:
3136       node_verify_param[constants.NV_OOB_PATHS] = oob_paths
3137
3138     for instance in self.my_inst_names:
3139       inst_config = self.my_inst_info[instance]
3140       if inst_config.admin_state == constants.ADMINST_OFFLINE:
3141         i_offline += 1
3142
3143       for nname in inst_config.all_nodes:
3144         if nname not in node_image:
3145           gnode = self.NodeImage(name=nname)
3146           gnode.ghost = (nname not in self.all_node_info)
3147           node_image[nname] = gnode
3148
3149       inst_config.MapLVsByNode(node_vol_should)
3150
3151       pnode = inst_config.primary_node
3152       node_image[pnode].pinst.append(instance)
3153
3154       for snode in inst_config.secondary_nodes:
3155         nimg = node_image[snode]
3156         nimg.sinst.append(instance)
3157         if pnode not in nimg.sbp:
3158           nimg.sbp[pnode] = []
3159         nimg.sbp[pnode].append(instance)
3160
3161     # At this point, we have the in-memory data structures complete,
3162     # except for the runtime information, which we'll gather next
3163
3164     # Due to the way our RPC system works, exact response times cannot be
3165     # guaranteed (e.g. a broken node could run into a timeout). By keeping the
3166     # time before and after executing the request, we can at least have a time
3167     # window.
3168     nvinfo_starttime = time.time()
3169     all_nvinfo = self.rpc.call_node_verify(self.my_node_names,
3170                                            node_verify_param,
3171                                            self.cfg.GetClusterName())
3172     nvinfo_endtime = time.time()
3173
3174     if self.extra_lv_nodes and vg_name is not None:
3175       extra_lv_nvinfo = \
3176           self.rpc.call_node_verify(self.extra_lv_nodes,
3177                                     {constants.NV_LVLIST: vg_name},
3178                                     self.cfg.GetClusterName())
3179     else:
3180       extra_lv_nvinfo = {}
3181
3182     all_drbd_map = self.cfg.ComputeDRBDMap()
3183
3184     feedback_fn("* Gathering disk information (%s nodes)" %
3185                 len(self.my_node_names))
3186     instdisk = self._CollectDiskInfo(self.my_node_names, node_image,
3187                                      self.my_inst_info)
3188
3189     feedback_fn("* Verifying configuration file consistency")
3190
3191     # If not all nodes are being checked, we need to make sure the master node
3192     # and a non-checked vm_capable node are in the list.
3193     absent_nodes = set(self.all_node_info).difference(self.my_node_info)
3194     if absent_nodes:
3195       vf_nvinfo = all_nvinfo.copy()
3196       vf_node_info = list(self.my_node_info.values())
3197       additional_nodes = []
3198       if master_node not in self.my_node_info:
3199         additional_nodes.append(master_node)
3200         vf_node_info.append(self.all_node_info[master_node])
3201       # Add the first vm_capable node we find which is not included,
3202       # excluding the master node (which we already have)
3203       for node in absent_nodes:
3204         nodeinfo = self.all_node_info[node]
3205         if (nodeinfo.vm_capable and not nodeinfo.offline and
3206             node != master_node):
3207           additional_nodes.append(node)
3208           vf_node_info.append(self.all_node_info[node])
3209           break
3210       key = constants.NV_FILELIST
3211       vf_nvinfo.update(self.rpc.call_node_verify(additional_nodes,
3212                                                  {key: node_verify_param[key]},
3213                                                  self.cfg.GetClusterName()))
3214     else:
3215       vf_nvinfo = all_nvinfo
3216       vf_node_info = self.my_node_info.values()
3217
3218     self._VerifyFiles(_ErrorIf, vf_node_info, master_node, vf_nvinfo, filemap)
3219
3220     feedback_fn("* Verifying node status")
3221
3222     refos_img = None
3223
3224     for node_i in node_data_list:
3225       node = node_i.name
3226       nimg = node_image[node]
3227
3228       if node_i.offline:
3229         if verbose:
3230           feedback_fn("* Skipping offline node %s" % (node,))
3231         n_offline += 1
3232         continue
3233
3234       if node == master_node:
3235         ntype = "master"
3236       elif node_i.master_candidate:
3237         ntype = "master candidate"
3238       elif node_i.drained:
3239         ntype = "drained"
3240         n_drained += 1
3241       else:
3242         ntype = "regular"
3243       if verbose:
3244         feedback_fn("* Verifying node %s (%s)" % (node, ntype))
3245
3246       msg = all_nvinfo[node].fail_msg
3247       _ErrorIf(msg, constants.CV_ENODERPC, node, "while contacting node: %s",
3248                msg)
3249       if msg:
3250         nimg.rpc_fail = True
3251         continue
3252
3253       nresult = all_nvinfo[node].payload
3254
3255       nimg.call_ok = self._VerifyNode(node_i, nresult)
3256       self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
3257       self._VerifyNodeNetwork(node_i, nresult)
3258       self._VerifyNodeUserScripts(node_i, nresult)
3259       self._VerifyOob(node_i, nresult)
3260
3261       if nimg.vm_capable:
3262         self._VerifyNodeLVM(node_i, nresult, vg_name)
3263         self._VerifyNodeDrbd(node_i, nresult, self.all_inst_info, drbd_helper,
3264                              all_drbd_map)
3265
3266         self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
3267         self._UpdateNodeInstances(node_i, nresult, nimg)
3268         self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
3269         self._UpdateNodeOS(node_i, nresult, nimg)
3270
3271         if not nimg.os_fail:
3272           if refos_img is None:
3273             refos_img = nimg
3274           self._VerifyNodeOS(node_i, nimg, refos_img)
3275         self._VerifyNodeBridges(node_i, nresult, bridges)
3276
3277         # Check whether all running instancies are primary for the node. (This
3278         # can no longer be done from _VerifyInstance below, since some of the
3279         # wrong instances could be from other node groups.)
3280         non_primary_inst = set(nimg.instances).difference(nimg.pinst)
3281
3282         for inst in non_primary_inst:
3283           test = inst in self.all_inst_info
3284           _ErrorIf(test, constants.CV_EINSTANCEWRONGNODE, inst,
3285                    "instance should not run on node %s", node_i.name)
3286           _ErrorIf(not test, constants.CV_ENODEORPHANINSTANCE, node_i.name,
3287                    "node is running unknown instance %s", inst)
3288
3289     for node, result in extra_lv_nvinfo.items():
3290       self._UpdateNodeVolumes(self.all_node_info[node], result.payload,
3291                               node_image[node], vg_name)
3292
3293     feedback_fn("* Verifying instance status")
3294     for instance in self.my_inst_names:
3295       if verbose:
3296         feedback_fn("* Verifying instance %s" % instance)
3297       inst_config = self.my_inst_info[instance]
3298       self._VerifyInstance(instance, inst_config, node_image,
3299                            instdisk[instance])
3300       inst_nodes_offline = []
3301
3302       pnode = inst_config.primary_node
3303       pnode_img = node_image[pnode]
3304       _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
3305                constants.CV_ENODERPC, pnode, "instance %s, connection to"
3306                " primary node failed", instance)
3307
3308       _ErrorIf(inst_config.admin_state == constants.ADMINST_UP and
3309                pnode_img.offline,
3310                constants.CV_EINSTANCEBADNODE, instance,
3311                "instance is marked as running and lives on offline node %s",
3312                inst_config.primary_node)
3313
3314       # If the instance is non-redundant we cannot survive losing its primary
3315       # node, so we are not N+1 compliant. On the other hand we have no disk
3316       # templates with more than one secondary so that situation is not well
3317       # supported either.
3318       # FIXME: does not support file-backed instances
3319       if not inst_config.secondary_nodes:
3320         i_non_redundant.append(instance)
3321
3322       _ErrorIf(len(inst_config.secondary_nodes) > 1,
3323                constants.CV_EINSTANCELAYOUT,
3324                instance, "instance has multiple secondary nodes: %s",
3325                utils.CommaJoin(inst_config.secondary_nodes),
3326                code=self.ETYPE_WARNING)
3327
3328       if inst_config.disk_template in constants.DTS_INT_MIRROR:
3329         pnode = inst_config.primary_node
3330         instance_nodes = utils.NiceSort(inst_config.all_nodes)
3331         instance_groups = {}
3332
3333         for node in instance_nodes:
3334           instance_groups.setdefault(self.all_node_info[node].group,
3335                                      []).append(node)
3336
3337         pretty_list = [
3338           "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
3339           # Sort so that we always list the primary node first.
3340           for group, nodes in sorted(instance_groups.items(),
3341                                      key=lambda (_, nodes): pnode in nodes,
3342                                      reverse=True)]
3343
3344         self._ErrorIf(len(instance_groups) > 1,
3345                       constants.CV_EINSTANCESPLITGROUPS,
3346                       instance, "instance has primary and secondary nodes in"
3347                       " different groups: %s", utils.CommaJoin(pretty_list),
3348                       code=self.ETYPE_WARNING)
3349
3350       if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
3351         i_non_a_balanced.append(instance)
3352
3353       for snode in inst_config.secondary_nodes:
3354         s_img = node_image[snode]
3355         _ErrorIf(s_img.rpc_fail and not s_img.offline, constants.CV_ENODERPC,
3356                  snode, "instance %s, connection to secondary node failed",
3357                  instance)
3358
3359         if s_img.offline:
3360           inst_nodes_offline.append(snode)
3361
3362       # warn that the instance lives on offline nodes
3363       _ErrorIf(inst_nodes_offline, constants.CV_EINSTANCEBADNODE, instance,
3364                "instance has offline secondary node(s) %s",
3365                utils.CommaJoin(inst_nodes_offline))
3366       # ... or ghost/non-vm_capable nodes
3367       for node in inst_config.all_nodes:
3368         _ErrorIf(node_image[node].ghost, constants.CV_EINSTANCEBADNODE,
3369                  instance, "instance lives on ghost node %s", node)
3370         _ErrorIf(not node_image[node].vm_capable, constants.CV_EINSTANCEBADNODE,
3371                  instance, "instance lives on non-vm_capable node %s", node)
3372
3373     feedback_fn("* Verifying orphan volumes")
3374     reserved = utils.FieldSet(*cluster.reserved_lvs)
3375
3376     # We will get spurious "unknown volume" warnings if any node of this group
3377     # is secondary for an instance whose primary is in another group. To avoid
3378     # them, we find these instances and add their volumes to node_vol_should.
3379     for inst in self.all_inst_info.values():
3380       for secondary in inst.secondary_nodes:
3381         if (secondary in self.my_node_info
3382             and inst.name not in self.my_inst_info):
3383           inst.MapLVsByNode(node_vol_should)
3384           break
3385
3386     self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
3387
3388     if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
3389       feedback_fn("* Verifying N+1 Memory redundancy")
3390       self._VerifyNPlusOneMemory(node_image, self.my_inst_info)
3391
3392     feedback_fn("* Other Notes")
3393     if i_non_redundant:
3394       feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
3395                   % len(i_non_redundant))
3396
3397     if i_non_a_balanced:
3398       feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
3399                   % len(i_non_a_balanced))
3400
3401     if i_offline:
3402       feedback_fn("  - NOTICE: %d offline instance(s) found." % i_offline)
3403
3404     if n_offline:
3405       feedback_fn("  - NOTICE: %d offline node(s) found." % n_offline)
3406
3407     if n_drained:
3408       feedback_fn("  - NOTICE: %d drained node(s) found." % n_drained)
3409
3410     return not self.bad
3411
3412   def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
3413     """Analyze the post-hooks' result
3414
3415     This method analyses the hook result, handles it, and sends some
3416     nicely-formatted feedback back to the user.
3417
3418     @param phase: one of L{constants.HOOKS_PHASE_POST} or
3419         L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
3420     @param hooks_results: the results of the multi-node hooks rpc call
3421     @param feedback_fn: function used send feedback back to the caller
3422     @param lu_result: previous Exec result
3423     @return: the new Exec result, based on the previous result
3424         and hook results
3425
3426     """
3427     # We only really run POST phase hooks, only for non-empty groups,
3428     # and are only interested in their results
3429     if not self.my_node_names:
3430       # empty node group
3431       pass
3432     elif phase == constants.HOOKS_PHASE_POST:
3433       # Used to change hooks' output to proper indentation
3434       feedback_fn("* Hooks Results")
3435       assert hooks_results, "invalid result from hooks"
3436
3437       for node_name in hooks_results:
3438         res = hooks_results[node_name]
3439         msg = res.fail_msg
3440         test = msg and not res.offline
3441         self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3442                       "Communication failure in hooks execution: %s", msg)
3443         if res.offline or msg:
3444           # No need to investigate payload if node is offline or gave
3445           # an error.
3446           continue
3447         for script, hkr, output in res.payload:
3448           test = hkr == constants.HKR_FAIL
3449           self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3450                         "Script %s failed, output:", script)
3451           if test:
3452             output = self._HOOKS_INDENT_RE.sub("      ", output)
3453             feedback_fn("%s" % output)
3454             lu_result = False
3455
3456     return lu_result
3457
3458
3459 class LUClusterVerifyDisks(NoHooksLU):
3460   """Verifies the cluster disks status.
3461
3462   """
3463   REQ_BGL = False
3464
3465   def ExpandNames(self):
3466     self.share_locks = _ShareAll()
3467     self.needed_locks = {
3468       locking.LEVEL_NODEGROUP: locking.ALL_SET,
3469       }
3470
3471   def Exec(self, feedback_fn):
3472     group_names = self.owned_locks(locking.LEVEL_NODEGROUP)
3473
3474     # Submit one instance of L{opcodes.OpGroupVerifyDisks} per node group
3475     return ResultWithJobs([[opcodes.OpGroupVerifyDisks(group_name=group)]
3476                            for group in group_names])
3477
3478
3479 class LUGroupVerifyDisks(NoHooksLU):
3480   """Verifies the status of all disks in a node group.
3481
3482   """
3483   REQ_BGL = False
3484
3485   def ExpandNames(self):
3486     # Raises errors.OpPrereqError on its own if group can't be found
3487     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
3488
3489     self.share_locks = _ShareAll()
3490     self.needed_locks = {
3491       locking.LEVEL_INSTANCE: [],
3492       locking.LEVEL_NODEGROUP: [],
3493       locking.LEVEL_NODE: [],
3494       }
3495
3496   def DeclareLocks(self, level):
3497     if level == locking.LEVEL_INSTANCE:
3498       assert not self.needed_locks[locking.LEVEL_INSTANCE]
3499
3500       # Lock instances optimistically, needs verification once node and group
3501       # locks have been acquired
3502       self.needed_locks[locking.LEVEL_INSTANCE] = \
3503         self.cfg.GetNodeGroupInstances(self.group_uuid)
3504
3505     elif level == locking.LEVEL_NODEGROUP:
3506       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
3507
3508       self.needed_locks[locking.LEVEL_NODEGROUP] = \
3509         set([self.group_uuid] +
3510             # Lock all groups used by instances optimistically; this requires
3511             # going via the node before it's locked, requiring verification
3512             # later on
3513             [group_uuid
3514              for instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
3515              for group_uuid in self.cfg.GetInstanceNodeGroups(instance_name)])
3516
3517     elif level == locking.LEVEL_NODE:
3518       # This will only lock the nodes in the group to be verified which contain
3519       # actual instances
3520       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
3521       self._LockInstancesNodes()
3522
3523       # Lock all nodes in group to be verified
3524       assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
3525       member_nodes = self.cfg.GetNodeGroup(self.group_uuid).members
3526       self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
3527
3528   def CheckPrereq(self):
3529     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
3530     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
3531     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
3532
3533     assert self.group_uuid in owned_groups
3534
3535     # Check if locked instances are still correct
3536     _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
3537
3538     # Get instance information
3539     self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
3540
3541     # Check if node groups for locked instances are still correct
3542     _CheckInstancesNodeGroups(self.cfg, self.instances,
3543                               owned_groups, owned_nodes, self.group_uuid)
3544
3545   def Exec(self, feedback_fn):
3546     """Verify integrity of cluster disks.
3547
3548     @rtype: tuple of three items
3549     @return: a tuple of (dict of node-to-node_error, list of instances
3550         which need activate-disks, dict of instance: (node, volume) for
3551         missing volumes
3552
3553     """
3554     res_nodes = {}
3555     res_instances = set()
3556     res_missing = {}
3557
3558     nv_dict = _MapInstanceDisksToNodes(
3559       [inst for inst in self.instances.values()
3560        if inst.admin_state == constants.ADMINST_UP])
3561
3562     if nv_dict:
3563       nodes = utils.NiceSort(set(self.owned_locks(locking.LEVEL_NODE)) &
3564                              set(self.cfg.GetVmCapableNodeList()))
3565
3566       node_lvs = self.rpc.call_lv_list(nodes, [])
3567
3568       for (node, node_res) in node_lvs.items():
3569         if node_res.offline:
3570           continue
3571
3572         msg = node_res.fail_msg
3573         if msg:
3574           logging.warning("Error enumerating LVs on node %s: %s", node, msg)
3575           res_nodes[node] = msg
3576           continue
3577
3578         for lv_name, (_, _, lv_online) in node_res.payload.items():
3579           inst = nv_dict.pop((node, lv_name), None)
3580           if not (lv_online or inst is None):
3581             res_instances.add(inst)
3582
3583       # any leftover items in nv_dict are missing LVs, let's arrange the data
3584       # better
3585       for key, inst in nv_dict.iteritems():
3586         res_missing.setdefault(inst, []).append(list(key))
3587
3588     return (res_nodes, list(res_instances), res_missing)
3589
3590
3591 class LUClusterRepairDiskSizes(NoHooksLU):
3592   """Verifies the cluster disks sizes.
3593
3594   """
3595   REQ_BGL = False
3596
3597   def ExpandNames(self):
3598     if self.op.instances:
3599       self.wanted_names = _GetWantedInstances(self, self.op.instances)
3600       self.needed_locks = {
3601         locking.LEVEL_NODE_RES: [],
3602         locking.LEVEL_INSTANCE: self.wanted_names,
3603         }
3604       self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
3605     else:
3606       self.wanted_names = None
3607       self.needed_locks = {
3608         locking.LEVEL_NODE_RES: locking.ALL_SET,
3609         locking.LEVEL_INSTANCE: locking.ALL_SET,
3610         }
3611     self.share_locks = {
3612       locking.LEVEL_NODE_RES: 1,
3613       locking.LEVEL_INSTANCE: 0,
3614       }
3615
3616   def DeclareLocks(self, level):
3617     if level == locking.LEVEL_NODE_RES and self.wanted_names is not None:
3618       self._LockInstancesNodes(primary_only=True, level=level)
3619
3620   def CheckPrereq(self):
3621     """Check prerequisites.
3622
3623     This only checks the optional instance list against the existing names.
3624
3625     """
3626     if self.wanted_names is None:
3627       self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
3628
3629     self.wanted_instances = \
3630         map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
3631
3632   def _EnsureChildSizes(self, disk):
3633     """Ensure children of the disk have the needed disk size.
3634
3635     This is valid mainly for DRBD8 and fixes an issue where the
3636     children have smaller disk size.
3637
3638     @param disk: an L{ganeti.objects.Disk} object
3639
3640     """
3641     if disk.dev_type == constants.LD_DRBD8:
3642       assert disk.children, "Empty children for DRBD8?"
3643       fchild = disk.children[0]
3644       mismatch = fchild.size < disk.size
3645       if mismatch:
3646         self.LogInfo("Child disk has size %d, parent %d, fixing",
3647                      fchild.size, disk.size)
3648         fchild.size = disk.size
3649
3650       # and we recurse on this child only, not on the metadev
3651       return self._EnsureChildSizes(fchild) or mismatch
3652     else:
3653       return False
3654
3655   def Exec(self, feedback_fn):
3656     """Verify the size of cluster disks.
3657
3658     """
3659     # TODO: check child disks too
3660     # TODO: check differences in size between primary/secondary nodes
3661     per_node_disks = {}
3662     for instance in self.wanted_instances:
3663       pnode = instance.primary_node
3664       if pnode not in per_node_disks:
3665         per_node_disks[pnode] = []
3666       for idx, disk in enumerate(instance.disks):
3667         per_node_disks[pnode].append((instance, idx, disk))
3668
3669     assert not (frozenset(per_node_disks.keys()) -
3670                 self.owned_locks(locking.LEVEL_NODE_RES)), \
3671       "Not owning correct locks"
3672     assert not self.owned_locks(locking.LEVEL_NODE)
3673
3674     changed = []
3675     for node, dskl in per_node_disks.items():
3676       newl = [v[2].Copy() for v in dskl]
3677       for dsk in newl:
3678         self.cfg.SetDiskID(dsk, node)
3679       result = self.rpc.call_blockdev_getsize(node, newl)
3680       if result.fail_msg:
3681         self.LogWarning("Failure in blockdev_getsize call to node"
3682                         " %s, ignoring", node)
3683         continue
3684       if len(result.payload) != len(dskl):
3685         logging.warning("Invalid result from node %s: len(dksl)=%d,"
3686                         " result.payload=%s", node, len(dskl), result.payload)
3687         self.LogWarning("Invalid result from node %s, ignoring node results",
3688                         node)
3689         continue
3690       for ((instance, idx, disk), size) in zip(dskl, result.payload):
3691         if size is None:
3692           self.LogWarning("Disk %d of instance %s did not return size"
3693                           " information, ignoring", idx, instance.name)
3694           continue
3695         if not isinstance(size, (int, long)):
3696           self.LogWarning("Disk %d of instance %s did not return valid"
3697                           " size information, ignoring", idx, instance.name)
3698           continue
3699         size = size >> 20
3700         if size != disk.size:
3701           self.LogInfo("Disk %d of instance %s has mismatched size,"
3702                        " correcting: recorded %d, actual %d", idx,
3703                        instance.name, disk.size, size)
3704           disk.size = size
3705           self.cfg.Update(instance, feedback_fn)
3706           changed.append((instance.name, idx, size))
3707         if self._EnsureChildSizes(disk):
3708           self.cfg.Update(instance, feedback_fn)
3709           changed.append((instance.name, idx, disk.size))
3710     return changed
3711
3712
3713 class LUClusterRename(LogicalUnit):
3714   """Rename the cluster.
3715
3716   """
3717   HPATH = "cluster-rename"
3718   HTYPE = constants.HTYPE_CLUSTER
3719
3720   def BuildHooksEnv(self):
3721     """Build hooks env.
3722
3723     """
3724     return {
3725       "OP_TARGET": self.cfg.GetClusterName(),
3726       "NEW_NAME": self.op.name,
3727       }
3728
3729   def BuildHooksNodes(self):
3730     """Build hooks nodes.
3731
3732     """
3733     return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
3734
3735   def CheckPrereq(self):
3736     """Verify that the passed name is a valid one.
3737
3738     """
3739     hostname = netutils.GetHostname(name=self.op.name,
3740                                     family=self.cfg.GetPrimaryIPFamily())
3741
3742     new_name = hostname.name
3743     self.ip = new_ip = hostname.ip
3744     old_name = self.cfg.GetClusterName()
3745     old_ip = self.cfg.GetMasterIP()
3746     if new_name == old_name and new_ip == old_ip:
3747       raise errors.OpPrereqError("Neither the name nor the IP address of the"
3748                                  " cluster has changed",
3749                                  errors.ECODE_INVAL)
3750     if new_ip != old_ip:
3751       if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
3752         raise errors.OpPrereqError("The given cluster IP address (%s) is"
3753                                    " reachable on the network" %
3754                                    new_ip, errors.ECODE_NOTUNIQUE)
3755
3756     self.op.name = new_name
3757
3758   def Exec(self, feedback_fn):
3759     """Rename the cluster.
3760
3761     """
3762     clustername = self.op.name
3763     new_ip = self.ip
3764
3765     # shutdown the master IP
3766     master_params = self.cfg.GetMasterNetworkParameters()
3767     ems = self.cfg.GetUseExternalMipScript()
3768     result = self.rpc.call_node_deactivate_master_ip(master_params.name,
3769                                                      master_params, ems)
3770     result.Raise("Could not disable the master role")
3771
3772     try:
3773       cluster = self.cfg.GetClusterInfo()
3774       cluster.cluster_name = clustername
3775       cluster.master_ip = new_ip
3776       self.cfg.Update(cluster, feedback_fn)
3777
3778       # update the known hosts file
3779       ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
3780       node_list = self.cfg.GetOnlineNodeList()
3781       try:
3782         node_list.remove(master_params.name)
3783       except ValueError:
3784         pass
3785       _UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE)
3786     finally:
3787       master_params.ip = new_ip
3788       result = self.rpc.call_node_activate_master_ip(master_params.name,
3789                                                      master_params, ems)
3790       msg = result.fail_msg
3791       if msg:
3792         self.LogWarning("Could not re-enable the master role on"
3793                         " the master, please restart manually: %s", msg)
3794
3795     return clustername
3796
3797
3798 def _ValidateNetmask(cfg, netmask):
3799   """Checks if a netmask is valid.
3800
3801   @type cfg: L{config.ConfigWriter}
3802   @param cfg: The cluster configuration
3803   @type netmask: int
3804   @param netmask: the netmask to be verified
3805   @raise errors.OpPrereqError: if the validation fails
3806
3807   """
3808   ip_family = cfg.GetPrimaryIPFamily()
3809   try:
3810     ipcls = netutils.IPAddress.GetClassFromIpFamily(ip_family)
3811   except errors.ProgrammerError:
3812     raise errors.OpPrereqError("Invalid primary ip family: %s." %
3813                                ip_family, errors.ECODE_INVAL)
3814   if not ipcls.ValidateNetmask(netmask):
3815     raise errors.OpPrereqError("CIDR netmask (%s) not valid" %
3816                                 (netmask), errors.ECODE_INVAL)
3817
3818
3819 class LUClusterSetParams(LogicalUnit):
3820   """Change the parameters of the cluster.
3821
3822   """
3823   HPATH = "cluster-modify"
3824   HTYPE = constants.HTYPE_CLUSTER
3825   REQ_BGL = False
3826
3827   def CheckArguments(self):
3828     """Check parameters
3829
3830     """
3831     if self.op.uid_pool:
3832       uidpool.CheckUidPool(self.op.uid_pool)
3833
3834     if self.op.add_uids:
3835       uidpool.CheckUidPool(self.op.add_uids)
3836
3837     if self.op.remove_uids:
3838       uidpool.CheckUidPool(self.op.remove_uids)
3839
3840     if self.op.master_netmask is not None:
3841       _ValidateNetmask(self.cfg, self.op.master_netmask)
3842
3843     if self.op.diskparams:
3844       for dt_params in self.op.diskparams.values():
3845         utils.ForceDictType(dt_params, constants.DISK_DT_TYPES)
3846       try:
3847         utils.VerifyDictOptions(self.op.diskparams, constants.DISK_DT_DEFAULTS)
3848       except errors.OpPrereqError, err:
3849         raise errors.OpPrereqError("While verify diskparams options: %s" % err,
3850                                    errors.ECODE_INVAL)
3851
3852   def ExpandNames(self):
3853     # FIXME: in the future maybe other cluster params won't require checking on
3854     # all nodes to be modified.
3855     self.needed_locks = {
3856       locking.LEVEL_NODE: locking.ALL_SET,
3857       locking.LEVEL_INSTANCE: locking.ALL_SET,
3858       locking.LEVEL_NODEGROUP: locking.ALL_SET,
3859     }
3860     self.share_locks = {
3861         locking.LEVEL_NODE: 1,
3862         locking.LEVEL_INSTANCE: 1,
3863         locking.LEVEL_NODEGROUP: 1,
3864     }
3865
3866   def BuildHooksEnv(self):
3867     """Build hooks env.
3868
3869     """
3870     return {
3871       "OP_TARGET": self.cfg.GetClusterName(),
3872       "NEW_VG_NAME": self.op.vg_name,
3873       }
3874
3875   def BuildHooksNodes(self):
3876     """Build hooks nodes.
3877
3878     """
3879     mn = self.cfg.GetMasterNode()
3880     return ([mn], [mn])
3881
3882   def CheckPrereq(self):
3883     """Check prerequisites.
3884
3885     This checks whether the given params don't conflict and
3886     if the given volume group is valid.
3887
3888     """
3889     if self.op.vg_name is not None and not self.op.vg_name:
3890       if self.cfg.HasAnyDiskOfType(constants.LD_LV):
3891         raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
3892                                    " instances exist", errors.ECODE_INVAL)
3893
3894     if self.op.drbd_helper is not None and not self.op.drbd_helper:
3895       if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
3896         raise errors.OpPrereqError("Cannot disable drbd helper while"
3897                                    " drbd-based instances exist",
3898                                    errors.ECODE_INVAL)
3899
3900     node_list = self.owned_locks(locking.LEVEL_NODE)
3901
3902     # if vg_name not None, checks given volume group on all nodes
3903     if self.op.vg_name:
3904       vglist = self.rpc.call_vg_list(node_list)
3905       for node in node_list:
3906         msg = vglist[node].fail_msg
3907         if msg:
3908           # ignoring down node
3909           self.LogWarning("Error while gathering data on node %s"
3910                           " (ignoring node): %s", node, msg)
3911           continue
3912         vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
3913                                               self.op.vg_name,
3914                                               constants.MIN_VG_SIZE)
3915         if vgstatus:
3916           raise errors.OpPrereqError("Error on node '%s': %s" %
3917                                      (node, vgstatus), errors.ECODE_ENVIRON)
3918
3919     if self.op.drbd_helper:
3920       # checks given drbd helper on all nodes
3921       helpers = self.rpc.call_drbd_helper(node_list)
3922       for (node, ninfo) in self.cfg.GetMultiNodeInfo(node_list):
3923         if ninfo.offline:
3924           self.LogInfo("Not checking drbd helper on offline node %s", node)
3925           continue
3926         msg = helpers[node].fail_msg
3927         if msg:
3928           raise errors.OpPrereqError("Error checking drbd helper on node"
3929                                      " '%s': %s" % (node, msg),
3930                                      errors.ECODE_ENVIRON)
3931         node_helper = helpers[node].payload
3932         if node_helper != self.op.drbd_helper:
3933           raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
3934                                      (node, node_helper), errors.ECODE_ENVIRON)
3935
3936     self.cluster = cluster = self.cfg.GetClusterInfo()
3937     # validate params changes
3938     if self.op.beparams:
3939       objects.UpgradeBeParams(self.op.beparams)
3940       utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
3941       self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
3942
3943     if self.op.ndparams:
3944       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
3945       self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
3946
3947       # TODO: we need a more general way to handle resetting
3948       # cluster-level parameters to default values
3949       if self.new_ndparams["oob_program"] == "":
3950         self.new_ndparams["oob_program"] = \
3951             constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
3952
3953     if self.op.hv_state:
3954       new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
3955                                             self.cluster.hv_state_static)
3956       self.new_hv_state = dict((hv, cluster.SimpleFillHvState(values))
3957                                for hv, values in new_hv_state.items())
3958
3959     if self.op.disk_state:
3960       new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state,
3961                                                 self.cluster.disk_state_static)
3962       self.new_disk_state = \
3963         dict((storage, dict((name, cluster.SimpleFillDiskState(values))
3964                             for name, values in svalues.items()))
3965              for storage, svalues in new_disk_state.items())
3966
3967     if self.op.ipolicy:
3968       self.new_ipolicy = _GetUpdatedIPolicy(cluster.ipolicy, self.op.ipolicy,
3969                                             group_policy=False)
3970
3971       all_instances = self.cfg.GetAllInstancesInfo().values()
3972       violations = set()
3973       for group in self.cfg.GetAllNodeGroupsInfo().values():
3974         instances = frozenset([inst for inst in all_instances
3975                                if compat.any(node in group.members
3976                                              for node in inst.all_nodes)])
3977         new_ipolicy = objects.FillIPolicy(self.new_ipolicy, group.ipolicy)
3978         ipol = ganeti.masterd.instance.CalculateGroupIPolicy(cluster, group)
3979         new = _ComputeNewInstanceViolations(ipol,
3980                                             new_ipolicy, instances)
3981         if new:
3982           violations.update(new)
3983
3984       if violations:
3985         self.LogWarning("After the ipolicy change the following instances"
3986                         " violate them: %s",
3987                         utils.CommaJoin(utils.NiceSort(violations)))
3988
3989     if self.op.nicparams:
3990       utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
3991       self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
3992       objects.NIC.CheckParameterSyntax(self.new_nicparams)
3993       nic_errors = []
3994
3995       # check all instances for consistency
3996       for instance in self.cfg.GetAllInstancesInfo().values():
3997         for nic_idx, nic in enumerate(instance.nics):
3998           params_copy = copy.deepcopy(nic.nicparams)
3999           params_filled = objects.FillDict(self.new_nicparams, params_copy)
4000
4001           # check parameter syntax
4002           try:
4003             objects.NIC.CheckParameterSyntax(params_filled)
4004           except errors.ConfigurationError, err:
4005             nic_errors.append("Instance %s, nic/%d: %s" %
4006                               (instance.name, nic_idx, err))
4007
4008           # if we're moving instances to routed, check that they have an ip
4009           target_mode = params_filled[constants.NIC_MODE]
4010           if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
4011             nic_errors.append("Instance %s, nic/%d: routed NIC with no ip"
4012                               " address" % (instance.name, nic_idx))
4013       if nic_errors:
4014         raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
4015                                    "\n".join(nic_errors), errors.ECODE_INVAL)
4016
4017     # hypervisor list/parameters
4018     self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
4019     if self.op.hvparams:
4020       for hv_name, hv_dict in self.op.hvparams.items():
4021         if hv_name not in self.new_hvparams:
4022           self.new_hvparams[hv_name] = hv_dict
4023         else:
4024           self.new_hvparams[hv_name].update(hv_dict)
4025
4026     # disk template parameters
4027     self.new_diskparams = objects.FillDict(cluster.diskparams, {})
4028     if self.op.diskparams:
4029       for dt_name, dt_params in self.op.diskparams.items():
4030         if dt_name not in self.op.diskparams:
4031           self.new_diskparams[dt_name] = dt_params
4032         else:
4033           self.new_diskparams[dt_name].update(dt_params)
4034
4035     # os hypervisor parameters
4036     self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
4037     if self.op.os_hvp:
4038       for os_name, hvs in self.op.os_hvp.items():
4039         if os_name not in self.new_os_hvp:
4040           self.new_os_hvp[os_name] = hvs
4041         else:
4042           for hv_name, hv_dict in hvs.items():
4043             if hv_name not in self.new_os_hvp[os_name]:
4044               self.new_os_hvp[os_name][hv_name] = hv_dict
4045             else:
4046               self.new_os_hvp[os_name][hv_name].update(hv_dict)
4047
4048     # os parameters
4049     self.new_osp = objects.FillDict(cluster.osparams, {})
4050     if self.op.osparams:
4051       for os_name, osp in self.op.osparams.items():
4052         if os_name not in self.new_osp:
4053           self.new_osp[os_name] = {}
4054
4055         self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
4056                                                   use_none=True)
4057
4058         if not self.new_osp[os_name]:
4059           # we removed all parameters
4060           del self.new_osp[os_name]
4061         else:
4062           # check the parameter validity (remote check)
4063           _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
4064                          os_name, self.new_osp[os_name])
4065
4066     # changes to the hypervisor list
4067     if self.op.enabled_hypervisors is not None:
4068       self.hv_list = self.op.enabled_hypervisors
4069       for hv in self.hv_list:
4070         # if the hypervisor doesn't already exist in the cluster
4071         # hvparams, we initialize it to empty, and then (in both
4072         # cases) we make sure to fill the defaults, as we might not
4073         # have a complete defaults list if the hypervisor wasn't
4074         # enabled before
4075         if hv not in new_hvp:
4076           new_hvp[hv] = {}
4077         new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
4078         utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
4079     else:
4080       self.hv_list = cluster.enabled_hypervisors
4081
4082     if self.op.hvparams or self.op.enabled_hypervisors is not None:
4083       # either the enabled list has changed, or the parameters have, validate
4084       for hv_name, hv_params in self.new_hvparams.items():
4085         if ((self.op.hvparams and hv_name in self.op.hvparams) or
4086             (self.op.enabled_hypervisors and
4087              hv_name in self.op.enabled_hypervisors)):
4088           # either this is a new hypervisor, or its parameters have changed
4089           hv_class = hypervisor.GetHypervisor(hv_name)
4090           utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
4091           hv_class.CheckParameterSyntax(hv_params)
4092           _CheckHVParams(self, node_list, hv_name, hv_params)
4093
4094     if self.op.os_hvp:
4095       # no need to check any newly-enabled hypervisors, since the
4096       # defaults have already been checked in the above code-block
4097       for os_name, os_hvp in self.new_os_hvp.items():
4098         for hv_name, hv_params in os_hvp.items():
4099           utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
4100           # we need to fill in the new os_hvp on top of the actual hv_p
4101           cluster_defaults = self.new_hvparams.get(hv_name, {})
4102           new_osp = objects.FillDict(cluster_defaults, hv_params)
4103           hv_class = hypervisor.GetHypervisor(hv_name)
4104           hv_class.CheckParameterSyntax(new_osp)
4105           _CheckHVParams(self, node_list, hv_name, new_osp)
4106
4107     if self.op.default_iallocator:
4108       alloc_script = utils.FindFile(self.op.default_iallocator,
4109                                     constants.IALLOCATOR_SEARCH_PATH,
4110                                     os.path.isfile)
4111       if alloc_script is None:
4112         raise errors.OpPrereqError("Invalid default iallocator script '%s'"
4113                                    " specified" % self.op.default_iallocator,
4114                                    errors.ECODE_INVAL)
4115
4116   def Exec(self, feedback_fn):
4117     """Change the parameters of the cluster.
4118
4119     """
4120     if self.op.vg_name is not None:
4121       new_volume = self.op.vg_name
4122       if not new_volume:
4123         new_volume = None
4124       if new_volume != self.cfg.GetVGName():
4125         self.cfg.SetVGName(new_volume)
4126       else:
4127         feedback_fn("Cluster LVM configuration already in desired"
4128                     " state, not changing")
4129     if self.op.drbd_helper is not None:
4130       new_helper = self.op.drbd_helper
4131       if not new_helper:
4132         new_helper = None
4133       if new_helper != self.cfg.GetDRBDHelper():
4134         self.cfg.SetDRBDHelper(new_helper)
4135       else:
4136         feedback_fn("Cluster DRBD helper already in desired state,"
4137                     " not changing")
4138     if self.op.hvparams:
4139       self.cluster.hvparams = self.new_hvparams
4140     if self.op.os_hvp:
4141       self.cluster.os_hvp = self.new_os_hvp
4142     if self.op.enabled_hypervisors is not None:
4143       self.cluster.hvparams = self.new_hvparams
4144       self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
4145     if self.op.beparams:
4146       self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
4147     if self.op.nicparams:
4148       self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
4149     if self.op.ipolicy:
4150       self.cluster.ipolicy = self.new_ipolicy
4151     if self.op.osparams:
4152       self.cluster.osparams = self.new_osp
4153     if self.op.ndparams:
4154       self.cluster.ndparams = self.new_ndparams
4155     if self.op.diskparams:
4156       self.cluster.diskparams = self.new_diskparams
4157     if self.op.hv_state:
4158       self.cluster.hv_state_static = self.new_hv_state
4159     if self.op.disk_state:
4160       self.cluster.disk_state_static = self.new_disk_state
4161
4162     if self.op.candidate_pool_size is not None:
4163       self.cluster.candidate_pool_size = self.op.candidate_pool_size
4164       # we need to update the pool size here, otherwise the save will fail
4165       _AdjustCandidatePool(self, [])
4166
4167     if self.op.maintain_node_health is not None:
4168       if self.op.maintain_node_health and not constants.ENABLE_CONFD:
4169         feedback_fn("Note: CONFD was disabled at build time, node health"
4170                     " maintenance is not useful (still enabling it)")
4171       self.cluster.maintain_node_health = self.op.maintain_node_health
4172
4173     if self.op.prealloc_wipe_disks is not None:
4174       self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
4175
4176     if self.op.add_uids is not None:
4177       uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
4178
4179     if self.op.remove_uids is not None:
4180       uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
4181
4182     if self.op.uid_pool is not None:
4183       self.cluster.uid_pool = self.op.uid_pool
4184
4185     if self.op.default_iallocator is not None:
4186       self.cluster.default_iallocator = self.op.default_iallocator
4187
4188     if self.op.reserved_lvs is not None:
4189       self.cluster.reserved_lvs = self.op.reserved_lvs
4190
4191     if self.op.use_external_mip_script is not None:
4192       self.cluster.use_external_mip_script = self.op.use_external_mip_script
4193
4194     def helper_os(aname, mods, desc):
4195       desc += " OS list"
4196       lst = getattr(self.cluster, aname)
4197       for key, val in mods:
4198         if key == constants.DDM_ADD:
4199           if val in lst:
4200             feedback_fn("OS %s already in %s, ignoring" % (val, desc))
4201           else:
4202             lst.append(val)
4203         elif key == constants.DDM_REMOVE:
4204           if val in lst:
4205             lst.remove(val)
4206           else:
4207             feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
4208         else:
4209           raise errors.ProgrammerError("Invalid modification '%s'" % key)
4210
4211     if self.op.hidden_os:
4212       helper_os("hidden_os", self.op.hidden_os, "hidden")
4213
4214     if self.op.blacklisted_os:
4215       helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
4216
4217     if self.op.master_netdev:
4218       master_params = self.cfg.GetMasterNetworkParameters()
4219       ems = self.cfg.GetUseExternalMipScript()
4220       feedback_fn("Shutting down master ip on the current netdev (%s)" %
4221                   self.cluster.master_netdev)
4222       result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4223                                                        master_params, ems)
4224       result.Raise("Could not disable the master ip")
4225       feedback_fn("Changing master_netdev from %s to %s" %
4226                   (master_params.netdev, self.op.master_netdev))
4227       self.cluster.master_netdev = self.op.master_netdev
4228
4229     if self.op.master_netmask:
4230       master_params = self.cfg.GetMasterNetworkParameters()
4231       feedback_fn("Changing master IP netmask to %s" % self.op.master_netmask)
4232       result = self.rpc.call_node_change_master_netmask(master_params.name,
4233                                                         master_params.netmask,
4234                                                         self.op.master_netmask,
4235                                                         master_params.ip,
4236                                                         master_params.netdev)
4237       if result.fail_msg:
4238         msg = "Could not change the master IP netmask: %s" % result.fail_msg
4239         feedback_fn(msg)
4240
4241       self.cluster.master_netmask = self.op.master_netmask
4242
4243     self.cfg.Update(self.cluster, feedback_fn)
4244
4245     if self.op.master_netdev:
4246       master_params = self.cfg.GetMasterNetworkParameters()
4247       feedback_fn("Starting the master ip on the new master netdev (%s)" %
4248                   self.op.master_netdev)
4249       ems = self.cfg.GetUseExternalMipScript()
4250       result = self.rpc.call_node_activate_master_ip(master_params.name,
4251                                                      master_params, ems)
4252       if result.fail_msg:
4253         self.LogWarning("Could not re-enable the master ip on"
4254                         " the master, please restart manually: %s",
4255                         result.fail_msg)
4256
4257
4258 def _UploadHelper(lu, nodes, fname):
4259   """Helper for uploading a file and showing warnings.
4260
4261   """
4262   if os.path.exists(fname):
4263     result = lu.rpc.call_upload_file(nodes, fname)
4264     for to_node, to_result in result.items():
4265       msg = to_result.fail_msg
4266       if msg:
4267         msg = ("Copy of file %s to node %s failed: %s" %
4268                (fname, to_node, msg))
4269         lu.proc.LogWarning(msg)
4270
4271
4272 def _ComputeAncillaryFiles(cluster, redist):
4273   """Compute files external to Ganeti which need to be consistent.
4274
4275   @type redist: boolean
4276   @param redist: Whether to include files which need to be redistributed
4277
4278   """
4279   # Compute files for all nodes
4280   files_all = set([
4281     constants.SSH_KNOWN_HOSTS_FILE,
4282     constants.CONFD_HMAC_KEY,
4283     constants.CLUSTER_DOMAIN_SECRET_FILE,
4284     constants.SPICE_CERT_FILE,
4285     constants.SPICE_CACERT_FILE,
4286     constants.RAPI_USERS_FILE,
4287     ])
4288
4289   if not redist:
4290     files_all.update(constants.ALL_CERT_FILES)
4291     files_all.update(ssconf.SimpleStore().GetFileList())
4292   else:
4293     # we need to ship at least the RAPI certificate
4294     files_all.add(constants.RAPI_CERT_FILE)
4295
4296   if cluster.modify_etc_hosts:
4297     files_all.add(constants.ETC_HOSTS)
4298
4299   if cluster.use_external_mip_script:
4300     files_all.add(constants.EXTERNAL_MASTER_SETUP_SCRIPT)
4301
4302   # Files which are optional, these must:
4303   # - be present in one other category as well
4304   # - either exist or not exist on all nodes of that category (mc, vm all)
4305   files_opt = set([
4306     constants.RAPI_USERS_FILE,
4307     ])
4308
4309   # Files which should only be on master candidates
4310   files_mc = set()
4311
4312   if not redist:
4313     files_mc.add(constants.CLUSTER_CONF_FILE)
4314
4315   # Files which should only be on VM-capable nodes
4316   files_vm = set(
4317     filename
4318     for hv_name in cluster.enabled_hypervisors
4319     for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[0])
4320
4321   files_opt |= set(
4322     filename
4323     for hv_name in cluster.enabled_hypervisors
4324     for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[1])
4325
4326   # Filenames in each category must be unique
4327   all_files_set = files_all | files_mc | files_vm
4328   assert (len(all_files_set) ==
4329           sum(map(len, [files_all, files_mc, files_vm]))), \
4330          "Found file listed in more than one file list"
4331
4332   # Optional files must be present in one other category
4333   assert all_files_set.issuperset(files_opt), \
4334          "Optional file not in a different required list"
4335
4336   return (files_all, files_opt, files_mc, files_vm)
4337
4338
4339 def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
4340   """Distribute additional files which are part of the cluster configuration.
4341
4342   ConfigWriter takes care of distributing the config and ssconf files, but
4343   there are more files which should be distributed to all nodes. This function
4344   makes sure those are copied.
4345
4346   @param lu: calling logical unit
4347   @param additional_nodes: list of nodes not in the config to distribute to
4348   @type additional_vm: boolean
4349   @param additional_vm: whether the additional nodes are vm-capable or not
4350
4351   """
4352   # Gather target nodes
4353   cluster = lu.cfg.GetClusterInfo()
4354   master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
4355
4356   online_nodes = lu.cfg.GetOnlineNodeList()
4357   online_set = frozenset(online_nodes)
4358   vm_nodes = list(online_set.intersection(lu.cfg.GetVmCapableNodeList()))
4359
4360   if additional_nodes is not None:
4361     online_nodes.extend(additional_nodes)
4362     if additional_vm:
4363       vm_nodes.extend(additional_nodes)
4364
4365   # Never distribute to master node
4366   for nodelist in [online_nodes, vm_nodes]:
4367     if master_info.name in nodelist:
4368       nodelist.remove(master_info.name)
4369
4370   # Gather file lists
4371   (files_all, _, files_mc, files_vm) = \
4372     _ComputeAncillaryFiles(cluster, True)
4373
4374   # Never re-distribute configuration file from here
4375   assert not (constants.CLUSTER_CONF_FILE in files_all or
4376               constants.CLUSTER_CONF_FILE in files_vm)
4377   assert not files_mc, "Master candidates not handled in this function"
4378
4379   filemap = [
4380     (online_nodes, files_all),
4381     (vm_nodes, files_vm),
4382     ]
4383
4384   # Upload the files
4385   for (node_list, files) in filemap:
4386     for fname in files:
4387       _UploadHelper(lu, node_list, fname)
4388
4389
4390 class LUClusterRedistConf(NoHooksLU):
4391   """Force the redistribution of cluster configuration.
4392
4393   This is a very simple LU.
4394
4395   """
4396   REQ_BGL = False
4397
4398   def ExpandNames(self):
4399     self.needed_locks = {
4400       locking.LEVEL_NODE: locking.ALL_SET,
4401     }
4402     self.share_locks[locking.LEVEL_NODE] = 1
4403
4404   def Exec(self, feedback_fn):
4405     """Redistribute the configuration.
4406
4407     """
4408     self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
4409     _RedistributeAncillaryFiles(self)
4410
4411
4412 class LUClusterActivateMasterIp(NoHooksLU):
4413   """Activate the master IP on the master node.
4414
4415   """
4416   def Exec(self, feedback_fn):
4417     """Activate the master IP.
4418
4419     """
4420     master_params = self.cfg.GetMasterNetworkParameters()
4421     ems = self.cfg.GetUseExternalMipScript()
4422     result = self.rpc.call_node_activate_master_ip(master_params.name,
4423                                                    master_params, ems)
4424     result.Raise("Could not activate the master IP")
4425
4426
4427 class LUClusterDeactivateMasterIp(NoHooksLU):
4428   """Deactivate the master IP on the master node.
4429
4430   """
4431   def Exec(self, feedback_fn):
4432     """Deactivate the master IP.
4433
4434     """
4435     master_params = self.cfg.GetMasterNetworkParameters()
4436     ems = self.cfg.GetUseExternalMipScript()
4437     result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4438                                                      master_params, ems)
4439     result.Raise("Could not deactivate the master IP")
4440
4441
4442 def _WaitForSync(lu, instance, disks=None, oneshot=False):
4443   """Sleep and poll for an instance's disk to sync.
4444
4445   """
4446   if not instance.disks or disks is not None and not disks:
4447     return True
4448
4449   disks = _ExpandCheckDisks(instance, disks)
4450
4451   if not oneshot:
4452     lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
4453
4454   node = instance.primary_node
4455
4456   for dev in disks:
4457     lu.cfg.SetDiskID(dev, node)
4458
4459   # TODO: Convert to utils.Retry
4460
4461   retries = 0
4462   degr_retries = 10 # in seconds, as we sleep 1 second each time
4463   while True:
4464     max_time = 0
4465     done = True
4466     cumul_degraded = False
4467     rstats = lu.rpc.call_blockdev_getmirrorstatus(node, (disks, instance))
4468     msg = rstats.fail_msg
4469     if msg:
4470       lu.LogWarning("Can't get any data from node %s: %s", node, msg)
4471       retries += 1
4472       if retries >= 10:
4473         raise errors.RemoteError("Can't contact node %s for mirror data,"
4474                                  " aborting." % node)
4475       time.sleep(6)
4476       continue
4477     rstats = rstats.payload
4478     retries = 0
4479     for i, mstat in enumerate(rstats):
4480       if mstat is None:
4481         lu.LogWarning("Can't compute data for node %s/%s",
4482                            node, disks[i].iv_name)
4483         continue
4484
4485       cumul_degraded = (cumul_degraded or
4486                         (mstat.is_degraded and mstat.sync_percent is None))
4487       if mstat.sync_percent is not None:
4488         done = False
4489         if mstat.estimated_time is not None:
4490           rem_time = ("%s remaining (estimated)" %
4491                       utils.FormatSeconds(mstat.estimated_time))
4492           max_time = mstat.estimated_time
4493         else:
4494           rem_time = "no time estimate"
4495         lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
4496                         (disks[i].iv_name, mstat.sync_percent, rem_time))
4497
4498     # if we're done but degraded, let's do a few small retries, to
4499     # make sure we see a stable and not transient situation; therefore
4500     # we force restart of the loop
4501     if (done or oneshot) and cumul_degraded and degr_retries > 0:
4502       logging.info("Degraded disks found, %d retries left", degr_retries)
4503       degr_retries -= 1
4504       time.sleep(1)
4505       continue
4506
4507     if done or oneshot:
4508       break
4509
4510     time.sleep(min(60, max_time))
4511
4512   if done:
4513     lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
4514   return not cumul_degraded
4515
4516
4517 def _BlockdevFind(lu, node, dev, instance):
4518   """Wrapper around call_blockdev_find to annotate diskparams.
4519
4520   @param lu: A reference to the lu object
4521   @param node: The node to call out
4522   @param dev: The device to find
4523   @param instance: The instance object the device belongs to
4524   @returns The result of the rpc call
4525
4526   """
4527   (disk,) = _AnnotateDiskParams(instance, [dev], lu.cfg)
4528   return lu.rpc.call_blockdev_find(node, disk)
4529
4530
4531 def _CheckDiskConsistency(lu, instance, dev, node, on_primary, ldisk=False):
4532   """Wrapper around L{_CheckDiskConsistencyInner}.
4533
4534   """
4535   (disk,) = _AnnotateDiskParams(instance, [dev], lu.cfg)
4536   return _CheckDiskConsistencyInner(lu, instance, disk, node, on_primary,
4537                                     ldisk=ldisk)
4538
4539
4540 def _CheckDiskConsistencyInner(lu, instance, dev, node, on_primary,
4541                                ldisk=False):
4542   """Check that mirrors are not degraded.
4543
4544   @attention: The device has to be annotated already.
4545
4546   The ldisk parameter, if True, will change the test from the
4547   is_degraded attribute (which represents overall non-ok status for
4548   the device(s)) to the ldisk (representing the local storage status).
4549
4550   """
4551   lu.cfg.SetDiskID(dev, node)
4552
4553   result = True
4554
4555   if on_primary or dev.AssembleOnSecondary():
4556     rstats = lu.rpc.call_blockdev_find(node, dev)
4557     msg = rstats.fail_msg
4558     if msg:
4559       lu.LogWarning("Can't find disk on node %s: %s", node, msg)
4560       result = False
4561     elif not rstats.payload:
4562       lu.LogWarning("Can't find disk on node %s", node)
4563       result = False
4564     else:
4565       if ldisk:
4566         result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
4567       else:
4568         result = result and not rstats.payload.is_degraded
4569
4570   if dev.children:
4571     for child in dev.children:
4572       result = result and _CheckDiskConsistencyInner(lu, instance, child, node,
4573                                                      on_primary)
4574
4575   return result
4576
4577
4578 class LUOobCommand(NoHooksLU):
4579   """Logical unit for OOB handling.
4580
4581   """
4582   REQ_BGL = False
4583   _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
4584
4585   def ExpandNames(self):
4586     """Gather locks we need.
4587
4588     """
4589     if self.op.node_names:
4590       self.op.node_names = _GetWantedNodes(self, self.op.node_names)
4591       lock_names = self.op.node_names
4592     else:
4593       lock_names = locking.ALL_SET
4594
4595     self.needed_locks = {
4596       locking.LEVEL_NODE: lock_names,
4597       }
4598
4599   def CheckPrereq(self):
4600     """Check prerequisites.
4601
4602     This checks:
4603      - the node exists in the configuration
4604      - OOB is supported
4605
4606     Any errors are signaled by raising errors.OpPrereqError.
4607
4608     """
4609     self.nodes = []
4610     self.master_node = self.cfg.GetMasterNode()
4611
4612     assert self.op.power_delay >= 0.0
4613
4614     if self.op.node_names:
4615       if (self.op.command in self._SKIP_MASTER and
4616           self.master_node in self.op.node_names):
4617         master_node_obj = self.cfg.GetNodeInfo(self.master_node)
4618         master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
4619
4620         if master_oob_handler:
4621           additional_text = ("run '%s %s %s' if you want to operate on the"
4622                              " master regardless") % (master_oob_handler,
4623                                                       self.op.command,
4624                                                       self.master_node)
4625         else:
4626           additional_text = "it does not support out-of-band operations"
4627
4628         raise errors.OpPrereqError(("Operating on the master node %s is not"
4629                                     " allowed for %s; %s") %
4630                                    (self.master_node, self.op.command,
4631                                     additional_text), errors.ECODE_INVAL)
4632     else:
4633       self.op.node_names = self.cfg.GetNodeList()
4634       if self.op.command in self._SKIP_MASTER:
4635         self.op.node_names.remove(self.master_node)
4636
4637     if self.op.command in self._SKIP_MASTER:
4638       assert self.master_node not in self.op.node_names
4639
4640     for (node_name, node) in self.cfg.GetMultiNodeInfo(self.op.node_names):
4641       if node is None:
4642         raise errors.OpPrereqError("Node %s not found" % node_name,
4643                                    errors.ECODE_NOENT)
4644       else:
4645         self.nodes.append(node)
4646
4647       if (not self.op.ignore_status and
4648           (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
4649         raise errors.OpPrereqError(("Cannot power off node %s because it is"
4650                                     " not marked offline") % node_name,
4651                                    errors.ECODE_STATE)
4652
4653   def Exec(self, feedback_fn):
4654     """Execute OOB and return result if we expect any.
4655
4656     """
4657     master_node = self.master_node
4658     ret = []
4659
4660     for idx, node in enumerate(utils.NiceSort(self.nodes,
4661                                               key=lambda node: node.name)):
4662       node_entry = [(constants.RS_NORMAL, node.name)]
4663       ret.append(node_entry)
4664
4665       oob_program = _SupportsOob(self.cfg, node)
4666
4667       if not oob_program:
4668         node_entry.append((constants.RS_UNAVAIL, None))
4669         continue
4670
4671       logging.info("Executing out-of-band command '%s' using '%s' on %s",
4672                    self.op.command, oob_program, node.name)
4673       result = self.rpc.call_run_oob(master_node, oob_program,
4674                                      self.op.command, node.name,
4675                                      self.op.timeout)
4676
4677       if result.fail_msg:
4678         self.LogWarning("Out-of-band RPC failed on node '%s': %s",
4679                         node.name, result.fail_msg)
4680         node_entry.append((constants.RS_NODATA, None))
4681       else:
4682         try:
4683           self._CheckPayload(result)
4684         except errors.OpExecError, err:
4685           self.LogWarning("Payload returned by node '%s' is not valid: %s",
4686                           node.name, err)
4687           node_entry.append((constants.RS_NODATA, None))
4688         else:
4689           if self.op.command == constants.OOB_HEALTH:
4690             # For health we should log important events
4691             for item, status in result.payload:
4692               if status in [constants.OOB_STATUS_WARNING,
4693                             constants.OOB_STATUS_CRITICAL]:
4694                 self.LogWarning("Item '%s' on node '%s' has status '%s'",
4695                                 item, node.name, status)
4696
4697           if self.op.command == constants.OOB_POWER_ON:
4698             node.powered = True
4699           elif self.op.command == constants.OOB_POWER_OFF:
4700             node.powered = False
4701           elif self.op.command == constants.OOB_POWER_STATUS:
4702             powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
4703             if powered != node.powered:
4704               logging.warning(("Recorded power state (%s) of node '%s' does not"
4705                                " match actual power state (%s)"), node.powered,
4706                               node.name, powered)
4707
4708           # For configuration changing commands we should update the node
4709           if self.op.command in (constants.OOB_POWER_ON,
4710                                  constants.OOB_POWER_OFF):
4711             self.cfg.Update(node, feedback_fn)
4712
4713           node_entry.append((constants.RS_NORMAL, result.payload))
4714
4715           if (self.op.command == constants.OOB_POWER_ON and
4716               idx < len(self.nodes) - 1):
4717             time.sleep(self.op.power_delay)
4718
4719     return ret
4720
4721   def _CheckPayload(self, result):
4722     """Checks if the payload is valid.
4723
4724     @param result: RPC result
4725     @raises errors.OpExecError: If payload is not valid
4726
4727     """
4728     errs = []
4729     if self.op.command == constants.OOB_HEALTH:
4730       if not isinstance(result.payload, list):
4731         errs.append("command 'health' is expected to return a list but got %s" %
4732                     type(result.payload))
4733       else:
4734         for item, status in result.payload:
4735           if status not in constants.OOB_STATUSES:
4736             errs.append("health item '%s' has invalid status '%s'" %
4737                         (item, status))
4738
4739     if self.op.command == constants.OOB_POWER_STATUS:
4740       if not isinstance(result.payload, dict):
4741         errs.append("power-status is expected to return a dict but got %s" %
4742                     type(result.payload))
4743
4744     if self.op.command in [
4745       constants.OOB_POWER_ON,
4746       constants.OOB_POWER_OFF,
4747       constants.OOB_POWER_CYCLE,
4748       ]:
4749       if result.payload is not None:
4750         errs.append("%s is expected to not return payload but got '%s'" %
4751                     (self.op.command, result.payload))
4752
4753     if errs:
4754       raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
4755                                utils.CommaJoin(errs))
4756
4757
4758 class _OsQuery(_QueryBase):
4759   FIELDS = query.OS_FIELDS
4760
4761   def ExpandNames(self, lu):
4762     # Lock all nodes in shared mode
4763     # Temporary removal of locks, should be reverted later
4764     # TODO: reintroduce locks when they are lighter-weight
4765     lu.needed_locks = {}
4766     #self.share_locks[locking.LEVEL_NODE] = 1
4767     #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4768
4769     # The following variables interact with _QueryBase._GetNames
4770     if self.names:
4771       self.wanted = self.names
4772     else:
4773       self.wanted = locking.ALL_SET
4774
4775     self.do_locking = self.use_locking
4776
4777   def DeclareLocks(self, lu, level):
4778     pass
4779
4780   @staticmethod
4781   def _DiagnoseByOS(rlist):
4782     """Remaps a per-node return list into an a per-os per-node dictionary
4783
4784     @param rlist: a map with node names as keys and OS objects as values
4785
4786     @rtype: dict
4787     @return: a dictionary with osnames as keys and as value another
4788         map, with nodes as keys and tuples of (path, status, diagnose,
4789         variants, parameters, api_versions) as values, eg::
4790
4791           {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
4792                                      (/srv/..., False, "invalid api")],
4793                            "node2": [(/srv/..., True, "", [], [])]}
4794           }
4795
4796     """
4797     all_os = {}
4798     # we build here the list of nodes that didn't fail the RPC (at RPC
4799     # level), so that nodes with a non-responding node daemon don't
4800     # make all OSes invalid
4801     good_nodes = [node_name for node_name in rlist
4802                   if not rlist[node_name].fail_msg]
4803     for node_name, nr in rlist.items():
4804       if nr.fail_msg or not nr.payload:
4805         continue
4806       for (name, path, status, diagnose, variants,
4807            params, api_versions) in nr.payload:
4808         if name not in all_os:
4809           # build a list of nodes for this os containing empty lists
4810           # for each node in node_list
4811           all_os[name] = {}
4812           for nname in good_nodes:
4813             all_os[name][nname] = []
4814         # convert params from [name, help] to (name, help)
4815         params = [tuple(v) for v in params]
4816         all_os[name][node_name].append((path, status, diagnose,
4817                                         variants, params, api_versions))
4818     return all_os
4819
4820   def _GetQueryData(self, lu):
4821     """Computes the list of nodes and their attributes.
4822
4823     """
4824     # Locking is not used
4825     assert not (compat.any(lu.glm.is_owned(level)
4826                            for level in locking.LEVELS
4827                            if level != locking.LEVEL_CLUSTER) or
4828                 self.do_locking or self.use_locking)
4829
4830     valid_nodes = [node.name
4831                    for node in lu.cfg.GetAllNodesInfo().values()
4832                    if not node.offline and node.vm_capable]
4833     pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
4834     cluster = lu.cfg.GetClusterInfo()
4835
4836     data = {}
4837
4838     for (os_name, os_data) in pol.items():
4839       info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
4840                           hidden=(os_name in cluster.hidden_os),
4841                           blacklisted=(os_name in cluster.blacklisted_os))
4842
4843       variants = set()
4844       parameters = set()
4845       api_versions = set()
4846
4847       for idx, osl in enumerate(os_data.values()):
4848         info.valid = bool(info.valid and osl and osl[0][1])
4849         if not info.valid:
4850           break
4851
4852         (node_variants, node_params, node_api) = osl[0][3:6]
4853         if idx == 0:
4854           # First entry
4855           variants.update(node_variants)
4856           parameters.update(node_params)
4857           api_versions.update(node_api)
4858         else:
4859           # Filter out inconsistent values
4860           variants.intersection_update(node_variants)
4861           parameters.intersection_update(node_params)
4862           api_versions.intersection_update(node_api)
4863
4864       info.variants = list(variants)
4865       info.parameters = list(parameters)
4866       info.api_versions = list(api_versions)
4867
4868       data[os_name] = info
4869
4870     # Prepare data in requested order
4871     return [data[name] for name in self._GetNames(lu, pol.keys(), None)
4872             if name in data]
4873
4874
4875 class LUOsDiagnose(NoHooksLU):
4876   """Logical unit for OS diagnose/query.
4877
4878   """
4879   REQ_BGL = False
4880
4881   @staticmethod
4882   def _BuildFilter(fields, names):
4883     """Builds a filter for querying OSes.
4884
4885     """
4886     name_filter = qlang.MakeSimpleFilter("name", names)
4887
4888     # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
4889     # respective field is not requested
4890     status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
4891                      for fname in ["hidden", "blacklisted"]
4892                      if fname not in fields]
4893     if "valid" not in fields:
4894       status_filter.append([qlang.OP_TRUE, "valid"])
4895
4896     if status_filter:
4897       status_filter.insert(0, qlang.OP_AND)
4898     else:
4899       status_filter = None
4900
4901     if name_filter and status_filter:
4902       return [qlang.OP_AND, name_filter, status_filter]
4903     elif name_filter:
4904       return name_filter
4905     else:
4906       return status_filter
4907
4908   def CheckArguments(self):
4909     self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
4910                        self.op.output_fields, False)
4911
4912   def ExpandNames(self):
4913     self.oq.ExpandNames(self)
4914
4915   def Exec(self, feedback_fn):
4916     return self.oq.OldStyleQuery(self)
4917
4918
4919 class LUNodeRemove(LogicalUnit):
4920   """Logical unit for removing a node.
4921
4922   """
4923   HPATH = "node-remove"
4924   HTYPE = constants.HTYPE_NODE
4925
4926   def BuildHooksEnv(self):
4927     """Build hooks env.
4928
4929     """
4930     return {
4931       "OP_TARGET": self.op.node_name,
4932       "NODE_NAME": self.op.node_name,
4933       }
4934
4935   def BuildHooksNodes(self):
4936     """Build hooks nodes.
4937
4938     This doesn't run on the target node in the pre phase as a failed
4939     node would then be impossible to remove.
4940
4941     """
4942     all_nodes = self.cfg.GetNodeList()
4943     try:
4944       all_nodes.remove(self.op.node_name)
4945     except ValueError:
4946       pass
4947     return (all_nodes, all_nodes)
4948
4949   def CheckPrereq(self):
4950     """Check prerequisites.
4951
4952     This checks:
4953      - the node exists in the configuration
4954      - it does not have primary or secondary instances
4955      - it's not the master
4956
4957     Any errors are signaled by raising errors.OpPrereqError.
4958
4959     """
4960     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4961     node = self.cfg.GetNodeInfo(self.op.node_name)
4962     assert node is not None
4963
4964     masternode = self.cfg.GetMasterNode()
4965     if node.name == masternode:
4966       raise errors.OpPrereqError("Node is the master node, failover to another"
4967                                  " node is required", errors.ECODE_INVAL)
4968
4969     for instance_name, instance in self.cfg.GetAllInstancesInfo().items():
4970       if node.name in instance.all_nodes:
4971         raise errors.OpPrereqError("Instance %s is still running on the node,"
4972                                    " please remove first" % instance_name,
4973                                    errors.ECODE_INVAL)
4974     self.op.node_name = node.name
4975     self.node = node
4976
4977   def Exec(self, feedback_fn):
4978     """Removes the node from the cluster.
4979
4980     """
4981     node = self.node
4982     logging.info("Stopping the node daemon and removing configs from node %s",
4983                  node.name)
4984
4985     modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
4986
4987     assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
4988       "Not owning BGL"
4989
4990     # Promote nodes to master candidate as needed
4991     _AdjustCandidatePool(self, exceptions=[node.name])
4992     self.context.RemoveNode(node.name)
4993
4994     # Run post hooks on the node before it's removed
4995     _RunPostHook(self, node.name)
4996
4997     result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
4998     msg = result.fail_msg
4999     if msg:
5000       self.LogWarning("Errors encountered on the remote node while leaving"
5001                       " the cluster: %s", msg)
5002
5003     # Remove node from our /etc/hosts
5004     if self.cfg.GetClusterInfo().modify_etc_hosts:
5005       master_node = self.cfg.GetMasterNode()
5006       result = self.rpc.call_etc_hosts_modify(master_node,
5007                                               constants.ETC_HOSTS_REMOVE,
5008                                               node.name, None)
5009       result.Raise("Can't update hosts file with new host data")
5010       _RedistributeAncillaryFiles(self)
5011
5012
5013 class _NodeQuery(_QueryBase):
5014   FIELDS = query.NODE_FIELDS
5015
5016   def ExpandNames(self, lu):
5017     lu.needed_locks = {}
5018     lu.share_locks = _ShareAll()
5019
5020     if self.names:
5021       self.wanted = _GetWantedNodes(lu, self.names)
5022     else:
5023       self.wanted = locking.ALL_SET
5024
5025     self.do_locking = (self.use_locking and
5026                        query.NQ_LIVE in self.requested_data)
5027
5028     if self.do_locking:
5029       # If any non-static field is requested we need to lock the nodes
5030       lu.needed_locks[locking.LEVEL_NODE] = self.wanted
5031
5032   def DeclareLocks(self, lu, level):
5033     pass
5034
5035   def _GetQueryData(self, lu):
5036     """Computes the list of nodes and their attributes.
5037
5038     """
5039     all_info = lu.cfg.GetAllNodesInfo()
5040
5041     nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
5042
5043     # Gather data as requested
5044     if query.NQ_LIVE in self.requested_data:
5045       # filter out non-vm_capable nodes
5046       toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
5047
5048       node_data = lu.rpc.call_node_info(toquery_nodes, [lu.cfg.GetVGName()],
5049                                         [lu.cfg.GetHypervisorType()])
5050       live_data = dict((name, rpc.MakeLegacyNodeInfo(nresult.payload))
5051                        for (name, nresult) in node_data.items()
5052                        if not nresult.fail_msg and nresult.payload)
5053     else:
5054       live_data = None
5055
5056     if query.NQ_INST in self.requested_data:
5057       node_to_primary = dict([(name, set()) for name in nodenames])
5058       node_to_secondary = dict([(name, set()) for name in nodenames])
5059
5060       inst_data = lu.cfg.GetAllInstancesInfo()
5061
5062       for inst in inst_data.values():
5063         if inst.primary_node in node_to_primary:
5064           node_to_primary[inst.primary_node].add(inst.name)
5065         for secnode in inst.secondary_nodes:
5066           if secnode in node_to_secondary:
5067             node_to_secondary[secnode].add(inst.name)
5068     else:
5069       node_to_primary = None
5070       node_to_secondary = None
5071
5072     if query.NQ_OOB in self.requested_data:
5073       oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
5074                          for name, node in all_info.iteritems())
5075     else:
5076       oob_support = None
5077
5078     if query.NQ_GROUP in self.requested_data:
5079       groups = lu.cfg.GetAllNodeGroupsInfo()
5080     else:
5081       groups = {}
5082
5083     return query.NodeQueryData([all_info[name] for name in nodenames],
5084                                live_data, lu.cfg.GetMasterNode(),
5085                                node_to_primary, node_to_secondary, groups,
5086                                oob_support, lu.cfg.GetClusterInfo())
5087
5088
5089 class LUNodeQuery(NoHooksLU):
5090   """Logical unit for querying nodes.
5091
5092   """
5093   # pylint: disable=W0142
5094   REQ_BGL = False
5095
5096   def CheckArguments(self):
5097     self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
5098                          self.op.output_fields, self.op.use_locking)
5099
5100   def ExpandNames(self):
5101     self.nq.ExpandNames(self)
5102
5103   def DeclareLocks(self, level):
5104     self.nq.DeclareLocks(self, level)
5105
5106   def Exec(self, feedback_fn):
5107     return self.nq.OldStyleQuery(self)
5108
5109
5110 class LUNodeQueryvols(NoHooksLU):
5111   """Logical unit for getting volumes on node(s).
5112
5113   """
5114   REQ_BGL = False
5115   _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
5116   _FIELDS_STATIC = utils.FieldSet("node")
5117
5118   def CheckArguments(self):
5119     _CheckOutputFields(static=self._FIELDS_STATIC,
5120                        dynamic=self._FIELDS_DYNAMIC,
5121                        selected=self.op.output_fields)
5122
5123   def ExpandNames(self):
5124     self.share_locks = _ShareAll()
5125     self.needed_locks = {}
5126
5127     if not self.op.nodes:
5128       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5129     else:
5130       self.needed_locks[locking.LEVEL_NODE] = \
5131         _GetWantedNodes(self, self.op.nodes)
5132
5133   def Exec(self, feedback_fn):
5134     """Computes the list of nodes and their attributes.
5135
5136     """
5137     nodenames = self.owned_locks(locking.LEVEL_NODE)
5138     volumes = self.rpc.call_node_volumes(nodenames)
5139
5140     ilist = self.cfg.GetAllInstancesInfo()
5141     vol2inst = _MapInstanceDisksToNodes(ilist.values())
5142
5143     output = []
5144     for node in nodenames:
5145       nresult = volumes[node]
5146       if nresult.offline:
5147         continue
5148       msg = nresult.fail_msg
5149       if msg:
5150         self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
5151         continue
5152
5153       node_vols = sorted(nresult.payload,
5154                          key=operator.itemgetter("dev"))
5155
5156       for vol in node_vols:
5157         node_output = []
5158         for field in self.op.output_fields:
5159           if field == "node":
5160             val = node
5161           elif field == "phys":
5162             val = vol["dev"]
5163           elif field == "vg":
5164             val = vol["vg"]
5165           elif field == "name":
5166             val = vol["name"]
5167           elif field == "size":
5168             val = int(float(vol["size"]))
5169           elif field == "instance":
5170             val = vol2inst.get((node, vol["vg"] + "/" + vol["name"]), "-")
5171           else:
5172             raise errors.ParameterError(field)
5173           node_output.append(str(val))
5174
5175         output.append(node_output)
5176
5177     return output
5178
5179
5180 class LUNodeQueryStorage(NoHooksLU):
5181   """Logical unit for getting information on storage units on node(s).
5182
5183   """
5184   _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
5185   REQ_BGL = False
5186
5187   def CheckArguments(self):
5188     _CheckOutputFields(static=self._FIELDS_STATIC,
5189                        dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
5190                        selected=self.op.output_fields)
5191
5192   def ExpandNames(self):
5193     self.share_locks = _ShareAll()
5194     self.needed_locks = {}
5195
5196     if self.op.nodes:
5197       self.needed_locks[locking.LEVEL_NODE] = \
5198         _GetWantedNodes(self, self.op.nodes)
5199     else:
5200       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5201
5202   def Exec(self, feedback_fn):
5203     """Computes the list of nodes and their attributes.
5204
5205     """
5206     self.nodes = self.owned_locks(locking.LEVEL_NODE)
5207
5208     # Always get name to sort by
5209     if constants.SF_NAME in self.op.output_fields:
5210       fields = self.op.output_fields[:]
5211     else:
5212       fields = [constants.SF_NAME] + self.op.output_fields
5213
5214     # Never ask for node or type as it's only known to the LU
5215     for extra in [constants.SF_NODE, constants.SF_TYPE]:
5216       while extra in fields:
5217         fields.remove(extra)
5218
5219     field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
5220     name_idx = field_idx[constants.SF_NAME]
5221
5222     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5223     data = self.rpc.call_storage_list(self.nodes,
5224                                       self.op.storage_type, st_args,
5225                                       self.op.name, fields)
5226
5227     result = []
5228
5229     for node in utils.NiceSort(self.nodes):
5230       nresult = data[node]
5231       if nresult.offline:
5232         continue
5233
5234       msg = nresult.fail_msg
5235       if msg:
5236         self.LogWarning("Can't get storage data from node %s: %s", node, msg)
5237         continue
5238
5239       rows = dict([(row[name_idx], row) for row in nresult.payload])
5240
5241       for name in utils.NiceSort(rows.keys()):
5242         row = rows[name]
5243
5244         out = []
5245
5246         for field in self.op.output_fields:
5247           if field == constants.SF_NODE:
5248             val = node
5249           elif field == constants.SF_TYPE:
5250             val = self.op.storage_type
5251           elif field in field_idx:
5252             val = row[field_idx[field]]
5253           else:
5254             raise errors.ParameterError(field)
5255
5256           out.append(val)
5257
5258         result.append(out)
5259
5260     return result
5261
5262
5263 class _InstanceQuery(_QueryBase):
5264   FIELDS = query.INSTANCE_FIELDS
5265
5266   def ExpandNames(self, lu):
5267     lu.needed_locks = {}
5268     lu.share_locks = _ShareAll()
5269
5270     if self.names:
5271       self.wanted = _GetWantedInstances(lu, self.names)
5272     else:
5273       self.wanted = locking.ALL_SET
5274
5275     self.do_locking = (self.use_locking and
5276                        query.IQ_LIVE in self.requested_data)
5277     if self.do_locking:
5278       lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
5279       lu.needed_locks[locking.LEVEL_NODEGROUP] = []
5280       lu.needed_locks[locking.LEVEL_NODE] = []
5281       lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5282
5283     self.do_grouplocks = (self.do_locking and
5284                           query.IQ_NODES in self.requested_data)
5285
5286   def DeclareLocks(self, lu, level):
5287     if self.do_locking:
5288       if level == locking.LEVEL_NODEGROUP and self.do_grouplocks:
5289         assert not lu.needed_locks[locking.LEVEL_NODEGROUP]
5290
5291         # Lock all groups used by instances optimistically; this requires going
5292         # via the node before it's locked, requiring verification later on
5293         lu.needed_locks[locking.LEVEL_NODEGROUP] = \
5294           set(group_uuid
5295               for instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
5296               for group_uuid in lu.cfg.GetInstanceNodeGroups(instance_name))
5297       elif level == locking.LEVEL_NODE:
5298         lu._LockInstancesNodes() # pylint: disable=W0212
5299
5300   @staticmethod
5301   def _CheckGroupLocks(lu):
5302     owned_instances = frozenset(lu.owned_locks(locking.LEVEL_INSTANCE))
5303     owned_groups = frozenset(lu.owned_locks(locking.LEVEL_NODEGROUP))
5304
5305     # Check if node groups for locked instances are still correct
5306     for instance_name in owned_instances:
5307       _CheckInstanceNodeGroups(lu.cfg, instance_name, owned_groups)
5308
5309   def _GetQueryData(self, lu):
5310     """Computes the list of instances and their attributes.
5311
5312     """
5313     if self.do_grouplocks:
5314       self._CheckGroupLocks(lu)
5315
5316     cluster = lu.cfg.GetClusterInfo()
5317     all_info = lu.cfg.GetAllInstancesInfo()
5318
5319     instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
5320
5321     instance_list = [all_info[name] for name in instance_names]
5322     nodes = frozenset(itertools.chain(*(inst.all_nodes
5323                                         for inst in instance_list)))
5324     hv_list = list(set([inst.hypervisor for inst in instance_list]))
5325     bad_nodes = []
5326     offline_nodes = []
5327     wrongnode_inst = set()
5328
5329     # Gather data as requested
5330     if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
5331       live_data = {}
5332       node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
5333       for name in nodes:
5334         result = node_data[name]
5335         if result.offline:
5336           # offline nodes will be in both lists
5337           assert result.fail_msg
5338           offline_nodes.append(name)
5339         if result.fail_msg:
5340           bad_nodes.append(name)
5341         elif result.payload:
5342           for inst in result.payload:
5343             if inst in all_info:
5344               if all_info[inst].primary_node == name:
5345                 live_data.update(result.payload)
5346               else:
5347                 wrongnode_inst.add(inst)
5348             else:
5349               # orphan instance; we don't list it here as we don't
5350               # handle this case yet in the output of instance listing
5351               logging.warning("Orphan instance '%s' found on node %s",
5352                               inst, name)
5353         # else no instance is alive
5354     else:
5355       live_data = {}
5356
5357     if query.IQ_DISKUSAGE in self.requested_data:
5358       gmi = ganeti.masterd.instance
5359       disk_usage = dict((inst.name,
5360                          gmi.ComputeDiskSize(inst.disk_template,
5361                                              [{constants.IDISK_SIZE: disk.size}
5362                                               for disk in inst.disks]))
5363                         for inst in instance_list)
5364     else:
5365       disk_usage = None
5366
5367     if query.IQ_CONSOLE in self.requested_data:
5368       consinfo = {}
5369       for inst in instance_list:
5370         if inst.name in live_data:
5371           # Instance is running
5372           consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
5373         else:
5374           consinfo[inst.name] = None
5375       assert set(consinfo.keys()) == set(instance_names)
5376     else:
5377       consinfo = None
5378
5379     if query.IQ_NODES in self.requested_data:
5380       node_names = set(itertools.chain(*map(operator.attrgetter("all_nodes"),
5381                                             instance_list)))
5382       nodes = dict(lu.cfg.GetMultiNodeInfo(node_names))
5383       groups = dict((uuid, lu.cfg.GetNodeGroup(uuid))
5384                     for uuid in set(map(operator.attrgetter("group"),
5385                                         nodes.values())))
5386     else:
5387       nodes = None
5388       groups = None
5389
5390     return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
5391                                    disk_usage, offline_nodes, bad_nodes,
5392                                    live_data, wrongnode_inst, consinfo,
5393                                    nodes, groups)
5394
5395
5396 class LUQuery(NoHooksLU):
5397   """Query for resources/items of a certain kind.
5398
5399   """
5400   # pylint: disable=W0142
5401   REQ_BGL = False
5402
5403   def CheckArguments(self):
5404     qcls = _GetQueryImplementation(self.op.what)
5405
5406     self.impl = qcls(self.op.qfilter, self.op.fields, self.op.use_locking)
5407
5408   def ExpandNames(self):
5409     self.impl.ExpandNames(self)
5410
5411   def DeclareLocks(self, level):
5412     self.impl.DeclareLocks(self, level)
5413
5414   def Exec(self, feedback_fn):
5415     return self.impl.NewStyleQuery(self)
5416
5417
5418 class LUQueryFields(NoHooksLU):
5419   """Query for resources/items of a certain kind.
5420
5421   """
5422   # pylint: disable=W0142
5423   REQ_BGL = False
5424
5425   def CheckArguments(self):
5426     self.qcls = _GetQueryImplementation(self.op.what)
5427
5428   def ExpandNames(self):
5429     self.needed_locks = {}
5430
5431   def Exec(self, feedback_fn):
5432     return query.QueryFields(self.qcls.FIELDS, self.op.fields)
5433
5434
5435 class LUNodeModifyStorage(NoHooksLU):
5436   """Logical unit for modifying a storage volume on a node.
5437
5438   """
5439   REQ_BGL = False
5440
5441   def CheckArguments(self):
5442     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5443
5444     storage_type = self.op.storage_type
5445
5446     try:
5447       modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
5448     except KeyError:
5449       raise errors.OpPrereqError("Storage units of type '%s' can not be"
5450                                  " modified" % storage_type,
5451                                  errors.ECODE_INVAL)
5452
5453     diff = set(self.op.changes.keys()) - modifiable
5454     if diff:
5455       raise errors.OpPrereqError("The following fields can not be modified for"
5456                                  " storage units of type '%s': %r" %
5457                                  (storage_type, list(diff)),
5458                                  errors.ECODE_INVAL)
5459
5460   def ExpandNames(self):
5461     self.needed_locks = {
5462       locking.LEVEL_NODE: self.op.node_name,
5463       }
5464
5465   def Exec(self, feedback_fn):
5466     """Computes the list of nodes and their attributes.
5467
5468     """
5469     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5470     result = self.rpc.call_storage_modify(self.op.node_name,
5471                                           self.op.storage_type, st_args,
5472                                           self.op.name, self.op.changes)
5473     result.Raise("Failed to modify storage unit '%s' on %s" %
5474                  (self.op.name, self.op.node_name))
5475
5476
5477 class LUNodeAdd(LogicalUnit):
5478   """Logical unit for adding node to the cluster.
5479
5480   """
5481   HPATH = "node-add"
5482   HTYPE = constants.HTYPE_NODE
5483   _NFLAGS = ["master_capable", "vm_capable"]
5484
5485   def CheckArguments(self):
5486     self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
5487     # validate/normalize the node name
5488     self.hostname = netutils.GetHostname(name=self.op.node_name,
5489                                          family=self.primary_ip_family)
5490     self.op.node_name = self.hostname.name
5491
5492     if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
5493       raise errors.OpPrereqError("Cannot readd the master node",
5494                                  errors.ECODE_STATE)
5495
5496     if self.op.readd and self.op.group:
5497       raise errors.OpPrereqError("Cannot pass a node group when a node is"
5498                                  " being readded", errors.ECODE_INVAL)
5499
5500   def BuildHooksEnv(self):
5501     """Build hooks env.
5502
5503     This will run on all nodes before, and on all nodes + the new node after.
5504
5505     """
5506     return {
5507       "OP_TARGET": self.op.node_name,
5508       "NODE_NAME": self.op.node_name,
5509       "NODE_PIP": self.op.primary_ip,
5510       "NODE_SIP": self.op.secondary_ip,
5511       "MASTER_CAPABLE": str(self.op.master_capable),
5512       "VM_CAPABLE": str(self.op.vm_capable),
5513       }
5514
5515   def BuildHooksNodes(self):
5516     """Build hooks nodes.
5517
5518     """
5519     # Exclude added node
5520     pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
5521     post_nodes = pre_nodes + [self.op.node_name, ]
5522
5523     return (pre_nodes, post_nodes)
5524
5525   def CheckPrereq(self):
5526     """Check prerequisites.
5527
5528     This checks:
5529      - the new node is not already in the config
5530      - it is resolvable
5531      - its parameters (single/dual homed) matches the cluster
5532
5533     Any errors are signaled by raising errors.OpPrereqError.
5534
5535     """
5536     cfg = self.cfg
5537     hostname = self.hostname
5538     node = hostname.name
5539     primary_ip = self.op.primary_ip = hostname.ip
5540     if self.op.secondary_ip is None:
5541       if self.primary_ip_family == netutils.IP6Address.family:
5542         raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
5543                                    " IPv4 address must be given as secondary",
5544                                    errors.ECODE_INVAL)
5545       self.op.secondary_ip = primary_ip
5546
5547     secondary_ip = self.op.secondary_ip
5548     if not netutils.IP4Address.IsValid(secondary_ip):
5549       raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5550                                  " address" % secondary_ip, errors.ECODE_INVAL)
5551
5552     node_list = cfg.GetNodeList()
5553     if not self.op.readd and node in node_list:
5554       raise errors.OpPrereqError("Node %s is already in the configuration" %
5555                                  node, errors.ECODE_EXISTS)
5556     elif self.op.readd and node not in node_list:
5557       raise errors.OpPrereqError("Node %s is not in the configuration" % node,
5558                                  errors.ECODE_NOENT)
5559
5560     self.changed_primary_ip = False
5561
5562     for existing_node_name, existing_node in cfg.GetMultiNodeInfo(node_list):
5563       if self.op.readd and node == existing_node_name:
5564         if existing_node.secondary_ip != secondary_ip:
5565           raise errors.OpPrereqError("Readded node doesn't have the same IP"
5566                                      " address configuration as before",
5567                                      errors.ECODE_INVAL)
5568         if existing_node.primary_ip != primary_ip:
5569           self.changed_primary_ip = True
5570
5571         continue
5572
5573       if (existing_node.primary_ip == primary_ip or
5574           existing_node.secondary_ip == primary_ip or
5575           existing_node.primary_ip == secondary_ip or
5576           existing_node.secondary_ip == secondary_ip):
5577         raise errors.OpPrereqError("New node ip address(es) conflict with"
5578                                    " existing node %s" % existing_node.name,
5579                                    errors.ECODE_NOTUNIQUE)
5580
5581     # After this 'if' block, None is no longer a valid value for the
5582     # _capable op attributes
5583     if self.op.readd:
5584       old_node = self.cfg.GetNodeInfo(node)
5585       assert old_node is not None, "Can't retrieve locked node %s" % node
5586       for attr in self._NFLAGS:
5587         if getattr(self.op, attr) is None:
5588           setattr(self.op, attr, getattr(old_node, attr))
5589     else:
5590       for attr in self._NFLAGS:
5591         if getattr(self.op, attr) is None:
5592           setattr(self.op, attr, True)
5593
5594     if self.op.readd and not self.op.vm_capable:
5595       pri, sec = cfg.GetNodeInstances(node)
5596       if pri or sec:
5597         raise errors.OpPrereqError("Node %s being re-added with vm_capable"
5598                                    " flag set to false, but it already holds"
5599                                    " instances" % node,
5600                                    errors.ECODE_STATE)
5601
5602     # check that the type of the node (single versus dual homed) is the
5603     # same as for the master
5604     myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
5605     master_singlehomed = myself.secondary_ip == myself.primary_ip
5606     newbie_singlehomed = secondary_ip == primary_ip
5607     if master_singlehomed != newbie_singlehomed:
5608       if master_singlehomed:
5609         raise errors.OpPrereqError("The master has no secondary ip but the"
5610                                    " new node has one",
5611                                    errors.ECODE_INVAL)
5612       else:
5613         raise errors.OpPrereqError("The master has a secondary ip but the"
5614                                    " new node doesn't have one",
5615                                    errors.ECODE_INVAL)
5616
5617     # checks reachability
5618     if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
5619       raise errors.OpPrereqError("Node not reachable by ping",
5620                                  errors.ECODE_ENVIRON)
5621
5622     if not newbie_singlehomed:
5623       # check reachability from my secondary ip to newbie's secondary ip
5624       if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
5625                               source=myself.secondary_ip):
5626         raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5627                                    " based ping to node daemon port",
5628                                    errors.ECODE_ENVIRON)
5629
5630     if self.op.readd:
5631       exceptions = [node]
5632     else:
5633       exceptions = []
5634
5635     if self.op.master_capable:
5636       self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
5637     else:
5638       self.master_candidate = False
5639
5640     if self.op.readd:
5641       self.new_node = old_node
5642     else:
5643       node_group = cfg.LookupNodeGroup(self.op.group)
5644       self.new_node = objects.Node(name=node,
5645                                    primary_ip=primary_ip,
5646                                    secondary_ip=secondary_ip,
5647                                    master_candidate=self.master_candidate,
5648                                    offline=False, drained=False,
5649                                    group=node_group)
5650
5651     if self.op.ndparams:
5652       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
5653
5654     if self.op.hv_state:
5655       self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
5656
5657     if self.op.disk_state:
5658       self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
5659
5660     # TODO: If we need to have multiple DnsOnlyRunner we probably should make
5661     #       it a property on the base class.
5662     result = rpc.DnsOnlyRunner().call_version([node])[node]
5663     result.Raise("Can't get version information from node %s" % node)
5664     if constants.PROTOCOL_VERSION == result.payload:
5665       logging.info("Communication to node %s fine, sw version %s match",
5666                    node, result.payload)
5667     else:
5668       raise errors.OpPrereqError("Version mismatch master version %s,"
5669                                  " node version %s" %
5670                                  (constants.PROTOCOL_VERSION, result.payload),
5671                                  errors.ECODE_ENVIRON)
5672
5673   def Exec(self, feedback_fn):
5674     """Adds the new node to the cluster.
5675
5676     """
5677     new_node = self.new_node
5678     node = new_node.name
5679
5680     assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
5681       "Not owning BGL"
5682
5683     # We adding a new node so we assume it's powered
5684     new_node.powered = True
5685
5686     # for re-adds, reset the offline/drained/master-candidate flags;
5687     # we need to reset here, otherwise offline would prevent RPC calls
5688     # later in the procedure; this also means that if the re-add
5689     # fails, we are left with a non-offlined, broken node
5690     if self.op.readd:
5691       new_node.drained = new_node.offline = False # pylint: disable=W0201
5692       self.LogInfo("Readding a node, the offline/drained flags were reset")
5693       # if we demote the node, we do cleanup later in the procedure
5694       new_node.master_candidate = self.master_candidate
5695       if self.changed_primary_ip:
5696         new_node.primary_ip = self.op.primary_ip
5697
5698     # copy the master/vm_capable flags
5699     for attr in self._NFLAGS:
5700       setattr(new_node, attr, getattr(self.op, attr))
5701
5702     # notify the user about any possible mc promotion
5703     if new_node.master_candidate:
5704       self.LogInfo("Node will be a master candidate")
5705
5706     if self.op.ndparams:
5707       new_node.ndparams = self.op.ndparams
5708     else:
5709       new_node.ndparams = {}
5710
5711     if self.op.hv_state:
5712       new_node.hv_state_static = self.new_hv_state
5713
5714     if self.op.disk_state:
5715       new_node.disk_state_static = self.new_disk_state
5716
5717     # Add node to our /etc/hosts, and add key to known_hosts
5718     if self.cfg.GetClusterInfo().modify_etc_hosts:
5719       master_node = self.cfg.GetMasterNode()
5720       result = self.rpc.call_etc_hosts_modify(master_node,
5721                                               constants.ETC_HOSTS_ADD,
5722                                               self.hostname.name,
5723                                               self.hostname.ip)
5724       result.Raise("Can't update hosts file with new host data")
5725
5726     if new_node.secondary_ip != new_node.primary_ip:
5727       _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
5728                                False)
5729
5730     node_verify_list = [self.cfg.GetMasterNode()]
5731     node_verify_param = {
5732       constants.NV_NODELIST: ([node], {}),
5733       # TODO: do a node-net-test as well?
5734     }
5735
5736     result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
5737                                        self.cfg.GetClusterName())
5738     for verifier in node_verify_list:
5739       result[verifier].Raise("Cannot communicate with node %s" % verifier)
5740       nl_payload = result[verifier].payload[constants.NV_NODELIST]
5741       if nl_payload:
5742         for failed in nl_payload:
5743           feedback_fn("ssh/hostname verification failed"
5744                       " (checking from %s): %s" %
5745                       (verifier, nl_payload[failed]))
5746         raise errors.OpExecError("ssh/hostname verification failed")
5747
5748     if self.op.readd:
5749       _RedistributeAncillaryFiles(self)
5750       self.context.ReaddNode(new_node)
5751       # make sure we redistribute the config
5752       self.cfg.Update(new_node, feedback_fn)
5753       # and make sure the new node will not have old files around
5754       if not new_node.master_candidate:
5755         result = self.rpc.call_node_demote_from_mc(new_node.name)
5756         msg = result.fail_msg
5757         if msg:
5758           self.LogWarning("Node failed to demote itself from master"
5759                           " candidate status: %s" % msg)
5760     else:
5761       _RedistributeAncillaryFiles(self, additional_nodes=[node],
5762                                   additional_vm=self.op.vm_capable)
5763       self.context.AddNode(new_node, self.proc.GetECId())
5764
5765
5766 class LUNodeSetParams(LogicalUnit):
5767   """Modifies the parameters of a node.
5768
5769   @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
5770       to the node role (as _ROLE_*)
5771   @cvar _R2F: a dictionary from node role to tuples of flags
5772   @cvar _FLAGS: a list of attribute names corresponding to the flags
5773
5774   """
5775   HPATH = "node-modify"
5776   HTYPE = constants.HTYPE_NODE
5777   REQ_BGL = False
5778   (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
5779   _F2R = {
5780     (True, False, False): _ROLE_CANDIDATE,
5781     (False, True, False): _ROLE_DRAINED,
5782     (False, False, True): _ROLE_OFFLINE,
5783     (False, False, False): _ROLE_REGULAR,
5784     }
5785   _R2F = dict((v, k) for k, v in _F2R.items())
5786   _FLAGS = ["master_candidate", "drained", "offline"]
5787
5788   def CheckArguments(self):
5789     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5790     all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
5791                 self.op.master_capable, self.op.vm_capable,
5792                 self.op.secondary_ip, self.op.ndparams, self.op.hv_state,
5793                 self.op.disk_state]
5794     if all_mods.count(None) == len(all_mods):
5795       raise errors.OpPrereqError("Please pass at least one modification",
5796                                  errors.ECODE_INVAL)
5797     if all_mods.count(True) > 1:
5798       raise errors.OpPrereqError("Can't set the node into more than one"
5799                                  " state at the same time",
5800                                  errors.ECODE_INVAL)
5801
5802     # Boolean value that tells us whether we might be demoting from MC
5803     self.might_demote = (self.op.master_candidate is False or
5804                          self.op.offline is True or
5805                          self.op.drained is True or
5806                          self.op.master_capable is False)
5807
5808     if self.op.secondary_ip:
5809       if not netutils.IP4Address.IsValid(self.op.secondary_ip):
5810         raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5811                                    " address" % self.op.secondary_ip,
5812                                    errors.ECODE_INVAL)
5813
5814     self.lock_all = self.op.auto_promote and self.might_demote
5815     self.lock_instances = self.op.secondary_ip is not None
5816
5817   def _InstanceFilter(self, instance):
5818     """Filter for getting affected instances.
5819
5820     """
5821     return (instance.disk_template in constants.DTS_INT_MIRROR and
5822             self.op.node_name in instance.all_nodes)
5823
5824   def ExpandNames(self):
5825     if self.lock_all:
5826       self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
5827     else:
5828       self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
5829
5830     # Since modifying a node can have severe effects on currently running
5831     # operations the resource lock is at least acquired in shared mode
5832     self.needed_locks[locking.LEVEL_NODE_RES] = \
5833       self.needed_locks[locking.LEVEL_NODE]
5834
5835     # Get node resource and instance locks in shared mode; they are not used
5836     # for anything but read-only access
5837     self.share_locks[locking.LEVEL_NODE_RES] = 1
5838     self.share_locks[locking.LEVEL_INSTANCE] = 1
5839
5840     if self.lock_instances:
5841       self.needed_locks[locking.LEVEL_INSTANCE] = \
5842         frozenset(self.cfg.GetInstancesInfoByFilter(self._InstanceFilter))
5843
5844   def BuildHooksEnv(self):
5845     """Build hooks env.
5846
5847     This runs on the master node.
5848
5849     """
5850     return {
5851       "OP_TARGET": self.op.node_name,
5852       "MASTER_CANDIDATE": str(self.op.master_candidate),
5853       "OFFLINE": str(self.op.offline),
5854       "DRAINED": str(self.op.drained),
5855       "MASTER_CAPABLE": str(self.op.master_capable),
5856       "VM_CAPABLE": str(self.op.vm_capable),
5857       }
5858
5859   def BuildHooksNodes(self):
5860     """Build hooks nodes.
5861
5862     """
5863     nl = [self.cfg.GetMasterNode(), self.op.node_name]
5864     return (nl, nl)
5865
5866   def CheckPrereq(self):
5867     """Check prerequisites.
5868
5869     This only checks the instance list against the existing names.
5870
5871     """
5872     node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
5873
5874     if self.lock_instances:
5875       affected_instances = \
5876         self.cfg.GetInstancesInfoByFilter(self._InstanceFilter)
5877
5878       # Verify instance locks
5879       owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
5880       wanted_instances = frozenset(affected_instances.keys())
5881       if wanted_instances - owned_instances:
5882         raise errors.OpPrereqError("Instances affected by changing node %s's"
5883                                    " secondary IP address have changed since"
5884                                    " locks were acquired, wanted '%s', have"
5885                                    " '%s'; retry the operation" %
5886                                    (self.op.node_name,
5887                                     utils.CommaJoin(wanted_instances),
5888                                     utils.CommaJoin(owned_instances)),
5889                                    errors.ECODE_STATE)
5890     else:
5891       affected_instances = None
5892
5893     if (self.op.master_candidate is not None or
5894         self.op.drained is not None or
5895         self.op.offline is not None):
5896       # we can't change the master's node flags
5897       if self.op.node_name == self.cfg.GetMasterNode():
5898         raise errors.OpPrereqError("The master role can be changed"
5899                                    " only via master-failover",
5900                                    errors.ECODE_INVAL)
5901
5902     if self.op.master_candidate and not node.master_capable:
5903       raise errors.OpPrereqError("Node %s is not master capable, cannot make"
5904                                  " it a master candidate" % node.name,
5905                                  errors.ECODE_STATE)
5906
5907     if self.op.vm_capable is False:
5908       (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
5909       if ipri or isec:
5910         raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
5911                                    " the vm_capable flag" % node.name,
5912                                    errors.ECODE_STATE)
5913
5914     if node.master_candidate and self.might_demote and not self.lock_all:
5915       assert not self.op.auto_promote, "auto_promote set but lock_all not"
5916       # check if after removing the current node, we're missing master
5917       # candidates
5918       (mc_remaining, mc_should, _) = \
5919           self.cfg.GetMasterCandidateStats(exceptions=[node.name])
5920       if mc_remaining < mc_should:
5921         raise errors.OpPrereqError("Not enough master candidates, please"
5922                                    " pass auto promote option to allow"
5923                                    " promotion (--auto-promote or RAPI"
5924                                    " auto_promote=True)", errors.ECODE_STATE)
5925
5926     self.old_flags = old_flags = (node.master_candidate,
5927                                   node.drained, node.offline)
5928     assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
5929     self.old_role = old_role = self._F2R[old_flags]
5930
5931     # Check for ineffective changes
5932     for attr in self._FLAGS:
5933       if (getattr(self.op, attr) is False and getattr(node, attr) is False):
5934         self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
5935         setattr(self.op, attr, None)
5936
5937     # Past this point, any flag change to False means a transition
5938     # away from the respective state, as only real changes are kept
5939
5940     # TODO: We might query the real power state if it supports OOB
5941     if _SupportsOob(self.cfg, node):
5942       if self.op.offline is False and not (node.powered or
5943                                            self.op.powered is True):
5944         raise errors.OpPrereqError(("Node %s needs to be turned on before its"
5945                                     " offline status can be reset") %
5946                                    self.op.node_name, errors.ECODE_STATE)
5947     elif self.op.powered is not None:
5948       raise errors.OpPrereqError(("Unable to change powered state for node %s"
5949                                   " as it does not support out-of-band"
5950                                   " handling") % self.op.node_name,
5951                                  errors.ECODE_STATE)
5952
5953     # If we're being deofflined/drained, we'll MC ourself if needed
5954     if (self.op.drained is False or self.op.offline is False or
5955         (self.op.master_capable and not node.master_capable)):
5956       if _DecideSelfPromotion(self):
5957         self.op.master_candidate = True
5958         self.LogInfo("Auto-promoting node to master candidate")
5959
5960     # If we're no longer master capable, we'll demote ourselves from MC
5961     if self.op.master_capable is False and node.master_candidate:
5962       self.LogInfo("Demoting from master candidate")
5963       self.op.master_candidate = False
5964
5965     # Compute new role
5966     assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
5967     if self.op.master_candidate:
5968       new_role = self._ROLE_CANDIDATE
5969     elif self.op.drained:
5970       new_role = self._ROLE_DRAINED
5971     elif self.op.offline:
5972       new_role = self._ROLE_OFFLINE
5973     elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
5974       # False is still in new flags, which means we're un-setting (the
5975       # only) True flag
5976       new_role = self._ROLE_REGULAR
5977     else: # no new flags, nothing, keep old role
5978       new_role = old_role
5979
5980     self.new_role = new_role
5981
5982     if old_role == self._ROLE_OFFLINE and new_role != old_role:
5983       # Trying to transition out of offline status
5984       result = self.rpc.call_version([node.name])[node.name]
5985       if result.fail_msg:
5986         raise errors.OpPrereqError("Node %s is being de-offlined but fails"
5987                                    " to report its version: %s" %
5988                                    (node.name, result.fail_msg),
5989                                    errors.ECODE_STATE)
5990       else:
5991         self.LogWarning("Transitioning node from offline to online state"
5992                         " without using re-add. Please make sure the node"
5993                         " is healthy!")
5994
5995     # When changing the secondary ip, verify if this is a single-homed to
5996     # multi-homed transition or vice versa, and apply the relevant
5997     # restrictions.
5998     if self.op.secondary_ip:
5999       # Ok even without locking, because this can't be changed by any LU
6000       master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
6001       master_singlehomed = master.secondary_ip == master.primary_ip
6002       if master_singlehomed and self.op.secondary_ip != node.primary_ip:
6003         if self.op.force and node.name == master.name:
6004           self.LogWarning("Transitioning from single-homed to multi-homed"
6005                           " cluster. All nodes will require a secondary ip.")
6006         else:
6007           raise errors.OpPrereqError("Changing the secondary ip on a"
6008                                      " single-homed cluster requires the"
6009                                      " --force option to be passed, and the"
6010                                      " target node to be the master",
6011                                      errors.ECODE_INVAL)
6012       elif not master_singlehomed and self.op.secondary_ip == node.primary_ip:
6013         if self.op.force and node.name == master.name:
6014           self.LogWarning("Transitioning from multi-homed to single-homed"
6015                           " cluster. Secondary IPs will have to be removed.")
6016         else:
6017           raise errors.OpPrereqError("Cannot set the secondary IP to be the"
6018                                      " same as the primary IP on a multi-homed"
6019                                      " cluster, unless the --force option is"
6020                                      " passed, and the target node is the"
6021                                      " master", errors.ECODE_INVAL)
6022
6023       assert not (frozenset(affected_instances) -
6024                   self.owned_locks(locking.LEVEL_INSTANCE))
6025
6026       if node.offline:
6027         if affected_instances:
6028           msg = ("Cannot change secondary IP address: offline node has"
6029                  " instances (%s) configured to use it" %
6030                  utils.CommaJoin(affected_instances.keys()))
6031           raise errors.OpPrereqError(msg, errors.ECODE_STATE)
6032       else:
6033         # On online nodes, check that no instances are running, and that
6034         # the node has the new ip and we can reach it.
6035         for instance in affected_instances.values():
6036           _CheckInstanceState(self, instance, INSTANCE_DOWN,
6037                               msg="cannot change secondary ip")
6038
6039         _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
6040         if master.name != node.name:
6041           # check reachability from master secondary ip to new secondary ip
6042           if not netutils.TcpPing(self.op.secondary_ip,
6043                                   constants.DEFAULT_NODED_PORT,
6044                                   source=master.secondary_ip):
6045             raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
6046                                        " based ping to node daemon port",
6047                                        errors.ECODE_ENVIRON)
6048
6049     if self.op.ndparams:
6050       new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
6051       utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
6052       self.new_ndparams = new_ndparams
6053
6054     if self.op.hv_state:
6055       self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
6056                                                  self.node.hv_state_static)
6057
6058     if self.op.disk_state:
6059       self.new_disk_state = \
6060         _MergeAndVerifyDiskState(self.op.disk_state,
6061                                  self.node.disk_state_static)
6062
6063   def Exec(self, feedback_fn):
6064     """Modifies a node.
6065
6066     """
6067     node = self.node
6068     old_role = self.old_role
6069     new_role = self.new_role
6070
6071     result = []
6072
6073     if self.op.ndparams:
6074       node.ndparams = self.new_ndparams
6075
6076     if self.op.powered is not None:
6077       node.powered = self.op.powered
6078
6079     if self.op.hv_state:
6080       node.hv_state_static = self.new_hv_state
6081
6082     if self.op.disk_state:
6083       node.disk_state_static = self.new_disk_state
6084
6085     for attr in ["master_capable", "vm_capable"]:
6086       val = getattr(self.op, attr)
6087       if val is not None:
6088         setattr(node, attr, val)
6089         result.append((attr, str(val)))
6090
6091     if new_role != old_role:
6092       # Tell the node to demote itself, if no longer MC and not offline
6093       if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
6094         msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
6095         if msg:
6096           self.LogWarning("Node failed to demote itself: %s", msg)
6097
6098       new_flags = self._R2F[new_role]
6099       for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
6100         if of != nf:
6101           result.append((desc, str(nf)))
6102       (node.master_candidate, node.drained, node.offline) = new_flags
6103
6104       # we locked all nodes, we adjust the CP before updating this node
6105       if self.lock_all:
6106         _AdjustCandidatePool(self, [node.name])
6107
6108     if self.op.secondary_ip:
6109       node.secondary_ip = self.op.secondary_ip
6110       result.append(("secondary_ip", self.op.secondary_ip))
6111
6112     # this will trigger configuration file update, if needed
6113     self.cfg.Update(node, feedback_fn)
6114
6115     # this will trigger job queue propagation or cleanup if the mc
6116     # flag changed
6117     if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
6118       self.context.ReaddNode(node)
6119
6120     return result
6121
6122
6123 class LUNodePowercycle(NoHooksLU):
6124   """Powercycles a node.
6125
6126   """
6127   REQ_BGL = False
6128
6129   def CheckArguments(self):
6130     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6131     if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
6132       raise errors.OpPrereqError("The node is the master and the force"
6133                                  " parameter was not set",
6134                                  errors.ECODE_INVAL)
6135
6136   def ExpandNames(self):
6137     """Locking for PowercycleNode.
6138
6139     This is a last-resort option and shouldn't block on other
6140     jobs. Therefore, we grab no locks.
6141
6142     """
6143     self.needed_locks = {}
6144
6145   def Exec(self, feedback_fn):
6146     """Reboots a node.
6147
6148     """
6149     result = self.rpc.call_node_powercycle(self.op.node_name,
6150                                            self.cfg.GetHypervisorType())
6151     result.Raise("Failed to schedule the reboot")
6152     return result.payload
6153
6154
6155 class LUClusterQuery(NoHooksLU):
6156   """Query cluster configuration.
6157
6158   """
6159   REQ_BGL = False
6160
6161   def ExpandNames(self):
6162     self.needed_locks = {}
6163
6164   def Exec(self, feedback_fn):
6165     """Return cluster config.
6166
6167     """
6168     cluster = self.cfg.GetClusterInfo()
6169     os_hvp = {}
6170
6171     # Filter just for enabled hypervisors
6172     for os_name, hv_dict in cluster.os_hvp.items():
6173       os_hvp[os_name] = {}
6174       for hv_name, hv_params in hv_dict.items():
6175         if hv_name in cluster.enabled_hypervisors:
6176           os_hvp[os_name][hv_name] = hv_params
6177
6178     # Convert ip_family to ip_version
6179     primary_ip_version = constants.IP4_VERSION
6180     if cluster.primary_ip_family == netutils.IP6Address.family:
6181       primary_ip_version = constants.IP6_VERSION
6182
6183     result = {
6184       "software_version": constants.RELEASE_VERSION,
6185       "protocol_version": constants.PROTOCOL_VERSION,
6186       "config_version": constants.CONFIG_VERSION,
6187       "os_api_version": max(constants.OS_API_VERSIONS),
6188       "export_version": constants.EXPORT_VERSION,
6189       "architecture": runtime.GetArchInfo(),
6190       "name": cluster.cluster_name,
6191       "master": cluster.master_node,
6192       "default_hypervisor": cluster.primary_hypervisor,
6193       "enabled_hypervisors": cluster.enabled_hypervisors,
6194       "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
6195                         for hypervisor_name in cluster.enabled_hypervisors]),
6196       "os_hvp": os_hvp,
6197       "beparams": cluster.beparams,
6198       "osparams": cluster.osparams,
6199       "ipolicy": cluster.ipolicy,
6200       "nicparams": cluster.nicparams,
6201       "ndparams": cluster.ndparams,
6202       "diskparams": cluster.diskparams,
6203       "candidate_pool_size": cluster.candidate_pool_size,
6204       "master_netdev": cluster.master_netdev,
6205       "master_netmask": cluster.master_netmask,
6206       "use_external_mip_script": cluster.use_external_mip_script,
6207       "volume_group_name": cluster.volume_group_name,
6208       "drbd_usermode_helper": cluster.drbd_usermode_helper,
6209       "file_storage_dir": cluster.file_storage_dir,
6210       "shared_file_storage_dir": cluster.shared_file_storage_dir,
6211       "maintain_node_health": cluster.maintain_node_health,
6212       "ctime": cluster.ctime,
6213       "mtime": cluster.mtime,
6214       "uuid": cluster.uuid,
6215       "tags": list(cluster.GetTags()),
6216       "uid_pool": cluster.uid_pool,
6217       "default_iallocator": cluster.default_iallocator,
6218       "reserved_lvs": cluster.reserved_lvs,
6219       "primary_ip_version": primary_ip_version,
6220       "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
6221       "hidden_os": cluster.hidden_os,
6222       "blacklisted_os": cluster.blacklisted_os,
6223       }
6224
6225     return result
6226
6227
6228 class LUClusterConfigQuery(NoHooksLU):
6229   """Return configuration values.
6230
6231   """
6232   REQ_BGL = False
6233
6234   def CheckArguments(self):
6235     self.cq = _ClusterQuery(None, self.op.output_fields, False)
6236
6237   def ExpandNames(self):
6238     self.cq.ExpandNames(self)
6239
6240   def DeclareLocks(self, level):
6241     self.cq.DeclareLocks(self, level)
6242
6243   def Exec(self, feedback_fn):
6244     result = self.cq.OldStyleQuery(self)
6245
6246     assert len(result) == 1
6247
6248     return result[0]
6249
6250
6251 class _ClusterQuery(_QueryBase):
6252   FIELDS = query.CLUSTER_FIELDS
6253
6254   #: Do not sort (there is only one item)
6255   SORT_FIELD = None
6256
6257   def ExpandNames(self, lu):
6258     lu.needed_locks = {}
6259
6260     # The following variables interact with _QueryBase._GetNames
6261     self.wanted = locking.ALL_SET
6262     self.do_locking = self.use_locking
6263
6264     if self.do_locking:
6265       raise errors.OpPrereqError("Can not use locking for cluster queries",
6266                                  errors.ECODE_INVAL)
6267
6268   def DeclareLocks(self, lu, level):
6269     pass
6270
6271   def _GetQueryData(self, lu):
6272     """Computes the list of nodes and their attributes.
6273
6274     """
6275     # Locking is not used
6276     assert not (compat.any(lu.glm.is_owned(level)
6277                            for level in locking.LEVELS
6278                            if level != locking.LEVEL_CLUSTER) or
6279                 self.do_locking or self.use_locking)
6280
6281     if query.CQ_CONFIG in self.requested_data:
6282       cluster = lu.cfg.GetClusterInfo()
6283     else:
6284       cluster = NotImplemented
6285
6286     if query.CQ_QUEUE_DRAINED in self.requested_data:
6287       drain_flag = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
6288     else:
6289       drain_flag = NotImplemented
6290
6291     if query.CQ_WATCHER_PAUSE in self.requested_data:
6292       watcher_pause = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
6293     else:
6294       watcher_pause = NotImplemented
6295
6296     return query.ClusterQueryData(cluster, drain_flag, watcher_pause)
6297
6298
6299 class LUInstanceActivateDisks(NoHooksLU):
6300   """Bring up an instance's disks.
6301
6302   """
6303   REQ_BGL = False
6304
6305   def ExpandNames(self):
6306     self._ExpandAndLockInstance()
6307     self.needed_locks[locking.LEVEL_NODE] = []
6308     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6309
6310   def DeclareLocks(self, level):
6311     if level == locking.LEVEL_NODE:
6312       self._LockInstancesNodes()
6313
6314   def CheckPrereq(self):
6315     """Check prerequisites.
6316
6317     This checks that the instance is in the cluster.
6318
6319     """
6320     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6321     assert self.instance is not None, \
6322       "Cannot retrieve locked instance %s" % self.op.instance_name
6323     _CheckNodeOnline(self, self.instance.primary_node)
6324
6325   def Exec(self, feedback_fn):
6326     """Activate the disks.
6327
6328     """
6329     disks_ok, disks_info = \
6330               _AssembleInstanceDisks(self, self.instance,
6331                                      ignore_size=self.op.ignore_size)
6332     if not disks_ok:
6333       raise errors.OpExecError("Cannot activate block devices")
6334
6335     if self.op.wait_for_sync:
6336       if not _WaitForSync(self, self.instance):
6337         raise errors.OpExecError("Some disks of the instance are degraded!")
6338
6339     return disks_info
6340
6341
6342 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
6343                            ignore_size=False):
6344   """Prepare the block devices for an instance.
6345
6346   This sets up the block devices on all nodes.
6347
6348   @type lu: L{LogicalUnit}
6349   @param lu: the logical unit on whose behalf we execute
6350   @type instance: L{objects.Instance}
6351   @param instance: the instance for whose disks we assemble
6352   @type disks: list of L{objects.Disk} or None
6353   @param disks: which disks to assemble (or all, if None)
6354   @type ignore_secondaries: boolean
6355   @param ignore_secondaries: if true, errors on secondary nodes
6356       won't result in an error return from the function
6357   @type ignore_size: boolean
6358   @param ignore_size: if true, the current known size of the disk
6359       will not be used during the disk activation, useful for cases
6360       when the size is wrong
6361   @return: False if the operation failed, otherwise a list of
6362       (host, instance_visible_name, node_visible_name)
6363       with the mapping from node devices to instance devices
6364
6365   """
6366   device_info = []
6367   disks_ok = True
6368   iname = instance.name
6369   disks = _ExpandCheckDisks(instance, disks)
6370
6371   # With the two passes mechanism we try to reduce the window of
6372   # opportunity for the race condition of switching DRBD to primary
6373   # before handshaking occured, but we do not eliminate it
6374
6375   # The proper fix would be to wait (with some limits) until the
6376   # connection has been made and drbd transitions from WFConnection
6377   # into any other network-connected state (Connected, SyncTarget,
6378   # SyncSource, etc.)
6379
6380   # 1st pass, assemble on all nodes in secondary mode
6381   for idx, inst_disk in enumerate(disks):
6382     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6383       if ignore_size:
6384         node_disk = node_disk.Copy()
6385         node_disk.UnsetSize()
6386       lu.cfg.SetDiskID(node_disk, node)
6387       result = lu.rpc.call_blockdev_assemble(node, (node_disk, instance), iname,
6388                                              False, idx)
6389       msg = result.fail_msg
6390       if msg:
6391         is_offline_secondary = (node in instance.secondary_nodes and
6392                                 result.offline)
6393         lu.proc.LogWarning("Could not prepare block device %s on node %s"
6394                            " (is_primary=False, pass=1): %s",
6395                            inst_disk.iv_name, node, msg)
6396         if not (ignore_secondaries or is_offline_secondary):
6397           disks_ok = False
6398
6399   # FIXME: race condition on drbd migration to primary
6400
6401   # 2nd pass, do only the primary node
6402   for idx, inst_disk in enumerate(disks):
6403     dev_path = None
6404
6405     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6406       if node != instance.primary_node:
6407         continue
6408       if ignore_size:
6409         node_disk = node_disk.Copy()
6410         node_disk.UnsetSize()
6411       lu.cfg.SetDiskID(node_disk, node)
6412       result = lu.rpc.call_blockdev_assemble(node, (node_disk, instance), iname,
6413                                              True, idx)
6414       msg = result.fail_msg
6415       if msg:
6416         lu.proc.LogWarning("Could not prepare block device %s on node %s"
6417                            " (is_primary=True, pass=2): %s",
6418                            inst_disk.iv_name, node, msg)
6419         disks_ok = False
6420       else:
6421         dev_path = result.payload
6422
6423     device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
6424
6425   # leave the disks configured for the primary node
6426   # this is a workaround that would be fixed better by
6427   # improving the logical/physical id handling
6428   for disk in disks:
6429     lu.cfg.SetDiskID(disk, instance.primary_node)
6430
6431   return disks_ok, device_info
6432
6433
6434 def _StartInstanceDisks(lu, instance, force):
6435   """Start the disks of an instance.
6436
6437   """
6438   disks_ok, _ = _AssembleInstanceDisks(lu, instance,
6439                                            ignore_secondaries=force)
6440   if not disks_ok:
6441     _ShutdownInstanceDisks(lu, instance)
6442     if force is not None and not force:
6443       lu.proc.LogWarning("", hint="If the message above refers to a"
6444                          " secondary node,"
6445                          " you can retry the operation using '--force'.")
6446     raise errors.OpExecError("Disk consistency error")
6447
6448
6449 class LUInstanceDeactivateDisks(NoHooksLU):
6450   """Shutdown an instance's disks.
6451
6452   """
6453   REQ_BGL = False
6454
6455   def ExpandNames(self):
6456     self._ExpandAndLockInstance()
6457     self.needed_locks[locking.LEVEL_NODE] = []
6458     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6459
6460   def DeclareLocks(self, level):
6461     if level == locking.LEVEL_NODE:
6462       self._LockInstancesNodes()
6463
6464   def CheckPrereq(self):
6465     """Check prerequisites.
6466
6467     This checks that the instance is in the cluster.
6468
6469     """
6470     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6471     assert self.instance is not None, \
6472       "Cannot retrieve locked instance %s" % self.op.instance_name
6473
6474   def Exec(self, feedback_fn):
6475     """Deactivate the disks
6476
6477     """
6478     instance = self.instance
6479     if self.op.force:
6480       _ShutdownInstanceDisks(self, instance)
6481     else:
6482       _SafeShutdownInstanceDisks(self, instance)
6483
6484
6485 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
6486   """Shutdown block devices of an instance.
6487
6488   This function checks if an instance is running, before calling
6489   _ShutdownInstanceDisks.
6490
6491   """
6492   _CheckInstanceState(lu, instance, INSTANCE_DOWN, msg="cannot shutdown disks")
6493   _ShutdownInstanceDisks(lu, instance, disks=disks)
6494
6495
6496 def _ExpandCheckDisks(instance, disks):
6497   """Return the instance disks selected by the disks list
6498
6499   @type disks: list of L{objects.Disk} or None
6500   @param disks: selected disks
6501   @rtype: list of L{objects.Disk}
6502   @return: selected instance disks to act on
6503
6504   """
6505   if disks is None:
6506     return instance.disks
6507   else:
6508     if not set(disks).issubset(instance.disks):
6509       raise errors.ProgrammerError("Can only act on disks belonging to the"
6510                                    " target instance")
6511     return disks
6512
6513
6514 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
6515   """Shutdown block devices of an instance.
6516
6517   This does the shutdown on all nodes of the instance.
6518
6519   If the ignore_primary is false, errors on the primary node are
6520   ignored.
6521
6522   """
6523   all_result = True
6524   disks = _ExpandCheckDisks(instance, disks)
6525
6526   for disk in disks:
6527     for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
6528       lu.cfg.SetDiskID(top_disk, node)
6529       result = lu.rpc.call_blockdev_shutdown(node, (top_disk, instance))
6530       msg = result.fail_msg
6531       if msg:
6532         lu.LogWarning("Could not shutdown block device %s on node %s: %s",
6533                       disk.iv_name, node, msg)
6534         if ((node == instance.primary_node and not ignore_primary) or
6535             (node != instance.primary_node and not result.offline)):
6536           all_result = False
6537   return all_result
6538
6539
6540 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
6541   """Checks if a node has enough free memory.
6542
6543   This function check if a given node has the needed amount of free
6544   memory. In case the node has less memory or we cannot get the
6545   information from the node, this function raise an OpPrereqError
6546   exception.
6547
6548   @type lu: C{LogicalUnit}
6549   @param lu: a logical unit from which we get configuration data
6550   @type node: C{str}
6551   @param node: the node to check
6552   @type reason: C{str}
6553   @param reason: string to use in the error message
6554   @type requested: C{int}
6555   @param requested: the amount of memory in MiB to check for
6556   @type hypervisor_name: C{str}
6557   @param hypervisor_name: the hypervisor to ask for memory stats
6558   @rtype: integer
6559   @return: node current free memory
6560   @raise errors.OpPrereqError: if the node doesn't have enough memory, or
6561       we cannot check the node
6562
6563   """
6564   nodeinfo = lu.rpc.call_node_info([node], None, [hypervisor_name])
6565   nodeinfo[node].Raise("Can't get data from node %s" % node,
6566                        prereq=True, ecode=errors.ECODE_ENVIRON)
6567   (_, _, (hv_info, )) = nodeinfo[node].payload
6568
6569   free_mem = hv_info.get("memory_free", None)
6570   if not isinstance(free_mem, int):
6571     raise errors.OpPrereqError("Can't compute free memory on node %s, result"
6572                                " was '%s'" % (node, free_mem),
6573                                errors.ECODE_ENVIRON)
6574   if requested > free_mem:
6575     raise errors.OpPrereqError("Not enough memory on node %s for %s:"
6576                                " needed %s MiB, available %s MiB" %
6577                                (node, reason, requested, free_mem),
6578                                errors.ECODE_NORES)
6579   return free_mem
6580
6581
6582 def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
6583   """Checks if nodes have enough free disk space in the all VGs.
6584
6585   This function check if all given nodes have the needed amount of
6586   free disk. In case any node has less disk or we cannot get the
6587   information from the node, this function raise an OpPrereqError
6588   exception.
6589
6590   @type lu: C{LogicalUnit}
6591   @param lu: a logical unit from which we get configuration data
6592   @type nodenames: C{list}
6593   @param nodenames: the list of node names to check
6594   @type req_sizes: C{dict}
6595   @param req_sizes: the hash of vg and corresponding amount of disk in
6596       MiB to check for
6597   @raise errors.OpPrereqError: if the node doesn't have enough disk,
6598       or we cannot check the node
6599
6600   """
6601   for vg, req_size in req_sizes.items():
6602     _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
6603
6604
6605 def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
6606   """Checks if nodes have enough free disk space in the specified VG.
6607
6608   This function check if all given nodes have the needed amount of
6609   free disk. In case any node has less disk or we cannot get the
6610   information from the node, this function raise an OpPrereqError
6611   exception.
6612
6613   @type lu: C{LogicalUnit}
6614   @param lu: a logical unit from which we get configuration data
6615   @type nodenames: C{list}
6616   @param nodenames: the list of node names to check
6617   @type vg: C{str}
6618   @param vg: the volume group to check
6619   @type requested: C{int}
6620   @param requested: the amount of disk in MiB to check for
6621   @raise errors.OpPrereqError: if the node doesn't have enough disk,
6622       or we cannot check the node
6623
6624   """
6625   nodeinfo = lu.rpc.call_node_info(nodenames, [vg], None)
6626   for node in nodenames:
6627     info = nodeinfo[node]
6628     info.Raise("Cannot get current information from node %s" % node,
6629                prereq=True, ecode=errors.ECODE_ENVIRON)
6630     (_, (vg_info, ), _) = info.payload
6631     vg_free = vg_info.get("vg_free", None)
6632     if not isinstance(vg_free, int):
6633       raise errors.OpPrereqError("Can't compute free disk space on node"
6634                                  " %s for vg %s, result was '%s'" %
6635                                  (node, vg, vg_free), errors.ECODE_ENVIRON)
6636     if requested > vg_free:
6637       raise errors.OpPrereqError("Not enough disk space on target node %s"
6638                                  " vg %s: required %d MiB, available %d MiB" %
6639                                  (node, vg, requested, vg_free),
6640                                  errors.ECODE_NORES)
6641
6642
6643 def _CheckNodesPhysicalCPUs(lu, nodenames, requested, hypervisor_name):
6644   """Checks if nodes have enough physical CPUs
6645
6646   This function checks if all given nodes have the needed number of
6647   physical CPUs. In case any node has less CPUs or we cannot get the
6648   information from the node, this function raises an OpPrereqError
6649   exception.
6650
6651   @type lu: C{LogicalUnit}
6652   @param lu: a logical unit from which we get configuration data
6653   @type nodenames: C{list}
6654   @param nodenames: the list of node names to check
6655   @type requested: C{int}
6656   @param requested: the minimum acceptable number of physical CPUs
6657   @raise errors.OpPrereqError: if the node doesn't have enough CPUs,
6658       or we cannot check the node
6659
6660   """
6661   nodeinfo = lu.rpc.call_node_info(nodenames, None, [hypervisor_name])
6662   for node in nodenames:
6663     info = nodeinfo[node]
6664     info.Raise("Cannot get current information from node %s" % node,
6665                prereq=True, ecode=errors.ECODE_ENVIRON)
6666     (_, _, (hv_info, )) = info.payload
6667     num_cpus = hv_info.get("cpu_total", None)
6668     if not isinstance(num_cpus, int):
6669       raise errors.OpPrereqError("Can't compute the number of physical CPUs"
6670                                  " on node %s, result was '%s'" %
6671                                  (node, num_cpus), errors.ECODE_ENVIRON)
6672     if requested > num_cpus:
6673       raise errors.OpPrereqError("Node %s has %s physical CPUs, but %s are "
6674                                  "required" % (node, num_cpus, requested),
6675                                  errors.ECODE_NORES)
6676
6677
6678 class LUInstanceStartup(LogicalUnit):
6679   """Starts an instance.
6680
6681   """
6682   HPATH = "instance-start"
6683   HTYPE = constants.HTYPE_INSTANCE
6684   REQ_BGL = False
6685
6686   def CheckArguments(self):
6687     # extra beparams
6688     if self.op.beparams:
6689       # fill the beparams dict
6690       objects.UpgradeBeParams(self.op.beparams)
6691       utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
6692
6693   def ExpandNames(self):
6694     self._ExpandAndLockInstance()
6695     self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
6696
6697   def DeclareLocks(self, level):
6698     if level == locking.LEVEL_NODE_RES:
6699       self._LockInstancesNodes(primary_only=True, level=locking.LEVEL_NODE_RES)
6700
6701   def BuildHooksEnv(self):
6702     """Build hooks env.
6703
6704     This runs on master, primary and secondary nodes of the instance.
6705
6706     """
6707     env = {
6708       "FORCE": self.op.force,
6709       }
6710
6711     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6712
6713     return env
6714
6715   def BuildHooksNodes(self):
6716     """Build hooks nodes.
6717
6718     """
6719     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6720     return (nl, nl)
6721
6722   def CheckPrereq(self):
6723     """Check prerequisites.
6724
6725     This checks that the instance is in the cluster.
6726
6727     """
6728     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6729     assert self.instance is not None, \
6730       "Cannot retrieve locked instance %s" % self.op.instance_name
6731
6732     # extra hvparams
6733     if self.op.hvparams:
6734       # check hypervisor parameter syntax (locally)
6735       cluster = self.cfg.GetClusterInfo()
6736       utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
6737       filled_hvp = cluster.FillHV(instance)
6738       filled_hvp.update(self.op.hvparams)
6739       hv_type = hypervisor.GetHypervisor(instance.hypervisor)
6740       hv_type.CheckParameterSyntax(filled_hvp)
6741       _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
6742
6743     _CheckInstanceState(self, instance, INSTANCE_ONLINE)
6744
6745     self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
6746
6747     if self.primary_offline and self.op.ignore_offline_nodes:
6748       self.proc.LogWarning("Ignoring offline primary node")
6749
6750       if self.op.hvparams or self.op.beparams:
6751         self.proc.LogWarning("Overridden parameters are ignored")
6752     else:
6753       _CheckNodeOnline(self, instance.primary_node)
6754
6755       bep = self.cfg.GetClusterInfo().FillBE(instance)
6756       bep.update(self.op.beparams)
6757
6758       # check bridges existence
6759       _CheckInstanceBridgesExist(self, instance)
6760
6761       remote_info = self.rpc.call_instance_info(instance.primary_node,
6762                                                 instance.name,
6763                                                 instance.hypervisor)
6764       remote_info.Raise("Error checking node %s" % instance.primary_node,
6765                         prereq=True, ecode=errors.ECODE_ENVIRON)
6766       if not remote_info.payload: # not running already
6767         _CheckNodeFreeMemory(self, instance.primary_node,
6768                              "starting instance %s" % instance.name,
6769                              bep[constants.BE_MINMEM], instance.hypervisor)
6770
6771   def Exec(self, feedback_fn):
6772     """Start the instance.
6773
6774     """
6775     instance = self.instance
6776     force = self.op.force
6777
6778     if not self.op.no_remember:
6779       self.cfg.MarkInstanceUp(instance.name)
6780
6781     if self.primary_offline:
6782       assert self.op.ignore_offline_nodes
6783       self.proc.LogInfo("Primary node offline, marked instance as started")
6784     else:
6785       node_current = instance.primary_node
6786
6787       _StartInstanceDisks(self, instance, force)
6788
6789       result = \
6790         self.rpc.call_instance_start(node_current,
6791                                      (instance, self.op.hvparams,
6792                                       self.op.beparams),
6793                                      self.op.startup_paused)
6794       msg = result.fail_msg
6795       if msg:
6796         _ShutdownInstanceDisks(self, instance)
6797         raise errors.OpExecError("Could not start instance: %s" % msg)
6798
6799
6800 class LUInstanceReboot(LogicalUnit):
6801   """Reboot an instance.
6802
6803   """
6804   HPATH = "instance-reboot"
6805   HTYPE = constants.HTYPE_INSTANCE
6806   REQ_BGL = False
6807
6808   def ExpandNames(self):
6809     self._ExpandAndLockInstance()
6810
6811   def BuildHooksEnv(self):
6812     """Build hooks env.
6813
6814     This runs on master, primary and secondary nodes of the instance.
6815
6816     """
6817     env = {
6818       "IGNORE_SECONDARIES": self.op.ignore_secondaries,
6819       "REBOOT_TYPE": self.op.reboot_type,
6820       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6821       }
6822
6823     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6824
6825     return env
6826
6827   def BuildHooksNodes(self):
6828     """Build hooks nodes.
6829
6830     """
6831     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6832     return (nl, nl)
6833
6834   def CheckPrereq(self):
6835     """Check prerequisites.
6836
6837     This checks that the instance is in the cluster.
6838
6839     """
6840     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6841     assert self.instance is not None, \
6842       "Cannot retrieve locked instance %s" % self.op.instance_name
6843     _CheckInstanceState(self, instance, INSTANCE_ONLINE)
6844     _CheckNodeOnline(self, instance.primary_node)
6845
6846     # check bridges existence
6847     _CheckInstanceBridgesExist(self, instance)
6848
6849   def Exec(self, feedback_fn):
6850     """Reboot the instance.
6851
6852     """
6853     instance = self.instance
6854     ignore_secondaries = self.op.ignore_secondaries
6855     reboot_type = self.op.reboot_type
6856
6857     remote_info = self.rpc.call_instance_info(instance.primary_node,
6858                                               instance.name,
6859                                               instance.hypervisor)
6860     remote_info.Raise("Error checking node %s" % instance.primary_node)
6861     instance_running = bool(remote_info.payload)
6862
6863     node_current = instance.primary_node
6864
6865     if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
6866                                             constants.INSTANCE_REBOOT_HARD]:
6867       for disk in instance.disks:
6868         self.cfg.SetDiskID(disk, node_current)
6869       result = self.rpc.call_instance_reboot(node_current, instance,
6870                                              reboot_type,
6871                                              self.op.shutdown_timeout)
6872       result.Raise("Could not reboot instance")
6873     else:
6874       if instance_running:
6875         result = self.rpc.call_instance_shutdown(node_current, instance,
6876                                                  self.op.shutdown_timeout)
6877         result.Raise("Could not shutdown instance for full reboot")
6878         _ShutdownInstanceDisks(self, instance)
6879       else:
6880         self.LogInfo("Instance %s was already stopped, starting now",
6881                      instance.name)
6882       _StartInstanceDisks(self, instance, ignore_secondaries)
6883       result = self.rpc.call_instance_start(node_current,
6884                                             (instance, None, None), False)
6885       msg = result.fail_msg
6886       if msg:
6887         _ShutdownInstanceDisks(self, instance)
6888         raise errors.OpExecError("Could not start instance for"
6889                                  " full reboot: %s" % msg)
6890
6891     self.cfg.MarkInstanceUp(instance.name)
6892
6893
6894 class LUInstanceShutdown(LogicalUnit):
6895   """Shutdown an instance.
6896
6897   """
6898   HPATH = "instance-stop"
6899   HTYPE = constants.HTYPE_INSTANCE
6900   REQ_BGL = False
6901
6902   def ExpandNames(self):
6903     self._ExpandAndLockInstance()
6904
6905   def BuildHooksEnv(self):
6906     """Build hooks env.
6907
6908     This runs on master, primary and secondary nodes of the instance.
6909
6910     """
6911     env = _BuildInstanceHookEnvByObject(self, self.instance)
6912     env["TIMEOUT"] = self.op.timeout
6913     return env
6914
6915   def BuildHooksNodes(self):
6916     """Build hooks nodes.
6917
6918     """
6919     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6920     return (nl, nl)
6921
6922   def CheckPrereq(self):
6923     """Check prerequisites.
6924
6925     This checks that the instance is in the cluster.
6926
6927     """
6928     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6929     assert self.instance is not None, \
6930       "Cannot retrieve locked instance %s" % self.op.instance_name
6931
6932     _CheckInstanceState(self, self.instance, INSTANCE_ONLINE)
6933
6934     self.primary_offline = \
6935       self.cfg.GetNodeInfo(self.instance.primary_node).offline
6936
6937     if self.primary_offline and self.op.ignore_offline_nodes:
6938       self.proc.LogWarning("Ignoring offline primary node")
6939     else:
6940       _CheckNodeOnline(self, self.instance.primary_node)
6941
6942   def Exec(self, feedback_fn):
6943     """Shutdown the instance.
6944
6945     """
6946     instance = self.instance
6947     node_current = instance.primary_node
6948     timeout = self.op.timeout
6949
6950     if not self.op.no_remember:
6951       self.cfg.MarkInstanceDown(instance.name)
6952
6953     if self.primary_offline:
6954       assert self.op.ignore_offline_nodes
6955       self.proc.LogInfo("Primary node offline, marked instance as stopped")
6956     else:
6957       result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
6958       msg = result.fail_msg
6959       if msg:
6960         self.proc.LogWarning("Could not shutdown instance: %s" % msg)
6961
6962       _ShutdownInstanceDisks(self, instance)
6963
6964
6965 class LUInstanceReinstall(LogicalUnit):
6966   """Reinstall an instance.
6967
6968   """
6969   HPATH = "instance-reinstall"
6970   HTYPE = constants.HTYPE_INSTANCE
6971   REQ_BGL = False
6972
6973   def ExpandNames(self):
6974     self._ExpandAndLockInstance()
6975
6976   def BuildHooksEnv(self):
6977     """Build hooks env.
6978
6979     This runs on master, primary and secondary nodes of the instance.
6980
6981     """
6982     return _BuildInstanceHookEnvByObject(self, self.instance)
6983
6984   def BuildHooksNodes(self):
6985     """Build hooks nodes.
6986
6987     """
6988     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6989     return (nl, nl)
6990
6991   def CheckPrereq(self):
6992     """Check prerequisites.
6993
6994     This checks that the instance is in the cluster and is not running.
6995
6996     """
6997     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6998     assert instance is not None, \
6999       "Cannot retrieve locked instance %s" % self.op.instance_name
7000     _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
7001                      " offline, cannot reinstall")
7002
7003     if instance.disk_template == constants.DT_DISKLESS:
7004       raise errors.OpPrereqError("Instance '%s' has no disks" %
7005                                  self.op.instance_name,
7006                                  errors.ECODE_INVAL)
7007     _CheckInstanceState(self, instance, INSTANCE_DOWN, msg="cannot reinstall")
7008
7009     if self.op.os_type is not None:
7010       # OS verification
7011       pnode = _ExpandNodeName(self.cfg, instance.primary_node)
7012       _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
7013       instance_os = self.op.os_type
7014     else:
7015       instance_os = instance.os
7016
7017     nodelist = list(instance.all_nodes)
7018
7019     if self.op.osparams:
7020       i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
7021       _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
7022       self.os_inst = i_osdict # the new dict (without defaults)
7023     else:
7024       self.os_inst = None
7025
7026     self.instance = instance
7027
7028   def Exec(self, feedback_fn):
7029     """Reinstall the instance.
7030
7031     """
7032     inst = self.instance
7033
7034     if self.op.os_type is not None:
7035       feedback_fn("Changing OS to '%s'..." % self.op.os_type)
7036       inst.os = self.op.os_type
7037       # Write to configuration
7038       self.cfg.Update(inst, feedback_fn)
7039
7040     _StartInstanceDisks(self, inst, None)
7041     try:
7042       feedback_fn("Running the instance OS create scripts...")
7043       # FIXME: pass debug option from opcode to backend
7044       result = self.rpc.call_instance_os_add(inst.primary_node,
7045                                              (inst, self.os_inst), True,
7046                                              self.op.debug_level)
7047       result.Raise("Could not install OS for instance %s on node %s" %
7048                    (inst.name, inst.primary_node))
7049     finally:
7050       _ShutdownInstanceDisks(self, inst)
7051
7052
7053 class LUInstanceRecreateDisks(LogicalUnit):
7054   """Recreate an instance's missing disks.
7055
7056   """
7057   HPATH = "instance-recreate-disks"
7058   HTYPE = constants.HTYPE_INSTANCE
7059   REQ_BGL = False
7060
7061   _MODIFYABLE = frozenset([
7062     constants.IDISK_SIZE,
7063     constants.IDISK_MODE,
7064     ])
7065
7066   # New or changed disk parameters may have different semantics
7067   assert constants.IDISK_PARAMS == (_MODIFYABLE | frozenset([
7068     constants.IDISK_ADOPT,
7069
7070     # TODO: Implement support changing VG while recreating
7071     constants.IDISK_VG,
7072     constants.IDISK_METAVG,
7073     ]))
7074
7075   def _RunAllocator(self):
7076     """Run the allocator based on input opcode.
7077
7078     """
7079     be_full = self.cfg.GetClusterInfo().FillBE(self.instance)
7080
7081     # FIXME
7082     # The allocator should actually run in "relocate" mode, but current
7083     # allocators don't support relocating all the nodes of an instance at
7084     # the same time. As a workaround we use "allocate" mode, but this is
7085     # suboptimal for two reasons:
7086     # - The instance name passed to the allocator is present in the list of
7087     #   existing instances, so there could be a conflict within the
7088     #   internal structures of the allocator. This doesn't happen with the
7089     #   current allocators, but it's a liability.
7090     # - The allocator counts the resources used by the instance twice: once
7091     #   because the instance exists already, and once because it tries to
7092     #   allocate a new instance.
7093     # The allocator could choose some of the nodes on which the instance is
7094     # running, but that's not a problem. If the instance nodes are broken,
7095     # they should be already be marked as drained or offline, and hence
7096     # skipped by the allocator. If instance disks have been lost for other
7097     # reasons, then recreating the disks on the same nodes should be fine.
7098     disk_template = self.instance.disk_template
7099     spindle_use = be_full[constants.BE_SPINDLE_USE]
7100     req = iallocator.IAReqInstanceAlloc(name=self.op.instance_name,
7101                                         disk_template=disk_template,
7102                                         tags=list(self.instance.GetTags()),
7103                                         os=self.instance.os,
7104                                         nics=[{}],
7105                                         vcpus=be_full[constants.BE_VCPUS],
7106                                         memory=be_full[constants.BE_MAXMEM],
7107                                         spindle_use=spindle_use,
7108                                         disks=[{constants.IDISK_SIZE: d.size,
7109                                                 constants.IDISK_MODE: d.mode}
7110                                                 for d in self.instance.disks],
7111                                         hypervisor=self.instance.hypervisor)
7112     ial = iallocator.IAllocator(self.cfg, self.rpc, req)
7113
7114     ial.Run(self.op.iallocator)
7115
7116     assert req.RequiredNodes() == len(self.instance.all_nodes)
7117
7118     if not ial.success:
7119       raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
7120                                  " %s" % (self.op.iallocator, ial.info),
7121                                  errors.ECODE_NORES)
7122
7123     self.op.nodes = ial.result
7124     self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
7125                  self.op.instance_name, self.op.iallocator,
7126                  utils.CommaJoin(ial.result))
7127
7128   def CheckArguments(self):
7129     if self.op.disks and ht.TPositiveInt(self.op.disks[0]):
7130       # Normalize and convert deprecated list of disk indices
7131       self.op.disks = [(idx, {}) for idx in sorted(frozenset(self.op.disks))]
7132
7133     duplicates = utils.FindDuplicates(map(compat.fst, self.op.disks))
7134     if duplicates:
7135       raise errors.OpPrereqError("Some disks have been specified more than"
7136                                  " once: %s" % utils.CommaJoin(duplicates),
7137                                  errors.ECODE_INVAL)
7138
7139     if self.op.iallocator and self.op.nodes:
7140       raise errors.OpPrereqError("Give either the iallocator or the new"
7141                                  " nodes, not both", errors.ECODE_INVAL)
7142
7143     for (idx, params) in self.op.disks:
7144       utils.ForceDictType(params, constants.IDISK_PARAMS_TYPES)
7145       unsupported = frozenset(params.keys()) - self._MODIFYABLE
7146       if unsupported:
7147         raise errors.OpPrereqError("Parameters for disk %s try to change"
7148                                    " unmodifyable parameter(s): %s" %
7149                                    (idx, utils.CommaJoin(unsupported)),
7150                                    errors.ECODE_INVAL)
7151
7152   def ExpandNames(self):
7153     self._ExpandAndLockInstance()
7154     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7155     if self.op.nodes:
7156       self.op.nodes = [_ExpandNodeName(self.cfg, n) for n in self.op.nodes]
7157       self.needed_locks[locking.LEVEL_NODE] = list(self.op.nodes)
7158     else:
7159       self.needed_locks[locking.LEVEL_NODE] = []
7160       if self.op.iallocator:
7161         # iallocator will select a new node in the same group
7162         self.needed_locks[locking.LEVEL_NODEGROUP] = []
7163     self.needed_locks[locking.LEVEL_NODE_RES] = []
7164
7165   def DeclareLocks(self, level):
7166     if level == locking.LEVEL_NODEGROUP:
7167       assert self.op.iallocator is not None
7168       assert not self.op.nodes
7169       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
7170       self.share_locks[locking.LEVEL_NODEGROUP] = 1
7171       # Lock the primary group used by the instance optimistically; this
7172       # requires going via the node before it's locked, requiring
7173       # verification later on
7174       self.needed_locks[locking.LEVEL_NODEGROUP] = \
7175         self.cfg.GetInstanceNodeGroups(self.op.instance_name, primary_only=True)
7176
7177     elif level == locking.LEVEL_NODE:
7178       # If an allocator is used, then we lock all the nodes in the current
7179       # instance group, as we don't know yet which ones will be selected;
7180       # if we replace the nodes without using an allocator, locks are
7181       # already declared in ExpandNames; otherwise, we need to lock all the
7182       # instance nodes for disk re-creation
7183       if self.op.iallocator:
7184         assert not self.op.nodes
7185         assert not self.needed_locks[locking.LEVEL_NODE]
7186         assert len(self.owned_locks(locking.LEVEL_NODEGROUP)) == 1
7187
7188         # Lock member nodes of the group of the primary node
7189         for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP):
7190           self.needed_locks[locking.LEVEL_NODE].extend(
7191             self.cfg.GetNodeGroup(group_uuid).members)
7192       elif not self.op.nodes:
7193         self._LockInstancesNodes(primary_only=False)
7194     elif level == locking.LEVEL_NODE_RES:
7195       # Copy node locks
7196       self.needed_locks[locking.LEVEL_NODE_RES] = \
7197         self.needed_locks[locking.LEVEL_NODE][:]
7198
7199   def BuildHooksEnv(self):
7200     """Build hooks env.
7201
7202     This runs on master, primary and secondary nodes of the instance.
7203
7204     """
7205     return _BuildInstanceHookEnvByObject(self, self.instance)
7206
7207   def BuildHooksNodes(self):
7208     """Build hooks nodes.
7209
7210     """
7211     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7212     return (nl, nl)
7213
7214   def CheckPrereq(self):
7215     """Check prerequisites.
7216
7217     This checks that the instance is in the cluster and is not running.
7218
7219     """
7220     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7221     assert instance is not None, \
7222       "Cannot retrieve locked instance %s" % self.op.instance_name
7223     if self.op.nodes:
7224       if len(self.op.nodes) != len(instance.all_nodes):
7225         raise errors.OpPrereqError("Instance %s currently has %d nodes, but"
7226                                    " %d replacement nodes were specified" %
7227                                    (instance.name, len(instance.all_nodes),
7228                                     len(self.op.nodes)),
7229                                    errors.ECODE_INVAL)
7230       assert instance.disk_template != constants.DT_DRBD8 or \
7231           len(self.op.nodes) == 2
7232       assert instance.disk_template != constants.DT_PLAIN or \
7233           len(self.op.nodes) == 1
7234       primary_node = self.op.nodes[0]
7235     else:
7236       primary_node = instance.primary_node
7237     if not self.op.iallocator:
7238       _CheckNodeOnline(self, primary_node)
7239
7240     if instance.disk_template == constants.DT_DISKLESS:
7241       raise errors.OpPrereqError("Instance '%s' has no disks" %
7242                                  self.op.instance_name, errors.ECODE_INVAL)
7243
7244     # Verify if node group locks are still correct
7245     owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
7246     if owned_groups:
7247       # Node group locks are acquired only for the primary node (and only
7248       # when the allocator is used)
7249       _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups,
7250                                primary_only=True)
7251
7252     # if we replace nodes *and* the old primary is offline, we don't
7253     # check the instance state
7254     old_pnode = self.cfg.GetNodeInfo(instance.primary_node)
7255     if not ((self.op.iallocator or self.op.nodes) and old_pnode.offline):
7256       _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
7257                           msg="cannot recreate disks")
7258
7259     if self.op.disks:
7260       self.disks = dict(self.op.disks)
7261     else:
7262       self.disks = dict((idx, {}) for idx in range(len(instance.disks)))
7263
7264     maxidx = max(self.disks.keys())
7265     if maxidx >= len(instance.disks):
7266       raise errors.OpPrereqError("Invalid disk index '%s'" % maxidx,
7267                                  errors.ECODE_INVAL)
7268
7269     if ((self.op.nodes or self.op.iallocator) and
7270         sorted(self.disks.keys()) != range(len(instance.disks))):
7271       raise errors.OpPrereqError("Can't recreate disks partially and"
7272                                  " change the nodes at the same time",
7273                                  errors.ECODE_INVAL)
7274
7275     self.instance = instance
7276
7277     if self.op.iallocator:
7278       self._RunAllocator()
7279
7280     # Release unneeded node and node resource locks
7281     _ReleaseLocks(self, locking.LEVEL_NODE, keep=self.op.nodes)
7282     _ReleaseLocks(self, locking.LEVEL_NODE_RES, keep=self.op.nodes)
7283
7284   def Exec(self, feedback_fn):
7285     """Recreate the disks.
7286
7287     """
7288     instance = self.instance
7289
7290     assert (self.owned_locks(locking.LEVEL_NODE) ==
7291             self.owned_locks(locking.LEVEL_NODE_RES))
7292
7293     to_skip = []
7294     mods = [] # keeps track of needed changes
7295
7296     for idx, disk in enumerate(instance.disks):
7297       try:
7298         changes = self.disks[idx]
7299       except KeyError:
7300         # Disk should not be recreated
7301         to_skip.append(idx)
7302         continue
7303
7304       # update secondaries for disks, if needed
7305       if self.op.nodes and disk.dev_type == constants.LD_DRBD8:
7306         # need to update the nodes and minors
7307         assert len(self.op.nodes) == 2
7308         assert len(disk.logical_id) == 6 # otherwise disk internals
7309                                          # have changed
7310         (_, _, old_port, _, _, old_secret) = disk.logical_id
7311         new_minors = self.cfg.AllocateDRBDMinor(self.op.nodes, instance.name)
7312         new_id = (self.op.nodes[0], self.op.nodes[1], old_port,
7313                   new_minors[0], new_minors[1], old_secret)
7314         assert len(disk.logical_id) == len(new_id)
7315       else:
7316         new_id = None
7317
7318       mods.append((idx, new_id, changes))
7319
7320     # now that we have passed all asserts above, we can apply the mods
7321     # in a single run (to avoid partial changes)
7322     for idx, new_id, changes in mods:
7323       disk = instance.disks[idx]
7324       if new_id is not None:
7325         assert disk.dev_type == constants.LD_DRBD8
7326         disk.logical_id = new_id
7327       if changes:
7328         disk.Update(size=changes.get(constants.IDISK_SIZE, None),
7329                     mode=changes.get(constants.IDISK_MODE, None))
7330
7331     # change primary node, if needed
7332     if self.op.nodes:
7333       instance.primary_node = self.op.nodes[0]
7334       self.LogWarning("Changing the instance's nodes, you will have to"
7335                       " remove any disks left on the older nodes manually")
7336
7337     if self.op.nodes:
7338       self.cfg.Update(instance, feedback_fn)
7339
7340     _CreateDisks(self, instance, to_skip=to_skip)
7341
7342
7343 class LUInstanceRename(LogicalUnit):
7344   """Rename an instance.
7345
7346   """
7347   HPATH = "instance-rename"
7348   HTYPE = constants.HTYPE_INSTANCE
7349
7350   def CheckArguments(self):
7351     """Check arguments.
7352
7353     """
7354     if self.op.ip_check and not self.op.name_check:
7355       # TODO: make the ip check more flexible and not depend on the name check
7356       raise errors.OpPrereqError("IP address check requires a name check",
7357                                  errors.ECODE_INVAL)
7358
7359   def BuildHooksEnv(self):
7360     """Build hooks env.
7361
7362     This runs on master, primary and secondary nodes of the instance.
7363
7364     """
7365     env = _BuildInstanceHookEnvByObject(self, self.instance)
7366     env["INSTANCE_NEW_NAME"] = self.op.new_name
7367     return env
7368
7369   def BuildHooksNodes(self):
7370     """Build hooks nodes.
7371
7372     """
7373     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7374     return (nl, nl)
7375
7376   def CheckPrereq(self):
7377     """Check prerequisites.
7378
7379     This checks that the instance is in the cluster and is not running.
7380
7381     """
7382     self.op.instance_name = _ExpandInstanceName(self.cfg,
7383                                                 self.op.instance_name)
7384     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7385     assert instance is not None
7386     _CheckNodeOnline(self, instance.primary_node)
7387     _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
7388                         msg="cannot rename")
7389     self.instance = instance
7390
7391     new_name = self.op.new_name
7392     if self.op.name_check:
7393       hostname = netutils.GetHostname(name=new_name)
7394       if hostname.name != new_name:
7395         self.LogInfo("Resolved given name '%s' to '%s'", new_name,
7396                      hostname.name)
7397       if not utils.MatchNameComponent(self.op.new_name, [hostname.name]):
7398         raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
7399                                     " same as given hostname '%s'") %
7400                                     (hostname.name, self.op.new_name),
7401                                     errors.ECODE_INVAL)
7402       new_name = self.op.new_name = hostname.name
7403       if (self.op.ip_check and
7404           netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
7405         raise errors.OpPrereqError("IP %s of instance %s already in use" %
7406                                    (hostname.ip, new_name),
7407                                    errors.ECODE_NOTUNIQUE)
7408
7409     instance_list = self.cfg.GetInstanceList()
7410     if new_name in instance_list and new_name != instance.name:
7411       raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
7412                                  new_name, errors.ECODE_EXISTS)
7413
7414   def Exec(self, feedback_fn):
7415     """Rename the instance.
7416
7417     """
7418     inst = self.instance
7419     old_name = inst.name
7420
7421     rename_file_storage = False
7422     if (inst.disk_template in constants.DTS_FILEBASED and
7423         self.op.new_name != inst.name):
7424       old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
7425       rename_file_storage = True
7426
7427     self.cfg.RenameInstance(inst.name, self.op.new_name)
7428     # Change the instance lock. This is definitely safe while we hold the BGL.
7429     # Otherwise the new lock would have to be added in acquired mode.
7430     assert self.REQ_BGL
7431     self.glm.remove(locking.LEVEL_INSTANCE, old_name)
7432     self.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
7433
7434     # re-read the instance from the configuration after rename
7435     inst = self.cfg.GetInstanceInfo(self.op.new_name)
7436
7437     if rename_file_storage:
7438       new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
7439       result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
7440                                                      old_file_storage_dir,
7441                                                      new_file_storage_dir)
7442       result.Raise("Could not rename on node %s directory '%s' to '%s'"
7443                    " (but the instance has been renamed in Ganeti)" %
7444                    (inst.primary_node, old_file_storage_dir,
7445                     new_file_storage_dir))
7446
7447     _StartInstanceDisks(self, inst, None)
7448     try:
7449       result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
7450                                                  old_name, self.op.debug_level)
7451       msg = result.fail_msg
7452       if msg:
7453         msg = ("Could not run OS rename script for instance %s on node %s"
7454                " (but the instance has been renamed in Ganeti): %s" %
7455                (inst.name, inst.primary_node, msg))
7456         self.proc.LogWarning(msg)
7457     finally:
7458       _ShutdownInstanceDisks(self, inst)
7459
7460     return inst.name
7461
7462
7463 class LUInstanceRemove(LogicalUnit):
7464   """Remove an instance.
7465
7466   """
7467   HPATH = "instance-remove"
7468   HTYPE = constants.HTYPE_INSTANCE
7469   REQ_BGL = False
7470
7471   def ExpandNames(self):
7472     self._ExpandAndLockInstance()
7473     self.needed_locks[locking.LEVEL_NODE] = []
7474     self.needed_locks[locking.LEVEL_NODE_RES] = []
7475     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7476
7477   def DeclareLocks(self, level):
7478     if level == locking.LEVEL_NODE:
7479       self._LockInstancesNodes()
7480     elif level == locking.LEVEL_NODE_RES:
7481       # Copy node locks
7482       self.needed_locks[locking.LEVEL_NODE_RES] = \
7483         self.needed_locks[locking.LEVEL_NODE][:]
7484
7485   def BuildHooksEnv(self):
7486     """Build hooks env.
7487
7488     This runs on master, primary and secondary nodes of the instance.
7489
7490     """
7491     env = _BuildInstanceHookEnvByObject(self, self.instance)
7492     env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
7493     return env
7494
7495   def BuildHooksNodes(self):
7496     """Build hooks nodes.
7497
7498     """
7499     nl = [self.cfg.GetMasterNode()]
7500     nl_post = list(self.instance.all_nodes) + nl
7501     return (nl, nl_post)
7502
7503   def CheckPrereq(self):
7504     """Check prerequisites.
7505
7506     This checks that the instance is in the cluster.
7507
7508     """
7509     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7510     assert self.instance is not None, \
7511       "Cannot retrieve locked instance %s" % self.op.instance_name
7512
7513   def Exec(self, feedback_fn):
7514     """Remove the instance.
7515
7516     """
7517     instance = self.instance
7518     logging.info("Shutting down instance %s on node %s",
7519                  instance.name, instance.primary_node)
7520
7521     result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
7522                                              self.op.shutdown_timeout)
7523     msg = result.fail_msg
7524     if msg:
7525       if self.op.ignore_failures:
7526         feedback_fn("Warning: can't shutdown instance: %s" % msg)
7527       else:
7528         raise errors.OpExecError("Could not shutdown instance %s on"
7529                                  " node %s: %s" %
7530                                  (instance.name, instance.primary_node, msg))
7531
7532     assert (self.owned_locks(locking.LEVEL_NODE) ==
7533             self.owned_locks(locking.LEVEL_NODE_RES))
7534     assert not (set(instance.all_nodes) -
7535                 self.owned_locks(locking.LEVEL_NODE)), \
7536       "Not owning correct locks"
7537
7538     _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
7539
7540
7541 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
7542   """Utility function to remove an instance.
7543
7544   """
7545   logging.info("Removing block devices for instance %s", instance.name)
7546
7547   if not _RemoveDisks(lu, instance, ignore_failures=ignore_failures):
7548     if not ignore_failures:
7549       raise errors.OpExecError("Can't remove instance's disks")
7550     feedback_fn("Warning: can't remove instance's disks")
7551
7552   logging.info("Removing instance %s out of cluster config", instance.name)
7553
7554   lu.cfg.RemoveInstance(instance.name)
7555
7556   assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
7557     "Instance lock removal conflict"
7558
7559   # Remove lock for the instance
7560   lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
7561
7562
7563 class LUInstanceQuery(NoHooksLU):
7564   """Logical unit for querying instances.
7565
7566   """
7567   # pylint: disable=W0142
7568   REQ_BGL = False
7569
7570   def CheckArguments(self):
7571     self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
7572                              self.op.output_fields, self.op.use_locking)
7573
7574   def ExpandNames(self):
7575     self.iq.ExpandNames(self)
7576
7577   def DeclareLocks(self, level):
7578     self.iq.DeclareLocks(self, level)
7579
7580   def Exec(self, feedback_fn):
7581     return self.iq.OldStyleQuery(self)
7582
7583
7584 class LUInstanceFailover(LogicalUnit):
7585   """Failover an instance.
7586
7587   """
7588   HPATH = "instance-failover"
7589   HTYPE = constants.HTYPE_INSTANCE
7590   REQ_BGL = False
7591
7592   def CheckArguments(self):
7593     """Check the arguments.
7594
7595     """
7596     self.iallocator = getattr(self.op, "iallocator", None)
7597     self.target_node = getattr(self.op, "target_node", None)
7598
7599   def ExpandNames(self):
7600     self._ExpandAndLockInstance()
7601
7602     if self.op.target_node is not None:
7603       self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7604
7605     self.needed_locks[locking.LEVEL_NODE] = []
7606     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7607
7608     self.needed_locks[locking.LEVEL_NODE_RES] = []
7609     self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
7610
7611     ignore_consistency = self.op.ignore_consistency
7612     shutdown_timeout = self.op.shutdown_timeout
7613     self._migrater = TLMigrateInstance(self, self.op.instance_name,
7614                                        cleanup=False,
7615                                        failover=True,
7616                                        ignore_consistency=ignore_consistency,
7617                                        shutdown_timeout=shutdown_timeout,
7618                                        ignore_ipolicy=self.op.ignore_ipolicy)
7619     self.tasklets = [self._migrater]
7620
7621   def DeclareLocks(self, level):
7622     if level == locking.LEVEL_NODE:
7623       instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
7624       if instance.disk_template in constants.DTS_EXT_MIRROR:
7625         if self.op.target_node is None:
7626           self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7627         else:
7628           self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
7629                                                    self.op.target_node]
7630         del self.recalculate_locks[locking.LEVEL_NODE]
7631       else:
7632         self._LockInstancesNodes()
7633     elif level == locking.LEVEL_NODE_RES:
7634       # Copy node locks
7635       self.needed_locks[locking.LEVEL_NODE_RES] = \
7636         self.needed_locks[locking.LEVEL_NODE][:]
7637
7638   def BuildHooksEnv(self):
7639     """Build hooks env.
7640
7641     This runs on master, primary and secondary nodes of the instance.
7642
7643     """
7644     instance = self._migrater.instance
7645     source_node = instance.primary_node
7646     target_node = self.op.target_node
7647     env = {
7648       "IGNORE_CONSISTENCY": self.op.ignore_consistency,
7649       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7650       "OLD_PRIMARY": source_node,
7651       "NEW_PRIMARY": target_node,
7652       }
7653
7654     if instance.disk_template in constants.DTS_INT_MIRROR:
7655       env["OLD_SECONDARY"] = instance.secondary_nodes[0]
7656       env["NEW_SECONDARY"] = source_node
7657     else:
7658       env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
7659
7660     env.update(_BuildInstanceHookEnvByObject(self, instance))
7661
7662     return env
7663
7664   def BuildHooksNodes(self):
7665     """Build hooks nodes.
7666
7667     """
7668     instance = self._migrater.instance
7669     nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
7670     return (nl, nl + [instance.primary_node])
7671
7672
7673 class LUInstanceMigrate(LogicalUnit):
7674   """Migrate an instance.
7675
7676   This is migration without shutting down, compared to the failover,
7677   which is done with shutdown.
7678
7679   """
7680   HPATH = "instance-migrate"
7681   HTYPE = constants.HTYPE_INSTANCE
7682   REQ_BGL = False
7683
7684   def ExpandNames(self):
7685     self._ExpandAndLockInstance()
7686
7687     if self.op.target_node is not None:
7688       self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7689
7690     self.needed_locks[locking.LEVEL_NODE] = []
7691     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7692
7693     self.needed_locks[locking.LEVEL_NODE] = []
7694     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7695
7696     self._migrater = \
7697       TLMigrateInstance(self, self.op.instance_name,
7698                         cleanup=self.op.cleanup,
7699                         failover=False,
7700                         fallback=self.op.allow_failover,
7701                         allow_runtime_changes=self.op.allow_runtime_changes,
7702                         ignore_ipolicy=self.op.ignore_ipolicy)
7703     self.tasklets = [self._migrater]
7704
7705   def DeclareLocks(self, level):
7706     if level == locking.LEVEL_NODE:
7707       instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
7708       if instance.disk_template in constants.DTS_EXT_MIRROR:
7709         if self.op.target_node is None:
7710           self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7711         else:
7712           self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
7713                                                    self.op.target_node]
7714         del self.recalculate_locks[locking.LEVEL_NODE]
7715       else:
7716         self._LockInstancesNodes()
7717     elif level == locking.LEVEL_NODE_RES:
7718       # Copy node locks
7719       self.needed_locks[locking.LEVEL_NODE_RES] = \
7720         self.needed_locks[locking.LEVEL_NODE][:]
7721
7722   def BuildHooksEnv(self):
7723     """Build hooks env.
7724
7725     This runs on master, primary and secondary nodes of the instance.
7726
7727     """
7728     instance = self._migrater.instance
7729     source_node = instance.primary_node
7730     target_node = self.op.target_node
7731     env = _BuildInstanceHookEnvByObject(self, instance)
7732     env.update({
7733       "MIGRATE_LIVE": self._migrater.live,
7734       "MIGRATE_CLEANUP": self.op.cleanup,
7735       "OLD_PRIMARY": source_node,
7736       "NEW_PRIMARY": target_node,
7737       "ALLOW_RUNTIME_CHANGES": self.op.allow_runtime_changes,
7738       })
7739
7740     if instance.disk_template in constants.DTS_INT_MIRROR:
7741       env["OLD_SECONDARY"] = target_node
7742       env["NEW_SECONDARY"] = source_node
7743     else:
7744       env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
7745
7746     return env
7747
7748   def BuildHooksNodes(self):
7749     """Build hooks nodes.
7750
7751     """
7752     instance = self._migrater.instance
7753     nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
7754     return (nl, nl + [instance.primary_node])
7755
7756
7757 class LUInstanceMove(LogicalUnit):
7758   """Move an instance by data-copying.
7759
7760   """
7761   HPATH = "instance-move"
7762   HTYPE = constants.HTYPE_INSTANCE
7763   REQ_BGL = False
7764
7765   def ExpandNames(self):
7766     self._ExpandAndLockInstance()
7767     target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7768     self.op.target_node = target_node
7769     self.needed_locks[locking.LEVEL_NODE] = [target_node]
7770     self.needed_locks[locking.LEVEL_NODE_RES] = []
7771     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7772
7773   def DeclareLocks(self, level):
7774     if level == locking.LEVEL_NODE:
7775       self._LockInstancesNodes(primary_only=True)
7776     elif level == locking.LEVEL_NODE_RES:
7777       # Copy node locks
7778       self.needed_locks[locking.LEVEL_NODE_RES] = \
7779         self.needed_locks[locking.LEVEL_NODE][:]
7780
7781   def BuildHooksEnv(self):
7782     """Build hooks env.
7783
7784     This runs on master, primary and secondary nodes of the instance.
7785
7786     """
7787     env = {
7788       "TARGET_NODE": self.op.target_node,
7789       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7790       }
7791     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
7792     return env
7793
7794   def BuildHooksNodes(self):
7795     """Build hooks nodes.
7796
7797     """
7798     nl = [
7799       self.cfg.GetMasterNode(),
7800       self.instance.primary_node,
7801       self.op.target_node,
7802       ]
7803     return (nl, nl)
7804
7805   def CheckPrereq(self):
7806     """Check prerequisites.
7807
7808     This checks that the instance is in the cluster.
7809
7810     """
7811     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7812     assert self.instance is not None, \
7813       "Cannot retrieve locked instance %s" % self.op.instance_name
7814
7815     node = self.cfg.GetNodeInfo(self.op.target_node)
7816     assert node is not None, \
7817       "Cannot retrieve locked node %s" % self.op.target_node
7818
7819     self.target_node = target_node = node.name
7820
7821     if target_node == instance.primary_node:
7822       raise errors.OpPrereqError("Instance %s is already on the node %s" %
7823                                  (instance.name, target_node),
7824                                  errors.ECODE_STATE)
7825
7826     bep = self.cfg.GetClusterInfo().FillBE(instance)
7827
7828     for idx, dsk in enumerate(instance.disks):
7829       if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
7830         raise errors.OpPrereqError("Instance disk %d has a complex layout,"
7831                                    " cannot copy" % idx, errors.ECODE_STATE)
7832
7833     _CheckNodeOnline(self, target_node)
7834     _CheckNodeNotDrained(self, target_node)
7835     _CheckNodeVmCapable(self, target_node)
7836     cluster = self.cfg.GetClusterInfo()
7837     group_info = self.cfg.GetNodeGroup(node.group)
7838     ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster, group_info)
7839     _CheckTargetNodeIPolicy(self, ipolicy, instance, node,
7840                             ignore=self.op.ignore_ipolicy)
7841
7842     if instance.admin_state == constants.ADMINST_UP:
7843       # check memory requirements on the secondary node
7844       _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
7845                            instance.name, bep[constants.BE_MAXMEM],
7846                            instance.hypervisor)
7847     else:
7848       self.LogInfo("Not checking memory on the secondary node as"
7849                    " instance will not be started")
7850
7851     # check bridge existance
7852     _CheckInstanceBridgesExist(self, instance, node=target_node)
7853
7854   def Exec(self, feedback_fn):
7855     """Move an instance.
7856
7857     The move is done by shutting it down on its present node, copying
7858     the data over (slow) and starting it on the new node.
7859
7860     """
7861     instance = self.instance
7862
7863     source_node = instance.primary_node
7864     target_node = self.target_node
7865
7866     self.LogInfo("Shutting down instance %s on source node %s",
7867                  instance.name, source_node)
7868
7869     assert (self.owned_locks(locking.LEVEL_NODE) ==
7870             self.owned_locks(locking.LEVEL_NODE_RES))
7871
7872     result = self.rpc.call_instance_shutdown(source_node, instance,
7873                                              self.op.shutdown_timeout)
7874     msg = result.fail_msg
7875     if msg:
7876       if self.op.ignore_consistency:
7877         self.proc.LogWarning("Could not shutdown instance %s on node %s."
7878                              " Proceeding anyway. Please make sure node"
7879                              " %s is down. Error details: %s",
7880                              instance.name, source_node, source_node, msg)
7881       else:
7882         raise errors.OpExecError("Could not shutdown instance %s on"
7883                                  " node %s: %s" %
7884                                  (instance.name, source_node, msg))
7885
7886     # create the target disks
7887     try:
7888       _CreateDisks(self, instance, target_node=target_node)
7889     except errors.OpExecError:
7890       self.LogWarning("Device creation failed, reverting...")
7891       try:
7892         _RemoveDisks(self, instance, target_node=target_node)
7893       finally:
7894         self.cfg.ReleaseDRBDMinors(instance.name)
7895         raise
7896
7897     cluster_name = self.cfg.GetClusterInfo().cluster_name
7898
7899     errs = []
7900     # activate, get path, copy the data over
7901     for idx, disk in enumerate(instance.disks):
7902       self.LogInfo("Copying data for disk %d", idx)
7903       result = self.rpc.call_blockdev_assemble(target_node, (disk, instance),
7904                                                instance.name, True, idx)
7905       if result.fail_msg:
7906         self.LogWarning("Can't assemble newly created disk %d: %s",
7907                         idx, result.fail_msg)
7908         errs.append(result.fail_msg)
7909         break
7910       dev_path = result.payload
7911       result = self.rpc.call_blockdev_export(source_node, (disk, instance),
7912                                              target_node, dev_path,
7913                                              cluster_name)
7914       if result.fail_msg:
7915         self.LogWarning("Can't copy data over for disk %d: %s",
7916                         idx, result.fail_msg)
7917         errs.append(result.fail_msg)
7918         break
7919
7920     if errs:
7921       self.LogWarning("Some disks failed to copy, aborting")
7922       try:
7923         _RemoveDisks(self, instance, target_node=target_node)
7924       finally:
7925         self.cfg.ReleaseDRBDMinors(instance.name)
7926         raise errors.OpExecError("Errors during disk copy: %s" %
7927                                  (",".join(errs),))
7928
7929     instance.primary_node = target_node
7930     self.cfg.Update(instance, feedback_fn)
7931
7932     self.LogInfo("Removing the disks on the original node")
7933     _RemoveDisks(self, instance, target_node=source_node)
7934
7935     # Only start the instance if it's marked as up
7936     if instance.admin_state == constants.ADMINST_UP:
7937       self.LogInfo("Starting instance %s on node %s",
7938                    instance.name, target_node)
7939
7940       disks_ok, _ = _AssembleInstanceDisks(self, instance,
7941                                            ignore_secondaries=True)
7942       if not disks_ok:
7943         _ShutdownInstanceDisks(self, instance)
7944         raise errors.OpExecError("Can't activate the instance's disks")
7945
7946       result = self.rpc.call_instance_start(target_node,
7947                                             (instance, None, None), False)
7948       msg = result.fail_msg
7949       if msg:
7950         _ShutdownInstanceDisks(self, instance)
7951         raise errors.OpExecError("Could not start instance %s on node %s: %s" %
7952                                  (instance.name, target_node, msg))
7953
7954
7955 class LUNodeMigrate(LogicalUnit):
7956   """Migrate all instances from a node.
7957
7958   """
7959   HPATH = "node-migrate"
7960   HTYPE = constants.HTYPE_NODE
7961   REQ_BGL = False
7962
7963   def CheckArguments(self):
7964     pass
7965
7966   def ExpandNames(self):
7967     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
7968
7969     self.share_locks = _ShareAll()
7970     self.needed_locks = {
7971       locking.LEVEL_NODE: [self.op.node_name],
7972       }
7973
7974   def BuildHooksEnv(self):
7975     """Build hooks env.
7976
7977     This runs on the master, the primary and all the secondaries.
7978
7979     """
7980     return {
7981       "NODE_NAME": self.op.node_name,
7982       "ALLOW_RUNTIME_CHANGES": self.op.allow_runtime_changes,
7983       }
7984
7985   def BuildHooksNodes(self):
7986     """Build hooks nodes.
7987
7988     """
7989     nl = [self.cfg.GetMasterNode()]
7990     return (nl, nl)
7991
7992   def CheckPrereq(self):
7993     pass
7994
7995   def Exec(self, feedback_fn):
7996     # Prepare jobs for migration instances
7997     allow_runtime_changes = self.op.allow_runtime_changes
7998     jobs = [
7999       [opcodes.OpInstanceMigrate(instance_name=inst.name,
8000                                  mode=self.op.mode,
8001                                  live=self.op.live,
8002                                  iallocator=self.op.iallocator,
8003                                  target_node=self.op.target_node,
8004                                  allow_runtime_changes=allow_runtime_changes,
8005                                  ignore_ipolicy=self.op.ignore_ipolicy)]
8006       for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name)
8007       ]
8008
8009     # TODO: Run iallocator in this opcode and pass correct placement options to
8010     # OpInstanceMigrate. Since other jobs can modify the cluster between
8011     # running the iallocator and the actual migration, a good consistency model
8012     # will have to be found.
8013
8014     assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
8015             frozenset([self.op.node_name]))
8016
8017     return ResultWithJobs(jobs)
8018
8019
8020 class TLMigrateInstance(Tasklet):
8021   """Tasklet class for instance migration.
8022
8023   @type live: boolean
8024   @ivar live: whether the migration will be done live or non-live;
8025       this variable is initalized only after CheckPrereq has run
8026   @type cleanup: boolean
8027   @ivar cleanup: Wheater we cleanup from a failed migration
8028   @type iallocator: string
8029   @ivar iallocator: The iallocator used to determine target_node
8030   @type target_node: string
8031   @ivar target_node: If given, the target_node to reallocate the instance to
8032   @type failover: boolean
8033   @ivar failover: Whether operation results in failover or migration
8034   @type fallback: boolean
8035   @ivar fallback: Whether fallback to failover is allowed if migration not
8036                   possible
8037   @type ignore_consistency: boolean
8038   @ivar ignore_consistency: Wheter we should ignore consistency between source
8039                             and target node
8040   @type shutdown_timeout: int
8041   @ivar shutdown_timeout: In case of failover timeout of the shutdown
8042   @type ignore_ipolicy: bool
8043   @ivar ignore_ipolicy: If true, we can ignore instance policy when migrating
8044
8045   """
8046
8047   # Constants
8048   _MIGRATION_POLL_INTERVAL = 1      # seconds
8049   _MIGRATION_FEEDBACK_INTERVAL = 10 # seconds
8050
8051   def __init__(self, lu, instance_name, cleanup=False,
8052                failover=False, fallback=False,
8053                ignore_consistency=False,
8054                allow_runtime_changes=True,
8055                shutdown_timeout=constants.DEFAULT_SHUTDOWN_TIMEOUT,
8056                ignore_ipolicy=False):
8057     """Initializes this class.
8058
8059     """
8060     Tasklet.__init__(self, lu)
8061
8062     # Parameters
8063     self.instance_name = instance_name
8064     self.cleanup = cleanup
8065     self.live = False # will be overridden later
8066     self.failover = failover
8067     self.fallback = fallback
8068     self.ignore_consistency = ignore_consistency
8069     self.shutdown_timeout = shutdown_timeout
8070     self.ignore_ipolicy = ignore_ipolicy
8071     self.allow_runtime_changes = allow_runtime_changes
8072
8073   def CheckPrereq(self):
8074     """Check prerequisites.
8075
8076     This checks that the instance is in the cluster.
8077
8078     """
8079     instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
8080     instance = self.cfg.GetInstanceInfo(instance_name)
8081     assert instance is not None
8082     self.instance = instance
8083     cluster = self.cfg.GetClusterInfo()
8084
8085     if (not self.cleanup and
8086         not instance.admin_state == constants.ADMINST_UP and
8087         not self.failover and self.fallback):
8088       self.lu.LogInfo("Instance is marked down or offline, fallback allowed,"
8089                       " switching to failover")
8090       self.failover = True
8091
8092     if instance.disk_template not in constants.DTS_MIRRORED:
8093       if self.failover:
8094         text = "failovers"
8095       else:
8096         text = "migrations"
8097       raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
8098                                  " %s" % (instance.disk_template, text),
8099                                  errors.ECODE_STATE)
8100
8101     if instance.disk_template in constants.DTS_EXT_MIRROR:
8102       _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
8103
8104       if self.lu.op.iallocator:
8105         self._RunAllocator()
8106       else:
8107         # We set set self.target_node as it is required by
8108         # BuildHooksEnv
8109         self.target_node = self.lu.op.target_node
8110
8111       # Check that the target node is correct in terms of instance policy
8112       nodeinfo = self.cfg.GetNodeInfo(self.target_node)
8113       group_info = self.cfg.GetNodeGroup(nodeinfo.group)
8114       ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
8115                                                               group_info)
8116       _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo,
8117                               ignore=self.ignore_ipolicy)
8118
8119       # self.target_node is already populated, either directly or by the
8120       # iallocator run
8121       target_node = self.target_node
8122       if self.target_node == instance.primary_node:
8123         raise errors.OpPrereqError("Cannot migrate instance %s"
8124                                    " to its primary (%s)" %
8125                                    (instance.name, instance.primary_node),
8126                                    errors.ECODE_STATE)
8127
8128       if len(self.lu.tasklets) == 1:
8129         # It is safe to release locks only when we're the only tasklet
8130         # in the LU
8131         _ReleaseLocks(self.lu, locking.LEVEL_NODE,
8132                       keep=[instance.primary_node, self.target_node])
8133
8134     else:
8135       secondary_nodes = instance.secondary_nodes
8136       if not secondary_nodes:
8137         raise errors.ConfigurationError("No secondary node but using"
8138                                         " %s disk template" %
8139                                         instance.disk_template)
8140       target_node = secondary_nodes[0]
8141       if self.lu.op.iallocator or (self.lu.op.target_node and
8142                                    self.lu.op.target_node != target_node):
8143         if self.failover:
8144           text = "failed over"
8145         else:
8146           text = "migrated"
8147         raise errors.OpPrereqError("Instances with disk template %s cannot"
8148                                    " be %s to arbitrary nodes"
8149                                    " (neither an iallocator nor a target"
8150                                    " node can be passed)" %
8151                                    (instance.disk_template, text),
8152                                    errors.ECODE_INVAL)
8153       nodeinfo = self.cfg.GetNodeInfo(target_node)
8154       group_info = self.cfg.GetNodeGroup(nodeinfo.group)
8155       ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
8156                                                               group_info)
8157       _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo,
8158                               ignore=self.ignore_ipolicy)
8159
8160     i_be = cluster.FillBE(instance)
8161
8162     # check memory requirements on the secondary node
8163     if (not self.cleanup and
8164          (not self.failover or instance.admin_state == constants.ADMINST_UP)):
8165       self.tgt_free_mem = _CheckNodeFreeMemory(self.lu, target_node,
8166                                                "migrating instance %s" %
8167                                                instance.name,
8168                                                i_be[constants.BE_MINMEM],
8169                                                instance.hypervisor)
8170     else:
8171       self.lu.LogInfo("Not checking memory on the secondary node as"
8172                       " instance will not be started")
8173
8174     # check if failover must be forced instead of migration
8175     if (not self.cleanup and not self.failover and
8176         i_be[constants.BE_ALWAYS_FAILOVER]):
8177       if self.fallback:
8178         self.lu.LogInfo("Instance configured to always failover; fallback"
8179                         " to failover")
8180         self.failover = True
8181       else:
8182         raise errors.OpPrereqError("This instance has been configured to"
8183                                    " always failover, please allow failover",
8184                                    errors.ECODE_STATE)
8185
8186     # check bridge existance
8187     _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
8188
8189     if not self.cleanup:
8190       _CheckNodeNotDrained(self.lu, target_node)
8191       if not self.failover:
8192         result = self.rpc.call_instance_migratable(instance.primary_node,
8193                                                    instance)
8194         if result.fail_msg and self.fallback:
8195           self.lu.LogInfo("Can't migrate, instance offline, fallback to"
8196                           " failover")
8197           self.failover = True
8198         else:
8199           result.Raise("Can't migrate, please use failover",
8200                        prereq=True, ecode=errors.ECODE_STATE)
8201
8202     assert not (self.failover and self.cleanup)
8203
8204     if not self.failover:
8205       if self.lu.op.live is not None and self.lu.op.mode is not None:
8206         raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
8207                                    " parameters are accepted",
8208                                    errors.ECODE_INVAL)
8209       if self.lu.op.live is not None:
8210         if self.lu.op.live:
8211           self.lu.op.mode = constants.HT_MIGRATION_LIVE
8212         else:
8213           self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
8214         # reset the 'live' parameter to None so that repeated
8215         # invocations of CheckPrereq do not raise an exception
8216         self.lu.op.live = None
8217       elif self.lu.op.mode is None:
8218         # read the default value from the hypervisor
8219         i_hv = cluster.FillHV(self.instance, skip_globals=False)
8220         self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
8221
8222       self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
8223     else:
8224       # Failover is never live
8225       self.live = False
8226
8227     if not (self.failover or self.cleanup):
8228       remote_info = self.rpc.call_instance_info(instance.primary_node,
8229                                                 instance.name,
8230                                                 instance.hypervisor)
8231       remote_info.Raise("Error checking instance on node %s" %
8232                         instance.primary_node)
8233       instance_running = bool(remote_info.payload)
8234       if instance_running:
8235         self.current_mem = int(remote_info.payload["memory"])
8236
8237   def _RunAllocator(self):
8238     """Run the allocator based on input opcode.
8239
8240     """
8241     # FIXME: add a self.ignore_ipolicy option
8242     req = iallocator.IAReqRelocate(name=self.instance_name,
8243                                    relocate_from=[self.instance.primary_node])
8244     ial = iallocator.IAllocator(self.cfg, self.rpc, req)
8245
8246     ial.Run(self.lu.op.iallocator)
8247
8248     if not ial.success:
8249       raise errors.OpPrereqError("Can't compute nodes using"
8250                                  " iallocator '%s': %s" %
8251                                  (self.lu.op.iallocator, ial.info),
8252                                  errors.ECODE_NORES)
8253     self.target_node = ial.result[0]
8254     self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
8255                     self.instance_name, self.lu.op.iallocator,
8256                     utils.CommaJoin(ial.result))
8257
8258   def _WaitUntilSync(self):
8259     """Poll with custom rpc for disk sync.
8260
8261     This uses our own step-based rpc call.
8262
8263     """
8264     self.feedback_fn("* wait until resync is done")
8265     all_done = False
8266     while not all_done:
8267       all_done = True
8268       result = self.rpc.call_drbd_wait_sync(self.all_nodes,
8269                                             self.nodes_ip,
8270                                             (self.instance.disks,
8271                                              self.instance))
8272       min_percent = 100
8273       for node, nres in result.items():
8274         nres.Raise("Cannot resync disks on node %s" % node)
8275         node_done, node_percent = nres.payload
8276         all_done = all_done and node_done
8277         if node_percent is not None:
8278           min_percent = min(min_percent, node_percent)
8279       if not all_done:
8280         if min_percent < 100:
8281           self.feedback_fn("   - progress: %.1f%%" % min_percent)
8282         time.sleep(2)
8283
8284   def _EnsureSecondary(self, node):
8285     """Demote a node to secondary.
8286
8287     """
8288     self.feedback_fn("* switching node %s to secondary mode" % node)
8289
8290     for dev in self.instance.disks:
8291       self.cfg.SetDiskID(dev, node)
8292
8293     result = self.rpc.call_blockdev_close(node, self.instance.name,
8294                                           self.instance.disks)
8295     result.Raise("Cannot change disk to secondary on node %s" % node)
8296
8297   def _GoStandalone(self):
8298     """Disconnect from the network.
8299
8300     """
8301     self.feedback_fn("* changing into standalone mode")
8302     result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
8303                                                self.instance.disks)
8304     for node, nres in result.items():
8305       nres.Raise("Cannot disconnect disks node %s" % node)
8306
8307   def _GoReconnect(self, multimaster):
8308     """Reconnect to the network.
8309
8310     """
8311     if multimaster:
8312       msg = "dual-master"
8313     else:
8314       msg = "single-master"
8315     self.feedback_fn("* changing disks into %s mode" % msg)
8316     result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
8317                                            (self.instance.disks, self.instance),
8318                                            self.instance.name, multimaster)
8319     for node, nres in result.items():
8320       nres.Raise("Cannot change disks config on node %s" % node)
8321
8322   def _ExecCleanup(self):
8323     """Try to cleanup after a failed migration.
8324
8325     The cleanup is done by:
8326       - check that the instance is running only on one node
8327         (and update the config if needed)
8328       - change disks on its secondary node to secondary
8329       - wait until disks are fully synchronized
8330       - disconnect from the network
8331       - change disks into single-master mode
8332       - wait again until disks are fully synchronized
8333
8334     """
8335     instance = self.instance
8336     target_node = self.target_node
8337     source_node = self.source_node
8338
8339     # check running on only one node
8340     self.feedback_fn("* checking where the instance actually runs"
8341                      " (if this hangs, the hypervisor might be in"
8342                      " a bad state)")
8343     ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
8344     for node, result in ins_l.items():
8345       result.Raise("Can't contact node %s" % node)
8346
8347     runningon_source = instance.name in ins_l[source_node].payload
8348     runningon_target = instance.name in ins_l[target_node].payload
8349
8350     if runningon_source and runningon_target:
8351       raise errors.OpExecError("Instance seems to be running on two nodes,"
8352                                " or the hypervisor is confused; you will have"
8353                                " to ensure manually that it runs only on one"
8354                                " and restart this operation")
8355
8356     if not (runningon_source or runningon_target):
8357       raise errors.OpExecError("Instance does not seem to be running at all;"
8358                                " in this case it's safer to repair by"
8359                                " running 'gnt-instance stop' to ensure disk"
8360                                " shutdown, and then restarting it")
8361
8362     if runningon_target:
8363       # the migration has actually succeeded, we need to update the config
8364       self.feedback_fn("* instance running on secondary node (%s),"
8365                        " updating config" % target_node)
8366       instance.primary_node = target_node
8367       self.cfg.Update(instance, self.feedback_fn)
8368       demoted_node = source_node
8369     else:
8370       self.feedback_fn("* instance confirmed to be running on its"
8371                        " primary node (%s)" % source_node)
8372       demoted_node = target_node
8373
8374     if instance.disk_template in constants.DTS_INT_MIRROR:
8375       self._EnsureSecondary(demoted_node)
8376       try:
8377         self._WaitUntilSync()
8378       except errors.OpExecError:
8379         # we ignore here errors, since if the device is standalone, it
8380         # won't be able to sync
8381         pass
8382       self._GoStandalone()
8383       self._GoReconnect(False)
8384       self._WaitUntilSync()
8385
8386     self.feedback_fn("* done")
8387
8388   def _RevertDiskStatus(self):
8389     """Try to revert the disk status after a failed migration.
8390
8391     """
8392     target_node = self.target_node
8393     if self.instance.disk_template in constants.DTS_EXT_MIRROR:
8394       return
8395
8396     try:
8397       self._EnsureSecondary(target_node)
8398       self._GoStandalone()
8399       self._GoReconnect(False)
8400       self._WaitUntilSync()
8401     except errors.OpExecError, err:
8402       self.lu.LogWarning("Migration failed and I can't reconnect the drives,"
8403                          " please try to recover the instance manually;"
8404                          " error '%s'" % str(err))
8405
8406   def _AbortMigration(self):
8407     """Call the hypervisor code to abort a started migration.
8408
8409     """
8410     instance = self.instance
8411     target_node = self.target_node
8412     source_node = self.source_node
8413     migration_info = self.migration_info
8414
8415     abort_result = self.rpc.call_instance_finalize_migration_dst(target_node,
8416                                                                  instance,
8417                                                                  migration_info,
8418                                                                  False)
8419     abort_msg = abort_result.fail_msg
8420     if abort_msg:
8421       logging.error("Aborting migration failed on target node %s: %s",
8422                     target_node, abort_msg)
8423       # Don't raise an exception here, as we stil have to try to revert the
8424       # disk status, even if this step failed.
8425
8426     abort_result = self.rpc.call_instance_finalize_migration_src(
8427       source_node, instance, False, self.live)
8428     abort_msg = abort_result.fail_msg
8429     if abort_msg:
8430       logging.error("Aborting migration failed on source node %s: %s",
8431                     source_node, abort_msg)
8432
8433   def _ExecMigration(self):
8434     """Migrate an instance.
8435
8436     The migrate is done by:
8437       - change the disks into dual-master mode
8438       - wait until disks are fully synchronized again
8439       - migrate the instance
8440       - change disks on the new secondary node (the old primary) to secondary
8441       - wait until disks are fully synchronized
8442       - change disks into single-master mode
8443
8444     """
8445     instance = self.instance
8446     target_node = self.target_node
8447     source_node = self.source_node
8448
8449     # Check for hypervisor version mismatch and warn the user.
8450     nodeinfo = self.rpc.call_node_info([source_node, target_node],
8451                                        None, [self.instance.hypervisor])
8452     for ninfo in nodeinfo.values():
8453       ninfo.Raise("Unable to retrieve node information from node '%s'" %
8454                   ninfo.node)
8455     (_, _, (src_info, )) = nodeinfo[source_node].payload
8456     (_, _, (dst_info, )) = nodeinfo[target_node].payload
8457
8458     if ((constants.HV_NODEINFO_KEY_VERSION in src_info) and
8459         (constants.HV_NODEINFO_KEY_VERSION in dst_info)):
8460       src_version = src_info[constants.HV_NODEINFO_KEY_VERSION]
8461       dst_version = dst_info[constants.HV_NODEINFO_KEY_VERSION]
8462       if src_version != dst_version:
8463         self.feedback_fn("* warning: hypervisor version mismatch between"
8464                          " source (%s) and target (%s) node" %
8465                          (src_version, dst_version))
8466
8467     self.feedback_fn("* checking disk consistency between source and target")
8468     for (idx, dev) in enumerate(instance.disks):
8469       if not _CheckDiskConsistency(self.lu, instance, dev, target_node, False):
8470         raise errors.OpExecError("Disk %s is degraded or not fully"
8471                                  " synchronized on target node,"
8472                                  " aborting migration" % idx)
8473
8474     if self.current_mem > self.tgt_free_mem:
8475       if not self.allow_runtime_changes:
8476         raise errors.OpExecError("Memory ballooning not allowed and not enough"
8477                                  " free memory to fit instance %s on target"
8478                                  " node %s (have %dMB, need %dMB)" %
8479                                  (instance.name, target_node,
8480                                   self.tgt_free_mem, self.current_mem))
8481       self.feedback_fn("* setting instance memory to %s" % self.tgt_free_mem)
8482       rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node,
8483                                                      instance,
8484                                                      self.tgt_free_mem)
8485       rpcres.Raise("Cannot modify instance runtime memory")
8486
8487     # First get the migration information from the remote node
8488     result = self.rpc.call_migration_info(source_node, instance)
8489     msg = result.fail_msg
8490     if msg:
8491       log_err = ("Failed fetching source migration information from %s: %s" %
8492                  (source_node, msg))
8493       logging.error(log_err)
8494       raise errors.OpExecError(log_err)
8495
8496     self.migration_info = migration_info = result.payload
8497
8498     if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
8499       # Then switch the disks to master/master mode
8500       self._EnsureSecondary(target_node)
8501       self._GoStandalone()
8502       self._GoReconnect(True)
8503       self._WaitUntilSync()
8504
8505     self.feedback_fn("* preparing %s to accept the instance" % target_node)
8506     result = self.rpc.call_accept_instance(target_node,
8507                                            instance,
8508                                            migration_info,
8509                                            self.nodes_ip[target_node])
8510
8511     msg = result.fail_msg
8512     if msg:
8513       logging.error("Instance pre-migration failed, trying to revert"
8514                     " disk status: %s", msg)
8515       self.feedback_fn("Pre-migration failed, aborting")
8516       self._AbortMigration()
8517       self._RevertDiskStatus()
8518       raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
8519                                (instance.name, msg))
8520
8521     self.feedback_fn("* migrating instance to %s" % target_node)
8522     result = self.rpc.call_instance_migrate(source_node, instance,
8523                                             self.nodes_ip[target_node],
8524                                             self.live)
8525     msg = result.fail_msg
8526     if msg:
8527       logging.error("Instance migration failed, trying to revert"
8528                     " disk status: %s", msg)
8529       self.feedback_fn("Migration failed, aborting")
8530       self._AbortMigration()
8531       self._RevertDiskStatus()
8532       raise errors.OpExecError("Could not migrate instance %s: %s" %
8533                                (instance.name, msg))
8534
8535     self.feedback_fn("* starting memory transfer")
8536     last_feedback = time.time()
8537     while True:
8538       result = self.rpc.call_instance_get_migration_status(source_node,
8539                                                            instance)
8540       msg = result.fail_msg
8541       ms = result.payload   # MigrationStatus instance
8542       if msg or (ms.status in constants.HV_MIGRATION_FAILED_STATUSES):
8543         logging.error("Instance migration failed, trying to revert"
8544                       " disk status: %s", msg)
8545         self.feedback_fn("Migration failed, aborting")
8546         self._AbortMigration()
8547         self._RevertDiskStatus()
8548         raise errors.OpExecError("Could not migrate instance %s: %s" %
8549                                  (instance.name, msg))
8550
8551       if result.payload.status != constants.HV_MIGRATION_ACTIVE:
8552         self.feedback_fn("* memory transfer complete")
8553         break
8554
8555       if (utils.TimeoutExpired(last_feedback,
8556                                self._MIGRATION_FEEDBACK_INTERVAL) and
8557           ms.transferred_ram is not None):
8558         mem_progress = 100 * float(ms.transferred_ram) / float(ms.total_ram)
8559         self.feedback_fn("* memory transfer progress: %.2f %%" % mem_progress)
8560         last_feedback = time.time()
8561
8562       time.sleep(self._MIGRATION_POLL_INTERVAL)
8563
8564     result = self.rpc.call_instance_finalize_migration_src(source_node,
8565                                                            instance,
8566                                                            True,
8567                                                            self.live)
8568     msg = result.fail_msg
8569     if msg:
8570       logging.error("Instance migration succeeded, but finalization failed"
8571                     " on the source node: %s", msg)
8572       raise errors.OpExecError("Could not finalize instance migration: %s" %
8573                                msg)
8574
8575     instance.primary_node = target_node
8576
8577     # distribute new instance config to the other nodes
8578     self.cfg.Update(instance, self.feedback_fn)
8579
8580     result = self.rpc.call_instance_finalize_migration_dst(target_node,
8581                                                            instance,
8582                                                            migration_info,
8583                                                            True)
8584     msg = result.fail_msg
8585     if msg:
8586       logging.error("Instance migration succeeded, but finalization failed"
8587                     " on the target node: %s", msg)
8588       raise errors.OpExecError("Could not finalize instance migration: %s" %
8589                                msg)
8590
8591     if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
8592       self._EnsureSecondary(source_node)
8593       self._WaitUntilSync()
8594       self._GoStandalone()
8595       self._GoReconnect(False)
8596       self._WaitUntilSync()
8597
8598     # If the instance's disk template is `rbd' and there was a successful
8599     # migration, unmap the device from the source node.
8600     if self.instance.disk_template == constants.DT_RBD:
8601       disks = _ExpandCheckDisks(instance, instance.disks)
8602       self.feedback_fn("* unmapping instance's disks from %s" % source_node)
8603       for disk in disks:
8604         result = self.rpc.call_blockdev_shutdown(source_node, (disk, instance))
8605         msg = result.fail_msg
8606         if msg:
8607           logging.error("Migration was successful, but couldn't unmap the"
8608                         " block device %s on source node %s: %s",
8609                         disk.iv_name, source_node, msg)
8610           logging.error("You need to unmap the device %s manually on %s",
8611                         disk.iv_name, source_node)
8612
8613     self.feedback_fn("* done")
8614
8615   def _ExecFailover(self):
8616     """Failover an instance.
8617
8618     The failover is done by shutting it down on its present node and
8619     starting it on the secondary.
8620
8621     """
8622     instance = self.instance
8623     primary_node = self.cfg.GetNodeInfo(instance.primary_node)
8624
8625     source_node = instance.primary_node
8626     target_node = self.target_node
8627
8628     if instance.admin_state == constants.ADMINST_UP:
8629       self.feedback_fn("* checking disk consistency between source and target")
8630       for (idx, dev) in enumerate(instance.disks):
8631         # for drbd, these are drbd over lvm
8632         if not _CheckDiskConsistency(self.lu, instance, dev, target_node,
8633                                      False):
8634           if primary_node.offline:
8635             self.feedback_fn("Node %s is offline, ignoring degraded disk %s on"
8636                              " target node %s" %
8637                              (primary_node.name, idx, target_node))
8638           elif not self.ignore_consistency:
8639             raise errors.OpExecError("Disk %s is degraded on target node,"
8640                                      " aborting failover" % idx)
8641     else:
8642       self.feedback_fn("* not checking disk consistency as instance is not"
8643                        " running")
8644
8645     self.feedback_fn("* shutting down instance on source node")
8646     logging.info("Shutting down instance %s on node %s",
8647                  instance.name, source_node)
8648
8649     result = self.rpc.call_instance_shutdown(source_node, instance,
8650                                              self.shutdown_timeout)
8651     msg = result.fail_msg
8652     if msg:
8653       if self.ignore_consistency or primary_node.offline:
8654         self.lu.LogWarning("Could not shutdown instance %s on node %s,"
8655                            " proceeding anyway; please make sure node"
8656                            " %s is down; error details: %s",
8657                            instance.name, source_node, source_node, msg)
8658       else:
8659         raise errors.OpExecError("Could not shutdown instance %s on"
8660                                  " node %s: %s" %
8661                                  (instance.name, source_node, msg))
8662
8663     self.feedback_fn("* deactivating the instance's disks on source node")
8664     if not _ShutdownInstanceDisks(self.lu, instance, ignore_primary=True):
8665       raise errors.OpExecError("Can't shut down the instance's disks")
8666
8667     instance.primary_node = target_node
8668     # distribute new instance config to the other nodes
8669     self.cfg.Update(instance, self.feedback_fn)
8670
8671     # Only start the instance if it's marked as up
8672     if instance.admin_state == constants.ADMINST_UP:
8673       self.feedback_fn("* activating the instance's disks on target node %s" %
8674                        target_node)
8675       logging.info("Starting instance %s on node %s",
8676                    instance.name, target_node)
8677
8678       disks_ok, _ = _AssembleInstanceDisks(self.lu, instance,
8679                                            ignore_secondaries=True)
8680       if not disks_ok:
8681         _ShutdownInstanceDisks(self.lu, instance)
8682         raise errors.OpExecError("Can't activate the instance's disks")
8683
8684       self.feedback_fn("* starting the instance on the target node %s" %
8685                        target_node)
8686       result = self.rpc.call_instance_start(target_node, (instance, None, None),
8687                                             False)
8688       msg = result.fail_msg
8689       if msg:
8690         _ShutdownInstanceDisks(self.lu, instance)
8691         raise errors.OpExecError("Could not start instance %s on node %s: %s" %
8692                                  (instance.name, target_node, msg))
8693
8694   def Exec(self, feedback_fn):
8695     """Perform the migration.
8696
8697     """
8698     self.feedback_fn = feedback_fn
8699     self.source_node = self.instance.primary_node
8700
8701     # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
8702     if self.instance.disk_template in constants.DTS_INT_MIRROR:
8703       self.target_node = self.instance.secondary_nodes[0]
8704       # Otherwise self.target_node has been populated either
8705       # directly, or through an iallocator.
8706
8707     self.all_nodes = [self.source_node, self.target_node]
8708     self.nodes_ip = dict((name, node.secondary_ip) for (name, node)
8709                          in self.cfg.GetMultiNodeInfo(self.all_nodes))
8710
8711     if self.failover:
8712       feedback_fn("Failover instance %s" % self.instance.name)
8713       self._ExecFailover()
8714     else:
8715       feedback_fn("Migrating instance %s" % self.instance.name)
8716
8717       if self.cleanup:
8718         return self._ExecCleanup()
8719       else:
8720         return self._ExecMigration()
8721
8722
8723 def _CreateBlockDev(lu, node, instance, device, force_create, info,
8724                     force_open):
8725   """Wrapper around L{_CreateBlockDevInner}.
8726
8727   This method annotates the root device first.
8728
8729   """
8730   (disk,) = _AnnotateDiskParams(instance, [device], lu.cfg)
8731   return _CreateBlockDevInner(lu, node, instance, disk, force_create, info,
8732                               force_open)
8733
8734
8735 def _CreateBlockDevInner(lu, node, instance, device, force_create,
8736                          info, force_open):
8737   """Create a tree of block devices on a given node.
8738
8739   If this device type has to be created on secondaries, create it and
8740   all its children.
8741
8742   If not, just recurse to children keeping the same 'force' value.
8743
8744   @attention: The device has to be annotated already.
8745
8746   @param lu: the lu on whose behalf we execute
8747   @param node: the node on which to create the device
8748   @type instance: L{objects.Instance}
8749   @param instance: the instance which owns the device
8750   @type device: L{objects.Disk}
8751   @param device: the device to create
8752   @type force_create: boolean
8753   @param force_create: whether to force creation of this device; this
8754       will be change to True whenever we find a device which has
8755       CreateOnSecondary() attribute
8756   @param info: the extra 'metadata' we should attach to the device
8757       (this will be represented as a LVM tag)
8758   @type force_open: boolean
8759   @param force_open: this parameter will be passes to the
8760       L{backend.BlockdevCreate} function where it specifies
8761       whether we run on primary or not, and it affects both
8762       the child assembly and the device own Open() execution
8763
8764   """
8765   if device.CreateOnSecondary():
8766     force_create = True
8767
8768   if device.children:
8769     for child in device.children:
8770       _CreateBlockDevInner(lu, node, instance, child, force_create,
8771                            info, force_open)
8772
8773   if not force_create:
8774     return
8775
8776   _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
8777
8778
8779 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
8780   """Create a single block device on a given node.
8781
8782   This will not recurse over children of the device, so they must be
8783   created in advance.
8784
8785   @param lu: the lu on whose behalf we execute
8786   @param node: the node on which to create the device
8787   @type instance: L{objects.Instance}
8788   @param instance: the instance which owns the device
8789   @type device: L{objects.Disk}
8790   @param device: the device to create
8791   @param info: the extra 'metadata' we should attach to the device
8792       (this will be represented as a LVM tag)
8793   @type force_open: boolean
8794   @param force_open: this parameter will be passes to the
8795       L{backend.BlockdevCreate} function where it specifies
8796       whether we run on primary or not, and it affects both
8797       the child assembly and the device own Open() execution
8798
8799   """
8800   lu.cfg.SetDiskID(device, node)
8801   result = lu.rpc.call_blockdev_create(node, device, device.size,
8802                                        instance.name, force_open, info)
8803   result.Raise("Can't create block device %s on"
8804                " node %s for instance %s" % (device, node, instance.name))
8805   if device.physical_id is None:
8806     device.physical_id = result.payload
8807
8808
8809 def _GenerateUniqueNames(lu, exts):
8810   """Generate a suitable LV name.
8811
8812   This will generate a logical volume name for the given instance.
8813
8814   """
8815   results = []
8816   for val in exts:
8817     new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
8818     results.append("%s%s" % (new_id, val))
8819   return results
8820
8821
8822 def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
8823                          iv_name, p_minor, s_minor):
8824   """Generate a drbd8 device complete with its children.
8825
8826   """
8827   assert len(vgnames) == len(names) == 2
8828   port = lu.cfg.AllocatePort()
8829   shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
8830
8831   dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
8832                           logical_id=(vgnames[0], names[0]),
8833                           params={})
8834   dev_meta = objects.Disk(dev_type=constants.LD_LV,
8835                           size=constants.DRBD_META_SIZE,
8836                           logical_id=(vgnames[1], names[1]),
8837                           params={})
8838   drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
8839                           logical_id=(primary, secondary, port,
8840                                       p_minor, s_minor,
8841                                       shared_secret),
8842                           children=[dev_data, dev_meta],
8843                           iv_name=iv_name, params={})
8844   return drbd_dev
8845
8846
8847 _DISK_TEMPLATE_NAME_PREFIX = {
8848   constants.DT_PLAIN: "",
8849   constants.DT_RBD: ".rbd",
8850   }
8851
8852
8853 _DISK_TEMPLATE_DEVICE_TYPE = {
8854   constants.DT_PLAIN: constants.LD_LV,
8855   constants.DT_FILE: constants.LD_FILE,
8856   constants.DT_SHARED_FILE: constants.LD_FILE,
8857   constants.DT_BLOCK: constants.LD_BLOCKDEV,
8858   constants.DT_RBD: constants.LD_RBD,
8859   }
8860
8861
8862 def _GenerateDiskTemplate(
8863   lu, template_name, instance_name, primary_node, secondary_nodes,
8864   disk_info, file_storage_dir, file_driver, base_index,
8865   feedback_fn, full_disk_params, _req_file_storage=opcodes.RequireFileStorage,
8866   _req_shr_file_storage=opcodes.RequireSharedFileStorage):
8867   """Generate the entire disk layout for a given template type.
8868
8869   """
8870   #TODO: compute space requirements
8871
8872   vgname = lu.cfg.GetVGName()
8873   disk_count = len(disk_info)
8874   disks = []
8875
8876   if template_name == constants.DT_DISKLESS:
8877     pass
8878   elif template_name == constants.DT_DRBD8:
8879     if len(secondary_nodes) != 1:
8880       raise errors.ProgrammerError("Wrong template configuration")
8881     remote_node = secondary_nodes[0]
8882     minors = lu.cfg.AllocateDRBDMinor(
8883       [primary_node, remote_node] * len(disk_info), instance_name)
8884
8885     (drbd_params, _, _) = objects.Disk.ComputeLDParams(template_name,
8886                                                        full_disk_params)
8887     drbd_default_metavg = drbd_params[constants.LDP_DEFAULT_METAVG]
8888
8889     names = []
8890     for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
8891                                                for i in range(disk_count)]):
8892       names.append(lv_prefix + "_data")
8893       names.append(lv_prefix + "_meta")
8894     for idx, disk in enumerate(disk_info):
8895       disk_index = idx + base_index
8896       data_vg = disk.get(constants.IDISK_VG, vgname)
8897       meta_vg = disk.get(constants.IDISK_METAVG, drbd_default_metavg)
8898       disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
8899                                       disk[constants.IDISK_SIZE],
8900                                       [data_vg, meta_vg],
8901                                       names[idx * 2:idx * 2 + 2],
8902                                       "disk/%d" % disk_index,
8903                                       minors[idx * 2], minors[idx * 2 + 1])
8904       disk_dev.mode = disk[constants.IDISK_MODE]
8905       disks.append(disk_dev)
8906   else:
8907     if secondary_nodes:
8908       raise errors.ProgrammerError("Wrong template configuration")
8909
8910     if template_name == constants.DT_FILE:
8911       _req_file_storage()
8912     elif template_name == constants.DT_SHARED_FILE:
8913       _req_shr_file_storage()
8914
8915     name_prefix = _DISK_TEMPLATE_NAME_PREFIX.get(template_name, None)
8916     if name_prefix is None:
8917       names = None
8918     else:
8919       names = _GenerateUniqueNames(lu, ["%s.disk%s" %
8920                                         (name_prefix, base_index + i)
8921                                         for i in range(disk_count)])
8922
8923     if template_name == constants.DT_PLAIN:
8924       def logical_id_fn(idx, _, disk):
8925         vg = disk.get(constants.IDISK_VG, vgname)
8926         return (vg, names[idx])
8927     elif template_name in (constants.DT_FILE, constants.DT_SHARED_FILE):
8928       logical_id_fn = \
8929         lambda _, disk_index, disk: (file_driver,
8930                                      "%s/disk%d" % (file_storage_dir,
8931                                                     disk_index))
8932     elif template_name == constants.DT_BLOCK:
8933       logical_id_fn = \
8934         lambda idx, disk_index, disk: (constants.BLOCKDEV_DRIVER_MANUAL,
8935                                        disk[constants.IDISK_ADOPT])
8936     elif template_name == constants.DT_RBD:
8937       logical_id_fn = lambda idx, _, disk: ("rbd", names[idx])
8938     else:
8939       raise errors.ProgrammerError("Unknown disk template '%s'" % template_name)
8940
8941     dev_type = _DISK_TEMPLATE_DEVICE_TYPE[template_name]
8942
8943     for idx, disk in enumerate(disk_info):
8944       disk_index = idx + base_index
8945       size = disk[constants.IDISK_SIZE]
8946       feedback_fn("* disk %s, size %s" %
8947                   (disk_index, utils.FormatUnit(size, "h")))
8948       disks.append(objects.Disk(dev_type=dev_type, size=size,
8949                                 logical_id=logical_id_fn(idx, disk_index, disk),
8950                                 iv_name="disk/%d" % disk_index,
8951                                 mode=disk[constants.IDISK_MODE],
8952                                 params={}))
8953
8954   return disks
8955
8956
8957 def _GetInstanceInfoText(instance):
8958   """Compute that text that should be added to the disk's metadata.
8959
8960   """
8961   return "originstname+%s" % instance.name
8962
8963
8964 def _CalcEta(time_taken, written, total_size):
8965   """Calculates the ETA based on size written and total size.
8966
8967   @param time_taken: The time taken so far
8968   @param written: amount written so far
8969   @param total_size: The total size of data to be written
8970   @return: The remaining time in seconds
8971
8972   """
8973   avg_time = time_taken / float(written)
8974   return (total_size - written) * avg_time
8975
8976
8977 def _WipeDisks(lu, instance):
8978   """Wipes instance disks.
8979
8980   @type lu: L{LogicalUnit}
8981   @param lu: the logical unit on whose behalf we execute
8982   @type instance: L{objects.Instance}
8983   @param instance: the instance whose disks we should create
8984   @return: the success of the wipe
8985
8986   """
8987   node = instance.primary_node
8988
8989   for device in instance.disks:
8990     lu.cfg.SetDiskID(device, node)
8991
8992   logging.info("Pause sync of instance %s disks", instance.name)
8993   result = lu.rpc.call_blockdev_pause_resume_sync(node,
8994                                                   (instance.disks, instance),
8995                                                   True)
8996   result.Raise("Failed RPC to node %s for pausing the disk syncing" % node)
8997
8998   for idx, success in enumerate(result.payload):
8999     if not success:
9000       logging.warn("pause-sync of instance %s for disks %d failed",
9001                    instance.name, idx)
9002
9003   try:
9004     for idx, device in enumerate(instance.disks):
9005       # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
9006       # MAX_WIPE_CHUNK at max
9007       wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 *
9008                             constants.MIN_WIPE_CHUNK_PERCENT)
9009       # we _must_ make this an int, otherwise rounding errors will
9010       # occur
9011       wipe_chunk_size = int(wipe_chunk_size)
9012
9013       lu.LogInfo("* Wiping disk %d", idx)
9014       logging.info("Wiping disk %d for instance %s, node %s using"
9015                    " chunk size %s", idx, instance.name, node, wipe_chunk_size)
9016
9017       offset = 0
9018       size = device.size
9019       last_output = 0
9020       start_time = time.time()
9021
9022       while offset < size:
9023         wipe_size = min(wipe_chunk_size, size - offset)
9024         logging.debug("Wiping disk %d, offset %s, chunk %s",
9025                       idx, offset, wipe_size)
9026         result = lu.rpc.call_blockdev_wipe(node, (device, instance), offset,
9027                                            wipe_size)
9028         result.Raise("Could not wipe disk %d at offset %d for size %d" %
9029                      (idx, offset, wipe_size))
9030         now = time.time()
9031         offset += wipe_size
9032         if now - last_output >= 60:
9033           eta = _CalcEta(now - start_time, offset, size)
9034           lu.LogInfo(" - done: %.1f%% ETA: %s" %
9035                      (offset / float(size) * 100, utils.FormatSeconds(eta)))
9036           last_output = now
9037   finally:
9038     logging.info("Resume sync of instance %s disks", instance.name)
9039
9040     result = lu.rpc.call_blockdev_pause_resume_sync(node,
9041                                                     (instance.disks, instance),
9042                                                     False)
9043
9044     if result.fail_msg:
9045       lu.LogWarning("RPC call to %s for resuming disk syncing failed,"
9046                     " please have a look at the status and troubleshoot"
9047                     " the issue: %s", node, result.fail_msg)
9048     else:
9049       for idx, success in enumerate(result.payload):
9050         if not success:
9051           lu.LogWarning("Resume sync of disk %d failed, please have a"
9052                         " look at the status and troubleshoot the issue", idx)
9053           logging.warn("resume-sync of instance %s for disks %d failed",
9054                        instance.name, idx)
9055
9056
9057 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
9058   """Create all disks for an instance.
9059
9060   This abstracts away some work from AddInstance.
9061
9062   @type lu: L{LogicalUnit}
9063   @param lu: the logical unit on whose behalf we execute
9064   @type instance: L{objects.Instance}
9065   @param instance: the instance whose disks we should create
9066   @type to_skip: list
9067   @param to_skip: list of indices to skip
9068   @type target_node: string
9069   @param target_node: if passed, overrides the target node for creation
9070   @rtype: boolean
9071   @return: the success of the creation
9072
9073   """
9074   info = _GetInstanceInfoText(instance)
9075   if target_node is None:
9076     pnode = instance.primary_node
9077     all_nodes = instance.all_nodes
9078   else:
9079     pnode = target_node
9080     all_nodes = [pnode]
9081
9082   if instance.disk_template in constants.DTS_FILEBASED:
9083     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
9084     result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
9085
9086     result.Raise("Failed to create directory '%s' on"
9087                  " node %s" % (file_storage_dir, pnode))
9088
9089   # Note: this needs to be kept in sync with adding of disks in
9090   # LUInstanceSetParams
9091   for idx, device in enumerate(instance.disks):
9092     if to_skip and idx in to_skip:
9093       continue
9094     logging.info("Creating disk %s for instance '%s'", idx, instance.name)
9095     #HARDCODE
9096     for node in all_nodes:
9097       f_create = node == pnode
9098       _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
9099
9100
9101 def _RemoveDisks(lu, instance, target_node=None, ignore_failures=False):
9102   """Remove all disks for an instance.
9103
9104   This abstracts away some work from `AddInstance()` and
9105   `RemoveInstance()`. Note that in case some of the devices couldn't
9106   be removed, the removal will continue with the other ones (compare
9107   with `_CreateDisks()`).
9108
9109   @type lu: L{LogicalUnit}
9110   @param lu: the logical unit on whose behalf we execute
9111   @type instance: L{objects.Instance}
9112   @param instance: the instance whose disks we should remove
9113   @type target_node: string
9114   @param target_node: used to override the node on which to remove the disks
9115   @rtype: boolean
9116   @return: the success of the removal
9117
9118   """
9119   logging.info("Removing block devices for instance %s", instance.name)
9120
9121   all_result = True
9122   ports_to_release = set()
9123   anno_disks = _AnnotateDiskParams(instance, instance.disks, lu.cfg)
9124   for (idx, device) in enumerate(anno_disks):
9125     if target_node:
9126       edata = [(target_node, device)]
9127     else:
9128       edata = device.ComputeNodeTree(instance.primary_node)
9129     for node, disk in edata:
9130       lu.cfg.SetDiskID(disk, node)
9131       result = lu.rpc.call_blockdev_remove(node, disk)
9132       if result.fail_msg:
9133         lu.LogWarning("Could not remove disk %s on node %s,"
9134                       " continuing anyway: %s", idx, node, result.fail_msg)
9135         if not (result.offline and node != instance.primary_node):
9136           all_result = False
9137
9138     # if this is a DRBD disk, return its port to the pool
9139     if device.dev_type in constants.LDS_DRBD:
9140       ports_to_release.add(device.logical_id[2])
9141
9142   if all_result or ignore_failures:
9143     for port in ports_to_release:
9144       lu.cfg.AddTcpUdpPort(port)
9145
9146   if instance.disk_template == constants.DT_FILE:
9147     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
9148     if target_node:
9149       tgt = target_node
9150     else:
9151       tgt = instance.primary_node
9152     result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
9153     if result.fail_msg:
9154       lu.LogWarning("Could not remove directory '%s' on node %s: %s",
9155                     file_storage_dir, instance.primary_node, result.fail_msg)
9156       all_result = False
9157
9158   return all_result
9159
9160
9161 def _ComputeDiskSizePerVG(disk_template, disks):
9162   """Compute disk size requirements in the volume group
9163
9164   """
9165   def _compute(disks, payload):
9166     """Universal algorithm.
9167
9168     """
9169     vgs = {}
9170     for disk in disks:
9171       vgs[disk[constants.IDISK_VG]] = \
9172         vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
9173
9174     return vgs
9175
9176   # Required free disk space as a function of disk and swap space
9177   req_size_dict = {
9178     constants.DT_DISKLESS: {},
9179     constants.DT_PLAIN: _compute(disks, 0),
9180     # 128 MB are added for drbd metadata for each disk
9181     constants.DT_DRBD8: _compute(disks, constants.DRBD_META_SIZE),
9182     constants.DT_FILE: {},
9183     constants.DT_SHARED_FILE: {},
9184   }
9185
9186   if disk_template not in req_size_dict:
9187     raise errors.ProgrammerError("Disk template '%s' size requirement"
9188                                  " is unknown" % disk_template)
9189
9190   return req_size_dict[disk_template]
9191
9192
9193 def _FilterVmNodes(lu, nodenames):
9194   """Filters out non-vm_capable nodes from a list.
9195
9196   @type lu: L{LogicalUnit}
9197   @param lu: the logical unit for which we check
9198   @type nodenames: list
9199   @param nodenames: the list of nodes on which we should check
9200   @rtype: list
9201   @return: the list of vm-capable nodes
9202
9203   """
9204   vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
9205   return [name for name in nodenames if name not in vm_nodes]
9206
9207
9208 def _CheckHVParams(lu, nodenames, hvname, hvparams):
9209   """Hypervisor parameter validation.
9210
9211   This function abstract the hypervisor parameter validation to be
9212   used in both instance create and instance modify.
9213
9214   @type lu: L{LogicalUnit}
9215   @param lu: the logical unit for which we check
9216   @type nodenames: list
9217   @param nodenames: the list of nodes on which we should check
9218   @type hvname: string
9219   @param hvname: the name of the hypervisor we should use
9220   @type hvparams: dict
9221   @param hvparams: the parameters which we need to check
9222   @raise errors.OpPrereqError: if the parameters are not valid
9223
9224   """
9225   nodenames = _FilterVmNodes(lu, nodenames)
9226
9227   cluster = lu.cfg.GetClusterInfo()
9228   hvfull = objects.FillDict(cluster.hvparams.get(hvname, {}), hvparams)
9229
9230   hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames, hvname, hvfull)
9231   for node in nodenames:
9232     info = hvinfo[node]
9233     if info.offline:
9234       continue
9235     info.Raise("Hypervisor parameter validation failed on node %s" % node)
9236
9237
9238 def _CheckOSParams(lu, required, nodenames, osname, osparams):
9239   """OS parameters validation.
9240
9241   @type lu: L{LogicalUnit}
9242   @param lu: the logical unit for which we check
9243   @type required: boolean
9244   @param required: whether the validation should fail if the OS is not
9245       found
9246   @type nodenames: list
9247   @param nodenames: the list of nodes on which we should check
9248   @type osname: string
9249   @param osname: the name of the hypervisor we should use
9250   @type osparams: dict
9251   @param osparams: the parameters which we need to check
9252   @raise errors.OpPrereqError: if the parameters are not valid
9253
9254   """
9255   nodenames = _FilterVmNodes(lu, nodenames)
9256   result = lu.rpc.call_os_validate(nodenames, required, osname,
9257                                    [constants.OS_VALIDATE_PARAMETERS],
9258                                    osparams)
9259   for node, nres in result.items():
9260     # we don't check for offline cases since this should be run only
9261     # against the master node and/or an instance's nodes
9262     nres.Raise("OS Parameters validation failed on node %s" % node)
9263     if not nres.payload:
9264       lu.LogInfo("OS %s not found on node %s, validation skipped",
9265                  osname, node)
9266
9267
9268 class LUInstanceCreate(LogicalUnit):
9269   """Create an instance.
9270
9271   """
9272   HPATH = "instance-add"
9273   HTYPE = constants.HTYPE_INSTANCE
9274   REQ_BGL = False
9275
9276   def CheckArguments(self):
9277     """Check arguments.
9278
9279     """
9280     # do not require name_check to ease forward/backward compatibility
9281     # for tools
9282     if self.op.no_install and self.op.start:
9283       self.LogInfo("No-installation mode selected, disabling startup")
9284       self.op.start = False
9285     # validate/normalize the instance name
9286     self.op.instance_name = \
9287       netutils.Hostname.GetNormalizedName(self.op.instance_name)
9288
9289     if self.op.ip_check and not self.op.name_check:
9290       # TODO: make the ip check more flexible and not depend on the name check
9291       raise errors.OpPrereqError("Cannot do IP address check without a name"
9292                                  " check", errors.ECODE_INVAL)
9293
9294     # check nics' parameter names
9295     for nic in self.op.nics:
9296       utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
9297
9298     # check disks. parameter names and consistent adopt/no-adopt strategy
9299     has_adopt = has_no_adopt = False
9300     for disk in self.op.disks:
9301       utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
9302       if constants.IDISK_ADOPT in disk:
9303         has_adopt = True
9304       else:
9305         has_no_adopt = True
9306     if has_adopt and has_no_adopt:
9307       raise errors.OpPrereqError("Either all disks are adopted or none is",
9308                                  errors.ECODE_INVAL)
9309     if has_adopt:
9310       if self.op.disk_template not in constants.DTS_MAY_ADOPT:
9311         raise errors.OpPrereqError("Disk adoption is not supported for the"
9312                                    " '%s' disk template" %
9313                                    self.op.disk_template,
9314                                    errors.ECODE_INVAL)
9315       if self.op.iallocator is not None:
9316         raise errors.OpPrereqError("Disk adoption not allowed with an"
9317                                    " iallocator script", errors.ECODE_INVAL)
9318       if self.op.mode == constants.INSTANCE_IMPORT:
9319         raise errors.OpPrereqError("Disk adoption not allowed for"
9320                                    " instance import", errors.ECODE_INVAL)
9321     else:
9322       if self.op.disk_template in constants.DTS_MUST_ADOPT:
9323         raise errors.OpPrereqError("Disk template %s requires disk adoption,"
9324                                    " but no 'adopt' parameter given" %
9325                                    self.op.disk_template,
9326                                    errors.ECODE_INVAL)
9327
9328     self.adopt_disks = has_adopt
9329
9330     # instance name verification
9331     if self.op.name_check:
9332       self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
9333       self.op.instance_name = self.hostname1.name
9334       # used in CheckPrereq for ip ping check
9335       self.check_ip = self.hostname1.ip
9336     else:
9337       self.check_ip = None
9338
9339     # file storage checks
9340     if (self.op.file_driver and
9341         not self.op.file_driver in constants.FILE_DRIVER):
9342       raise errors.OpPrereqError("Invalid file driver name '%s'" %
9343                                  self.op.file_driver, errors.ECODE_INVAL)
9344
9345     if self.op.disk_template == constants.DT_FILE:
9346       opcodes.RequireFileStorage()
9347     elif self.op.disk_template == constants.DT_SHARED_FILE:
9348       opcodes.RequireSharedFileStorage()
9349
9350     ### Node/iallocator related checks
9351     _CheckIAllocatorOrNode(self, "iallocator", "pnode")
9352
9353     if self.op.pnode is not None:
9354       if self.op.disk_template in constants.DTS_INT_MIRROR:
9355         if self.op.snode is None:
9356           raise errors.OpPrereqError("The networked disk templates need"
9357                                      " a mirror node", errors.ECODE_INVAL)
9358       elif self.op.snode:
9359         self.LogWarning("Secondary node will be ignored on non-mirrored disk"
9360                         " template")
9361         self.op.snode = None
9362
9363     self._cds = _GetClusterDomainSecret()
9364
9365     if self.op.mode == constants.INSTANCE_IMPORT:
9366       # On import force_variant must be True, because if we forced it at
9367       # initial install, our only chance when importing it back is that it
9368       # works again!
9369       self.op.force_variant = True
9370
9371       if self.op.no_install:
9372         self.LogInfo("No-installation mode has no effect during import")
9373
9374     elif self.op.mode == constants.INSTANCE_CREATE:
9375       if self.op.os_type is None:
9376         raise errors.OpPrereqError("No guest OS specified",
9377                                    errors.ECODE_INVAL)
9378       if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
9379         raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
9380                                    " installation" % self.op.os_type,
9381                                    errors.ECODE_STATE)
9382       if self.op.disk_template is None:
9383         raise errors.OpPrereqError("No disk template specified",
9384                                    errors.ECODE_INVAL)
9385
9386     elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
9387       # Check handshake to ensure both clusters have the same domain secret
9388       src_handshake = self.op.source_handshake
9389       if not src_handshake:
9390         raise errors.OpPrereqError("Missing source handshake",
9391                                    errors.ECODE_INVAL)
9392
9393       errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
9394                                                            src_handshake)
9395       if errmsg:
9396         raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
9397                                    errors.ECODE_INVAL)
9398
9399       # Load and check source CA
9400       self.source_x509_ca_pem = self.op.source_x509_ca
9401       if not self.source_x509_ca_pem:
9402         raise errors.OpPrereqError("Missing source X509 CA",
9403                                    errors.ECODE_INVAL)
9404
9405       try:
9406         (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
9407                                                     self._cds)
9408       except OpenSSL.crypto.Error, err:
9409         raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
9410                                    (err, ), errors.ECODE_INVAL)
9411
9412       (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
9413       if errcode is not None:
9414         raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
9415                                    errors.ECODE_INVAL)
9416
9417       self.source_x509_ca = cert
9418
9419       src_instance_name = self.op.source_instance_name
9420       if not src_instance_name:
9421         raise errors.OpPrereqError("Missing source instance name",
9422                                    errors.ECODE_INVAL)
9423
9424       self.source_instance_name = \
9425           netutils.GetHostname(name=src_instance_name).name
9426
9427     else:
9428       raise errors.OpPrereqError("Invalid instance creation mode %r" %
9429                                  self.op.mode, errors.ECODE_INVAL)
9430
9431   def ExpandNames(self):
9432     """ExpandNames for CreateInstance.
9433
9434     Figure out the right locks for instance creation.
9435
9436     """
9437     self.needed_locks = {}
9438
9439     instance_name = self.op.instance_name
9440     # this is just a preventive check, but someone might still add this
9441     # instance in the meantime, and creation will fail at lock-add time
9442     if instance_name in self.cfg.GetInstanceList():
9443       raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
9444                                  instance_name, errors.ECODE_EXISTS)
9445
9446     self.add_locks[locking.LEVEL_INSTANCE] = instance_name
9447
9448     if self.op.iallocator:
9449       # TODO: Find a solution to not lock all nodes in the cluster, e.g. by
9450       # specifying a group on instance creation and then selecting nodes from
9451       # that group
9452       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9453       self.needed_locks[locking.LEVEL_NODE_RES] = locking.ALL_SET
9454     else:
9455       self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
9456       nodelist = [self.op.pnode]
9457       if self.op.snode is not None:
9458         self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
9459         nodelist.append(self.op.snode)
9460       self.needed_locks[locking.LEVEL_NODE] = nodelist
9461       # Lock resources of instance's primary and secondary nodes (copy to
9462       # prevent accidential modification)
9463       self.needed_locks[locking.LEVEL_NODE_RES] = list(nodelist)
9464
9465     # in case of import lock the source node too
9466     if self.op.mode == constants.INSTANCE_IMPORT:
9467       src_node = self.op.src_node
9468       src_path = self.op.src_path
9469
9470       if src_path is None:
9471         self.op.src_path = src_path = self.op.instance_name
9472
9473       if src_node is None:
9474         self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9475         self.op.src_node = None
9476         if os.path.isabs(src_path):
9477           raise errors.OpPrereqError("Importing an instance from a path"
9478                                      " requires a source node option",
9479                                      errors.ECODE_INVAL)
9480       else:
9481         self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
9482         if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
9483           self.needed_locks[locking.LEVEL_NODE].append(src_node)
9484         if not os.path.isabs(src_path):
9485           self.op.src_path = src_path = \
9486             utils.PathJoin(constants.EXPORT_DIR, src_path)
9487
9488   def _RunAllocator(self):
9489     """Run the allocator based on input opcode.
9490
9491     """
9492     nics = [n.ToDict() for n in self.nics]
9493     memory = self.be_full[constants.BE_MAXMEM]
9494     spindle_use = self.be_full[constants.BE_SPINDLE_USE]
9495     req = iallocator.IAReqInstanceAlloc(name=self.op.instance_name,
9496                                         disk_template=self.op.disk_template,
9497                                         tags=self.op.tags,
9498                                         os=self.op.os_type,
9499                                         vcpus=self.be_full[constants.BE_VCPUS],
9500                                         memory=memory,
9501                                         spindle_use=spindle_use,
9502                                         disks=self.disks,
9503                                         nics=nics,
9504                                         hypervisor=self.op.hypervisor)
9505     ial = iallocator.IAllocator(self.cfg, self.rpc, req)
9506
9507     ial.Run(self.op.iallocator)
9508
9509     if not ial.success:
9510       raise errors.OpPrereqError("Can't compute nodes using"
9511                                  " iallocator '%s': %s" %
9512                                  (self.op.iallocator, ial.info),
9513                                  errors.ECODE_NORES)
9514     self.op.pnode = ial.result[0]
9515     self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
9516                  self.op.instance_name, self.op.iallocator,
9517                  utils.CommaJoin(ial.result))
9518
9519     assert req.RequiredNodes() in (1, 2), "Wrong node count from iallocator"
9520
9521     if req.RequiredNodes() == 2:
9522       self.op.snode = ial.result[1]
9523
9524   def BuildHooksEnv(self):
9525     """Build hooks env.
9526
9527     This runs on master, primary and secondary nodes of the instance.
9528
9529     """
9530     env = {
9531       "ADD_MODE": self.op.mode,
9532       }
9533     if self.op.mode == constants.INSTANCE_IMPORT:
9534       env["SRC_NODE"] = self.op.src_node
9535       env["SRC_PATH"] = self.op.src_path
9536       env["SRC_IMAGES"] = self.src_images
9537
9538     env.update(_BuildInstanceHookEnv(
9539       name=self.op.instance_name,
9540       primary_node=self.op.pnode,
9541       secondary_nodes=self.secondaries,
9542       status=self.op.start,
9543       os_type=self.op.os_type,
9544       minmem=self.be_full[constants.BE_MINMEM],
9545       maxmem=self.be_full[constants.BE_MAXMEM],
9546       vcpus=self.be_full[constants.BE_VCPUS],
9547       nics=_NICListToTuple(self, self.nics),
9548       disk_template=self.op.disk_template,
9549       disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE])
9550              for d in self.disks],
9551       bep=self.be_full,
9552       hvp=self.hv_full,
9553       hypervisor_name=self.op.hypervisor,
9554       tags=self.op.tags,
9555     ))
9556
9557     return env
9558
9559   def BuildHooksNodes(self):
9560     """Build hooks nodes.
9561
9562     """
9563     nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
9564     return nl, nl
9565
9566   def _ReadExportInfo(self):
9567     """Reads the export information from disk.
9568
9569     It will override the opcode source node and path with the actual
9570     information, if these two were not specified before.
9571
9572     @return: the export information
9573
9574     """
9575     assert self.op.mode == constants.INSTANCE_IMPORT
9576
9577     src_node = self.op.src_node
9578     src_path = self.op.src_path
9579
9580     if src_node is None:
9581       locked_nodes = self.owned_locks(locking.LEVEL_NODE)
9582       exp_list = self.rpc.call_export_list(locked_nodes)
9583       found = False
9584       for node in exp_list:
9585         if exp_list[node].fail_msg:
9586           continue
9587         if src_path in exp_list[node].payload:
9588           found = True
9589           self.op.src_node = src_node = node
9590           self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
9591                                                        src_path)
9592           break
9593       if not found:
9594         raise errors.OpPrereqError("No export found for relative path %s" %
9595                                     src_path, errors.ECODE_INVAL)
9596
9597     _CheckNodeOnline(self, src_node)
9598     result = self.rpc.call_export_info(src_node, src_path)
9599     result.Raise("No export or invalid export found in dir %s" % src_path)
9600
9601     export_info = objects.SerializableConfigParser.Loads(str(result.payload))
9602     if not export_info.has_section(constants.INISECT_EXP):
9603       raise errors.ProgrammerError("Corrupted export config",
9604                                    errors.ECODE_ENVIRON)
9605
9606     ei_version = export_info.get(constants.INISECT_EXP, "version")
9607     if (int(ei_version) != constants.EXPORT_VERSION):
9608       raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
9609                                  (ei_version, constants.EXPORT_VERSION),
9610                                  errors.ECODE_ENVIRON)
9611     return export_info
9612
9613   def _ReadExportParams(self, einfo):
9614     """Use export parameters as defaults.
9615
9616     In case the opcode doesn't specify (as in override) some instance
9617     parameters, then try to use them from the export information, if
9618     that declares them.
9619
9620     """
9621     self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
9622
9623     if self.op.disk_template is None:
9624       if einfo.has_option(constants.INISECT_INS, "disk_template"):
9625         self.op.disk_template = einfo.get(constants.INISECT_INS,
9626                                           "disk_template")
9627         if self.op.disk_template not in constants.DISK_TEMPLATES:
9628           raise errors.OpPrereqError("Disk template specified in configuration"
9629                                      " file is not one of the allowed values:"
9630                                      " %s" %
9631                                      " ".join(constants.DISK_TEMPLATES),
9632                                      errors.ECODE_INVAL)
9633       else:
9634         raise errors.OpPrereqError("No disk template specified and the export"
9635                                    " is missing the disk_template information",
9636                                    errors.ECODE_INVAL)
9637
9638     if not self.op.disks:
9639       disks = []
9640       # TODO: import the disk iv_name too
9641       for idx in range(constants.MAX_DISKS):
9642         if einfo.has_option(constants.INISECT_INS, "disk%d_size" % idx):
9643           disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
9644           disks.append({constants.IDISK_SIZE: disk_sz})
9645       self.op.disks = disks
9646       if not disks and self.op.disk_template != constants.DT_DISKLESS:
9647         raise errors.OpPrereqError("No disk info specified and the export"
9648                                    " is missing the disk information",
9649                                    errors.ECODE_INVAL)
9650
9651     if not self.op.nics:
9652       nics = []
9653       for idx in range(constants.MAX_NICS):
9654         if einfo.has_option(constants.INISECT_INS, "nic%d_mac" % idx):
9655           ndict = {}
9656           for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
9657             v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
9658             ndict[name] = v
9659           nics.append(ndict)
9660         else:
9661           break
9662       self.op.nics = nics
9663
9664     if not self.op.tags and einfo.has_option(constants.INISECT_INS, "tags"):
9665       self.op.tags = einfo.get(constants.INISECT_INS, "tags").split()
9666
9667     if (self.op.hypervisor is None and
9668         einfo.has_option(constants.INISECT_INS, "hypervisor")):
9669       self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
9670
9671     if einfo.has_section(constants.INISECT_HYP):
9672       # use the export parameters but do not override the ones
9673       # specified by the user
9674       for name, value in einfo.items(constants.INISECT_HYP):
9675         if name not in self.op.hvparams:
9676           self.op.hvparams[name] = value
9677
9678     if einfo.has_section(constants.INISECT_BEP):
9679       # use the parameters, without overriding
9680       for name, value in einfo.items(constants.INISECT_BEP):
9681         if name not in self.op.beparams:
9682           self.op.beparams[name] = value
9683         # Compatibility for the old "memory" be param
9684         if name == constants.BE_MEMORY:
9685           if constants.BE_MAXMEM not in self.op.beparams:
9686             self.op.beparams[constants.BE_MAXMEM] = value
9687           if constants.BE_MINMEM not in self.op.beparams:
9688             self.op.beparams[constants.BE_MINMEM] = value
9689     else:
9690       # try to read the parameters old style, from the main section
9691       for name in constants.BES_PARAMETERS:
9692         if (name not in self.op.beparams and
9693             einfo.has_option(constants.INISECT_INS, name)):
9694           self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
9695
9696     if einfo.has_section(constants.INISECT_OSP):
9697       # use the parameters, without overriding
9698       for name, value in einfo.items(constants.INISECT_OSP):
9699         if name not in self.op.osparams:
9700           self.op.osparams[name] = value
9701
9702   def _RevertToDefaults(self, cluster):
9703     """Revert the instance parameters to the default values.
9704
9705     """
9706     # hvparams
9707     hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
9708     for name in self.op.hvparams.keys():
9709       if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
9710         del self.op.hvparams[name]
9711     # beparams
9712     be_defs = cluster.SimpleFillBE({})
9713     for name in self.op.beparams.keys():
9714       if name in be_defs and be_defs[name] == self.op.beparams[name]:
9715         del self.op.beparams[name]
9716     # nic params
9717     nic_defs = cluster.SimpleFillNIC({})
9718     for nic in self.op.nics:
9719       for name in constants.NICS_PARAMETERS:
9720         if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
9721           del nic[name]
9722     # osparams
9723     os_defs = cluster.SimpleFillOS(self.op.os_type, {})
9724     for name in self.op.osparams.keys():
9725       if name in os_defs and os_defs[name] == self.op.osparams[name]:
9726         del self.op.osparams[name]
9727
9728   def _CalculateFileStorageDir(self):
9729     """Calculate final instance file storage dir.
9730
9731     """
9732     # file storage dir calculation/check
9733     self.instance_file_storage_dir = None
9734     if self.op.disk_template in constants.DTS_FILEBASED:
9735       # build the full file storage dir path
9736       joinargs = []
9737
9738       if self.op.disk_template == constants.DT_SHARED_FILE:
9739         get_fsd_fn = self.cfg.GetSharedFileStorageDir
9740       else:
9741         get_fsd_fn = self.cfg.GetFileStorageDir
9742
9743       cfg_storagedir = get_fsd_fn()
9744       if not cfg_storagedir:
9745         raise errors.OpPrereqError("Cluster file storage dir not defined",
9746                                    errors.ECODE_STATE)
9747       joinargs.append(cfg_storagedir)
9748
9749       if self.op.file_storage_dir is not None:
9750         joinargs.append(self.op.file_storage_dir)
9751
9752       joinargs.append(self.op.instance_name)
9753
9754       # pylint: disable=W0142
9755       self.instance_file_storage_dir = utils.PathJoin(*joinargs)
9756
9757   def CheckPrereq(self): # pylint: disable=R0914
9758     """Check prerequisites.
9759
9760     """
9761     self._CalculateFileStorageDir()
9762
9763     if self.op.mode == constants.INSTANCE_IMPORT:
9764       export_info = self._ReadExportInfo()
9765       self._ReadExportParams(export_info)
9766       self._old_instance_name = export_info.get(constants.INISECT_INS, "name")
9767     else:
9768       self._old_instance_name = None
9769
9770     if (not self.cfg.GetVGName() and
9771         self.op.disk_template not in constants.DTS_NOT_LVM):
9772       raise errors.OpPrereqError("Cluster does not support lvm-based"
9773                                  " instances", errors.ECODE_STATE)
9774
9775     if (self.op.hypervisor is None or
9776         self.op.hypervisor == constants.VALUE_AUTO):
9777       self.op.hypervisor = self.cfg.GetHypervisorType()
9778
9779     cluster = self.cfg.GetClusterInfo()
9780     enabled_hvs = cluster.enabled_hypervisors
9781     if self.op.hypervisor not in enabled_hvs:
9782       raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
9783                                  " cluster (%s)" %
9784                                  (self.op.hypervisor, ",".join(enabled_hvs)),
9785                                  errors.ECODE_STATE)
9786
9787     # Check tag validity
9788     for tag in self.op.tags:
9789       objects.TaggableObject.ValidateTag(tag)
9790
9791     # check hypervisor parameter syntax (locally)
9792     utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
9793     filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
9794                                       self.op.hvparams)
9795     hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
9796     hv_type.CheckParameterSyntax(filled_hvp)
9797     self.hv_full = filled_hvp
9798     # check that we don't specify global parameters on an instance
9799     _CheckGlobalHvParams(self.op.hvparams)
9800
9801     # fill and remember the beparams dict
9802     default_beparams = cluster.beparams[constants.PP_DEFAULT]
9803     for param, value in self.op.beparams.iteritems():
9804       if value == constants.VALUE_AUTO:
9805         self.op.beparams[param] = default_beparams[param]
9806     objects.UpgradeBeParams(self.op.beparams)
9807     utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
9808     self.be_full = cluster.SimpleFillBE(self.op.beparams)
9809
9810     # build os parameters
9811     self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
9812
9813     # now that hvp/bep are in final format, let's reset to defaults,
9814     # if told to do so
9815     if self.op.identify_defaults:
9816       self._RevertToDefaults(cluster)
9817
9818     # NIC buildup
9819     self.nics = []
9820     for idx, nic in enumerate(self.op.nics):
9821       nic_mode_req = nic.get(constants.INIC_MODE, None)
9822       nic_mode = nic_mode_req
9823       if nic_mode is None or nic_mode == constants.VALUE_AUTO:
9824         nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
9825
9826       # in routed mode, for the first nic, the default ip is 'auto'
9827       if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
9828         default_ip_mode = constants.VALUE_AUTO
9829       else:
9830         default_ip_mode = constants.VALUE_NONE
9831
9832       # ip validity checks
9833       ip = nic.get(constants.INIC_IP, default_ip_mode)
9834       if ip is None or ip.lower() == constants.VALUE_NONE:
9835         nic_ip = None
9836       elif ip.lower() == constants.VALUE_AUTO:
9837         if not self.op.name_check:
9838           raise errors.OpPrereqError("IP address set to auto but name checks"
9839                                      " have been skipped",
9840                                      errors.ECODE_INVAL)
9841         nic_ip = self.hostname1.ip
9842       else:
9843         if not netutils.IPAddress.IsValid(ip):
9844           raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
9845                                      errors.ECODE_INVAL)
9846         nic_ip = ip
9847
9848       # TODO: check the ip address for uniqueness
9849       if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
9850         raise errors.OpPrereqError("Routed nic mode requires an ip address",
9851                                    errors.ECODE_INVAL)
9852
9853       # MAC address verification
9854       mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
9855       if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9856         mac = utils.NormalizeAndValidateMac(mac)
9857
9858         try:
9859           self.cfg.ReserveMAC(mac, self.proc.GetECId())
9860         except errors.ReservationError:
9861           raise errors.OpPrereqError("MAC address %s already in use"
9862                                      " in cluster" % mac,
9863                                      errors.ECODE_NOTUNIQUE)
9864
9865       #  Build nic parameters
9866       link = nic.get(constants.INIC_LINK, None)
9867       if link == constants.VALUE_AUTO:
9868         link = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_LINK]
9869       nicparams = {}
9870       if nic_mode_req:
9871         nicparams[constants.NIC_MODE] = nic_mode
9872       if link:
9873         nicparams[constants.NIC_LINK] = link
9874
9875       check_params = cluster.SimpleFillNIC(nicparams)
9876       objects.NIC.CheckParameterSyntax(check_params)
9877       self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
9878
9879     # disk checks/pre-build
9880     default_vg = self.cfg.GetVGName()
9881     self.disks = []
9882     for disk in self.op.disks:
9883       mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
9884       if mode not in constants.DISK_ACCESS_SET:
9885         raise errors.OpPrereqError("Invalid disk access mode '%s'" %
9886                                    mode, errors.ECODE_INVAL)
9887       size = disk.get(constants.IDISK_SIZE, None)
9888       if size is None:
9889         raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
9890       try:
9891         size = int(size)
9892       except (TypeError, ValueError):
9893         raise errors.OpPrereqError("Invalid disk size '%s'" % size,
9894                                    errors.ECODE_INVAL)
9895
9896       data_vg = disk.get(constants.IDISK_VG, default_vg)
9897       new_disk = {
9898         constants.IDISK_SIZE: size,
9899         constants.IDISK_MODE: mode,
9900         constants.IDISK_VG: data_vg,
9901         }
9902       if constants.IDISK_METAVG in disk:
9903         new_disk[constants.IDISK_METAVG] = disk[constants.IDISK_METAVG]
9904       if constants.IDISK_ADOPT in disk:
9905         new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
9906       self.disks.append(new_disk)
9907
9908     if self.op.mode == constants.INSTANCE_IMPORT:
9909       disk_images = []
9910       for idx in range(len(self.disks)):
9911         option = "disk%d_dump" % idx
9912         if export_info.has_option(constants.INISECT_INS, option):
9913           # FIXME: are the old os-es, disk sizes, etc. useful?
9914           export_name = export_info.get(constants.INISECT_INS, option)
9915           image = utils.PathJoin(self.op.src_path, export_name)
9916           disk_images.append(image)
9917         else:
9918           disk_images.append(False)
9919
9920       self.src_images = disk_images
9921
9922       if self.op.instance_name == self._old_instance_name:
9923         for idx, nic in enumerate(self.nics):
9924           if nic.mac == constants.VALUE_AUTO:
9925             nic_mac_ini = "nic%d_mac" % idx
9926             nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
9927
9928     # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
9929
9930     # ip ping checks (we use the same ip that was resolved in ExpandNames)
9931     if self.op.ip_check:
9932       if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
9933         raise errors.OpPrereqError("IP %s of instance %s already in use" %
9934                                    (self.check_ip, self.op.instance_name),
9935                                    errors.ECODE_NOTUNIQUE)
9936
9937     #### mac address generation
9938     # By generating here the mac address both the allocator and the hooks get
9939     # the real final mac address rather than the 'auto' or 'generate' value.
9940     # There is a race condition between the generation and the instance object
9941     # creation, which means that we know the mac is valid now, but we're not
9942     # sure it will be when we actually add the instance. If things go bad
9943     # adding the instance will abort because of a duplicate mac, and the
9944     # creation job will fail.
9945     for nic in self.nics:
9946       if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9947         nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
9948
9949     #### allocator run
9950
9951     if self.op.iallocator is not None:
9952       self._RunAllocator()
9953
9954     # Release all unneeded node locks
9955     _ReleaseLocks(self, locking.LEVEL_NODE,
9956                   keep=filter(None, [self.op.pnode, self.op.snode,
9957                                      self.op.src_node]))
9958     _ReleaseLocks(self, locking.LEVEL_NODE_RES,
9959                   keep=filter(None, [self.op.pnode, self.op.snode,
9960                                      self.op.src_node]))
9961
9962     #### node related checks
9963
9964     # check primary node
9965     self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
9966     assert self.pnode is not None, \
9967       "Cannot retrieve locked node %s" % self.op.pnode
9968     if pnode.offline:
9969       raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
9970                                  pnode.name, errors.ECODE_STATE)
9971     if pnode.drained:
9972       raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
9973                                  pnode.name, errors.ECODE_STATE)
9974     if not pnode.vm_capable:
9975       raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
9976                                  " '%s'" % pnode.name, errors.ECODE_STATE)
9977
9978     self.secondaries = []
9979
9980     # mirror node verification
9981     if self.op.disk_template in constants.DTS_INT_MIRROR:
9982       if self.op.snode == pnode.name:
9983         raise errors.OpPrereqError("The secondary node cannot be the"
9984                                    " primary node", errors.ECODE_INVAL)
9985       _CheckNodeOnline(self, self.op.snode)
9986       _CheckNodeNotDrained(self, self.op.snode)
9987       _CheckNodeVmCapable(self, self.op.snode)
9988       self.secondaries.append(self.op.snode)
9989
9990       snode = self.cfg.GetNodeInfo(self.op.snode)
9991       if pnode.group != snode.group:
9992         self.LogWarning("The primary and secondary nodes are in two"
9993                         " different node groups; the disk parameters"
9994                         " from the first disk's node group will be"
9995                         " used")
9996
9997     nodenames = [pnode.name] + self.secondaries
9998
9999     # Verify instance specs
10000     spindle_use = self.be_full.get(constants.BE_SPINDLE_USE, None)
10001     ispec = {
10002       constants.ISPEC_MEM_SIZE: self.be_full.get(constants.BE_MAXMEM, None),
10003       constants.ISPEC_CPU_COUNT: self.be_full.get(constants.BE_VCPUS, None),
10004       constants.ISPEC_DISK_COUNT: len(self.disks),
10005       constants.ISPEC_DISK_SIZE: [disk["size"] for disk in self.disks],
10006       constants.ISPEC_NIC_COUNT: len(self.nics),
10007       constants.ISPEC_SPINDLE_USE: spindle_use,
10008       }
10009
10010     group_info = self.cfg.GetNodeGroup(pnode.group)
10011     ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster, group_info)
10012     res = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec)
10013     if not self.op.ignore_ipolicy and res:
10014       raise errors.OpPrereqError(("Instance allocation to group %s violates"
10015                                   " policy: %s") % (pnode.group,
10016                                                     utils.CommaJoin(res)),
10017                                   errors.ECODE_INVAL)
10018
10019     if not self.adopt_disks:
10020       if self.op.disk_template == constants.DT_RBD:
10021         # _CheckRADOSFreeSpace() is just a placeholder.
10022         # Any function that checks prerequisites can be placed here.
10023         # Check if there is enough space on the RADOS cluster.
10024         _CheckRADOSFreeSpace()
10025       else:
10026         # Check lv size requirements, if not adopting
10027         req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
10028         _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
10029
10030     elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
10031       all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
10032                                 disk[constants.IDISK_ADOPT])
10033                      for disk in self.disks])
10034       if len(all_lvs) != len(self.disks):
10035         raise errors.OpPrereqError("Duplicate volume names given for adoption",
10036                                    errors.ECODE_INVAL)
10037       for lv_name in all_lvs:
10038         try:
10039           # FIXME: lv_name here is "vg/lv" need to ensure that other calls
10040           # to ReserveLV uses the same syntax
10041           self.cfg.ReserveLV(lv_name, self.proc.GetECId())
10042         except errors.ReservationError:
10043           raise errors.OpPrereqError("LV named %s used by another instance" %
10044                                      lv_name, errors.ECODE_NOTUNIQUE)
10045
10046       vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
10047       vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
10048
10049       node_lvs = self.rpc.call_lv_list([pnode.name],
10050                                        vg_names.payload.keys())[pnode.name]
10051       node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
10052       node_lvs = node_lvs.payload
10053
10054       delta = all_lvs.difference(node_lvs.keys())
10055       if delta:
10056         raise errors.OpPrereqError("Missing logical volume(s): %s" %
10057                                    utils.CommaJoin(delta),
10058                                    errors.ECODE_INVAL)
10059       online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
10060       if online_lvs:
10061         raise errors.OpPrereqError("Online logical volumes found, cannot"
10062                                    " adopt: %s" % utils.CommaJoin(online_lvs),
10063                                    errors.ECODE_STATE)
10064       # update the size of disk based on what is found
10065       for dsk in self.disks:
10066         dsk[constants.IDISK_SIZE] = \
10067           int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
10068                                         dsk[constants.IDISK_ADOPT])][0]))
10069
10070     elif self.op.disk_template == constants.DT_BLOCK:
10071       # Normalize and de-duplicate device paths
10072       all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
10073                        for disk in self.disks])
10074       if len(all_disks) != len(self.disks):
10075         raise errors.OpPrereqError("Duplicate disk names given for adoption",
10076                                    errors.ECODE_INVAL)
10077       baddisks = [d for d in all_disks
10078                   if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
10079       if baddisks:
10080         raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
10081                                    " cannot be adopted" %
10082                                    (", ".join(baddisks),
10083                                     constants.ADOPTABLE_BLOCKDEV_ROOT),
10084                                    errors.ECODE_INVAL)
10085
10086       node_disks = self.rpc.call_bdev_sizes([pnode.name],
10087                                             list(all_disks))[pnode.name]
10088       node_disks.Raise("Cannot get block device information from node %s" %
10089                        pnode.name)
10090       node_disks = node_disks.payload
10091       delta = all_disks.difference(node_disks.keys())
10092       if delta:
10093         raise errors.OpPrereqError("Missing block device(s): %s" %
10094                                    utils.CommaJoin(delta),
10095                                    errors.ECODE_INVAL)
10096       for dsk in self.disks:
10097         dsk[constants.IDISK_SIZE] = \
10098           int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
10099
10100     _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
10101
10102     _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
10103     # check OS parameters (remotely)
10104     _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
10105
10106     _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
10107
10108     # memory check on primary node
10109     #TODO(dynmem): use MINMEM for checking
10110     if self.op.start:
10111       _CheckNodeFreeMemory(self, self.pnode.name,
10112                            "creating instance %s" % self.op.instance_name,
10113                            self.be_full[constants.BE_MAXMEM],
10114                            self.op.hypervisor)
10115
10116     self.dry_run_result = list(nodenames)
10117
10118   def Exec(self, feedback_fn):
10119     """Create and add the instance to the cluster.
10120
10121     """
10122     instance = self.op.instance_name
10123     pnode_name = self.pnode.name
10124
10125     assert not (self.owned_locks(locking.LEVEL_NODE_RES) -
10126                 self.owned_locks(locking.LEVEL_NODE)), \
10127       "Node locks differ from node resource locks"
10128
10129     ht_kind = self.op.hypervisor
10130     if ht_kind in constants.HTS_REQ_PORT:
10131       network_port = self.cfg.AllocatePort()
10132     else:
10133       network_port = None
10134
10135     # This is ugly but we got a chicken-egg problem here
10136     # We can only take the group disk parameters, as the instance
10137     # has no disks yet (we are generating them right here).
10138     node = self.cfg.GetNodeInfo(pnode_name)
10139     nodegroup = self.cfg.GetNodeGroup(node.group)
10140     disks = _GenerateDiskTemplate(self,
10141                                   self.op.disk_template,
10142                                   instance, pnode_name,
10143                                   self.secondaries,
10144                                   self.disks,
10145                                   self.instance_file_storage_dir,
10146                                   self.op.file_driver,
10147                                   0,
10148                                   feedback_fn,
10149                                   self.cfg.GetGroupDiskParams(nodegroup))
10150
10151     iobj = objects.Instance(name=instance, os=self.op.os_type,
10152                             primary_node=pnode_name,
10153                             nics=self.nics, disks=disks,
10154                             disk_template=self.op.disk_template,
10155                             admin_state=constants.ADMINST_DOWN,
10156                             network_port=network_port,
10157                             beparams=self.op.beparams,
10158                             hvparams=self.op.hvparams,
10159                             hypervisor=self.op.hypervisor,
10160                             osparams=self.op.osparams,
10161                             )
10162
10163     if self.op.tags:
10164       for tag in self.op.tags:
10165         iobj.AddTag(tag)
10166
10167     if self.adopt_disks:
10168       if self.op.disk_template == constants.DT_PLAIN:
10169         # rename LVs to the newly-generated names; we need to construct
10170         # 'fake' LV disks with the old data, plus the new unique_id
10171         tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
10172         rename_to = []
10173         for t_dsk, a_dsk in zip(tmp_disks, self.disks):
10174           rename_to.append(t_dsk.logical_id)
10175           t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
10176           self.cfg.SetDiskID(t_dsk, pnode_name)
10177         result = self.rpc.call_blockdev_rename(pnode_name,
10178                                                zip(tmp_disks, rename_to))
10179         result.Raise("Failed to rename adoped LVs")
10180     else:
10181       feedback_fn("* creating instance disks...")
10182       try:
10183         _CreateDisks(self, iobj)
10184       except errors.OpExecError:
10185         self.LogWarning("Device creation failed, reverting...")
10186         try:
10187           _RemoveDisks(self, iobj)
10188         finally:
10189           self.cfg.ReleaseDRBDMinors(instance)
10190           raise
10191
10192     feedback_fn("adding instance %s to cluster config" % instance)
10193
10194     self.cfg.AddInstance(iobj, self.proc.GetECId())
10195
10196     # Declare that we don't want to remove the instance lock anymore, as we've
10197     # added the instance to the config
10198     del self.remove_locks[locking.LEVEL_INSTANCE]
10199
10200     if self.op.mode == constants.INSTANCE_IMPORT:
10201       # Release unused nodes
10202       _ReleaseLocks(self, locking.LEVEL_NODE, keep=[self.op.src_node])
10203     else:
10204       # Release all nodes
10205       _ReleaseLocks(self, locking.LEVEL_NODE)
10206
10207     disk_abort = False
10208     if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
10209       feedback_fn("* wiping instance disks...")
10210       try:
10211         _WipeDisks(self, iobj)
10212       except errors.OpExecError, err:
10213         logging.exception("Wiping disks failed")
10214         self.LogWarning("Wiping instance disks failed (%s)", err)
10215         disk_abort = True
10216
10217     if disk_abort:
10218       # Something is already wrong with the disks, don't do anything else
10219       pass
10220     elif self.op.wait_for_sync:
10221       disk_abort = not _WaitForSync(self, iobj)
10222     elif iobj.disk_template in constants.DTS_INT_MIRROR:
10223       # make sure the disks are not degraded (still sync-ing is ok)
10224       feedback_fn("* checking mirrors status")
10225       disk_abort = not _WaitForSync(self, iobj, oneshot=True)
10226     else:
10227       disk_abort = False
10228
10229     if disk_abort:
10230       _RemoveDisks(self, iobj)
10231       self.cfg.RemoveInstance(iobj.name)
10232       # Make sure the instance lock gets removed
10233       self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
10234       raise errors.OpExecError("There are some degraded disks for"
10235                                " this instance")
10236
10237     # Release all node resource locks
10238     _ReleaseLocks(self, locking.LEVEL_NODE_RES)
10239
10240     if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
10241       # we need to set the disks ID to the primary node, since the
10242       # preceding code might or might have not done it, depending on
10243       # disk template and other options
10244       for disk in iobj.disks:
10245         self.cfg.SetDiskID(disk, pnode_name)
10246       if self.op.mode == constants.INSTANCE_CREATE:
10247         if not self.op.no_install:
10248           pause_sync = (iobj.disk_template in constants.DTS_INT_MIRROR and
10249                         not self.op.wait_for_sync)
10250           if pause_sync:
10251             feedback_fn("* pausing disk sync to install instance OS")
10252             result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
10253                                                               (iobj.disks,
10254                                                                iobj), True)
10255             for idx, success in enumerate(result.payload):
10256               if not success:
10257                 logging.warn("pause-sync of instance %s for disk %d failed",
10258                              instance, idx)
10259
10260           feedback_fn("* running the instance OS create scripts...")
10261           # FIXME: pass debug option from opcode to backend
10262           os_add_result = \
10263             self.rpc.call_instance_os_add(pnode_name, (iobj, None), False,
10264                                           self.op.debug_level)
10265           if pause_sync:
10266             feedback_fn("* resuming disk sync")
10267             result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
10268                                                               (iobj.disks,
10269                                                                iobj), False)
10270             for idx, success in enumerate(result.payload):
10271               if not success:
10272                 logging.warn("resume-sync of instance %s for disk %d failed",
10273                              instance, idx)
10274
10275           os_add_result.Raise("Could not add os for instance %s"
10276                               " on node %s" % (instance, pnode_name))
10277
10278       else:
10279         if self.op.mode == constants.INSTANCE_IMPORT:
10280           feedback_fn("* running the instance OS import scripts...")
10281
10282           transfers = []
10283
10284           for idx, image in enumerate(self.src_images):
10285             if not image:
10286               continue
10287
10288             # FIXME: pass debug option from opcode to backend
10289             dt = masterd.instance.DiskTransfer("disk/%s" % idx,
10290                                                constants.IEIO_FILE, (image, ),
10291                                                constants.IEIO_SCRIPT,
10292                                                (iobj.disks[idx], idx),
10293                                                None)
10294             transfers.append(dt)
10295
10296           import_result = \
10297             masterd.instance.TransferInstanceData(self, feedback_fn,
10298                                                   self.op.src_node, pnode_name,
10299                                                   self.pnode.secondary_ip,
10300                                                   iobj, transfers)
10301           if not compat.all(import_result):
10302             self.LogWarning("Some disks for instance %s on node %s were not"
10303                             " imported successfully" % (instance, pnode_name))
10304
10305           rename_from = self._old_instance_name
10306
10307         elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
10308           feedback_fn("* preparing remote import...")
10309           # The source cluster will stop the instance before attempting to make
10310           # a connection. In some cases stopping an instance can take a long
10311           # time, hence the shutdown timeout is added to the connection
10312           # timeout.
10313           connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
10314                              self.op.source_shutdown_timeout)
10315           timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
10316
10317           assert iobj.primary_node == self.pnode.name
10318           disk_results = \
10319             masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
10320                                           self.source_x509_ca,
10321                                           self._cds, timeouts)
10322           if not compat.all(disk_results):
10323             # TODO: Should the instance still be started, even if some disks
10324             # failed to import (valid for local imports, too)?
10325             self.LogWarning("Some disks for instance %s on node %s were not"
10326                             " imported successfully" % (instance, pnode_name))
10327
10328           rename_from = self.source_instance_name
10329
10330         else:
10331           # also checked in the prereq part
10332           raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
10333                                        % self.op.mode)
10334
10335         # Run rename script on newly imported instance
10336         assert iobj.name == instance
10337         feedback_fn("Running rename script for %s" % instance)
10338         result = self.rpc.call_instance_run_rename(pnode_name, iobj,
10339                                                    rename_from,
10340                                                    self.op.debug_level)
10341         if result.fail_msg:
10342           self.LogWarning("Failed to run rename script for %s on node"
10343                           " %s: %s" % (instance, pnode_name, result.fail_msg))
10344
10345     assert not self.owned_locks(locking.LEVEL_NODE_RES)
10346
10347     if self.op.start:
10348       iobj.admin_state = constants.ADMINST_UP
10349       self.cfg.Update(iobj, feedback_fn)
10350       logging.info("Starting instance %s on node %s", instance, pnode_name)
10351       feedback_fn("* starting instance...")
10352       result = self.rpc.call_instance_start(pnode_name, (iobj, None, None),
10353                                             False)
10354       result.Raise("Could not start instance")
10355
10356     return list(iobj.all_nodes)
10357
10358
10359 def _CheckRADOSFreeSpace():
10360   """Compute disk size requirements inside the RADOS cluster.
10361
10362   """
10363   # For the RADOS cluster we assume there is always enough space.
10364   pass
10365
10366
10367 class LUInstanceConsole(NoHooksLU):
10368   """Connect to an instance's console.
10369
10370   This is somewhat special in that it returns the command line that
10371   you need to run on the master node in order to connect to the
10372   console.
10373
10374   """
10375   REQ_BGL = False
10376
10377   def ExpandNames(self):
10378     self.share_locks = _ShareAll()
10379     self._ExpandAndLockInstance()
10380
10381   def CheckPrereq(self):
10382     """Check prerequisites.
10383
10384     This checks that the instance is in the cluster.
10385
10386     """
10387     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10388     assert self.instance is not None, \
10389       "Cannot retrieve locked instance %s" % self.op.instance_name
10390     _CheckNodeOnline(self, self.instance.primary_node)
10391
10392   def Exec(self, feedback_fn):
10393     """Connect to the console of an instance
10394
10395     """
10396     instance = self.instance
10397     node = instance.primary_node
10398
10399     node_insts = self.rpc.call_instance_list([node],
10400                                              [instance.hypervisor])[node]
10401     node_insts.Raise("Can't get node information from %s" % node)
10402
10403     if instance.name not in node_insts.payload:
10404       if instance.admin_state == constants.ADMINST_UP:
10405         state = constants.INSTST_ERRORDOWN
10406       elif instance.admin_state == constants.ADMINST_DOWN:
10407         state = constants.INSTST_ADMINDOWN
10408       else:
10409         state = constants.INSTST_ADMINOFFLINE
10410       raise errors.OpExecError("Instance %s is not running (state %s)" %
10411                                (instance.name, state))
10412
10413     logging.debug("Connecting to console of %s on %s", instance.name, node)
10414
10415     return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
10416
10417
10418 def _GetInstanceConsole(cluster, instance):
10419   """Returns console information for an instance.
10420
10421   @type cluster: L{objects.Cluster}
10422   @type instance: L{objects.Instance}
10423   @rtype: dict
10424
10425   """
10426   hyper = hypervisor.GetHypervisor(instance.hypervisor)
10427   # beparams and hvparams are passed separately, to avoid editing the
10428   # instance and then saving the defaults in the instance itself.
10429   hvparams = cluster.FillHV(instance)
10430   beparams = cluster.FillBE(instance)
10431   console = hyper.GetInstanceConsole(instance, hvparams, beparams)
10432
10433   assert console.instance == instance.name
10434   assert console.Validate()
10435
10436   return console.ToDict()
10437
10438
10439 class LUInstanceReplaceDisks(LogicalUnit):
10440   """Replace the disks of an instance.
10441
10442   """
10443   HPATH = "mirrors-replace"
10444   HTYPE = constants.HTYPE_INSTANCE
10445   REQ_BGL = False
10446
10447   def CheckArguments(self):
10448     TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
10449                                   self.op.iallocator)
10450
10451   def ExpandNames(self):
10452     self._ExpandAndLockInstance()
10453
10454     assert locking.LEVEL_NODE not in self.needed_locks
10455     assert locking.LEVEL_NODE_RES not in self.needed_locks
10456     assert locking.LEVEL_NODEGROUP not in self.needed_locks
10457
10458     assert self.op.iallocator is None or self.op.remote_node is None, \
10459       "Conflicting options"
10460
10461     if self.op.remote_node is not None:
10462       self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10463
10464       # Warning: do not remove the locking of the new secondary here
10465       # unless DRBD8.AddChildren is changed to work in parallel;
10466       # currently it doesn't since parallel invocations of
10467       # FindUnusedMinor will conflict
10468       self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
10469       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
10470     else:
10471       self.needed_locks[locking.LEVEL_NODE] = []
10472       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10473
10474       if self.op.iallocator is not None:
10475         # iallocator will select a new node in the same group
10476         self.needed_locks[locking.LEVEL_NODEGROUP] = []
10477
10478     self.needed_locks[locking.LEVEL_NODE_RES] = []
10479
10480     self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
10481                                    self.op.iallocator, self.op.remote_node,
10482                                    self.op.disks, False, self.op.early_release,
10483                                    self.op.ignore_ipolicy)
10484
10485     self.tasklets = [self.replacer]
10486
10487   def DeclareLocks(self, level):
10488     if level == locking.LEVEL_NODEGROUP:
10489       assert self.op.remote_node is None
10490       assert self.op.iallocator is not None
10491       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
10492
10493       self.share_locks[locking.LEVEL_NODEGROUP] = 1
10494       # Lock all groups used by instance optimistically; this requires going
10495       # via the node before it's locked, requiring verification later on
10496       self.needed_locks[locking.LEVEL_NODEGROUP] = \
10497         self.cfg.GetInstanceNodeGroups(self.op.instance_name)
10498
10499     elif level == locking.LEVEL_NODE:
10500       if self.op.iallocator is not None:
10501         assert self.op.remote_node is None
10502         assert not self.needed_locks[locking.LEVEL_NODE]
10503
10504         # Lock member nodes of all locked groups
10505         self.needed_locks[locking.LEVEL_NODE] = \
10506             [node_name
10507              for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
10508              for node_name in self.cfg.GetNodeGroup(group_uuid).members]
10509       else:
10510         self._LockInstancesNodes()
10511     elif level == locking.LEVEL_NODE_RES:
10512       # Reuse node locks
10513       self.needed_locks[locking.LEVEL_NODE_RES] = \
10514         self.needed_locks[locking.LEVEL_NODE]
10515
10516   def BuildHooksEnv(self):
10517     """Build hooks env.
10518
10519     This runs on the master, the primary and all the secondaries.
10520
10521     """
10522     instance = self.replacer.instance
10523     env = {
10524       "MODE": self.op.mode,
10525       "NEW_SECONDARY": self.op.remote_node,
10526       "OLD_SECONDARY": instance.secondary_nodes[0],
10527       }
10528     env.update(_BuildInstanceHookEnvByObject(self, instance))
10529     return env
10530
10531   def BuildHooksNodes(self):
10532     """Build hooks nodes.
10533
10534     """
10535     instance = self.replacer.instance
10536     nl = [
10537       self.cfg.GetMasterNode(),
10538       instance.primary_node,
10539       ]
10540     if self.op.remote_node is not None:
10541       nl.append(self.op.remote_node)
10542     return nl, nl
10543
10544   def CheckPrereq(self):
10545     """Check prerequisites.
10546
10547     """
10548     assert (self.glm.is_owned(locking.LEVEL_NODEGROUP) or
10549             self.op.iallocator is None)
10550
10551     # Verify if node group locks are still correct
10552     owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
10553     if owned_groups:
10554       _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
10555
10556     return LogicalUnit.CheckPrereq(self)
10557
10558
10559 class TLReplaceDisks(Tasklet):
10560   """Replaces disks for an instance.
10561
10562   Note: Locking is not within the scope of this class.
10563
10564   """
10565   def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
10566                disks, delay_iallocator, early_release, ignore_ipolicy):
10567     """Initializes this class.
10568
10569     """
10570     Tasklet.__init__(self, lu)
10571
10572     # Parameters
10573     self.instance_name = instance_name
10574     self.mode = mode
10575     self.iallocator_name = iallocator_name
10576     self.remote_node = remote_node
10577     self.disks = disks
10578     self.delay_iallocator = delay_iallocator
10579     self.early_release = early_release
10580     self.ignore_ipolicy = ignore_ipolicy
10581
10582     # Runtime data
10583     self.instance = None
10584     self.new_node = None
10585     self.target_node = None
10586     self.other_node = None
10587     self.remote_node_info = None
10588     self.node_secondary_ip = None
10589
10590   @staticmethod
10591   def CheckArguments(mode, remote_node, ialloc):
10592     """Helper function for users of this class.
10593
10594     """
10595     # check for valid parameter combination
10596     if mode == constants.REPLACE_DISK_CHG:
10597       if remote_node is None and ialloc is None:
10598         raise errors.OpPrereqError("When changing the secondary either an"
10599                                    " iallocator script must be used or the"
10600                                    " new node given", errors.ECODE_INVAL)
10601
10602       if remote_node is not None and ialloc is not None:
10603         raise errors.OpPrereqError("Give either the iallocator or the new"
10604                                    " secondary, not both", errors.ECODE_INVAL)
10605
10606     elif remote_node is not None or ialloc is not None:
10607       # Not replacing the secondary
10608       raise errors.OpPrereqError("The iallocator and new node options can"
10609                                  " only be used when changing the"
10610                                  " secondary node", errors.ECODE_INVAL)
10611
10612   @staticmethod
10613   def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
10614     """Compute a new secondary node using an IAllocator.
10615
10616     """
10617     req = iallocator.IAReqRelocate(name=instance_name,
10618                                    relocate_from=list(relocate_from))
10619     ial = iallocator.IAllocator(lu.cfg, lu.rpc, req)
10620
10621     ial.Run(iallocator_name)
10622
10623     if not ial.success:
10624       raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
10625                                  " %s" % (iallocator_name, ial.info),
10626                                  errors.ECODE_NORES)
10627
10628     remote_node_name = ial.result[0]
10629
10630     lu.LogInfo("Selected new secondary for instance '%s': %s",
10631                instance_name, remote_node_name)
10632
10633     return remote_node_name
10634
10635   def _FindFaultyDisks(self, node_name):
10636     """Wrapper for L{_FindFaultyInstanceDisks}.
10637
10638     """
10639     return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
10640                                     node_name, True)
10641
10642   def _CheckDisksActivated(self, instance):
10643     """Checks if the instance disks are activated.
10644
10645     @param instance: The instance to check disks
10646     @return: True if they are activated, False otherwise
10647
10648     """
10649     nodes = instance.all_nodes
10650
10651     for idx, dev in enumerate(instance.disks):
10652       for node in nodes:
10653         self.lu.LogInfo("Checking disk/%d on %s", idx, node)
10654         self.cfg.SetDiskID(dev, node)
10655
10656         result = _BlockdevFind(self, node, dev, instance)
10657
10658         if result.offline:
10659           continue
10660         elif result.fail_msg or not result.payload:
10661           return False
10662
10663     return True
10664
10665   def CheckPrereq(self):
10666     """Check prerequisites.
10667
10668     This checks that the instance is in the cluster.
10669
10670     """
10671     self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
10672     assert instance is not None, \
10673       "Cannot retrieve locked instance %s" % self.instance_name
10674
10675     if instance.disk_template != constants.DT_DRBD8:
10676       raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
10677                                  " instances", errors.ECODE_INVAL)
10678
10679     if len(instance.secondary_nodes) != 1:
10680       raise errors.OpPrereqError("The instance has a strange layout,"
10681                                  " expected one secondary but found %d" %
10682                                  len(instance.secondary_nodes),
10683                                  errors.ECODE_FAULT)
10684
10685     if not self.delay_iallocator:
10686       self._CheckPrereq2()
10687
10688   def _CheckPrereq2(self):
10689     """Check prerequisites, second part.
10690
10691     This function should always be part of CheckPrereq. It was separated and is
10692     now called from Exec because during node evacuation iallocator was only
10693     called with an unmodified cluster model, not taking planned changes into
10694     account.
10695
10696     """
10697     instance = self.instance
10698     secondary_node = instance.secondary_nodes[0]
10699
10700     if self.iallocator_name is None:
10701       remote_node = self.remote_node
10702     else:
10703       remote_node = self._RunAllocator(self.lu, self.iallocator_name,
10704                                        instance.name, instance.secondary_nodes)
10705
10706     if remote_node is None:
10707       self.remote_node_info = None
10708     else:
10709       assert remote_node in self.lu.owned_locks(locking.LEVEL_NODE), \
10710              "Remote node '%s' is not locked" % remote_node
10711
10712       self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
10713       assert self.remote_node_info is not None, \
10714         "Cannot retrieve locked node %s" % remote_node
10715
10716     if remote_node == self.instance.primary_node:
10717       raise errors.OpPrereqError("The specified node is the primary node of"
10718                                  " the instance", errors.ECODE_INVAL)
10719
10720     if remote_node == secondary_node:
10721       raise errors.OpPrereqError("The specified node is already the"
10722                                  " secondary node of the instance",
10723                                  errors.ECODE_INVAL)
10724
10725     if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
10726                                     constants.REPLACE_DISK_CHG):
10727       raise errors.OpPrereqError("Cannot specify disks to be replaced",
10728                                  errors.ECODE_INVAL)
10729
10730     if self.mode == constants.REPLACE_DISK_AUTO:
10731       if not self._CheckDisksActivated(instance):
10732         raise errors.OpPrereqError("Please run activate-disks on instance %s"
10733                                    " first" % self.instance_name,
10734                                    errors.ECODE_STATE)
10735       faulty_primary = self._FindFaultyDisks(instance.primary_node)
10736       faulty_secondary = self._FindFaultyDisks(secondary_node)
10737
10738       if faulty_primary and faulty_secondary:
10739         raise errors.OpPrereqError("Instance %s has faulty disks on more than"
10740                                    " one node and can not be repaired"
10741                                    " automatically" % self.instance_name,
10742                                    errors.ECODE_STATE)
10743
10744       if faulty_primary:
10745         self.disks = faulty_primary
10746         self.target_node = instance.primary_node
10747         self.other_node = secondary_node
10748         check_nodes = [self.target_node, self.other_node]
10749       elif faulty_secondary:
10750         self.disks = faulty_secondary
10751         self.target_node = secondary_node
10752         self.other_node = instance.primary_node
10753         check_nodes = [self.target_node, self.other_node]
10754       else:
10755         self.disks = []
10756         check_nodes = []
10757
10758     else:
10759       # Non-automatic modes
10760       if self.mode == constants.REPLACE_DISK_PRI:
10761         self.target_node = instance.primary_node
10762         self.other_node = secondary_node
10763         check_nodes = [self.target_node, self.other_node]
10764
10765       elif self.mode == constants.REPLACE_DISK_SEC:
10766         self.target_node = secondary_node
10767         self.other_node = instance.primary_node
10768         check_nodes = [self.target_node, self.other_node]
10769
10770       elif self.mode == constants.REPLACE_DISK_CHG:
10771         self.new_node = remote_node
10772         self.other_node = instance.primary_node
10773         self.target_node = secondary_node
10774         check_nodes = [self.new_node, self.other_node]
10775
10776         _CheckNodeNotDrained(self.lu, remote_node)
10777         _CheckNodeVmCapable(self.lu, remote_node)
10778
10779         old_node_info = self.cfg.GetNodeInfo(secondary_node)
10780         assert old_node_info is not None
10781         if old_node_info.offline and not self.early_release:
10782           # doesn't make sense to delay the release
10783           self.early_release = True
10784           self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
10785                           " early-release mode", secondary_node)
10786
10787       else:
10788         raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
10789                                      self.mode)
10790
10791       # If not specified all disks should be replaced
10792       if not self.disks:
10793         self.disks = range(len(self.instance.disks))
10794
10795     # TODO: This is ugly, but right now we can't distinguish between internal
10796     # submitted opcode and external one. We should fix that.
10797     if self.remote_node_info:
10798       # We change the node, lets verify it still meets instance policy
10799       new_group_info = self.cfg.GetNodeGroup(self.remote_node_info.group)
10800       cluster = self.cfg.GetClusterInfo()
10801       ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
10802                                                               new_group_info)
10803       _CheckTargetNodeIPolicy(self, ipolicy, instance, self.remote_node_info,
10804                               ignore=self.ignore_ipolicy)
10805
10806     for node in check_nodes:
10807       _CheckNodeOnline(self.lu, node)
10808
10809     touched_nodes = frozenset(node_name for node_name in [self.new_node,
10810                                                           self.other_node,
10811                                                           self.target_node]
10812                               if node_name is not None)
10813
10814     # Release unneeded node and node resource locks
10815     _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
10816     _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES, keep=touched_nodes)
10817
10818     # Release any owned node group
10819     if self.lu.glm.is_owned(locking.LEVEL_NODEGROUP):
10820       _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
10821
10822     # Check whether disks are valid
10823     for disk_idx in self.disks:
10824       instance.FindDisk(disk_idx)
10825
10826     # Get secondary node IP addresses
10827     self.node_secondary_ip = dict((name, node.secondary_ip) for (name, node)
10828                                   in self.cfg.GetMultiNodeInfo(touched_nodes))
10829
10830   def Exec(self, feedback_fn):
10831     """Execute disk replacement.
10832
10833     This dispatches the disk replacement to the appropriate handler.
10834
10835     """
10836     if self.delay_iallocator:
10837       self._CheckPrereq2()
10838
10839     if __debug__:
10840       # Verify owned locks before starting operation
10841       owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
10842       assert set(owned_nodes) == set(self.node_secondary_ip), \
10843           ("Incorrect node locks, owning %s, expected %s" %
10844            (owned_nodes, self.node_secondary_ip.keys()))
10845       assert (self.lu.owned_locks(locking.LEVEL_NODE) ==
10846               self.lu.owned_locks(locking.LEVEL_NODE_RES))
10847
10848       owned_instances = self.lu.owned_locks(locking.LEVEL_INSTANCE)
10849       assert list(owned_instances) == [self.instance_name], \
10850           "Instance '%s' not locked" % self.instance_name
10851
10852       assert not self.lu.glm.is_owned(locking.LEVEL_NODEGROUP), \
10853           "Should not own any node group lock at this point"
10854
10855     if not self.disks:
10856       feedback_fn("No disks need replacement")
10857       return
10858
10859     feedback_fn("Replacing disk(s) %s for %s" %
10860                 (utils.CommaJoin(self.disks), self.instance.name))
10861
10862     activate_disks = (self.instance.admin_state != constants.ADMINST_UP)
10863
10864     # Activate the instance disks if we're replacing them on a down instance
10865     if activate_disks:
10866       _StartInstanceDisks(self.lu, self.instance, True)
10867
10868     try:
10869       # Should we replace the secondary node?
10870       if self.new_node is not None:
10871         fn = self._ExecDrbd8Secondary
10872       else:
10873         fn = self._ExecDrbd8DiskOnly
10874
10875       result = fn(feedback_fn)
10876     finally:
10877       # Deactivate the instance disks if we're replacing them on a
10878       # down instance
10879       if activate_disks:
10880         _SafeShutdownInstanceDisks(self.lu, self.instance)
10881
10882     assert not self.lu.owned_locks(locking.LEVEL_NODE)
10883
10884     if __debug__:
10885       # Verify owned locks
10886       owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE_RES)
10887       nodes = frozenset(self.node_secondary_ip)
10888       assert ((self.early_release and not owned_nodes) or
10889               (not self.early_release and not (set(owned_nodes) - nodes))), \
10890         ("Not owning the correct locks, early_release=%s, owned=%r,"
10891          " nodes=%r" % (self.early_release, owned_nodes, nodes))
10892
10893     return result
10894
10895   def _CheckVolumeGroup(self, nodes):
10896     self.lu.LogInfo("Checking volume groups")
10897
10898     vgname = self.cfg.GetVGName()
10899
10900     # Make sure volume group exists on all involved nodes
10901     results = self.rpc.call_vg_list(nodes)
10902     if not results:
10903       raise errors.OpExecError("Can't list volume groups on the nodes")
10904
10905     for node in nodes:
10906       res = results[node]
10907       res.Raise("Error checking node %s" % node)
10908       if vgname not in res.payload:
10909         raise errors.OpExecError("Volume group '%s' not found on node %s" %
10910                                  (vgname, node))
10911
10912   def _CheckDisksExistence(self, nodes):
10913     # Check disk existence
10914     for idx, dev in enumerate(self.instance.disks):
10915       if idx not in self.disks:
10916         continue
10917
10918       for node in nodes:
10919         self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
10920         self.cfg.SetDiskID(dev, node)
10921
10922         result = _BlockdevFind(self, node, dev, self.instance)
10923
10924         msg = result.fail_msg
10925         if msg or not result.payload:
10926           if not msg:
10927             msg = "disk not found"
10928           raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
10929                                    (idx, node, msg))
10930
10931   def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
10932     for idx, dev in enumerate(self.instance.disks):
10933       if idx not in self.disks:
10934         continue
10935
10936       self.lu.LogInfo("Checking disk/%d consistency on node %s" %
10937                       (idx, node_name))
10938
10939       if not _CheckDiskConsistency(self.lu, self.instance, dev, node_name,
10940                                    on_primary, ldisk=ldisk):
10941         raise errors.OpExecError("Node %s has degraded storage, unsafe to"
10942                                  " replace disks for instance %s" %
10943                                  (node_name, self.instance.name))
10944
10945   def _CreateNewStorage(self, node_name):
10946     """Create new storage on the primary or secondary node.
10947
10948     This is only used for same-node replaces, not for changing the
10949     secondary node, hence we don't want to modify the existing disk.
10950
10951     """
10952     iv_names = {}
10953
10954     disks = _AnnotateDiskParams(self.instance, self.instance.disks, self.cfg)
10955     for idx, dev in enumerate(disks):
10956       if idx not in self.disks:
10957         continue
10958
10959       self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
10960
10961       self.cfg.SetDiskID(dev, node_name)
10962
10963       lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
10964       names = _GenerateUniqueNames(self.lu, lv_names)
10965
10966       (data_disk, meta_disk) = dev.children
10967       vg_data = data_disk.logical_id[0]
10968       lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
10969                              logical_id=(vg_data, names[0]),
10970                              params=data_disk.params)
10971       vg_meta = meta_disk.logical_id[0]
10972       lv_meta = objects.Disk(dev_type=constants.LD_LV,
10973                              size=constants.DRBD_META_SIZE,
10974                              logical_id=(vg_meta, names[1]),
10975                              params=meta_disk.params)
10976
10977       new_lvs = [lv_data, lv_meta]
10978       old_lvs = [child.Copy() for child in dev.children]
10979       iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
10980
10981       # we pass force_create=True to force the LVM creation
10982       for new_lv in new_lvs:
10983         _CreateBlockDevInner(self.lu, node_name, self.instance, new_lv, True,
10984                              _GetInstanceInfoText(self.instance), False)
10985
10986     return iv_names
10987
10988   def _CheckDevices(self, node_name, iv_names):
10989     for name, (dev, _, _) in iv_names.iteritems():
10990       self.cfg.SetDiskID(dev, node_name)
10991
10992       result = _BlockdevFind(self, node_name, dev, self.instance)
10993
10994       msg = result.fail_msg
10995       if msg or not result.payload:
10996         if not msg:
10997           msg = "disk not found"
10998         raise errors.OpExecError("Can't find DRBD device %s: %s" %
10999                                  (name, msg))
11000
11001       if result.payload.is_degraded:
11002         raise errors.OpExecError("DRBD device %s is degraded!" % name)
11003
11004   def _RemoveOldStorage(self, node_name, iv_names):
11005     for name, (_, old_lvs, _) in iv_names.iteritems():
11006       self.lu.LogInfo("Remove logical volumes for %s" % name)
11007
11008       for lv in old_lvs:
11009         self.cfg.SetDiskID(lv, node_name)
11010
11011         msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
11012         if msg:
11013           self.lu.LogWarning("Can't remove old LV: %s" % msg,
11014                              hint="remove unused LVs manually")
11015
11016   def _ExecDrbd8DiskOnly(self, feedback_fn): # pylint: disable=W0613
11017     """Replace a disk on the primary or secondary for DRBD 8.
11018
11019     The algorithm for replace is quite complicated:
11020
11021       1. for each disk to be replaced:
11022
11023         1. create new LVs on the target node with unique names
11024         1. detach old LVs from the drbd device
11025         1. rename old LVs to name_replaced.<time_t>
11026         1. rename new LVs to old LVs
11027         1. attach the new LVs (with the old names now) to the drbd device
11028
11029       1. wait for sync across all devices
11030
11031       1. for each modified disk:
11032
11033         1. remove old LVs (which have the name name_replaces.<time_t>)
11034
11035     Failures are not very well handled.
11036
11037     """
11038     steps_total = 6
11039
11040     # Step: check device activation
11041     self.lu.LogStep(1, steps_total, "Check device existence")
11042     self._CheckDisksExistence([self.other_node, self.target_node])
11043     self._CheckVolumeGroup([self.target_node, self.other_node])
11044
11045     # Step: check other node consistency
11046     self.lu.LogStep(2, steps_total, "Check peer consistency")
11047     self._CheckDisksConsistency(self.other_node,
11048                                 self.other_node == self.instance.primary_node,
11049                                 False)
11050
11051     # Step: create new storage
11052     self.lu.LogStep(3, steps_total, "Allocate new storage")
11053     iv_names = self._CreateNewStorage(self.target_node)
11054
11055     # Step: for each lv, detach+rename*2+attach
11056     self.lu.LogStep(4, steps_total, "Changing drbd configuration")
11057     for dev, old_lvs, new_lvs in iv_names.itervalues():
11058       self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
11059
11060       result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
11061                                                      old_lvs)
11062       result.Raise("Can't detach drbd from local storage on node"
11063                    " %s for device %s" % (self.target_node, dev.iv_name))
11064       #dev.children = []
11065       #cfg.Update(instance)
11066
11067       # ok, we created the new LVs, so now we know we have the needed
11068       # storage; as such, we proceed on the target node to rename
11069       # old_lv to _old, and new_lv to old_lv; note that we rename LVs
11070       # using the assumption that logical_id == physical_id (which in
11071       # turn is the unique_id on that node)
11072
11073       # FIXME(iustin): use a better name for the replaced LVs
11074       temp_suffix = int(time.time())
11075       ren_fn = lambda d, suff: (d.physical_id[0],
11076                                 d.physical_id[1] + "_replaced-%s" % suff)
11077
11078       # Build the rename list based on what LVs exist on the node
11079       rename_old_to_new = []
11080       for to_ren in old_lvs:
11081         result = self.rpc.call_blockdev_find(self.target_node, to_ren)
11082         if not result.fail_msg and result.payload:
11083           # device exists
11084           rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
11085
11086       self.lu.LogInfo("Renaming the old LVs on the target node")
11087       result = self.rpc.call_blockdev_rename(self.target_node,
11088                                              rename_old_to_new)
11089       result.Raise("Can't rename old LVs on node %s" % self.target_node)
11090
11091       # Now we rename the new LVs to the old LVs
11092       self.lu.LogInfo("Renaming the new LVs on the target node")
11093       rename_new_to_old = [(new, old.physical_id)
11094                            for old, new in zip(old_lvs, new_lvs)]
11095       result = self.rpc.call_blockdev_rename(self.target_node,
11096                                              rename_new_to_old)
11097       result.Raise("Can't rename new LVs on node %s" % self.target_node)
11098
11099       # Intermediate steps of in memory modifications
11100       for old, new in zip(old_lvs, new_lvs):
11101         new.logical_id = old.logical_id
11102         self.cfg.SetDiskID(new, self.target_node)
11103
11104       # We need to modify old_lvs so that removal later removes the
11105       # right LVs, not the newly added ones; note that old_lvs is a
11106       # copy here
11107       for disk in old_lvs:
11108         disk.logical_id = ren_fn(disk, temp_suffix)
11109         self.cfg.SetDiskID(disk, self.target_node)
11110
11111       # Now that the new lvs have the old name, we can add them to the device
11112       self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
11113       result = self.rpc.call_blockdev_addchildren(self.target_node,
11114                                                   (dev, self.instance), new_lvs)
11115       msg = result.fail_msg
11116       if msg:
11117         for new_lv in new_lvs:
11118           msg2 = self.rpc.call_blockdev_remove(self.target_node,
11119                                                new_lv).fail_msg
11120           if msg2:
11121             self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
11122                                hint=("cleanup manually the unused logical"
11123                                      "volumes"))
11124         raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
11125
11126     cstep = itertools.count(5)
11127
11128     if self.early_release:
11129       self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11130       self._RemoveOldStorage(self.target_node, iv_names)
11131       # TODO: Check if releasing locks early still makes sense
11132       _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
11133     else:
11134       # Release all resource locks except those used by the instance
11135       _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
11136                     keep=self.node_secondary_ip.keys())
11137
11138     # Release all node locks while waiting for sync
11139     _ReleaseLocks(self.lu, locking.LEVEL_NODE)
11140
11141     # TODO: Can the instance lock be downgraded here? Take the optional disk
11142     # shutdown in the caller into consideration.
11143
11144     # Wait for sync
11145     # This can fail as the old devices are degraded and _WaitForSync
11146     # does a combined result over all disks, so we don't check its return value
11147     self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
11148     _WaitForSync(self.lu, self.instance)
11149
11150     # Check all devices manually
11151     self._CheckDevices(self.instance.primary_node, iv_names)
11152
11153     # Step: remove old storage
11154     if not self.early_release:
11155       self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11156       self._RemoveOldStorage(self.target_node, iv_names)
11157
11158   def _ExecDrbd8Secondary(self, feedback_fn):
11159     """Replace the secondary node for DRBD 8.
11160
11161     The algorithm for replace is quite complicated:
11162       - for all disks of the instance:
11163         - create new LVs on the new node with same names
11164         - shutdown the drbd device on the old secondary
11165         - disconnect the drbd network on the primary
11166         - create the drbd device on the new secondary
11167         - network attach the drbd on the primary, using an artifice:
11168           the drbd code for Attach() will connect to the network if it
11169           finds a device which is connected to the good local disks but
11170           not network enabled
11171       - wait for sync across all devices
11172       - remove all disks from the old secondary
11173
11174     Failures are not very well handled.
11175
11176     """
11177     steps_total = 6
11178
11179     pnode = self.instance.primary_node
11180
11181     # Step: check device activation
11182     self.lu.LogStep(1, steps_total, "Check device existence")
11183     self._CheckDisksExistence([self.instance.primary_node])
11184     self._CheckVolumeGroup([self.instance.primary_node])
11185
11186     # Step: check other node consistency
11187     self.lu.LogStep(2, steps_total, "Check peer consistency")
11188     self._CheckDisksConsistency(self.instance.primary_node, True, True)
11189
11190     # Step: create new storage
11191     self.lu.LogStep(3, steps_total, "Allocate new storage")
11192     disks = _AnnotateDiskParams(self.instance, self.instance.disks, self.cfg)
11193     for idx, dev in enumerate(disks):
11194       self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
11195                       (self.new_node, idx))
11196       # we pass force_create=True to force LVM creation
11197       for new_lv in dev.children:
11198         _CreateBlockDevInner(self.lu, self.new_node, self.instance, new_lv,
11199                              True, _GetInstanceInfoText(self.instance), False)
11200
11201     # Step 4: dbrd minors and drbd setups changes
11202     # after this, we must manually remove the drbd minors on both the
11203     # error and the success paths
11204     self.lu.LogStep(4, steps_total, "Changing drbd configuration")
11205     minors = self.cfg.AllocateDRBDMinor([self.new_node
11206                                          for dev in self.instance.disks],
11207                                         self.instance.name)
11208     logging.debug("Allocated minors %r", minors)
11209
11210     iv_names = {}
11211     for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
11212       self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
11213                       (self.new_node, idx))
11214       # create new devices on new_node; note that we create two IDs:
11215       # one without port, so the drbd will be activated without
11216       # networking information on the new node at this stage, and one
11217       # with network, for the latter activation in step 4
11218       (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
11219       if self.instance.primary_node == o_node1:
11220         p_minor = o_minor1
11221       else:
11222         assert self.instance.primary_node == o_node2, "Three-node instance?"
11223         p_minor = o_minor2
11224
11225       new_alone_id = (self.instance.primary_node, self.new_node, None,
11226                       p_minor, new_minor, o_secret)
11227       new_net_id = (self.instance.primary_node, self.new_node, o_port,
11228                     p_minor, new_minor, o_secret)
11229
11230       iv_names[idx] = (dev, dev.children, new_net_id)
11231       logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
11232                     new_net_id)
11233       new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
11234                               logical_id=new_alone_id,
11235                               children=dev.children,
11236                               size=dev.size,
11237                               params={})
11238       (anno_new_drbd,) = _AnnotateDiskParams(self.instance, [new_drbd],
11239                                              self.cfg)
11240       try:
11241         _CreateSingleBlockDev(self.lu, self.new_node, self.instance,
11242                               anno_new_drbd,
11243                               _GetInstanceInfoText(self.instance), False)
11244       except errors.GenericError:
11245         self.cfg.ReleaseDRBDMinors(self.instance.name)
11246         raise
11247
11248     # We have new devices, shutdown the drbd on the old secondary
11249     for idx, dev in enumerate(self.instance.disks):
11250       self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
11251       self.cfg.SetDiskID(dev, self.target_node)
11252       msg = self.rpc.call_blockdev_shutdown(self.target_node,
11253                                             (dev, self.instance)).fail_msg
11254       if msg:
11255         self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
11256                            "node: %s" % (idx, msg),
11257                            hint=("Please cleanup this device manually as"
11258                                  " soon as possible"))
11259
11260     self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
11261     result = self.rpc.call_drbd_disconnect_net([pnode], self.node_secondary_ip,
11262                                                self.instance.disks)[pnode]
11263
11264     msg = result.fail_msg
11265     if msg:
11266       # detaches didn't succeed (unlikely)
11267       self.cfg.ReleaseDRBDMinors(self.instance.name)
11268       raise errors.OpExecError("Can't detach the disks from the network on"
11269                                " old node: %s" % (msg,))
11270
11271     # if we managed to detach at least one, we update all the disks of
11272     # the instance to point to the new secondary
11273     self.lu.LogInfo("Updating instance configuration")
11274     for dev, _, new_logical_id in iv_names.itervalues():
11275       dev.logical_id = new_logical_id
11276       self.cfg.SetDiskID(dev, self.instance.primary_node)
11277
11278     self.cfg.Update(self.instance, feedback_fn)
11279
11280     # Release all node locks (the configuration has been updated)
11281     _ReleaseLocks(self.lu, locking.LEVEL_NODE)
11282
11283     # and now perform the drbd attach
11284     self.lu.LogInfo("Attaching primary drbds to new secondary"
11285                     " (standalone => connected)")
11286     result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
11287                                             self.new_node],
11288                                            self.node_secondary_ip,
11289                                            (self.instance.disks, self.instance),
11290                                            self.instance.name,
11291                                            False)
11292     for to_node, to_result in result.items():
11293       msg = to_result.fail_msg
11294       if msg:
11295         self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
11296                            to_node, msg,
11297                            hint=("please do a gnt-instance info to see the"
11298                                  " status of disks"))
11299
11300     cstep = itertools.count(5)
11301
11302     if self.early_release:
11303       self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11304       self._RemoveOldStorage(self.target_node, iv_names)
11305       # TODO: Check if releasing locks early still makes sense
11306       _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
11307     else:
11308       # Release all resource locks except those used by the instance
11309       _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
11310                     keep=self.node_secondary_ip.keys())
11311
11312     # TODO: Can the instance lock be downgraded here? Take the optional disk
11313     # shutdown in the caller into consideration.
11314
11315     # Wait for sync
11316     # This can fail as the old devices are degraded and _WaitForSync
11317     # does a combined result over all disks, so we don't check its return value
11318     self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
11319     _WaitForSync(self.lu, self.instance)
11320
11321     # Check all devices manually
11322     self._CheckDevices(self.instance.primary_node, iv_names)
11323
11324     # Step: remove old storage
11325     if not self.early_release:
11326       self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11327       self._RemoveOldStorage(self.target_node, iv_names)
11328
11329
11330 class LURepairNodeStorage(NoHooksLU):
11331   """Repairs the volume group on a node.
11332
11333   """
11334   REQ_BGL = False
11335
11336   def CheckArguments(self):
11337     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
11338
11339     storage_type = self.op.storage_type
11340
11341     if (constants.SO_FIX_CONSISTENCY not in
11342         constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
11343       raise errors.OpPrereqError("Storage units of type '%s' can not be"
11344                                  " repaired" % storage_type,
11345                                  errors.ECODE_INVAL)
11346
11347   def ExpandNames(self):
11348     self.needed_locks = {
11349       locking.LEVEL_NODE: [self.op.node_name],
11350       }
11351
11352   def _CheckFaultyDisks(self, instance, node_name):
11353     """Ensure faulty disks abort the opcode or at least warn."""
11354     try:
11355       if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
11356                                   node_name, True):
11357         raise errors.OpPrereqError("Instance '%s' has faulty disks on"
11358                                    " node '%s'" % (instance.name, node_name),
11359                                    errors.ECODE_STATE)
11360     except errors.OpPrereqError, err:
11361       if self.op.ignore_consistency:
11362         self.proc.LogWarning(str(err.args[0]))
11363       else:
11364         raise
11365
11366   def CheckPrereq(self):
11367     """Check prerequisites.
11368
11369     """
11370     # Check whether any instance on this node has faulty disks
11371     for inst in _GetNodeInstances(self.cfg, self.op.node_name):
11372       if inst.admin_state != constants.ADMINST_UP:
11373         continue
11374       check_nodes = set(inst.all_nodes)
11375       check_nodes.discard(self.op.node_name)
11376       for inst_node_name in check_nodes:
11377         self._CheckFaultyDisks(inst, inst_node_name)
11378
11379   def Exec(self, feedback_fn):
11380     feedback_fn("Repairing storage unit '%s' on %s ..." %
11381                 (self.op.name, self.op.node_name))
11382
11383     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
11384     result = self.rpc.call_storage_execute(self.op.node_name,
11385                                            self.op.storage_type, st_args,
11386                                            self.op.name,
11387                                            constants.SO_FIX_CONSISTENCY)
11388     result.Raise("Failed to repair storage unit '%s' on %s" %
11389                  (self.op.name, self.op.node_name))
11390
11391
11392 class LUNodeEvacuate(NoHooksLU):
11393   """Evacuates instances off a list of nodes.
11394
11395   """
11396   REQ_BGL = False
11397
11398   _MODE2IALLOCATOR = {
11399     constants.NODE_EVAC_PRI: constants.IALLOCATOR_NEVAC_PRI,
11400     constants.NODE_EVAC_SEC: constants.IALLOCATOR_NEVAC_SEC,
11401     constants.NODE_EVAC_ALL: constants.IALLOCATOR_NEVAC_ALL,
11402     }
11403   assert frozenset(_MODE2IALLOCATOR.keys()) == constants.NODE_EVAC_MODES
11404   assert (frozenset(_MODE2IALLOCATOR.values()) ==
11405           constants.IALLOCATOR_NEVAC_MODES)
11406
11407   def CheckArguments(self):
11408     _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
11409
11410   def ExpandNames(self):
11411     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
11412
11413     if self.op.remote_node is not None:
11414       self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
11415       assert self.op.remote_node
11416
11417       if self.op.remote_node == self.op.node_name:
11418         raise errors.OpPrereqError("Can not use evacuated node as a new"
11419                                    " secondary node", errors.ECODE_INVAL)
11420
11421       if self.op.mode != constants.NODE_EVAC_SEC:
11422         raise errors.OpPrereqError("Without the use of an iallocator only"
11423                                    " secondary instances can be evacuated",
11424                                    errors.ECODE_INVAL)
11425
11426     # Declare locks
11427     self.share_locks = _ShareAll()
11428     self.needed_locks = {
11429       locking.LEVEL_INSTANCE: [],
11430       locking.LEVEL_NODEGROUP: [],
11431       locking.LEVEL_NODE: [],
11432       }
11433
11434     # Determine nodes (via group) optimistically, needs verification once locks
11435     # have been acquired
11436     self.lock_nodes = self._DetermineNodes()
11437
11438   def _DetermineNodes(self):
11439     """Gets the list of nodes to operate on.
11440
11441     """
11442     if self.op.remote_node is None:
11443       # Iallocator will choose any node(s) in the same group
11444       group_nodes = self.cfg.GetNodeGroupMembersByNodes([self.op.node_name])
11445     else:
11446       group_nodes = frozenset([self.op.remote_node])
11447
11448     # Determine nodes to be locked
11449     return set([self.op.node_name]) | group_nodes
11450
11451   def _DetermineInstances(self):
11452     """Builds list of instances to operate on.
11453
11454     """
11455     assert self.op.mode in constants.NODE_EVAC_MODES
11456
11457     if self.op.mode == constants.NODE_EVAC_PRI:
11458       # Primary instances only
11459       inst_fn = _GetNodePrimaryInstances
11460       assert self.op.remote_node is None, \
11461         "Evacuating primary instances requires iallocator"
11462     elif self.op.mode == constants.NODE_EVAC_SEC:
11463       # Secondary instances only
11464       inst_fn = _GetNodeSecondaryInstances
11465     else:
11466       # All instances
11467       assert self.op.mode == constants.NODE_EVAC_ALL
11468       inst_fn = _GetNodeInstances
11469       # TODO: In 2.6, change the iallocator interface to take an evacuation mode
11470       # per instance
11471       raise errors.OpPrereqError("Due to an issue with the iallocator"
11472                                  " interface it is not possible to evacuate"
11473                                  " all instances at once; specify explicitly"
11474                                  " whether to evacuate primary or secondary"
11475                                  " instances",
11476                                  errors.ECODE_INVAL)
11477
11478     return inst_fn(self.cfg, self.op.node_name)
11479
11480   def DeclareLocks(self, level):
11481     if level == locking.LEVEL_INSTANCE:
11482       # Lock instances optimistically, needs verification once node and group
11483       # locks have been acquired
11484       self.needed_locks[locking.LEVEL_INSTANCE] = \
11485         set(i.name for i in self._DetermineInstances())
11486
11487     elif level == locking.LEVEL_NODEGROUP:
11488       # Lock node groups for all potential target nodes optimistically, needs
11489       # verification once nodes have been acquired
11490       self.needed_locks[locking.LEVEL_NODEGROUP] = \
11491         self.cfg.GetNodeGroupsFromNodes(self.lock_nodes)
11492
11493     elif level == locking.LEVEL_NODE:
11494       self.needed_locks[locking.LEVEL_NODE] = self.lock_nodes
11495
11496   def CheckPrereq(self):
11497     # Verify locks
11498     owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
11499     owned_nodes = self.owned_locks(locking.LEVEL_NODE)
11500     owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
11501
11502     need_nodes = self._DetermineNodes()
11503
11504     if not owned_nodes.issuperset(need_nodes):
11505       raise errors.OpPrereqError("Nodes in same group as '%s' changed since"
11506                                  " locks were acquired, current nodes are"
11507                                  " are '%s', used to be '%s'; retry the"
11508                                  " operation" %
11509                                  (self.op.node_name,
11510                                   utils.CommaJoin(need_nodes),
11511                                   utils.CommaJoin(owned_nodes)),
11512                                  errors.ECODE_STATE)
11513
11514     wanted_groups = self.cfg.GetNodeGroupsFromNodes(owned_nodes)
11515     if owned_groups != wanted_groups:
11516       raise errors.OpExecError("Node groups changed since locks were acquired,"
11517                                " current groups are '%s', used to be '%s';"
11518                                " retry the operation" %
11519                                (utils.CommaJoin(wanted_groups),
11520                                 utils.CommaJoin(owned_groups)))
11521
11522     # Determine affected instances
11523     self.instances = self._DetermineInstances()
11524     self.instance_names = [i.name for i in self.instances]
11525
11526     if set(self.instance_names) != owned_instances:
11527       raise errors.OpExecError("Instances on node '%s' changed since locks"
11528                                " were acquired, current instances are '%s',"
11529                                " used to be '%s'; retry the operation" %
11530                                (self.op.node_name,
11531                                 utils.CommaJoin(self.instance_names),
11532                                 utils.CommaJoin(owned_instances)))
11533
11534     if self.instance_names:
11535       self.LogInfo("Evacuating instances from node '%s': %s",
11536                    self.op.node_name,
11537                    utils.CommaJoin(utils.NiceSort(self.instance_names)))
11538     else:
11539       self.LogInfo("No instances to evacuate from node '%s'",
11540                    self.op.node_name)
11541
11542     if self.op.remote_node is not None:
11543       for i in self.instances:
11544         if i.primary_node == self.op.remote_node:
11545           raise errors.OpPrereqError("Node %s is the primary node of"
11546                                      " instance %s, cannot use it as"
11547                                      " secondary" %
11548                                      (self.op.remote_node, i.name),
11549                                      errors.ECODE_INVAL)
11550
11551   def Exec(self, feedback_fn):
11552     assert (self.op.iallocator is not None) ^ (self.op.remote_node is not None)
11553
11554     if not self.instance_names:
11555       # No instances to evacuate
11556       jobs = []
11557
11558     elif self.op.iallocator is not None:
11559       # TODO: Implement relocation to other group
11560       evac_mode = self._MODE2IALLOCATOR[self.op.mode]
11561       req = iallocator.IAReqNodeEvac(evac_mode=evac_mode,
11562                                      instances=list(self.instance_names))
11563       ial = iallocator.IAllocator(self.cfg, self.rpc, req)
11564
11565       ial.Run(self.op.iallocator)
11566
11567       if not ial.success:
11568         raise errors.OpPrereqError("Can't compute node evacuation using"
11569                                    " iallocator '%s': %s" %
11570                                    (self.op.iallocator, ial.info),
11571                                    errors.ECODE_NORES)
11572
11573       jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, True)
11574
11575     elif self.op.remote_node is not None:
11576       assert self.op.mode == constants.NODE_EVAC_SEC
11577       jobs = [
11578         [opcodes.OpInstanceReplaceDisks(instance_name=instance_name,
11579                                         remote_node=self.op.remote_node,
11580                                         disks=[],
11581                                         mode=constants.REPLACE_DISK_CHG,
11582                                         early_release=self.op.early_release)]
11583         for instance_name in self.instance_names
11584         ]
11585
11586     else:
11587       raise errors.ProgrammerError("No iallocator or remote node")
11588
11589     return ResultWithJobs(jobs)
11590
11591
11592 def _SetOpEarlyRelease(early_release, op):
11593   """Sets C{early_release} flag on opcodes if available.
11594
11595   """
11596   try:
11597     op.early_release = early_release
11598   except AttributeError:
11599     assert not isinstance(op, opcodes.OpInstanceReplaceDisks)
11600
11601   return op
11602
11603
11604 def _NodeEvacDest(use_nodes, group, nodes):
11605   """Returns group or nodes depending on caller's choice.
11606
11607   """
11608   if use_nodes:
11609     return utils.CommaJoin(nodes)
11610   else:
11611     return group
11612
11613
11614 def _LoadNodeEvacResult(lu, alloc_result, early_release, use_nodes):
11615   """Unpacks the result of change-group and node-evacuate iallocator requests.
11616
11617   Iallocator modes L{constants.IALLOCATOR_MODE_NODE_EVAC} and
11618   L{constants.IALLOCATOR_MODE_CHG_GROUP}.
11619
11620   @type lu: L{LogicalUnit}
11621   @param lu: Logical unit instance
11622   @type alloc_result: tuple/list
11623   @param alloc_result: Result from iallocator
11624   @type early_release: bool
11625   @param early_release: Whether to release locks early if possible
11626   @type use_nodes: bool
11627   @param use_nodes: Whether to display node names instead of groups
11628
11629   """
11630   (moved, failed, jobs) = alloc_result
11631
11632   if failed:
11633     failreason = utils.CommaJoin("%s (%s)" % (name, reason)
11634                                  for (name, reason) in failed)
11635     lu.LogWarning("Unable to evacuate instances %s", failreason)
11636     raise errors.OpExecError("Unable to evacuate instances %s" % failreason)
11637
11638   if moved:
11639     lu.LogInfo("Instances to be moved: %s",
11640                utils.CommaJoin("%s (to %s)" %
11641                                (name, _NodeEvacDest(use_nodes, group, nodes))
11642                                for (name, group, nodes) in moved))
11643
11644   return [map(compat.partial(_SetOpEarlyRelease, early_release),
11645               map(opcodes.OpCode.LoadOpCode, ops))
11646           for ops in jobs]
11647
11648
11649 class LUInstanceGrowDisk(LogicalUnit):
11650   """Grow a disk of an instance.
11651
11652   """
11653   HPATH = "disk-grow"
11654   HTYPE = constants.HTYPE_INSTANCE
11655   REQ_BGL = False
11656
11657   def ExpandNames(self):
11658     self._ExpandAndLockInstance()
11659     self.needed_locks[locking.LEVEL_NODE] = []
11660     self.needed_locks[locking.LEVEL_NODE_RES] = []
11661     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
11662     self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
11663
11664   def DeclareLocks(self, level):
11665     if level == locking.LEVEL_NODE:
11666       self._LockInstancesNodes()
11667     elif level == locking.LEVEL_NODE_RES:
11668       # Copy node locks
11669       self.needed_locks[locking.LEVEL_NODE_RES] = \
11670         self.needed_locks[locking.LEVEL_NODE][:]
11671
11672   def BuildHooksEnv(self):
11673     """Build hooks env.
11674
11675     This runs on the master, the primary and all the secondaries.
11676
11677     """
11678     env = {
11679       "DISK": self.op.disk,
11680       "AMOUNT": self.op.amount,
11681       "ABSOLUTE": self.op.absolute,
11682       }
11683     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
11684     return env
11685
11686   def BuildHooksNodes(self):
11687     """Build hooks nodes.
11688
11689     """
11690     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
11691     return (nl, nl)
11692
11693   def CheckPrereq(self):
11694     """Check prerequisites.
11695
11696     This checks that the instance is in the cluster.
11697
11698     """
11699     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
11700     assert instance is not None, \
11701       "Cannot retrieve locked instance %s" % self.op.instance_name
11702     nodenames = list(instance.all_nodes)
11703     for node in nodenames:
11704       _CheckNodeOnline(self, node)
11705
11706     self.instance = instance
11707
11708     if instance.disk_template not in constants.DTS_GROWABLE:
11709       raise errors.OpPrereqError("Instance's disk layout does not support"
11710                                  " growing", errors.ECODE_INVAL)
11711
11712     self.disk = instance.FindDisk(self.op.disk)
11713
11714     if self.op.absolute:
11715       self.target = self.op.amount
11716       self.delta = self.target - self.disk.size
11717       if self.delta < 0:
11718         raise errors.OpPrereqError("Requested size (%s) is smaller than "
11719                                    "current disk size (%s)" %
11720                                    (utils.FormatUnit(self.target, "h"),
11721                                     utils.FormatUnit(self.disk.size, "h")),
11722                                    errors.ECODE_STATE)
11723     else:
11724       self.delta = self.op.amount
11725       self.target = self.disk.size + self.delta
11726       if self.delta < 0:
11727         raise errors.OpPrereqError("Requested increment (%s) is negative" %
11728                                    utils.FormatUnit(self.delta, "h"),
11729                                    errors.ECODE_INVAL)
11730
11731     if instance.disk_template not in (constants.DT_FILE,
11732                                       constants.DT_SHARED_FILE,
11733                                       constants.DT_RBD):
11734       # TODO: check the free disk space for file, when that feature will be
11735       # supported
11736       _CheckNodesFreeDiskPerVG(self, nodenames,
11737                                self.disk.ComputeGrowth(self.delta))
11738
11739   def Exec(self, feedback_fn):
11740     """Execute disk grow.
11741
11742     """
11743     instance = self.instance
11744     disk = self.disk
11745
11746     assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
11747     assert (self.owned_locks(locking.LEVEL_NODE) ==
11748             self.owned_locks(locking.LEVEL_NODE_RES))
11749
11750     disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
11751     if not disks_ok:
11752       raise errors.OpExecError("Cannot activate block device to grow")
11753
11754     feedback_fn("Growing disk %s of instance '%s' by %s to %s" %
11755                 (self.op.disk, instance.name,
11756                  utils.FormatUnit(self.delta, "h"),
11757                  utils.FormatUnit(self.target, "h")))
11758
11759     # First run all grow ops in dry-run mode
11760     for node in instance.all_nodes:
11761       self.cfg.SetDiskID(disk, node)
11762       result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
11763                                            True, True)
11764       result.Raise("Grow request failed to node %s" % node)
11765
11766     # We know that (as far as we can test) operations across different
11767     # nodes will succeed, time to run it for real on the backing storage
11768     for node in instance.all_nodes:
11769       self.cfg.SetDiskID(disk, node)
11770       result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
11771                                            False, True)
11772       result.Raise("Grow request failed to node %s" % node)
11773
11774     # And now execute it for logical storage, on the primary node
11775     node = instance.primary_node
11776     self.cfg.SetDiskID(disk, node)
11777     result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
11778                                          False, False)
11779     result.Raise("Grow request failed to node %s" % node)
11780
11781     disk.RecordGrow(self.delta)
11782     self.cfg.Update(instance, feedback_fn)
11783
11784     # Changes have been recorded, release node lock
11785     _ReleaseLocks(self, locking.LEVEL_NODE)
11786
11787     # Downgrade lock while waiting for sync
11788     self.glm.downgrade(locking.LEVEL_INSTANCE)
11789
11790     if self.op.wait_for_sync:
11791       disk_abort = not _WaitForSync(self, instance, disks=[disk])
11792       if disk_abort:
11793         self.proc.LogWarning("Disk sync-ing has not returned a good"
11794                              " status; please check the instance")
11795       if instance.admin_state != constants.ADMINST_UP:
11796         _SafeShutdownInstanceDisks(self, instance, disks=[disk])
11797     elif instance.admin_state != constants.ADMINST_UP:
11798       self.proc.LogWarning("Not shutting down the disk even if the instance is"
11799                            " not supposed to be running because no wait for"
11800                            " sync mode was requested")
11801
11802     assert self.owned_locks(locking.LEVEL_NODE_RES)
11803     assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
11804
11805
11806 class LUInstanceQueryData(NoHooksLU):
11807   """Query runtime instance data.
11808
11809   """
11810   REQ_BGL = False
11811
11812   def ExpandNames(self):
11813     self.needed_locks = {}
11814
11815     # Use locking if requested or when non-static information is wanted
11816     if not (self.op.static or self.op.use_locking):
11817       self.LogWarning("Non-static data requested, locks need to be acquired")
11818       self.op.use_locking = True
11819
11820     if self.op.instances or not self.op.use_locking:
11821       # Expand instance names right here
11822       self.wanted_names = _GetWantedInstances(self, self.op.instances)
11823     else:
11824       # Will use acquired locks
11825       self.wanted_names = None
11826
11827     if self.op.use_locking:
11828       self.share_locks = _ShareAll()
11829
11830       if self.wanted_names is None:
11831         self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
11832       else:
11833         self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
11834
11835       self.needed_locks[locking.LEVEL_NODEGROUP] = []
11836       self.needed_locks[locking.LEVEL_NODE] = []
11837       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
11838
11839   def DeclareLocks(self, level):
11840     if self.op.use_locking:
11841       if level == locking.LEVEL_NODEGROUP:
11842         owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
11843
11844         # Lock all groups used by instances optimistically; this requires going
11845         # via the node before it's locked, requiring verification later on
11846         self.needed_locks[locking.LEVEL_NODEGROUP] = \
11847           frozenset(group_uuid
11848                     for instance_name in owned_instances
11849                     for group_uuid in
11850                       self.cfg.GetInstanceNodeGroups(instance_name))
11851
11852       elif level == locking.LEVEL_NODE:
11853         self._LockInstancesNodes()
11854
11855   def CheckPrereq(self):
11856     """Check prerequisites.
11857
11858     This only checks the optional instance list against the existing names.
11859
11860     """
11861     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
11862     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
11863     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
11864
11865     if self.wanted_names is None:
11866       assert self.op.use_locking, "Locking was not used"
11867       self.wanted_names = owned_instances
11868
11869     instances = dict(self.cfg.GetMultiInstanceInfo(self.wanted_names))
11870
11871     if self.op.use_locking:
11872       _CheckInstancesNodeGroups(self.cfg, instances, owned_groups, owned_nodes,
11873                                 None)
11874     else:
11875       assert not (owned_instances or owned_groups or owned_nodes)
11876
11877     self.wanted_instances = instances.values()
11878
11879   def _ComputeBlockdevStatus(self, node, instance, dev):
11880     """Returns the status of a block device
11881
11882     """
11883     if self.op.static or not node:
11884       return None
11885
11886     self.cfg.SetDiskID(dev, node)
11887
11888     result = self.rpc.call_blockdev_find(node, dev)
11889     if result.offline:
11890       return None
11891
11892     result.Raise("Can't compute disk status for %s" % instance.name)
11893
11894     status = result.payload
11895     if status is None:
11896       return None
11897
11898     return (status.dev_path, status.major, status.minor,
11899             status.sync_percent, status.estimated_time,
11900             status.is_degraded, status.ldisk_status)
11901
11902   def _ComputeDiskStatus(self, instance, snode, dev):
11903     """Compute block device status.
11904
11905     """
11906     (anno_dev,) = _AnnotateDiskParams(instance, [dev], self.cfg)
11907
11908     return self._ComputeDiskStatusInner(instance, snode, anno_dev)
11909
11910   def _ComputeDiskStatusInner(self, instance, snode, dev):
11911     """Compute block device status.
11912
11913     @attention: The device has to be annotated already.
11914
11915     """
11916     if dev.dev_type in constants.LDS_DRBD:
11917       # we change the snode then (otherwise we use the one passed in)
11918       if dev.logical_id[0] == instance.primary_node:
11919         snode = dev.logical_id[1]
11920       else:
11921         snode = dev.logical_id[0]
11922
11923     dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
11924                                               instance, dev)
11925     dev_sstatus = self._ComputeBlockdevStatus(snode, instance, dev)
11926
11927     if dev.children:
11928       dev_children = map(compat.partial(self._ComputeDiskStatusInner,
11929                                         instance, snode),
11930                          dev.children)
11931     else:
11932       dev_children = []
11933
11934     return {
11935       "iv_name": dev.iv_name,
11936       "dev_type": dev.dev_type,
11937       "logical_id": dev.logical_id,
11938       "physical_id": dev.physical_id,
11939       "pstatus": dev_pstatus,
11940       "sstatus": dev_sstatus,
11941       "children": dev_children,
11942       "mode": dev.mode,
11943       "size": dev.size,
11944       }
11945
11946   def Exec(self, feedback_fn):
11947     """Gather and return data"""
11948     result = {}
11949
11950     cluster = self.cfg.GetClusterInfo()
11951
11952     node_names = itertools.chain(*(i.all_nodes for i in self.wanted_instances))
11953     nodes = dict(self.cfg.GetMultiNodeInfo(node_names))
11954
11955     groups = dict(self.cfg.GetMultiNodeGroupInfo(node.group
11956                                                  for node in nodes.values()))
11957
11958     group2name_fn = lambda uuid: groups[uuid].name
11959
11960     for instance in self.wanted_instances:
11961       pnode = nodes[instance.primary_node]
11962
11963       if self.op.static or pnode.offline:
11964         remote_state = None
11965         if pnode.offline:
11966           self.LogWarning("Primary node %s is marked offline, returning static"
11967                           " information only for instance %s" %
11968                           (pnode.name, instance.name))
11969       else:
11970         remote_info = self.rpc.call_instance_info(instance.primary_node,
11971                                                   instance.name,
11972                                                   instance.hypervisor)
11973         remote_info.Raise("Error checking node %s" % instance.primary_node)
11974         remote_info = remote_info.payload
11975         if remote_info and "state" in remote_info:
11976           remote_state = "up"
11977         else:
11978           if instance.admin_state == constants.ADMINST_UP:
11979             remote_state = "down"
11980           else:
11981             remote_state = instance.admin_state
11982
11983       disks = map(compat.partial(self._ComputeDiskStatus, instance, None),
11984                   instance.disks)
11985
11986       snodes_group_uuids = [nodes[snode_name].group
11987                             for snode_name in instance.secondary_nodes]
11988
11989       result[instance.name] = {
11990         "name": instance.name,
11991         "config_state": instance.admin_state,
11992         "run_state": remote_state,
11993         "pnode": instance.primary_node,
11994         "pnode_group_uuid": pnode.group,
11995         "pnode_group_name": group2name_fn(pnode.group),
11996         "snodes": instance.secondary_nodes,
11997         "snodes_group_uuids": snodes_group_uuids,
11998         "snodes_group_names": map(group2name_fn, snodes_group_uuids),
11999         "os": instance.os,
12000         # this happens to be the same format used for hooks
12001         "nics": _NICListToTuple(self, instance.nics),
12002         "disk_template": instance.disk_template,
12003         "disks": disks,
12004         "hypervisor": instance.hypervisor,
12005         "network_port": instance.network_port,
12006         "hv_instance": instance.hvparams,
12007         "hv_actual": cluster.FillHV(instance, skip_globals=True),
12008         "be_instance": instance.beparams,
12009         "be_actual": cluster.FillBE(instance),
12010         "os_instance": instance.osparams,
12011         "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
12012         "serial_no": instance.serial_no,
12013         "mtime": instance.mtime,
12014         "ctime": instance.ctime,
12015         "uuid": instance.uuid,
12016         }
12017
12018     return result
12019
12020
12021 def PrepareContainerMods(mods, private_fn):
12022   """Prepares a list of container modifications by adding a private data field.
12023
12024   @type mods: list of tuples; (operation, index, parameters)
12025   @param mods: List of modifications
12026   @type private_fn: callable or None
12027   @param private_fn: Callable for constructing a private data field for a
12028     modification
12029   @rtype: list
12030
12031   """
12032   if private_fn is None:
12033     fn = lambda: None
12034   else:
12035     fn = private_fn
12036
12037   return [(op, idx, params, fn()) for (op, idx, params) in mods]
12038
12039
12040 #: Type description for changes as returned by L{ApplyContainerMods}'s
12041 #: callbacks
12042 _TApplyContModsCbChanges = \
12043   ht.TMaybeListOf(ht.TAnd(ht.TIsLength(2), ht.TItems([
12044     ht.TNonEmptyString,
12045     ht.TAny,
12046     ])))
12047
12048
12049 def ApplyContainerMods(kind, container, chgdesc, mods,
12050                        create_fn, modify_fn, remove_fn):
12051   """Applies descriptions in C{mods} to C{container}.
12052
12053   @type kind: string
12054   @param kind: One-word item description
12055   @type container: list
12056   @param container: Container to modify
12057   @type chgdesc: None or list
12058   @param chgdesc: List of applied changes
12059   @type mods: list
12060   @param mods: Modifications as returned by L{PrepareContainerMods}
12061   @type create_fn: callable
12062   @param create_fn: Callback for creating a new item (L{constants.DDM_ADD});
12063     receives absolute item index, parameters and private data object as added
12064     by L{PrepareContainerMods}, returns tuple containing new item and changes
12065     as list
12066   @type modify_fn: callable
12067   @param modify_fn: Callback for modifying an existing item
12068     (L{constants.DDM_MODIFY}); receives absolute item index, item, parameters
12069     and private data object as added by L{PrepareContainerMods}, returns
12070     changes as list
12071   @type remove_fn: callable
12072   @param remove_fn: Callback on removing item; receives absolute item index,
12073     item and private data object as added by L{PrepareContainerMods}
12074
12075   """
12076   for (op, idx, params, private) in mods:
12077     if idx == -1:
12078       # Append
12079       absidx = len(container) - 1
12080     elif idx < 0:
12081       raise IndexError("Not accepting negative indices other than -1")
12082     elif idx > len(container):
12083       raise IndexError("Got %s index %s, but there are only %s" %
12084                        (kind, idx, len(container)))
12085     else:
12086       absidx = idx
12087
12088     changes = None
12089
12090     if op == constants.DDM_ADD:
12091       # Calculate where item will be added
12092       if idx == -1:
12093         addidx = len(container)
12094       else:
12095         addidx = idx
12096
12097       if create_fn is None:
12098         item = params
12099       else:
12100         (item, changes) = create_fn(addidx, params, private)
12101
12102       if idx == -1:
12103         container.append(item)
12104       else:
12105         assert idx >= 0
12106         assert idx <= len(container)
12107         # list.insert does so before the specified index
12108         container.insert(idx, item)
12109     else:
12110       # Retrieve existing item
12111       try:
12112         item = container[absidx]
12113       except IndexError:
12114         raise IndexError("Invalid %s index %s" % (kind, idx))
12115
12116       if op == constants.DDM_REMOVE:
12117         assert not params
12118
12119         if remove_fn is not None:
12120           remove_fn(absidx, item, private)
12121
12122         changes = [("%s/%s" % (kind, absidx), "remove")]
12123
12124         assert container[absidx] == item
12125         del container[absidx]
12126       elif op == constants.DDM_MODIFY:
12127         if modify_fn is not None:
12128           changes = modify_fn(absidx, item, params, private)
12129       else:
12130         raise errors.ProgrammerError("Unhandled operation '%s'" % op)
12131
12132     assert _TApplyContModsCbChanges(changes)
12133
12134     if not (chgdesc is None or changes is None):
12135       chgdesc.extend(changes)
12136
12137
12138 def _UpdateIvNames(base_index, disks):
12139   """Updates the C{iv_name} attribute of disks.
12140
12141   @type disks: list of L{objects.Disk}
12142
12143   """
12144   for (idx, disk) in enumerate(disks):
12145     disk.iv_name = "disk/%s" % (base_index + idx, )
12146
12147
12148 class _InstNicModPrivate:
12149   """Data structure for network interface modifications.
12150
12151   Used by L{LUInstanceSetParams}.
12152
12153   """
12154   def __init__(self):
12155     self.params = None
12156     self.filled = None
12157
12158
12159 class LUInstanceSetParams(LogicalUnit):
12160   """Modifies an instances's parameters.
12161
12162   """
12163   HPATH = "instance-modify"
12164   HTYPE = constants.HTYPE_INSTANCE
12165   REQ_BGL = False
12166
12167   @staticmethod
12168   def _UpgradeDiskNicMods(kind, mods, verify_fn):
12169     assert ht.TList(mods)
12170     assert not mods or len(mods[0]) in (2, 3)
12171
12172     if mods and len(mods[0]) == 2:
12173       result = []
12174
12175       addremove = 0
12176       for op, params in mods:
12177         if op in (constants.DDM_ADD, constants.DDM_REMOVE):
12178           result.append((op, -1, params))
12179           addremove += 1
12180
12181           if addremove > 1:
12182             raise errors.OpPrereqError("Only one %s add or remove operation is"
12183                                        " supported at a time" % kind,
12184                                        errors.ECODE_INVAL)
12185         else:
12186           result.append((constants.DDM_MODIFY, op, params))
12187
12188       assert verify_fn(result)
12189     else:
12190       result = mods
12191
12192     return result
12193
12194   @staticmethod
12195   def _CheckMods(kind, mods, key_types, item_fn):
12196     """Ensures requested disk/NIC modifications are valid.
12197
12198     """
12199     for (op, _, params) in mods:
12200       assert ht.TDict(params)
12201
12202       utils.ForceDictType(params, key_types)
12203
12204       if op == constants.DDM_REMOVE:
12205         if params:
12206           raise errors.OpPrereqError("No settings should be passed when"
12207                                      " removing a %s" % kind,
12208                                      errors.ECODE_INVAL)
12209       elif op in (constants.DDM_ADD, constants.DDM_MODIFY):
12210         item_fn(op, params)
12211       else:
12212         raise errors.ProgrammerError("Unhandled operation '%s'" % op)
12213
12214   @staticmethod
12215   def _VerifyDiskModification(op, params):
12216     """Verifies a disk modification.
12217
12218     """
12219     if op == constants.DDM_ADD:
12220       mode = params.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
12221       if mode not in constants.DISK_ACCESS_SET:
12222         raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
12223                                    errors.ECODE_INVAL)
12224
12225       size = params.get(constants.IDISK_SIZE, None)
12226       if size is None:
12227         raise errors.OpPrereqError("Required disk parameter '%s' missing" %
12228                                    constants.IDISK_SIZE, errors.ECODE_INVAL)
12229
12230       try:
12231         size = int(size)
12232       except (TypeError, ValueError), err:
12233         raise errors.OpPrereqError("Invalid disk size parameter: %s" % err,
12234                                    errors.ECODE_INVAL)
12235
12236       params[constants.IDISK_SIZE] = size
12237
12238     elif op == constants.DDM_MODIFY and constants.IDISK_SIZE in params:
12239       raise errors.OpPrereqError("Disk size change not possible, use"
12240                                  " grow-disk", errors.ECODE_INVAL)
12241
12242   @staticmethod
12243   def _VerifyNicModification(op, params):
12244     """Verifies a network interface modification.
12245
12246     """
12247     if op in (constants.DDM_ADD, constants.DDM_MODIFY):
12248       ip = params.get(constants.INIC_IP, None)
12249       if ip is None:
12250         pass
12251       elif ip.lower() == constants.VALUE_NONE:
12252         params[constants.INIC_IP] = None
12253       elif not netutils.IPAddress.IsValid(ip):
12254         raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
12255                                    errors.ECODE_INVAL)
12256
12257       bridge = params.get("bridge", None)
12258       link = params.get(constants.INIC_LINK, None)
12259       if bridge and link:
12260         raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
12261                                    " at the same time", errors.ECODE_INVAL)
12262       elif bridge and bridge.lower() == constants.VALUE_NONE:
12263         params["bridge"] = None
12264       elif link and link.lower() == constants.VALUE_NONE:
12265         params[constants.INIC_LINK] = None
12266
12267       if op == constants.DDM_ADD:
12268         macaddr = params.get(constants.INIC_MAC, None)
12269         if macaddr is None:
12270           params[constants.INIC_MAC] = constants.VALUE_AUTO
12271
12272       if constants.INIC_MAC in params:
12273         macaddr = params[constants.INIC_MAC]
12274         if macaddr not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
12275           macaddr = utils.NormalizeAndValidateMac(macaddr)
12276
12277         if op == constants.DDM_MODIFY and macaddr == constants.VALUE_AUTO:
12278           raise errors.OpPrereqError("'auto' is not a valid MAC address when"
12279                                      " modifying an existing NIC",
12280                                      errors.ECODE_INVAL)
12281
12282   def CheckArguments(self):
12283     if not (self.op.nics or self.op.disks or self.op.disk_template or
12284             self.op.hvparams or self.op.beparams or self.op.os_name or
12285             self.op.offline is not None or self.op.runtime_mem):
12286       raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
12287
12288     if self.op.hvparams:
12289       _CheckGlobalHvParams(self.op.hvparams)
12290
12291     self.op.disks = self._UpgradeDiskNicMods(
12292       "disk", self.op.disks, opcodes.OpInstanceSetParams.TestDiskModifications)
12293     self.op.nics = self._UpgradeDiskNicMods(
12294       "NIC", self.op.nics, opcodes.OpInstanceSetParams.TestNicModifications)
12295
12296     # Check disk modifications
12297     self._CheckMods("disk", self.op.disks, constants.IDISK_PARAMS_TYPES,
12298                     self._VerifyDiskModification)
12299
12300     if self.op.disks and self.op.disk_template is not None:
12301       raise errors.OpPrereqError("Disk template conversion and other disk"
12302                                  " changes not supported at the same time",
12303                                  errors.ECODE_INVAL)
12304
12305     if (self.op.disk_template and
12306         self.op.disk_template in constants.DTS_INT_MIRROR and
12307         self.op.remote_node is None):
12308       raise errors.OpPrereqError("Changing the disk template to a mirrored"
12309                                  " one requires specifying a secondary node",
12310                                  errors.ECODE_INVAL)
12311
12312     # Check NIC modifications
12313     self._CheckMods("NIC", self.op.nics, constants.INIC_PARAMS_TYPES,
12314                     self._VerifyNicModification)
12315
12316   def ExpandNames(self):
12317     self._ExpandAndLockInstance()
12318     # Can't even acquire node locks in shared mode as upcoming changes in
12319     # Ganeti 2.6 will start to modify the node object on disk conversion
12320     self.needed_locks[locking.LEVEL_NODE] = []
12321     self.needed_locks[locking.LEVEL_NODE_RES] = []
12322     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
12323
12324   def DeclareLocks(self, level):
12325     # TODO: Acquire group lock in shared mode (disk parameters)
12326     if level == locking.LEVEL_NODE:
12327       self._LockInstancesNodes()
12328       if self.op.disk_template and self.op.remote_node:
12329         self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
12330         self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
12331     elif level == locking.LEVEL_NODE_RES and self.op.disk_template:
12332       # Copy node locks
12333       self.needed_locks[locking.LEVEL_NODE_RES] = \
12334         self.needed_locks[locking.LEVEL_NODE][:]
12335
12336   def BuildHooksEnv(self):
12337     """Build hooks env.
12338
12339     This runs on the master, primary and secondaries.
12340
12341     """
12342     args = dict()
12343     if constants.BE_MINMEM in self.be_new:
12344       args["minmem"] = self.be_new[constants.BE_MINMEM]
12345     if constants.BE_MAXMEM in self.be_new:
12346       args["maxmem"] = self.be_new[constants.BE_MAXMEM]
12347     if constants.BE_VCPUS in self.be_new:
12348       args["vcpus"] = self.be_new[constants.BE_VCPUS]
12349     # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
12350     # information at all.
12351
12352     if self._new_nics is not None:
12353       nics = []
12354
12355       for nic in self._new_nics:
12356         nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
12357         mode = nicparams[constants.NIC_MODE]
12358         link = nicparams[constants.NIC_LINK]
12359         nics.append((nic.ip, nic.mac, mode, link))
12360
12361       args["nics"] = nics
12362
12363     env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
12364     if self.op.disk_template:
12365       env["NEW_DISK_TEMPLATE"] = self.op.disk_template
12366     if self.op.runtime_mem:
12367       env["RUNTIME_MEMORY"] = self.op.runtime_mem
12368
12369     return env
12370
12371   def BuildHooksNodes(self):
12372     """Build hooks nodes.
12373
12374     """
12375     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
12376     return (nl, nl)
12377
12378   def _PrepareNicModification(self, params, private, old_ip, old_params,
12379                               cluster, pnode):
12380     update_params_dict = dict([(key, params[key])
12381                                for key in constants.NICS_PARAMETERS
12382                                if key in params])
12383
12384     if "bridge" in params:
12385       update_params_dict[constants.NIC_LINK] = params["bridge"]
12386
12387     new_params = _GetUpdatedParams(old_params, update_params_dict)
12388     utils.ForceDictType(new_params, constants.NICS_PARAMETER_TYPES)
12389
12390     new_filled_params = cluster.SimpleFillNIC(new_params)
12391     objects.NIC.CheckParameterSyntax(new_filled_params)
12392
12393     new_mode = new_filled_params[constants.NIC_MODE]
12394     if new_mode == constants.NIC_MODE_BRIDGED:
12395       bridge = new_filled_params[constants.NIC_LINK]
12396       msg = self.rpc.call_bridges_exist(pnode, [bridge]).fail_msg
12397       if msg:
12398         msg = "Error checking bridges on node '%s': %s" % (pnode, msg)
12399         if self.op.force:
12400           self.warn.append(msg)
12401         else:
12402           raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
12403
12404     elif new_mode == constants.NIC_MODE_ROUTED:
12405       ip = params.get(constants.INIC_IP, old_ip)
12406       if ip is None:
12407         raise errors.OpPrereqError("Cannot set the NIC IP address to None"
12408                                    " on a routed NIC", errors.ECODE_INVAL)
12409
12410     if constants.INIC_MAC in params:
12411       mac = params[constants.INIC_MAC]
12412       if mac is None:
12413         raise errors.OpPrereqError("Cannot unset the NIC MAC address",
12414                                    errors.ECODE_INVAL)
12415       elif mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
12416         # otherwise generate the MAC address
12417         params[constants.INIC_MAC] = \
12418           self.cfg.GenerateMAC(self.proc.GetECId())
12419       else:
12420         # or validate/reserve the current one
12421         try:
12422           self.cfg.ReserveMAC(mac, self.proc.GetECId())
12423         except errors.ReservationError:
12424           raise errors.OpPrereqError("MAC address '%s' already in use"
12425                                      " in cluster" % mac,
12426                                      errors.ECODE_NOTUNIQUE)
12427
12428     private.params = new_params
12429     private.filled = new_filled_params
12430
12431   def CheckPrereq(self):
12432     """Check prerequisites.
12433
12434     This only checks the instance list against the existing names.
12435
12436     """
12437     # checking the new params on the primary/secondary nodes
12438
12439     instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
12440     cluster = self.cluster = self.cfg.GetClusterInfo()
12441     assert self.instance is not None, \
12442       "Cannot retrieve locked instance %s" % self.op.instance_name
12443     pnode = instance.primary_node
12444     nodelist = list(instance.all_nodes)
12445     pnode_info = self.cfg.GetNodeInfo(pnode)
12446     self.diskparams = self.cfg.GetInstanceDiskParams(instance)
12447
12448     # Prepare disk/NIC modifications
12449     self.diskmod = PrepareContainerMods(self.op.disks, None)
12450     self.nicmod = PrepareContainerMods(self.op.nics, _InstNicModPrivate)
12451
12452     # OS change
12453     if self.op.os_name and not self.op.force:
12454       _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
12455                       self.op.force_variant)
12456       instance_os = self.op.os_name
12457     else:
12458       instance_os = instance.os
12459
12460     assert not (self.op.disk_template and self.op.disks), \
12461       "Can't modify disk template and apply disk changes at the same time"
12462
12463     if self.op.disk_template:
12464       if instance.disk_template == self.op.disk_template:
12465         raise errors.OpPrereqError("Instance already has disk template %s" %
12466                                    instance.disk_template, errors.ECODE_INVAL)
12467
12468       if (instance.disk_template,
12469           self.op.disk_template) not in self._DISK_CONVERSIONS:
12470         raise errors.OpPrereqError("Unsupported disk template conversion from"
12471                                    " %s to %s" % (instance.disk_template,
12472                                                   self.op.disk_template),
12473                                    errors.ECODE_INVAL)
12474       _CheckInstanceState(self, instance, INSTANCE_DOWN,
12475                           msg="cannot change disk template")
12476       if self.op.disk_template in constants.DTS_INT_MIRROR:
12477         if self.op.remote_node == pnode:
12478           raise errors.OpPrereqError("Given new secondary node %s is the same"
12479                                      " as the primary node of the instance" %
12480                                      self.op.remote_node, errors.ECODE_STATE)
12481         _CheckNodeOnline(self, self.op.remote_node)
12482         _CheckNodeNotDrained(self, self.op.remote_node)
12483         # FIXME: here we assume that the old instance type is DT_PLAIN
12484         assert instance.disk_template == constants.DT_PLAIN
12485         disks = [{constants.IDISK_SIZE: d.size,
12486                   constants.IDISK_VG: d.logical_id[0]}
12487                  for d in instance.disks]
12488         required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
12489         _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
12490
12491         snode_info = self.cfg.GetNodeInfo(self.op.remote_node)
12492         snode_group = self.cfg.GetNodeGroup(snode_info.group)
12493         ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
12494                                                                 snode_group)
12495         _CheckTargetNodeIPolicy(self, ipolicy, instance, snode_info,
12496                                 ignore=self.op.ignore_ipolicy)
12497         if pnode_info.group != snode_info.group:
12498           self.LogWarning("The primary and secondary nodes are in two"
12499                           " different node groups; the disk parameters"
12500                           " from the first disk's node group will be"
12501                           " used")
12502
12503     # hvparams processing
12504     if self.op.hvparams:
12505       hv_type = instance.hypervisor
12506       i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
12507       utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
12508       hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
12509
12510       # local check
12511       hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
12512       _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
12513       self.hv_proposed = self.hv_new = hv_new # the new actual values
12514       self.hv_inst = i_hvdict # the new dict (without defaults)
12515     else:
12516       self.hv_proposed = cluster.SimpleFillHV(instance.hypervisor, instance.os,
12517                                               instance.hvparams)
12518       self.hv_new = self.hv_inst = {}
12519
12520     # beparams processing
12521     if self.op.beparams:
12522       i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
12523                                    use_none=True)
12524       objects.UpgradeBeParams(i_bedict)
12525       utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
12526       be_new = cluster.SimpleFillBE(i_bedict)
12527       self.be_proposed = self.be_new = be_new # the new actual values
12528       self.be_inst = i_bedict # the new dict (without defaults)
12529     else:
12530       self.be_new = self.be_inst = {}
12531       self.be_proposed = cluster.SimpleFillBE(instance.beparams)
12532     be_old = cluster.FillBE(instance)
12533
12534     # CPU param validation -- checking every time a parameter is
12535     # changed to cover all cases where either CPU mask or vcpus have
12536     # changed
12537     if (constants.BE_VCPUS in self.be_proposed and
12538         constants.HV_CPU_MASK in self.hv_proposed):
12539       cpu_list = \
12540         utils.ParseMultiCpuMask(self.hv_proposed[constants.HV_CPU_MASK])
12541       # Verify mask is consistent with number of vCPUs. Can skip this
12542       # test if only 1 entry in the CPU mask, which means same mask
12543       # is applied to all vCPUs.
12544       if (len(cpu_list) > 1 and
12545           len(cpu_list) != self.be_proposed[constants.BE_VCPUS]):
12546         raise errors.OpPrereqError("Number of vCPUs [%d] does not match the"
12547                                    " CPU mask [%s]" %
12548                                    (self.be_proposed[constants.BE_VCPUS],
12549                                     self.hv_proposed[constants.HV_CPU_MASK]),
12550                                    errors.ECODE_INVAL)
12551
12552       # Only perform this test if a new CPU mask is given
12553       if constants.HV_CPU_MASK in self.hv_new:
12554         # Calculate the largest CPU number requested
12555         max_requested_cpu = max(map(max, cpu_list))
12556         # Check that all of the instance's nodes have enough physical CPUs to
12557         # satisfy the requested CPU mask
12558         _CheckNodesPhysicalCPUs(self, instance.all_nodes,
12559                                 max_requested_cpu + 1, instance.hypervisor)
12560
12561     # osparams processing
12562     if self.op.osparams:
12563       i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
12564       _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
12565       self.os_inst = i_osdict # the new dict (without defaults)
12566     else:
12567       self.os_inst = {}
12568
12569     self.warn = []
12570
12571     #TODO(dynmem): do the appropriate check involving MINMEM
12572     if (constants.BE_MAXMEM in self.op.beparams and not self.op.force and
12573         be_new[constants.BE_MAXMEM] > be_old[constants.BE_MAXMEM]):
12574       mem_check_list = [pnode]
12575       if be_new[constants.BE_AUTO_BALANCE]:
12576         # either we changed auto_balance to yes or it was from before
12577         mem_check_list.extend(instance.secondary_nodes)
12578       instance_info = self.rpc.call_instance_info(pnode, instance.name,
12579                                                   instance.hypervisor)
12580       nodeinfo = self.rpc.call_node_info(mem_check_list, None,
12581                                          [instance.hypervisor])
12582       pninfo = nodeinfo[pnode]
12583       msg = pninfo.fail_msg
12584       if msg:
12585         # Assume the primary node is unreachable and go ahead
12586         self.warn.append("Can't get info from primary node %s: %s" %
12587                          (pnode, msg))
12588       else:
12589         (_, _, (pnhvinfo, )) = pninfo.payload
12590         if not isinstance(pnhvinfo.get("memory_free", None), int):
12591           self.warn.append("Node data from primary node %s doesn't contain"
12592                            " free memory information" % pnode)
12593         elif instance_info.fail_msg:
12594           self.warn.append("Can't get instance runtime information: %s" %
12595                            instance_info.fail_msg)
12596         else:
12597           if instance_info.payload:
12598             current_mem = int(instance_info.payload["memory"])
12599           else:
12600             # Assume instance not running
12601             # (there is a slight race condition here, but it's not very
12602             # probable, and we have no other way to check)
12603             # TODO: Describe race condition
12604             current_mem = 0
12605           #TODO(dynmem): do the appropriate check involving MINMEM
12606           miss_mem = (be_new[constants.BE_MAXMEM] - current_mem -
12607                       pnhvinfo["memory_free"])
12608           if miss_mem > 0:
12609             raise errors.OpPrereqError("This change will prevent the instance"
12610                                        " from starting, due to %d MB of memory"
12611                                        " missing on its primary node" %
12612                                        miss_mem, errors.ECODE_NORES)
12613
12614       if be_new[constants.BE_AUTO_BALANCE]:
12615         for node, nres in nodeinfo.items():
12616           if node not in instance.secondary_nodes:
12617             continue
12618           nres.Raise("Can't get info from secondary node %s" % node,
12619                      prereq=True, ecode=errors.ECODE_STATE)
12620           (_, _, (nhvinfo, )) = nres.payload
12621           if not isinstance(nhvinfo.get("memory_free", None), int):
12622             raise errors.OpPrereqError("Secondary node %s didn't return free"
12623                                        " memory information" % node,
12624                                        errors.ECODE_STATE)
12625           #TODO(dynmem): do the appropriate check involving MINMEM
12626           elif be_new[constants.BE_MAXMEM] > nhvinfo["memory_free"]:
12627             raise errors.OpPrereqError("This change will prevent the instance"
12628                                        " from failover to its secondary node"
12629                                        " %s, due to not enough memory" % node,
12630                                        errors.ECODE_STATE)
12631
12632     if self.op.runtime_mem:
12633       remote_info = self.rpc.call_instance_info(instance.primary_node,
12634                                                 instance.name,
12635                                                 instance.hypervisor)
12636       remote_info.Raise("Error checking node %s" % instance.primary_node)
12637       if not remote_info.payload: # not running already
12638         raise errors.OpPrereqError("Instance %s is not running" %
12639                                    instance.name, errors.ECODE_STATE)
12640
12641       current_memory = remote_info.payload["memory"]
12642       if (not self.op.force and
12643            (self.op.runtime_mem > self.be_proposed[constants.BE_MAXMEM] or
12644             self.op.runtime_mem < self.be_proposed[constants.BE_MINMEM])):
12645         raise errors.OpPrereqError("Instance %s must have memory between %d"
12646                                    " and %d MB of memory unless --force is"
12647                                    " given" %
12648                                    (instance.name,
12649                                     self.be_proposed[constants.BE_MINMEM],
12650                                     self.be_proposed[constants.BE_MAXMEM]),
12651                                    errors.ECODE_INVAL)
12652
12653       if self.op.runtime_mem > current_memory:
12654         _CheckNodeFreeMemory(self, instance.primary_node,
12655                              "ballooning memory for instance %s" %
12656                              instance.name,
12657                              self.op.memory - current_memory,
12658                              instance.hypervisor)
12659
12660     if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
12661       raise errors.OpPrereqError("Disk operations not supported for"
12662                                  " diskless instances", errors.ECODE_INVAL)
12663
12664     def _PrepareNicCreate(_, params, private):
12665       self._PrepareNicModification(params, private, None, {}, cluster, pnode)
12666       return (None, None)
12667
12668     def _PrepareNicMod(_, nic, params, private):
12669       self._PrepareNicModification(params, private, nic.ip,
12670                                    nic.nicparams, cluster, pnode)
12671       return None
12672
12673     # Verify NIC changes (operating on copy)
12674     nics = instance.nics[:]
12675     ApplyContainerMods("NIC", nics, None, self.nicmod,
12676                        _PrepareNicCreate, _PrepareNicMod, None)
12677     if len(nics) > constants.MAX_NICS:
12678       raise errors.OpPrereqError("Instance has too many network interfaces"
12679                                  " (%d), cannot add more" % constants.MAX_NICS,
12680                                  errors.ECODE_STATE)
12681
12682     # Verify disk changes (operating on a copy)
12683     disks = instance.disks[:]
12684     ApplyContainerMods("disk", disks, None, self.diskmod, None, None, None)
12685     if len(disks) > constants.MAX_DISKS:
12686       raise errors.OpPrereqError("Instance has too many disks (%d), cannot add"
12687                                  " more" % constants.MAX_DISKS,
12688                                  errors.ECODE_STATE)
12689
12690     if self.op.offline is not None:
12691       if self.op.offline:
12692         msg = "can't change to offline"
12693       else:
12694         msg = "can't change to online"
12695       _CheckInstanceState(self, instance, CAN_CHANGE_INSTANCE_OFFLINE, msg=msg)
12696
12697     # Pre-compute NIC changes (necessary to use result in hooks)
12698     self._nic_chgdesc = []
12699     if self.nicmod:
12700       # Operate on copies as this is still in prereq
12701       nics = [nic.Copy() for nic in instance.nics]
12702       ApplyContainerMods("NIC", nics, self._nic_chgdesc, self.nicmod,
12703                          self._CreateNewNic, self._ApplyNicMods, None)
12704       self._new_nics = nics
12705     else:
12706       self._new_nics = None
12707
12708   def _ConvertPlainToDrbd(self, feedback_fn):
12709     """Converts an instance from plain to drbd.
12710
12711     """
12712     feedback_fn("Converting template to drbd")
12713     instance = self.instance
12714     pnode = instance.primary_node
12715     snode = self.op.remote_node
12716
12717     assert instance.disk_template == constants.DT_PLAIN
12718
12719     # create a fake disk info for _GenerateDiskTemplate
12720     disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
12721                   constants.IDISK_VG: d.logical_id[0]}
12722                  for d in instance.disks]
12723     new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
12724                                       instance.name, pnode, [snode],
12725                                       disk_info, None, None, 0, feedback_fn,
12726                                       self.diskparams)
12727     anno_disks = rpc.AnnotateDiskParams(constants.DT_DRBD8, new_disks,
12728                                         self.diskparams)
12729     info = _GetInstanceInfoText(instance)
12730     feedback_fn("Creating additional volumes...")
12731     # first, create the missing data and meta devices
12732     for disk in anno_disks:
12733       # unfortunately this is... not too nice
12734       _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
12735                             info, True)
12736       for child in disk.children:
12737         _CreateSingleBlockDev(self, snode, instance, child, info, True)
12738     # at this stage, all new LVs have been created, we can rename the
12739     # old ones
12740     feedback_fn("Renaming original volumes...")
12741     rename_list = [(o, n.children[0].logical_id)
12742                    for (o, n) in zip(instance.disks, new_disks)]
12743     result = self.rpc.call_blockdev_rename(pnode, rename_list)
12744     result.Raise("Failed to rename original LVs")
12745
12746     feedback_fn("Initializing DRBD devices...")
12747     # all child devices are in place, we can now create the DRBD devices
12748     for disk in anno_disks:
12749       for node in [pnode, snode]:
12750         f_create = node == pnode
12751         _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
12752
12753     # at this point, the instance has been modified
12754     instance.disk_template = constants.DT_DRBD8
12755     instance.disks = new_disks
12756     self.cfg.Update(instance, feedback_fn)
12757
12758     # Release node locks while waiting for sync
12759     _ReleaseLocks(self, locking.LEVEL_NODE)
12760
12761     # disks are created, waiting for sync
12762     disk_abort = not _WaitForSync(self, instance,
12763                                   oneshot=not self.op.wait_for_sync)
12764     if disk_abort:
12765       raise errors.OpExecError("There are some degraded disks for"
12766                                " this instance, please cleanup manually")
12767
12768     # Node resource locks will be released by caller
12769
12770   def _ConvertDrbdToPlain(self, feedback_fn):
12771     """Converts an instance from drbd to plain.
12772
12773     """
12774     instance = self.instance
12775
12776     assert len(instance.secondary_nodes) == 1
12777     assert instance.disk_template == constants.DT_DRBD8
12778
12779     pnode = instance.primary_node
12780     snode = instance.secondary_nodes[0]
12781     feedback_fn("Converting template to plain")
12782
12783     old_disks = _AnnotateDiskParams(instance, instance.disks, self.cfg)
12784     new_disks = [d.children[0] for d in instance.disks]
12785
12786     # copy over size and mode
12787     for parent, child in zip(old_disks, new_disks):
12788       child.size = parent.size
12789       child.mode = parent.mode
12790
12791     # this is a DRBD disk, return its port to the pool
12792     # NOTE: this must be done right before the call to cfg.Update!
12793     for disk in old_disks:
12794       tcp_port = disk.logical_id[2]
12795       self.cfg.AddTcpUdpPort(tcp_port)
12796
12797     # update instance structure
12798     instance.disks = new_disks
12799     instance.disk_template = constants.DT_PLAIN
12800     self.cfg.Update(instance, feedback_fn)
12801
12802     # Release locks in case removing disks takes a while
12803     _ReleaseLocks(self, locking.LEVEL_NODE)
12804
12805     feedback_fn("Removing volumes on the secondary node...")
12806     for disk in old_disks:
12807       self.cfg.SetDiskID(disk, snode)
12808       msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
12809       if msg:
12810         self.LogWarning("Could not remove block device %s on node %s,"
12811                         " continuing anyway: %s", disk.iv_name, snode, msg)
12812
12813     feedback_fn("Removing unneeded volumes on the primary node...")
12814     for idx, disk in enumerate(old_disks):
12815       meta = disk.children[1]
12816       self.cfg.SetDiskID(meta, pnode)
12817       msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
12818       if msg:
12819         self.LogWarning("Could not remove metadata for disk %d on node %s,"
12820                         " continuing anyway: %s", idx, pnode, msg)
12821
12822   def _CreateNewDisk(self, idx, params, _):
12823     """Creates a new disk.
12824
12825     """
12826     instance = self.instance
12827
12828     # add a new disk
12829     if instance.disk_template in constants.DTS_FILEBASED:
12830       (file_driver, file_path) = instance.disks[0].logical_id
12831       file_path = os.path.dirname(file_path)
12832     else:
12833       file_driver = file_path = None
12834
12835     disk = \
12836       _GenerateDiskTemplate(self, instance.disk_template, instance.name,
12837                             instance.primary_node, instance.secondary_nodes,
12838                             [params], file_path, file_driver, idx,
12839                             self.Log, self.diskparams)[0]
12840
12841     info = _GetInstanceInfoText(instance)
12842
12843     logging.info("Creating volume %s for instance %s",
12844                  disk.iv_name, instance.name)
12845     # Note: this needs to be kept in sync with _CreateDisks
12846     #HARDCODE
12847     for node in instance.all_nodes:
12848       f_create = (node == instance.primary_node)
12849       try:
12850         _CreateBlockDev(self, node, instance, disk, f_create, info, f_create)
12851       except errors.OpExecError, err:
12852         self.LogWarning("Failed to create volume %s (%s) on node '%s': %s",
12853                         disk.iv_name, disk, node, err)
12854
12855     return (disk, [
12856       ("disk/%d" % idx, "add:size=%s,mode=%s" % (disk.size, disk.mode)),
12857       ])
12858
12859   @staticmethod
12860   def _ModifyDisk(idx, disk, params, _):
12861     """Modifies a disk.
12862
12863     """
12864     disk.mode = params[constants.IDISK_MODE]
12865
12866     return [
12867       ("disk.mode/%d" % idx, disk.mode),
12868       ]
12869
12870   def _RemoveDisk(self, idx, root, _):
12871     """Removes a disk.
12872
12873     """
12874     (anno_disk,) = _AnnotateDiskParams(self.instance, [root], self.cfg)
12875     for node, disk in anno_disk.ComputeNodeTree(self.instance.primary_node):
12876       self.cfg.SetDiskID(disk, node)
12877       msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
12878       if msg:
12879         self.LogWarning("Could not remove disk/%d on node '%s': %s,"
12880                         " continuing anyway", idx, node, msg)
12881
12882     # if this is a DRBD disk, return its port to the pool
12883     if root.dev_type in constants.LDS_DRBD:
12884       self.cfg.AddTcpUdpPort(root.logical_id[2])
12885
12886   @staticmethod
12887   def _CreateNewNic(idx, params, private):
12888     """Creates data structure for a new network interface.
12889
12890     """
12891     mac = params[constants.INIC_MAC]
12892     ip = params.get(constants.INIC_IP, None)
12893     nicparams = private.params
12894
12895     return (objects.NIC(mac=mac, ip=ip, nicparams=nicparams), [
12896       ("nic.%d" % idx,
12897        "add:mac=%s,ip=%s,mode=%s,link=%s" %
12898        (mac, ip, private.filled[constants.NIC_MODE],
12899        private.filled[constants.NIC_LINK])),
12900       ])
12901
12902   @staticmethod
12903   def _ApplyNicMods(idx, nic, params, private):
12904     """Modifies a network interface.
12905
12906     """
12907     changes = []
12908
12909     for key in [constants.INIC_MAC, constants.INIC_IP]:
12910       if key in params:
12911         changes.append(("nic.%s/%d" % (key, idx), params[key]))
12912         setattr(nic, key, params[key])
12913
12914     if private.params:
12915       nic.nicparams = private.params
12916
12917       for (key, val) in params.items():
12918         changes.append(("nic.%s/%d" % (key, idx), val))
12919
12920     return changes
12921
12922   def Exec(self, feedback_fn):
12923     """Modifies an instance.
12924
12925     All parameters take effect only at the next restart of the instance.
12926
12927     """
12928     # Process here the warnings from CheckPrereq, as we don't have a
12929     # feedback_fn there.
12930     # TODO: Replace with self.LogWarning
12931     for warn in self.warn:
12932       feedback_fn("WARNING: %s" % warn)
12933
12934     assert ((self.op.disk_template is None) ^
12935             bool(self.owned_locks(locking.LEVEL_NODE_RES))), \
12936       "Not owning any node resource locks"
12937
12938     result = []
12939     instance = self.instance
12940
12941     # runtime memory
12942     if self.op.runtime_mem:
12943       rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node,
12944                                                      instance,
12945                                                      self.op.runtime_mem)
12946       rpcres.Raise("Cannot modify instance runtime memory")
12947       result.append(("runtime_memory", self.op.runtime_mem))
12948
12949     # Apply disk changes
12950     ApplyContainerMods("disk", instance.disks, result, self.diskmod,
12951                        self._CreateNewDisk, self._ModifyDisk, self._RemoveDisk)
12952     _UpdateIvNames(0, instance.disks)
12953
12954     if self.op.disk_template:
12955       if __debug__:
12956         check_nodes = set(instance.all_nodes)
12957         if self.op.remote_node:
12958           check_nodes.add(self.op.remote_node)
12959         for level in [locking.LEVEL_NODE, locking.LEVEL_NODE_RES]:
12960           owned = self.owned_locks(level)
12961           assert not (check_nodes - owned), \
12962             ("Not owning the correct locks, owning %r, expected at least %r" %
12963              (owned, check_nodes))
12964
12965       r_shut = _ShutdownInstanceDisks(self, instance)
12966       if not r_shut:
12967         raise errors.OpExecError("Cannot shutdown instance disks, unable to"
12968                                  " proceed with disk template conversion")
12969       mode = (instance.disk_template, self.op.disk_template)
12970       try:
12971         self._DISK_CONVERSIONS[mode](self, feedback_fn)
12972       except:
12973         self.cfg.ReleaseDRBDMinors(instance.name)
12974         raise
12975       result.append(("disk_template", self.op.disk_template))
12976
12977       assert instance.disk_template == self.op.disk_template, \
12978         ("Expected disk template '%s', found '%s'" %
12979          (self.op.disk_template, instance.disk_template))
12980
12981     # Release node and resource locks if there are any (they might already have
12982     # been released during disk conversion)
12983     _ReleaseLocks(self, locking.LEVEL_NODE)
12984     _ReleaseLocks(self, locking.LEVEL_NODE_RES)
12985
12986     # Apply NIC changes
12987     if self._new_nics is not None:
12988       instance.nics = self._new_nics
12989       result.extend(self._nic_chgdesc)
12990
12991     # hvparams changes
12992     if self.op.hvparams:
12993       instance.hvparams = self.hv_inst
12994       for key, val in self.op.hvparams.iteritems():
12995         result.append(("hv/%s" % key, val))
12996
12997     # beparams changes
12998     if self.op.beparams:
12999       instance.beparams = self.be_inst
13000       for key, val in self.op.beparams.iteritems():
13001         result.append(("be/%s" % key, val))
13002
13003     # OS change
13004     if self.op.os_name:
13005       instance.os = self.op.os_name
13006
13007     # osparams changes
13008     if self.op.osparams:
13009       instance.osparams = self.os_inst
13010       for key, val in self.op.osparams.iteritems():
13011         result.append(("os/%s" % key, val))
13012
13013     if self.op.offline is None:
13014       # Ignore
13015       pass
13016     elif self.op.offline:
13017       # Mark instance as offline
13018       self.cfg.MarkInstanceOffline(instance.name)
13019       result.append(("admin_state", constants.ADMINST_OFFLINE))
13020     else:
13021       # Mark instance as online, but stopped
13022       self.cfg.MarkInstanceDown(instance.name)
13023       result.append(("admin_state", constants.ADMINST_DOWN))
13024
13025     self.cfg.Update(instance, feedback_fn)
13026
13027     assert not (self.owned_locks(locking.LEVEL_NODE_RES) or
13028                 self.owned_locks(locking.LEVEL_NODE)), \
13029       "All node locks should have been released by now"
13030
13031     return result
13032
13033   _DISK_CONVERSIONS = {
13034     (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
13035     (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
13036     }
13037
13038
13039 class LUInstanceChangeGroup(LogicalUnit):
13040   HPATH = "instance-change-group"
13041   HTYPE = constants.HTYPE_INSTANCE
13042   REQ_BGL = False
13043
13044   def ExpandNames(self):
13045     self.share_locks = _ShareAll()
13046     self.needed_locks = {
13047       locking.LEVEL_NODEGROUP: [],
13048       locking.LEVEL_NODE: [],
13049       }
13050
13051     self._ExpandAndLockInstance()
13052
13053     if self.op.target_groups:
13054       self.req_target_uuids = map(self.cfg.LookupNodeGroup,
13055                                   self.op.target_groups)
13056     else:
13057       self.req_target_uuids = None
13058
13059     self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
13060
13061   def DeclareLocks(self, level):
13062     if level == locking.LEVEL_NODEGROUP:
13063       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
13064
13065       if self.req_target_uuids:
13066         lock_groups = set(self.req_target_uuids)
13067
13068         # Lock all groups used by instance optimistically; this requires going
13069         # via the node before it's locked, requiring verification later on
13070         instance_groups = self.cfg.GetInstanceNodeGroups(self.op.instance_name)
13071         lock_groups.update(instance_groups)
13072       else:
13073         # No target groups, need to lock all of them
13074         lock_groups = locking.ALL_SET
13075
13076       self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
13077
13078     elif level == locking.LEVEL_NODE:
13079       if self.req_target_uuids:
13080         # Lock all nodes used by instances
13081         self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
13082         self._LockInstancesNodes()
13083
13084         # Lock all nodes in all potential target groups
13085         lock_groups = (frozenset(self.owned_locks(locking.LEVEL_NODEGROUP)) -
13086                        self.cfg.GetInstanceNodeGroups(self.op.instance_name))
13087         member_nodes = [node_name
13088                         for group in lock_groups
13089                         for node_name in self.cfg.GetNodeGroup(group).members]
13090         self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
13091       else:
13092         # Lock all nodes as all groups are potential targets
13093         self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
13094
13095   def CheckPrereq(self):
13096     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
13097     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
13098     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
13099
13100     assert (self.req_target_uuids is None or
13101             owned_groups.issuperset(self.req_target_uuids))
13102     assert owned_instances == set([self.op.instance_name])
13103
13104     # Get instance information
13105     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
13106
13107     # Check if node groups for locked instance are still correct
13108     assert owned_nodes.issuperset(self.instance.all_nodes), \
13109       ("Instance %s's nodes changed while we kept the lock" %
13110        self.op.instance_name)
13111
13112     inst_groups = _CheckInstanceNodeGroups(self.cfg, self.op.instance_name,
13113                                            owned_groups)
13114
13115     if self.req_target_uuids:
13116       # User requested specific target groups
13117       self.target_uuids = frozenset(self.req_target_uuids)
13118     else:
13119       # All groups except those used by the instance are potential targets
13120       self.target_uuids = owned_groups - inst_groups
13121
13122     conflicting_groups = self.target_uuids & inst_groups
13123     if conflicting_groups:
13124       raise errors.OpPrereqError("Can't use group(s) '%s' as targets, they are"
13125                                  " used by the instance '%s'" %
13126                                  (utils.CommaJoin(conflicting_groups),
13127                                   self.op.instance_name),
13128                                  errors.ECODE_INVAL)
13129
13130     if not self.target_uuids:
13131       raise errors.OpPrereqError("There are no possible target groups",
13132                                  errors.ECODE_INVAL)
13133
13134   def BuildHooksEnv(self):
13135     """Build hooks env.
13136
13137     """
13138     assert self.target_uuids
13139
13140     env = {
13141       "TARGET_GROUPS": " ".join(self.target_uuids),
13142       }
13143
13144     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
13145
13146     return env
13147
13148   def BuildHooksNodes(self):
13149     """Build hooks nodes.
13150
13151     """
13152     mn = self.cfg.GetMasterNode()
13153     return ([mn], [mn])
13154
13155   def Exec(self, feedback_fn):
13156     instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
13157
13158     assert instances == [self.op.instance_name], "Instance not locked"
13159
13160     req = iallocator.IAReqGroupChange(instances=instances,
13161                                       target_groups=list(self.target_uuids))
13162     ial = iallocator.IAllocator(self.cfg, self.rpc, req)
13163
13164     ial.Run(self.op.iallocator)
13165
13166     if not ial.success:
13167       raise errors.OpPrereqError("Can't compute solution for changing group of"
13168                                  " instance '%s' using iallocator '%s': %s" %
13169                                  (self.op.instance_name, self.op.iallocator,
13170                                   ial.info), errors.ECODE_NORES)
13171
13172     jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
13173
13174     self.LogInfo("Iallocator returned %s job(s) for changing group of"
13175                  " instance '%s'", len(jobs), self.op.instance_name)
13176
13177     return ResultWithJobs(jobs)
13178
13179
13180 class LUBackupQuery(NoHooksLU):
13181   """Query the exports list
13182
13183   """
13184   REQ_BGL = False
13185
13186   def CheckArguments(self):
13187     self.expq = _ExportQuery(qlang.MakeSimpleFilter("node", self.op.nodes),
13188                              ["node", "export"], self.op.use_locking)
13189
13190   def ExpandNames(self):
13191     self.expq.ExpandNames(self)
13192
13193   def DeclareLocks(self, level):
13194     self.expq.DeclareLocks(self, level)
13195
13196   def Exec(self, feedback_fn):
13197     result = {}
13198
13199     for (node, expname) in self.expq.OldStyleQuery(self):
13200       if expname is None:
13201         result[node] = False
13202       else:
13203         result.setdefault(node, []).append(expname)
13204
13205     return result
13206
13207
13208 class _ExportQuery(_QueryBase):
13209   FIELDS = query.EXPORT_FIELDS
13210
13211   #: The node name is not a unique key for this query
13212   SORT_FIELD = "node"
13213
13214   def ExpandNames(self, lu):
13215     lu.needed_locks = {}
13216
13217     # The following variables interact with _QueryBase._GetNames
13218     if self.names:
13219       self.wanted = _GetWantedNodes(lu, self.names)
13220     else:
13221       self.wanted = locking.ALL_SET
13222
13223     self.do_locking = self.use_locking
13224
13225     if self.do_locking:
13226       lu.share_locks = _ShareAll()
13227       lu.needed_locks = {
13228         locking.LEVEL_NODE: self.wanted,
13229         }
13230
13231   def DeclareLocks(self, lu, level):
13232     pass
13233
13234   def _GetQueryData(self, lu):
13235     """Computes the list of nodes and their attributes.
13236
13237     """
13238     # Locking is not used
13239     # TODO
13240     assert not (compat.any(lu.glm.is_owned(level)
13241                            for level in locking.LEVELS
13242                            if level != locking.LEVEL_CLUSTER) or
13243                 self.do_locking or self.use_locking)
13244
13245     nodes = self._GetNames(lu, lu.cfg.GetNodeList(), locking.LEVEL_NODE)
13246
13247     result = []
13248
13249     for (node, nres) in lu.rpc.call_export_list(nodes).items():
13250       if nres.fail_msg:
13251         result.append((node, None))
13252       else:
13253         result.extend((node, expname) for expname in nres.payload)
13254
13255     return result
13256
13257
13258 class LUBackupPrepare(NoHooksLU):
13259   """Prepares an instance for an export and returns useful information.
13260
13261   """
13262   REQ_BGL = False
13263
13264   def ExpandNames(self):
13265     self._ExpandAndLockInstance()
13266
13267   def CheckPrereq(self):
13268     """Check prerequisites.
13269
13270     """
13271     instance_name = self.op.instance_name
13272
13273     self.instance = self.cfg.GetInstanceInfo(instance_name)
13274     assert self.instance is not None, \
13275           "Cannot retrieve locked instance %s" % self.op.instance_name
13276     _CheckNodeOnline(self, self.instance.primary_node)
13277
13278     self._cds = _GetClusterDomainSecret()
13279
13280   def Exec(self, feedback_fn):
13281     """Prepares an instance for an export.
13282
13283     """
13284     instance = self.instance
13285
13286     if self.op.mode == constants.EXPORT_MODE_REMOTE:
13287       salt = utils.GenerateSecret(8)
13288
13289       feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
13290       result = self.rpc.call_x509_cert_create(instance.primary_node,
13291                                               constants.RIE_CERT_VALIDITY)
13292       result.Raise("Can't create X509 key and certificate on %s" % result.node)
13293
13294       (name, cert_pem) = result.payload
13295
13296       cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
13297                                              cert_pem)
13298
13299       return {
13300         "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
13301         "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
13302                           salt),
13303         "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
13304         }
13305
13306     return None
13307
13308
13309 class LUBackupExport(LogicalUnit):
13310   """Export an instance to an image in the cluster.
13311
13312   """
13313   HPATH = "instance-export"
13314   HTYPE = constants.HTYPE_INSTANCE
13315   REQ_BGL = False
13316
13317   def CheckArguments(self):
13318     """Check the arguments.
13319
13320     """
13321     self.x509_key_name = self.op.x509_key_name
13322     self.dest_x509_ca_pem = self.op.destination_x509_ca
13323
13324     if self.op.mode == constants.EXPORT_MODE_REMOTE:
13325       if not self.x509_key_name:
13326         raise errors.OpPrereqError("Missing X509 key name for encryption",
13327                                    errors.ECODE_INVAL)
13328
13329       if not self.dest_x509_ca_pem:
13330         raise errors.OpPrereqError("Missing destination X509 CA",
13331                                    errors.ECODE_INVAL)
13332
13333   def ExpandNames(self):
13334     self._ExpandAndLockInstance()
13335
13336     # Lock all nodes for local exports
13337     if self.op.mode == constants.EXPORT_MODE_LOCAL:
13338       # FIXME: lock only instance primary and destination node
13339       #
13340       # Sad but true, for now we have do lock all nodes, as we don't know where
13341       # the previous export might be, and in this LU we search for it and
13342       # remove it from its current node. In the future we could fix this by:
13343       #  - making a tasklet to search (share-lock all), then create the
13344       #    new one, then one to remove, after
13345       #  - removing the removal operation altogether
13346       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
13347
13348   def DeclareLocks(self, level):
13349     """Last minute lock declaration."""
13350     # All nodes are locked anyway, so nothing to do here.
13351
13352   def BuildHooksEnv(self):
13353     """Build hooks env.
13354
13355     This will run on the master, primary node and target node.
13356
13357     """
13358     env = {
13359       "EXPORT_MODE": self.op.mode,
13360       "EXPORT_NODE": self.op.target_node,
13361       "EXPORT_DO_SHUTDOWN": self.op.shutdown,
13362       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
13363       # TODO: Generic function for boolean env variables
13364       "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
13365       }
13366
13367     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
13368
13369     return env
13370
13371   def BuildHooksNodes(self):
13372     """Build hooks nodes.
13373
13374     """
13375     nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
13376
13377     if self.op.mode == constants.EXPORT_MODE_LOCAL:
13378       nl.append(self.op.target_node)
13379
13380     return (nl, nl)
13381
13382   def CheckPrereq(self):
13383     """Check prerequisites.
13384
13385     This checks that the instance and node names are valid.
13386
13387     """
13388     instance_name = self.op.instance_name
13389
13390     self.instance = self.cfg.GetInstanceInfo(instance_name)
13391     assert self.instance is not None, \
13392           "Cannot retrieve locked instance %s" % self.op.instance_name
13393     _CheckNodeOnline(self, self.instance.primary_node)
13394
13395     if (self.op.remove_instance and
13396         self.instance.admin_state == constants.ADMINST_UP and
13397         not self.op.shutdown):
13398       raise errors.OpPrereqError("Can not remove instance without shutting it"
13399                                  " down before", errors.ECODE_STATE)
13400
13401     if self.op.mode == constants.EXPORT_MODE_LOCAL:
13402       self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
13403       self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
13404       assert self.dst_node is not None
13405
13406       _CheckNodeOnline(self, self.dst_node.name)
13407       _CheckNodeNotDrained(self, self.dst_node.name)
13408
13409       self._cds = None
13410       self.dest_disk_info = None
13411       self.dest_x509_ca = None
13412
13413     elif self.op.mode == constants.EXPORT_MODE_REMOTE:
13414       self.dst_node = None
13415
13416       if len(self.op.target_node) != len(self.instance.disks):
13417         raise errors.OpPrereqError(("Received destination information for %s"
13418                                     " disks, but instance %s has %s disks") %
13419                                    (len(self.op.target_node), instance_name,
13420                                     len(self.instance.disks)),
13421                                    errors.ECODE_INVAL)
13422
13423       cds = _GetClusterDomainSecret()
13424
13425       # Check X509 key name
13426       try:
13427         (key_name, hmac_digest, hmac_salt) = self.x509_key_name
13428       except (TypeError, ValueError), err:
13429         raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err,
13430                                    errors.ECODE_INVAL)
13431
13432       if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
13433         raise errors.OpPrereqError("HMAC for X509 key name is wrong",
13434                                    errors.ECODE_INVAL)
13435
13436       # Load and verify CA
13437       try:
13438         (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
13439       except OpenSSL.crypto.Error, err:
13440         raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
13441                                    (err, ), errors.ECODE_INVAL)
13442
13443       (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
13444       if errcode is not None:
13445         raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
13446                                    (msg, ), errors.ECODE_INVAL)
13447
13448       self.dest_x509_ca = cert
13449
13450       # Verify target information
13451       disk_info = []
13452       for idx, disk_data in enumerate(self.op.target_node):
13453         try:
13454           (host, port, magic) = \
13455             masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
13456         except errors.GenericError, err:
13457           raise errors.OpPrereqError("Target info for disk %s: %s" %
13458                                      (idx, err), errors.ECODE_INVAL)
13459
13460         disk_info.append((host, port, magic))
13461
13462       assert len(disk_info) == len(self.op.target_node)
13463       self.dest_disk_info = disk_info
13464
13465     else:
13466       raise errors.ProgrammerError("Unhandled export mode %r" %
13467                                    self.op.mode)
13468
13469     # instance disk type verification
13470     # TODO: Implement export support for file-based disks
13471     for disk in self.instance.disks:
13472       if disk.dev_type == constants.LD_FILE:
13473         raise errors.OpPrereqError("Export not supported for instances with"
13474                                    " file-based disks", errors.ECODE_INVAL)
13475
13476   def _CleanupExports(self, feedback_fn):
13477     """Removes exports of current instance from all other nodes.
13478
13479     If an instance in a cluster with nodes A..D was exported to node C, its
13480     exports will be removed from the nodes A, B and D.
13481
13482     """
13483     assert self.op.mode != constants.EXPORT_MODE_REMOTE
13484
13485     nodelist = self.cfg.GetNodeList()
13486     nodelist.remove(self.dst_node.name)
13487
13488     # on one-node clusters nodelist will be empty after the removal
13489     # if we proceed the backup would be removed because OpBackupQuery
13490     # substitutes an empty list with the full cluster node list.
13491     iname = self.instance.name
13492     if nodelist:
13493       feedback_fn("Removing old exports for instance %s" % iname)
13494       exportlist = self.rpc.call_export_list(nodelist)
13495       for node in exportlist:
13496         if exportlist[node].fail_msg:
13497           continue
13498         if iname in exportlist[node].payload:
13499           msg = self.rpc.call_export_remove(node, iname).fail_msg
13500           if msg:
13501             self.LogWarning("Could not remove older export for instance %s"
13502                             " on node %s: %s", iname, node, msg)
13503
13504   def Exec(self, feedback_fn):
13505     """Export an instance to an image in the cluster.
13506
13507     """
13508     assert self.op.mode in constants.EXPORT_MODES
13509
13510     instance = self.instance
13511     src_node = instance.primary_node
13512
13513     if self.op.shutdown:
13514       # shutdown the instance, but not the disks
13515       feedback_fn("Shutting down instance %s" % instance.name)
13516       result = self.rpc.call_instance_shutdown(src_node, instance,
13517                                                self.op.shutdown_timeout)
13518       # TODO: Maybe ignore failures if ignore_remove_failures is set
13519       result.Raise("Could not shutdown instance %s on"
13520                    " node %s" % (instance.name, src_node))
13521
13522     # set the disks ID correctly since call_instance_start needs the
13523     # correct drbd minor to create the symlinks
13524     for disk in instance.disks:
13525       self.cfg.SetDiskID(disk, src_node)
13526
13527     activate_disks = (instance.admin_state != constants.ADMINST_UP)
13528
13529     if activate_disks:
13530       # Activate the instance disks if we'exporting a stopped instance
13531       feedback_fn("Activating disks for %s" % instance.name)
13532       _StartInstanceDisks(self, instance, None)
13533
13534     try:
13535       helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
13536                                                      instance)
13537
13538       helper.CreateSnapshots()
13539       try:
13540         if (self.op.shutdown and
13541             instance.admin_state == constants.ADMINST_UP and
13542             not self.op.remove_instance):
13543           assert not activate_disks
13544           feedback_fn("Starting instance %s" % instance.name)
13545           result = self.rpc.call_instance_start(src_node,
13546                                                 (instance, None, None), False)
13547           msg = result.fail_msg
13548           if msg:
13549             feedback_fn("Failed to start instance: %s" % msg)
13550             _ShutdownInstanceDisks(self, instance)
13551             raise errors.OpExecError("Could not start instance: %s" % msg)
13552
13553         if self.op.mode == constants.EXPORT_MODE_LOCAL:
13554           (fin_resu, dresults) = helper.LocalExport(self.dst_node)
13555         elif self.op.mode == constants.EXPORT_MODE_REMOTE:
13556           connect_timeout = constants.RIE_CONNECT_TIMEOUT
13557           timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
13558
13559           (key_name, _, _) = self.x509_key_name
13560
13561           dest_ca_pem = \
13562             OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
13563                                             self.dest_x509_ca)
13564
13565           (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
13566                                                      key_name, dest_ca_pem,
13567                                                      timeouts)
13568       finally:
13569         helper.Cleanup()
13570
13571       # Check for backwards compatibility
13572       assert len(dresults) == len(instance.disks)
13573       assert compat.all(isinstance(i, bool) for i in dresults), \
13574              "Not all results are boolean: %r" % dresults
13575
13576     finally:
13577       if activate_disks:
13578         feedback_fn("Deactivating disks for %s" % instance.name)
13579         _ShutdownInstanceDisks(self, instance)
13580
13581     if not (compat.all(dresults) and fin_resu):
13582       failures = []
13583       if not fin_resu:
13584         failures.append("export finalization")
13585       if not compat.all(dresults):
13586         fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
13587                                if not dsk)
13588         failures.append("disk export: disk(s) %s" % fdsk)
13589
13590       raise errors.OpExecError("Export failed, errors in %s" %
13591                                utils.CommaJoin(failures))
13592
13593     # At this point, the export was successful, we can cleanup/finish
13594
13595     # Remove instance if requested
13596     if self.op.remove_instance:
13597       feedback_fn("Removing instance %s" % instance.name)
13598       _RemoveInstance(self, feedback_fn, instance,
13599                       self.op.ignore_remove_failures)
13600
13601     if self.op.mode == constants.EXPORT_MODE_LOCAL:
13602       self._CleanupExports(feedback_fn)
13603
13604     return fin_resu, dresults
13605
13606
13607 class LUBackupRemove(NoHooksLU):
13608   """Remove exports related to the named instance.
13609
13610   """
13611   REQ_BGL = False
13612
13613   def ExpandNames(self):
13614     self.needed_locks = {}
13615     # We need all nodes to be locked in order for RemoveExport to work, but we
13616     # don't need to lock the instance itself, as nothing will happen to it (and
13617     # we can remove exports also for a removed instance)
13618     self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
13619
13620   def Exec(self, feedback_fn):
13621     """Remove any export.
13622
13623     """
13624     instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
13625     # If the instance was not found we'll try with the name that was passed in.
13626     # This will only work if it was an FQDN, though.
13627     fqdn_warn = False
13628     if not instance_name:
13629       fqdn_warn = True
13630       instance_name = self.op.instance_name
13631
13632     locked_nodes = self.owned_locks(locking.LEVEL_NODE)
13633     exportlist = self.rpc.call_export_list(locked_nodes)
13634     found = False
13635     for node in exportlist:
13636       msg = exportlist[node].fail_msg
13637       if msg:
13638         self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
13639         continue
13640       if instance_name in exportlist[node].payload:
13641         found = True
13642         result = self.rpc.call_export_remove(node, instance_name)
13643         msg = result.fail_msg
13644         if msg:
13645           logging.error("Could not remove export for instance %s"
13646                         " on node %s: %s", instance_name, node, msg)
13647
13648     if fqdn_warn and not found:
13649       feedback_fn("Export not found. If trying to remove an export belonging"
13650                   " to a deleted instance please use its Fully Qualified"
13651                   " Domain Name.")
13652
13653
13654 class LUGroupAdd(LogicalUnit):
13655   """Logical unit for creating node groups.
13656
13657   """
13658   HPATH = "group-add"
13659   HTYPE = constants.HTYPE_GROUP
13660   REQ_BGL = False
13661
13662   def ExpandNames(self):
13663     # We need the new group's UUID here so that we can create and acquire the
13664     # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
13665     # that it should not check whether the UUID exists in the configuration.
13666     self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
13667     self.needed_locks = {}
13668     self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
13669
13670   def CheckPrereq(self):
13671     """Check prerequisites.
13672
13673     This checks that the given group name is not an existing node group
13674     already.
13675
13676     """
13677     try:
13678       existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13679     except errors.OpPrereqError:
13680       pass
13681     else:
13682       raise errors.OpPrereqError("Desired group name '%s' already exists as a"
13683                                  " node group (UUID: %s)" %
13684                                  (self.op.group_name, existing_uuid),
13685                                  errors.ECODE_EXISTS)
13686
13687     if self.op.ndparams:
13688       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
13689
13690     if self.op.hv_state:
13691       self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
13692     else:
13693       self.new_hv_state = None
13694
13695     if self.op.disk_state:
13696       self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
13697     else:
13698       self.new_disk_state = None
13699
13700     if self.op.diskparams:
13701       for templ in constants.DISK_TEMPLATES:
13702         if templ in self.op.diskparams:
13703           utils.ForceDictType(self.op.diskparams[templ],
13704                               constants.DISK_DT_TYPES)
13705       self.new_diskparams = self.op.diskparams
13706       try:
13707         utils.VerifyDictOptions(self.new_diskparams, constants.DISK_DT_DEFAULTS)
13708       except errors.OpPrereqError, err:
13709         raise errors.OpPrereqError("While verify diskparams options: %s" % err,
13710                                    errors.ECODE_INVAL)
13711     else:
13712       self.new_diskparams = {}
13713
13714     if self.op.ipolicy:
13715       cluster = self.cfg.GetClusterInfo()
13716       full_ipolicy = cluster.SimpleFillIPolicy(self.op.ipolicy)
13717       try:
13718         objects.InstancePolicy.CheckParameterSyntax(full_ipolicy, False)
13719       except errors.ConfigurationError, err:
13720         raise errors.OpPrereqError("Invalid instance policy: %s" % err,
13721                                    errors.ECODE_INVAL)
13722
13723   def BuildHooksEnv(self):
13724     """Build hooks env.
13725
13726     """
13727     return {
13728       "GROUP_NAME": self.op.group_name,
13729       }
13730
13731   def BuildHooksNodes(self):
13732     """Build hooks nodes.
13733
13734     """
13735     mn = self.cfg.GetMasterNode()
13736     return ([mn], [mn])
13737
13738   def Exec(self, feedback_fn):
13739     """Add the node group to the cluster.
13740
13741     """
13742     group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
13743                                   uuid=self.group_uuid,
13744                                   alloc_policy=self.op.alloc_policy,
13745                                   ndparams=self.op.ndparams,
13746                                   diskparams=self.new_diskparams,
13747                                   ipolicy=self.op.ipolicy,
13748                                   hv_state_static=self.new_hv_state,
13749                                   disk_state_static=self.new_disk_state)
13750
13751     self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
13752     del self.remove_locks[locking.LEVEL_NODEGROUP]
13753
13754
13755 class LUGroupAssignNodes(NoHooksLU):
13756   """Logical unit for assigning nodes to groups.
13757
13758   """
13759   REQ_BGL = False
13760
13761   def ExpandNames(self):
13762     # These raise errors.OpPrereqError on their own:
13763     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13764     self.op.nodes = _GetWantedNodes(self, self.op.nodes)
13765
13766     # We want to lock all the affected nodes and groups. We have readily
13767     # available the list of nodes, and the *destination* group. To gather the
13768     # list of "source" groups, we need to fetch node information later on.
13769     self.needed_locks = {
13770       locking.LEVEL_NODEGROUP: set([self.group_uuid]),
13771       locking.LEVEL_NODE: self.op.nodes,
13772       }
13773
13774   def DeclareLocks(self, level):
13775     if level == locking.LEVEL_NODEGROUP:
13776       assert len(self.needed_locks[locking.LEVEL_NODEGROUP]) == 1
13777
13778       # Try to get all affected nodes' groups without having the group or node
13779       # lock yet. Needs verification later in the code flow.
13780       groups = self.cfg.GetNodeGroupsFromNodes(self.op.nodes)
13781
13782       self.needed_locks[locking.LEVEL_NODEGROUP].update(groups)
13783
13784   def CheckPrereq(self):
13785     """Check prerequisites.
13786
13787     """
13788     assert self.needed_locks[locking.LEVEL_NODEGROUP]
13789     assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
13790             frozenset(self.op.nodes))
13791
13792     expected_locks = (set([self.group_uuid]) |
13793                       self.cfg.GetNodeGroupsFromNodes(self.op.nodes))
13794     actual_locks = self.owned_locks(locking.LEVEL_NODEGROUP)
13795     if actual_locks != expected_locks:
13796       raise errors.OpExecError("Nodes changed groups since locks were acquired,"
13797                                " current groups are '%s', used to be '%s'" %
13798                                (utils.CommaJoin(expected_locks),
13799                                 utils.CommaJoin(actual_locks)))
13800
13801     self.node_data = self.cfg.GetAllNodesInfo()
13802     self.group = self.cfg.GetNodeGroup(self.group_uuid)
13803     instance_data = self.cfg.GetAllInstancesInfo()
13804
13805     if self.group is None:
13806       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
13807                                (self.op.group_name, self.group_uuid))
13808
13809     (new_splits, previous_splits) = \
13810       self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
13811                                              for node in self.op.nodes],
13812                                             self.node_data, instance_data)
13813
13814     if new_splits:
13815       fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
13816
13817       if not self.op.force:
13818         raise errors.OpExecError("The following instances get split by this"
13819                                  " change and --force was not given: %s" %
13820                                  fmt_new_splits)
13821       else:
13822         self.LogWarning("This operation will split the following instances: %s",
13823                         fmt_new_splits)
13824
13825         if previous_splits:
13826           self.LogWarning("In addition, these already-split instances continue"
13827                           " to be split across groups: %s",
13828                           utils.CommaJoin(utils.NiceSort(previous_splits)))
13829
13830   def Exec(self, feedback_fn):
13831     """Assign nodes to a new group.
13832
13833     """
13834     mods = [(node_name, self.group_uuid) for node_name in self.op.nodes]
13835
13836     self.cfg.AssignGroupNodes(mods)
13837
13838   @staticmethod
13839   def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
13840     """Check for split instances after a node assignment.
13841
13842     This method considers a series of node assignments as an atomic operation,
13843     and returns information about split instances after applying the set of
13844     changes.
13845
13846     In particular, it returns information about newly split instances, and
13847     instances that were already split, and remain so after the change.
13848
13849     Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
13850     considered.
13851
13852     @type changes: list of (node_name, new_group_uuid) pairs.
13853     @param changes: list of node assignments to consider.
13854     @param node_data: a dict with data for all nodes
13855     @param instance_data: a dict with all instances to consider
13856     @rtype: a two-tuple
13857     @return: a list of instances that were previously okay and result split as a
13858       consequence of this change, and a list of instances that were previously
13859       split and this change does not fix.
13860
13861     """
13862     changed_nodes = dict((node, group) for node, group in changes
13863                          if node_data[node].group != group)
13864
13865     all_split_instances = set()
13866     previously_split_instances = set()
13867
13868     def InstanceNodes(instance):
13869       return [instance.primary_node] + list(instance.secondary_nodes)
13870
13871     for inst in instance_data.values():
13872       if inst.disk_template not in constants.DTS_INT_MIRROR:
13873         continue
13874
13875       instance_nodes = InstanceNodes(inst)
13876
13877       if len(set(node_data[node].group for node in instance_nodes)) > 1:
13878         previously_split_instances.add(inst.name)
13879
13880       if len(set(changed_nodes.get(node, node_data[node].group)
13881                  for node in instance_nodes)) > 1:
13882         all_split_instances.add(inst.name)
13883
13884     return (list(all_split_instances - previously_split_instances),
13885             list(previously_split_instances & all_split_instances))
13886
13887
13888 class _GroupQuery(_QueryBase):
13889   FIELDS = query.GROUP_FIELDS
13890
13891   def ExpandNames(self, lu):
13892     lu.needed_locks = {}
13893
13894     self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
13895     self._cluster = lu.cfg.GetClusterInfo()
13896     name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
13897
13898     if not self.names:
13899       self.wanted = [name_to_uuid[name]
13900                      for name in utils.NiceSort(name_to_uuid.keys())]
13901     else:
13902       # Accept names to be either names or UUIDs.
13903       missing = []
13904       self.wanted = []
13905       all_uuid = frozenset(self._all_groups.keys())
13906
13907       for name in self.names:
13908         if name in all_uuid:
13909           self.wanted.append(name)
13910         elif name in name_to_uuid:
13911           self.wanted.append(name_to_uuid[name])
13912         else:
13913           missing.append(name)
13914
13915       if missing:
13916         raise errors.OpPrereqError("Some groups do not exist: %s" %
13917                                    utils.CommaJoin(missing),
13918                                    errors.ECODE_NOENT)
13919
13920   def DeclareLocks(self, lu, level):
13921     pass
13922
13923   def _GetQueryData(self, lu):
13924     """Computes the list of node groups and their attributes.
13925
13926     """
13927     do_nodes = query.GQ_NODE in self.requested_data
13928     do_instances = query.GQ_INST in self.requested_data
13929
13930     group_to_nodes = None
13931     group_to_instances = None
13932
13933     # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
13934     # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
13935     # latter GetAllInstancesInfo() is not enough, for we have to go through
13936     # instance->node. Hence, we will need to process nodes even if we only need
13937     # instance information.
13938     if do_nodes or do_instances:
13939       all_nodes = lu.cfg.GetAllNodesInfo()
13940       group_to_nodes = dict((uuid, []) for uuid in self.wanted)
13941       node_to_group = {}
13942
13943       for node in all_nodes.values():
13944         if node.group in group_to_nodes:
13945           group_to_nodes[node.group].append(node.name)
13946           node_to_group[node.name] = node.group
13947
13948       if do_instances:
13949         all_instances = lu.cfg.GetAllInstancesInfo()
13950         group_to_instances = dict((uuid, []) for uuid in self.wanted)
13951
13952         for instance in all_instances.values():
13953           node = instance.primary_node
13954           if node in node_to_group:
13955             group_to_instances[node_to_group[node]].append(instance.name)
13956
13957         if not do_nodes:
13958           # Do not pass on node information if it was not requested.
13959           group_to_nodes = None
13960
13961     return query.GroupQueryData(self._cluster,
13962                                 [self._all_groups[uuid]
13963                                  for uuid in self.wanted],
13964                                 group_to_nodes, group_to_instances,
13965                                 query.GQ_DISKPARAMS in self.requested_data)
13966
13967
13968 class LUGroupQuery(NoHooksLU):
13969   """Logical unit for querying node groups.
13970
13971   """
13972   REQ_BGL = False
13973
13974   def CheckArguments(self):
13975     self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
13976                           self.op.output_fields, False)
13977
13978   def ExpandNames(self):
13979     self.gq.ExpandNames(self)
13980
13981   def DeclareLocks(self, level):
13982     self.gq.DeclareLocks(self, level)
13983
13984   def Exec(self, feedback_fn):
13985     return self.gq.OldStyleQuery(self)
13986
13987
13988 class LUGroupSetParams(LogicalUnit):
13989   """Modifies the parameters of a node group.
13990
13991   """
13992   HPATH = "group-modify"
13993   HTYPE = constants.HTYPE_GROUP
13994   REQ_BGL = False
13995
13996   def CheckArguments(self):
13997     all_changes = [
13998       self.op.ndparams,
13999       self.op.diskparams,
14000       self.op.alloc_policy,
14001       self.op.hv_state,
14002       self.op.disk_state,
14003       self.op.ipolicy,
14004       ]
14005
14006     if all_changes.count(None) == len(all_changes):
14007       raise errors.OpPrereqError("Please pass at least one modification",
14008                                  errors.ECODE_INVAL)
14009
14010   def ExpandNames(self):
14011     # This raises errors.OpPrereqError on its own:
14012     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14013
14014     self.needed_locks = {
14015       locking.LEVEL_INSTANCE: [],
14016       locking.LEVEL_NODEGROUP: [self.group_uuid],
14017       }
14018
14019     self.share_locks[locking.LEVEL_INSTANCE] = 1
14020
14021   def DeclareLocks(self, level):
14022     if level == locking.LEVEL_INSTANCE:
14023       assert not self.needed_locks[locking.LEVEL_INSTANCE]
14024
14025       # Lock instances optimistically, needs verification once group lock has
14026       # been acquired
14027       self.needed_locks[locking.LEVEL_INSTANCE] = \
14028           self.cfg.GetNodeGroupInstances(self.group_uuid)
14029
14030   @staticmethod
14031   def _UpdateAndVerifyDiskParams(old, new):
14032     """Updates and verifies disk parameters.
14033
14034     """
14035     new_params = _GetUpdatedParams(old, new)
14036     utils.ForceDictType(new_params, constants.DISK_DT_TYPES)
14037     return new_params
14038
14039   def CheckPrereq(self):
14040     """Check prerequisites.
14041
14042     """
14043     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
14044
14045     # Check if locked instances are still correct
14046     _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
14047
14048     self.group = self.cfg.GetNodeGroup(self.group_uuid)
14049     cluster = self.cfg.GetClusterInfo()
14050
14051     if self.group is None:
14052       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
14053                                (self.op.group_name, self.group_uuid))
14054
14055     if self.op.ndparams:
14056       new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
14057       utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
14058       self.new_ndparams = new_ndparams
14059
14060     if self.op.diskparams:
14061       diskparams = self.group.diskparams
14062       uavdp = self._UpdateAndVerifyDiskParams
14063       # For each disktemplate subdict update and verify the values
14064       new_diskparams = dict((dt,
14065                              uavdp(diskparams.get(dt, {}),
14066                                    self.op.diskparams[dt]))
14067                             for dt in constants.DISK_TEMPLATES
14068                             if dt in self.op.diskparams)
14069       # As we've all subdicts of diskparams ready, lets merge the actual
14070       # dict with all updated subdicts
14071       self.new_diskparams = objects.FillDict(diskparams, new_diskparams)
14072       try:
14073         utils.VerifyDictOptions(self.new_diskparams, constants.DISK_DT_DEFAULTS)
14074       except errors.OpPrereqError, err:
14075         raise errors.OpPrereqError("While verify diskparams options: %s" % err,
14076                                    errors.ECODE_INVAL)
14077
14078     if self.op.hv_state:
14079       self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
14080                                                  self.group.hv_state_static)
14081
14082     if self.op.disk_state:
14083       self.new_disk_state = \
14084         _MergeAndVerifyDiskState(self.op.disk_state,
14085                                  self.group.disk_state_static)
14086
14087     if self.op.ipolicy:
14088       self.new_ipolicy = _GetUpdatedIPolicy(self.group.ipolicy,
14089                                             self.op.ipolicy,
14090                                             group_policy=True)
14091
14092       new_ipolicy = cluster.SimpleFillIPolicy(self.new_ipolicy)
14093       inst_filter = lambda inst: inst.name in owned_instances
14094       instances = self.cfg.GetInstancesInfoByFilter(inst_filter).values()
14095       gmi = ganeti.masterd.instance
14096       violations = \
14097           _ComputeNewInstanceViolations(gmi.CalculateGroupIPolicy(cluster,
14098                                                                   self.group),
14099                                         new_ipolicy, instances)
14100
14101       if violations:
14102         self.LogWarning("After the ipolicy change the following instances"
14103                         " violate them: %s",
14104                         utils.CommaJoin(violations))
14105
14106   def BuildHooksEnv(self):
14107     """Build hooks env.
14108
14109     """
14110     return {
14111       "GROUP_NAME": self.op.group_name,
14112       "NEW_ALLOC_POLICY": self.op.alloc_policy,
14113       }
14114
14115   def BuildHooksNodes(self):
14116     """Build hooks nodes.
14117
14118     """
14119     mn = self.cfg.GetMasterNode()
14120     return ([mn], [mn])
14121
14122   def Exec(self, feedback_fn):
14123     """Modifies the node group.
14124
14125     """
14126     result = []
14127
14128     if self.op.ndparams:
14129       self.group.ndparams = self.new_ndparams
14130       result.append(("ndparams", str(self.group.ndparams)))
14131
14132     if self.op.diskparams:
14133       self.group.diskparams = self.new_diskparams
14134       result.append(("diskparams", str(self.group.diskparams)))
14135
14136     if self.op.alloc_policy:
14137       self.group.alloc_policy = self.op.alloc_policy
14138
14139     if self.op.hv_state:
14140       self.group.hv_state_static = self.new_hv_state
14141
14142     if self.op.disk_state:
14143       self.group.disk_state_static = self.new_disk_state
14144
14145     if self.op.ipolicy:
14146       self.group.ipolicy = self.new_ipolicy
14147
14148     self.cfg.Update(self.group, feedback_fn)
14149     return result
14150
14151
14152 class LUGroupRemove(LogicalUnit):
14153   HPATH = "group-remove"
14154   HTYPE = constants.HTYPE_GROUP
14155   REQ_BGL = False
14156
14157   def ExpandNames(self):
14158     # This will raises errors.OpPrereqError on its own:
14159     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14160     self.needed_locks = {
14161       locking.LEVEL_NODEGROUP: [self.group_uuid],
14162       }
14163
14164   def CheckPrereq(self):
14165     """Check prerequisites.
14166
14167     This checks that the given group name exists as a node group, that is
14168     empty (i.e., contains no nodes), and that is not the last group of the
14169     cluster.
14170
14171     """
14172     # Verify that the group is empty.
14173     group_nodes = [node.name
14174                    for node in self.cfg.GetAllNodesInfo().values()
14175                    if node.group == self.group_uuid]
14176
14177     if group_nodes:
14178       raise errors.OpPrereqError("Group '%s' not empty, has the following"
14179                                  " nodes: %s" %
14180                                  (self.op.group_name,
14181                                   utils.CommaJoin(utils.NiceSort(group_nodes))),
14182                                  errors.ECODE_STATE)
14183
14184     # Verify the cluster would not be left group-less.
14185     if len(self.cfg.GetNodeGroupList()) == 1:
14186       raise errors.OpPrereqError("Group '%s' is the only group, cannot be"
14187                                  " removed" % self.op.group_name,
14188                                  errors.ECODE_STATE)
14189
14190   def BuildHooksEnv(self):
14191     """Build hooks env.
14192
14193     """
14194     return {
14195       "GROUP_NAME": self.op.group_name,
14196       }
14197
14198   def BuildHooksNodes(self):
14199     """Build hooks nodes.
14200
14201     """
14202     mn = self.cfg.GetMasterNode()
14203     return ([mn], [mn])
14204
14205   def Exec(self, feedback_fn):
14206     """Remove the node group.
14207
14208     """
14209     try:
14210       self.cfg.RemoveNodeGroup(self.group_uuid)
14211     except errors.ConfigurationError:
14212       raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
14213                                (self.op.group_name, self.group_uuid))
14214
14215     self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
14216
14217
14218 class LUGroupRename(LogicalUnit):
14219   HPATH = "group-rename"
14220   HTYPE = constants.HTYPE_GROUP
14221   REQ_BGL = False
14222
14223   def ExpandNames(self):
14224     # This raises errors.OpPrereqError on its own:
14225     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14226
14227     self.needed_locks = {
14228       locking.LEVEL_NODEGROUP: [self.group_uuid],
14229       }
14230
14231   def CheckPrereq(self):
14232     """Check prerequisites.
14233
14234     Ensures requested new name is not yet used.
14235
14236     """
14237     try:
14238       new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
14239     except errors.OpPrereqError:
14240       pass
14241     else:
14242       raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
14243                                  " node group (UUID: %s)" %
14244                                  (self.op.new_name, new_name_uuid),
14245                                  errors.ECODE_EXISTS)
14246
14247   def BuildHooksEnv(self):
14248     """Build hooks env.
14249
14250     """
14251     return {
14252       "OLD_NAME": self.op.group_name,
14253       "NEW_NAME": self.op.new_name,
14254       }
14255
14256   def BuildHooksNodes(self):
14257     """Build hooks nodes.
14258
14259     """
14260     mn = self.cfg.GetMasterNode()
14261
14262     all_nodes = self.cfg.GetAllNodesInfo()
14263     all_nodes.pop(mn, None)
14264
14265     run_nodes = [mn]
14266     run_nodes.extend(node.name for node in all_nodes.values()
14267                      if node.group == self.group_uuid)
14268
14269     return (run_nodes, run_nodes)
14270
14271   def Exec(self, feedback_fn):
14272     """Rename the node group.
14273
14274     """
14275     group = self.cfg.GetNodeGroup(self.group_uuid)
14276
14277     if group is None:
14278       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
14279                                (self.op.group_name, self.group_uuid))
14280
14281     group.name = self.op.new_name
14282     self.cfg.Update(group, feedback_fn)
14283
14284     return self.op.new_name
14285
14286
14287 class LUGroupEvacuate(LogicalUnit):
14288   HPATH = "group-evacuate"
14289   HTYPE = constants.HTYPE_GROUP
14290   REQ_BGL = False
14291
14292   def ExpandNames(self):
14293     # This raises errors.OpPrereqError on its own:
14294     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14295
14296     if self.op.target_groups:
14297       self.req_target_uuids = map(self.cfg.LookupNodeGroup,
14298                                   self.op.target_groups)
14299     else:
14300       self.req_target_uuids = []
14301
14302     if self.group_uuid in self.req_target_uuids:
14303       raise errors.OpPrereqError("Group to be evacuated (%s) can not be used"
14304                                  " as a target group (targets are %s)" %
14305                                  (self.group_uuid,
14306                                   utils.CommaJoin(self.req_target_uuids)),
14307                                  errors.ECODE_INVAL)
14308
14309     self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
14310
14311     self.share_locks = _ShareAll()
14312     self.needed_locks = {
14313       locking.LEVEL_INSTANCE: [],
14314       locking.LEVEL_NODEGROUP: [],
14315       locking.LEVEL_NODE: [],
14316       }
14317
14318   def DeclareLocks(self, level):
14319     if level == locking.LEVEL_INSTANCE:
14320       assert not self.needed_locks[locking.LEVEL_INSTANCE]
14321
14322       # Lock instances optimistically, needs verification once node and group
14323       # locks have been acquired
14324       self.needed_locks[locking.LEVEL_INSTANCE] = \
14325         self.cfg.GetNodeGroupInstances(self.group_uuid)
14326
14327     elif level == locking.LEVEL_NODEGROUP:
14328       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
14329
14330       if self.req_target_uuids:
14331         lock_groups = set([self.group_uuid] + self.req_target_uuids)
14332
14333         # Lock all groups used by instances optimistically; this requires going
14334         # via the node before it's locked, requiring verification later on
14335         lock_groups.update(group_uuid
14336                            for instance_name in
14337                              self.owned_locks(locking.LEVEL_INSTANCE)
14338                            for group_uuid in
14339                              self.cfg.GetInstanceNodeGroups(instance_name))
14340       else:
14341         # No target groups, need to lock all of them
14342         lock_groups = locking.ALL_SET
14343
14344       self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
14345
14346     elif level == locking.LEVEL_NODE:
14347       # This will only lock the nodes in the group to be evacuated which
14348       # contain actual instances
14349       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
14350       self._LockInstancesNodes()
14351
14352       # Lock all nodes in group to be evacuated and target groups
14353       owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
14354       assert self.group_uuid in owned_groups
14355       member_nodes = [node_name
14356                       for group in owned_groups
14357                       for node_name in self.cfg.GetNodeGroup(group).members]
14358       self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
14359
14360   def CheckPrereq(self):
14361     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
14362     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
14363     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
14364
14365     assert owned_groups.issuperset(self.req_target_uuids)
14366     assert self.group_uuid in owned_groups
14367
14368     # Check if locked instances are still correct
14369     _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
14370
14371     # Get instance information
14372     self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
14373
14374     # Check if node groups for locked instances are still correct
14375     _CheckInstancesNodeGroups(self.cfg, self.instances,
14376                               owned_groups, owned_nodes, self.group_uuid)
14377
14378     if self.req_target_uuids:
14379       # User requested specific target groups
14380       self.target_uuids = self.req_target_uuids
14381     else:
14382       # All groups except the one to be evacuated are potential targets
14383       self.target_uuids = [group_uuid for group_uuid in owned_groups
14384                            if group_uuid != self.group_uuid]
14385
14386       if not self.target_uuids:
14387         raise errors.OpPrereqError("There are no possible target groups",
14388                                    errors.ECODE_INVAL)
14389
14390   def BuildHooksEnv(self):
14391     """Build hooks env.
14392
14393     """
14394     return {
14395       "GROUP_NAME": self.op.group_name,
14396       "TARGET_GROUPS": " ".join(self.target_uuids),
14397       }
14398
14399   def BuildHooksNodes(self):
14400     """Build hooks nodes.
14401
14402     """
14403     mn = self.cfg.GetMasterNode()
14404
14405     assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
14406
14407     run_nodes = [mn] + self.cfg.GetNodeGroup(self.group_uuid).members
14408
14409     return (run_nodes, run_nodes)
14410
14411   def Exec(self, feedback_fn):
14412     instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
14413
14414     assert self.group_uuid not in self.target_uuids
14415
14416     req = iallocator.IAReqGroupChange(instances=instances,
14417                                       target_groups=self.target_uuids)
14418     ial = iallocator.IAllocator(self.cfg, self.rpc, req)
14419
14420     ial.Run(self.op.iallocator)
14421
14422     if not ial.success:
14423       raise errors.OpPrereqError("Can't compute group evacuation using"
14424                                  " iallocator '%s': %s" %
14425                                  (self.op.iallocator, ial.info),
14426                                  errors.ECODE_NORES)
14427
14428     jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
14429
14430     self.LogInfo("Iallocator returned %s job(s) for evacuating node group %s",
14431                  len(jobs), self.op.group_name)
14432
14433     return ResultWithJobs(jobs)
14434
14435
14436 class TagsLU(NoHooksLU): # pylint: disable=W0223
14437   """Generic tags LU.
14438
14439   This is an abstract class which is the parent of all the other tags LUs.
14440
14441   """
14442   def ExpandNames(self):
14443     self.group_uuid = None
14444     self.needed_locks = {}
14445
14446     if self.op.kind == constants.TAG_NODE:
14447       self.op.name = _ExpandNodeName(self.cfg, self.op.name)
14448       lock_level = locking.LEVEL_NODE
14449       lock_name = self.op.name
14450     elif self.op.kind == constants.TAG_INSTANCE:
14451       self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
14452       lock_level = locking.LEVEL_INSTANCE
14453       lock_name = self.op.name
14454     elif self.op.kind == constants.TAG_NODEGROUP:
14455       self.group_uuid = self.cfg.LookupNodeGroup(self.op.name)
14456       lock_level = locking.LEVEL_NODEGROUP
14457       lock_name = self.group_uuid
14458     else:
14459       lock_level = None
14460       lock_name = None
14461
14462     if lock_level and getattr(self.op, "use_locking", True):
14463       self.needed_locks[lock_level] = lock_name
14464
14465     # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
14466     # not possible to acquire the BGL based on opcode parameters)
14467
14468   def CheckPrereq(self):
14469     """Check prerequisites.
14470
14471     """
14472     if self.op.kind == constants.TAG_CLUSTER:
14473       self.target = self.cfg.GetClusterInfo()
14474     elif self.op.kind == constants.TAG_NODE:
14475       self.target = self.cfg.GetNodeInfo(self.op.name)
14476     elif self.op.kind == constants.TAG_INSTANCE:
14477       self.target = self.cfg.GetInstanceInfo(self.op.name)
14478     elif self.op.kind == constants.TAG_NODEGROUP:
14479       self.target = self.cfg.GetNodeGroup(self.group_uuid)
14480     else:
14481       raise errors.OpPrereqError("Wrong tag type requested (%s)" %
14482                                  str(self.op.kind), errors.ECODE_INVAL)
14483
14484
14485 class LUTagsGet(TagsLU):
14486   """Returns the tags of a given object.
14487
14488   """
14489   REQ_BGL = False
14490
14491   def ExpandNames(self):
14492     TagsLU.ExpandNames(self)
14493
14494     # Share locks as this is only a read operation
14495     self.share_locks = _ShareAll()
14496
14497   def Exec(self, feedback_fn):
14498     """Returns the tag list.
14499
14500     """
14501     return list(self.target.GetTags())
14502
14503
14504 class LUTagsSearch(NoHooksLU):
14505   """Searches the tags for a given pattern.
14506
14507   """
14508   REQ_BGL = False
14509
14510   def ExpandNames(self):
14511     self.needed_locks = {}
14512
14513   def CheckPrereq(self):
14514     """Check prerequisites.
14515
14516     This checks the pattern passed for validity by compiling it.
14517
14518     """
14519     try:
14520       self.re = re.compile(self.op.pattern)
14521     except re.error, err:
14522       raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
14523                                  (self.op.pattern, err), errors.ECODE_INVAL)
14524
14525   def Exec(self, feedback_fn):
14526     """Returns the tag list.
14527
14528     """
14529     cfg = self.cfg
14530     tgts = [("/cluster", cfg.GetClusterInfo())]
14531     ilist = cfg.GetAllInstancesInfo().values()
14532     tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
14533     nlist = cfg.GetAllNodesInfo().values()
14534     tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
14535     tgts.extend(("/nodegroup/%s" % n.name, n)
14536                 for n in cfg.GetAllNodeGroupsInfo().values())
14537     results = []
14538     for path, target in tgts:
14539       for tag in target.GetTags():
14540         if self.re.search(tag):
14541           results.append((path, tag))
14542     return results
14543
14544
14545 class LUTagsSet(TagsLU):
14546   """Sets a tag on a given object.
14547
14548   """
14549   REQ_BGL = False
14550
14551   def CheckPrereq(self):
14552     """Check prerequisites.
14553
14554     This checks the type and length of the tag name and value.
14555
14556     """
14557     TagsLU.CheckPrereq(self)
14558     for tag in self.op.tags:
14559       objects.TaggableObject.ValidateTag(tag)
14560
14561   def Exec(self, feedback_fn):
14562     """Sets the tag.
14563
14564     """
14565     try:
14566       for tag in self.op.tags:
14567         self.target.AddTag(tag)
14568     except errors.TagError, err:
14569       raise errors.OpExecError("Error while setting tag: %s" % str(err))
14570     self.cfg.Update(self.target, feedback_fn)
14571
14572
14573 class LUTagsDel(TagsLU):
14574   """Delete a list of tags from a given object.
14575
14576   """
14577   REQ_BGL = False
14578
14579   def CheckPrereq(self):
14580     """Check prerequisites.
14581
14582     This checks that we have the given tag.
14583
14584     """
14585     TagsLU.CheckPrereq(self)
14586     for tag in self.op.tags:
14587       objects.TaggableObject.ValidateTag(tag)
14588     del_tags = frozenset(self.op.tags)
14589     cur_tags = self.target.GetTags()
14590
14591     diff_tags = del_tags - cur_tags
14592     if diff_tags:
14593       diff_names = ("'%s'" % i for i in sorted(diff_tags))
14594       raise errors.OpPrereqError("Tag(s) %s not found" %
14595                                  (utils.CommaJoin(diff_names), ),
14596                                  errors.ECODE_NOENT)
14597
14598   def Exec(self, feedback_fn):
14599     """Remove the tag from the object.
14600
14601     """
14602     for tag in self.op.tags:
14603       self.target.RemoveTag(tag)
14604     self.cfg.Update(self.target, feedback_fn)
14605
14606
14607 class LUTestDelay(NoHooksLU):
14608   """Sleep for a specified amount of time.
14609
14610   This LU sleeps on the master and/or nodes for a specified amount of
14611   time.
14612
14613   """
14614   REQ_BGL = False
14615
14616   def ExpandNames(self):
14617     """Expand names and set required locks.
14618
14619     This expands the node list, if any.
14620
14621     """
14622     self.needed_locks = {}
14623     if self.op.on_nodes:
14624       # _GetWantedNodes can be used here, but is not always appropriate to use
14625       # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
14626       # more information.
14627       self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
14628       self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
14629
14630   def _TestDelay(self):
14631     """Do the actual sleep.
14632
14633     """
14634     if self.op.on_master:
14635       if not utils.TestDelay(self.op.duration):
14636         raise errors.OpExecError("Error during master delay test")
14637     if self.op.on_nodes:
14638       result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
14639       for node, node_result in result.items():
14640         node_result.Raise("Failure during rpc call to node %s" % node)
14641
14642   def Exec(self, feedback_fn):
14643     """Execute the test delay opcode, with the wanted repetitions.
14644
14645     """
14646     if self.op.repeat == 0:
14647       self._TestDelay()
14648     else:
14649       top_value = self.op.repeat - 1
14650       for i in range(self.op.repeat):
14651         self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
14652         self._TestDelay()
14653
14654
14655 class LUTestJqueue(NoHooksLU):
14656   """Utility LU to test some aspects of the job queue.
14657
14658   """
14659   REQ_BGL = False
14660
14661   # Must be lower than default timeout for WaitForJobChange to see whether it
14662   # notices changed jobs
14663   _CLIENT_CONNECT_TIMEOUT = 20.0
14664   _CLIENT_CONFIRM_TIMEOUT = 60.0
14665
14666   @classmethod
14667   def _NotifyUsingSocket(cls, cb, errcls):
14668     """Opens a Unix socket and waits for another program to connect.
14669
14670     @type cb: callable
14671     @param cb: Callback to send socket name to client
14672     @type errcls: class
14673     @param errcls: Exception class to use for errors
14674
14675     """
14676     # Using a temporary directory as there's no easy way to create temporary
14677     # sockets without writing a custom loop around tempfile.mktemp and
14678     # socket.bind
14679     tmpdir = tempfile.mkdtemp()
14680     try:
14681       tmpsock = utils.PathJoin(tmpdir, "sock")
14682
14683       logging.debug("Creating temporary socket at %s", tmpsock)
14684       sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
14685       try:
14686         sock.bind(tmpsock)
14687         sock.listen(1)
14688
14689         # Send details to client
14690         cb(tmpsock)
14691
14692         # Wait for client to connect before continuing
14693         sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
14694         try:
14695           (conn, _) = sock.accept()
14696         except socket.error, err:
14697           raise errcls("Client didn't connect in time (%s)" % err)
14698       finally:
14699         sock.close()
14700     finally:
14701       # Remove as soon as client is connected
14702       shutil.rmtree(tmpdir)
14703
14704     # Wait for client to close
14705     try:
14706       try:
14707         # pylint: disable=E1101
14708         # Instance of '_socketobject' has no ... member
14709         conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
14710         conn.recv(1)
14711       except socket.error, err:
14712         raise errcls("Client failed to confirm notification (%s)" % err)
14713     finally:
14714       conn.close()
14715
14716   def _SendNotification(self, test, arg, sockname):
14717     """Sends a notification to the client.
14718
14719     @type test: string
14720     @param test: Test name
14721     @param arg: Test argument (depends on test)
14722     @type sockname: string
14723     @param sockname: Socket path
14724
14725     """
14726     self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
14727
14728   def _Notify(self, prereq, test, arg):
14729     """Notifies the client of a test.
14730
14731     @type prereq: bool
14732     @param prereq: Whether this is a prereq-phase test
14733     @type test: string
14734     @param test: Test name
14735     @param arg: Test argument (depends on test)
14736
14737     """
14738     if prereq:
14739       errcls = errors.OpPrereqError
14740     else:
14741       errcls = errors.OpExecError
14742
14743     return self._NotifyUsingSocket(compat.partial(self._SendNotification,
14744                                                   test, arg),
14745                                    errcls)
14746
14747   def CheckArguments(self):
14748     self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
14749     self.expandnames_calls = 0
14750
14751   def ExpandNames(self):
14752     checkargs_calls = getattr(self, "checkargs_calls", 0)
14753     if checkargs_calls < 1:
14754       raise errors.ProgrammerError("CheckArguments was not called")
14755
14756     self.expandnames_calls += 1
14757
14758     if self.op.notify_waitlock:
14759       self._Notify(True, constants.JQT_EXPANDNAMES, None)
14760
14761     self.LogInfo("Expanding names")
14762
14763     # Get lock on master node (just to get a lock, not for a particular reason)
14764     self.needed_locks = {
14765       locking.LEVEL_NODE: self.cfg.GetMasterNode(),
14766       }
14767
14768   def Exec(self, feedback_fn):
14769     if self.expandnames_calls < 1:
14770       raise errors.ProgrammerError("ExpandNames was not called")
14771
14772     if self.op.notify_exec:
14773       self._Notify(False, constants.JQT_EXEC, None)
14774
14775     self.LogInfo("Executing")
14776
14777     if self.op.log_messages:
14778       self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
14779       for idx, msg in enumerate(self.op.log_messages):
14780         self.LogInfo("Sending log message %s", idx + 1)
14781         feedback_fn(constants.JQT_MSGPREFIX + msg)
14782         # Report how many test messages have been sent
14783         self._Notify(False, constants.JQT_LOGMSG, idx + 1)
14784
14785     if self.op.fail:
14786       raise errors.OpExecError("Opcode failure was requested")
14787
14788     return True
14789
14790
14791 class LUTestAllocator(NoHooksLU):
14792   """Run allocator tests.
14793
14794   This LU runs the allocator tests
14795
14796   """
14797   def CheckPrereq(self):
14798     """Check prerequisites.
14799
14800     This checks the opcode parameters depending on the director and mode test.
14801
14802     """
14803     if self.op.mode in (constants.IALLOCATOR_MODE_ALLOC,
14804                         constants.IALLOCATOR_MODE_MULTI_ALLOC):
14805       for attr in ["memory", "disks", "disk_template",
14806                    "os", "tags", "nics", "vcpus"]:
14807         if not hasattr(self.op, attr):
14808           raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
14809                                      attr, errors.ECODE_INVAL)
14810       iname = self.cfg.ExpandInstanceName(self.op.name)
14811       if iname is not None:
14812         raise errors.OpPrereqError("Instance '%s' already in the cluster" %
14813                                    iname, errors.ECODE_EXISTS)
14814       if not isinstance(self.op.nics, list):
14815         raise errors.OpPrereqError("Invalid parameter 'nics'",
14816                                    errors.ECODE_INVAL)
14817       if not isinstance(self.op.disks, list):
14818         raise errors.OpPrereqError("Invalid parameter 'disks'",
14819                                    errors.ECODE_INVAL)
14820       for row in self.op.disks:
14821         if (not isinstance(row, dict) or
14822             constants.IDISK_SIZE not in row or
14823             not isinstance(row[constants.IDISK_SIZE], int) or
14824             constants.IDISK_MODE not in row or
14825             row[constants.IDISK_MODE] not in constants.DISK_ACCESS_SET):
14826           raise errors.OpPrereqError("Invalid contents of the 'disks'"
14827                                      " parameter", errors.ECODE_INVAL)
14828       if self.op.hypervisor is None:
14829         self.op.hypervisor = self.cfg.GetHypervisorType()
14830     elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
14831       fname = _ExpandInstanceName(self.cfg, self.op.name)
14832       self.op.name = fname
14833       self.relocate_from = \
14834           list(self.cfg.GetInstanceInfo(fname).secondary_nodes)
14835     elif self.op.mode in (constants.IALLOCATOR_MODE_CHG_GROUP,
14836                           constants.IALLOCATOR_MODE_NODE_EVAC):
14837       if not self.op.instances:
14838         raise errors.OpPrereqError("Missing instances", errors.ECODE_INVAL)
14839       self.op.instances = _GetWantedInstances(self, self.op.instances)
14840     else:
14841       raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
14842                                  self.op.mode, errors.ECODE_INVAL)
14843
14844     if self.op.direction == constants.IALLOCATOR_DIR_OUT:
14845       if self.op.allocator is None:
14846         raise errors.OpPrereqError("Missing allocator name",
14847                                    errors.ECODE_INVAL)
14848     elif self.op.direction != constants.IALLOCATOR_DIR_IN:
14849       raise errors.OpPrereqError("Wrong allocator test '%s'" %
14850                                  self.op.direction, errors.ECODE_INVAL)
14851
14852   def Exec(self, feedback_fn):
14853     """Run the allocator test.
14854
14855     """
14856     if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
14857       req = iallocator.IAReqInstanceAlloc(name=self.op.name,
14858                                           memory=self.op.memory,
14859                                           disks=self.op.disks,
14860                                           disk_template=self.op.disk_template,
14861                                           os=self.op.os,
14862                                           tags=self.op.tags,
14863                                           nics=self.op.nics,
14864                                           vcpus=self.op.vcpus,
14865                                           spindle_use=self.op.spindle_use,
14866                                           hypervisor=self.op.hypervisor)
14867     elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
14868       req = iallocator.IAReqRelocate(name=self.op.name,
14869                                      relocate_from=list(self.relocate_from))
14870     elif self.op.mode == constants.IALLOCATOR_MODE_CHG_GROUP:
14871       req = iallocator.IAReqGroupChange(instances=self.op.instances,
14872                                         target_groups=self.op.target_groups)
14873     elif self.op.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
14874       req = iallocator.IAReqNodeEvac(instances=self.op.instances,
14875                                      evac_mode=self.op.evac_mode)
14876     elif self.op.mode == constants.IALLOCATOR_MODE_MULTI_ALLOC:
14877       disk_template = self.op.disk_template
14878       insts = [iallocator.IAReqInstanceAlloc(name="%s%s" % (self.op.name, idx),
14879                                              memory=self.op.memory,
14880                                              disks=self.op.disks,
14881                                              disk_template=disk_template,
14882                                              os=self.op.os,
14883                                              tags=self.op.tags,
14884                                              nics=self.op.nics,
14885                                              vcpus=self.op.vcpus,
14886                                              spindle_use=self.op.spindle_use,
14887                                              hypervisor=self.op.hypervisor)
14888                for idx in range(self.op.count)]
14889       req = iallocator.IAReqMultiInstanceAlloc(instances=insts)
14890     else:
14891       raise errors.ProgrammerError("Uncatched mode %s in"
14892                                    " LUTestAllocator.Exec", self.op.mode)
14893
14894     ial = iallocator.IAllocator(self.cfg, self.rpc, req)
14895     if self.op.direction == constants.IALLOCATOR_DIR_IN:
14896       result = ial.in_text
14897     else:
14898       ial.Run(self.op.allocator, validate=False)
14899       result = ial.out_text
14900     return result
14901
14902
14903 #: Query type implementations
14904 _QUERY_IMPL = {
14905   constants.QR_CLUSTER: _ClusterQuery,
14906   constants.QR_INSTANCE: _InstanceQuery,
14907   constants.QR_NODE: _NodeQuery,
14908   constants.QR_GROUP: _GroupQuery,
14909   constants.QR_OS: _OsQuery,
14910   constants.QR_EXPORT: _ExportQuery,
14911   }
14912
14913 assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
14914
14915
14916 def _GetQueryImplementation(name):
14917   """Returns the implemtnation for a query type.
14918
14919   @param name: Query type, must be one of L{constants.QR_VIA_OP}
14920
14921   """
14922   try:
14923     return _QUERY_IMPL[name]
14924   except KeyError:
14925     raise errors.OpPrereqError("Unknown query resource '%s'" % name,
14926                                errors.ECODE_INVAL)