code.grnet.gr Git - ganeti-local/blob - lib/cmdlib.py

   1 #
   2 #
   3
   4 # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012 Google Inc.
   5 #
   6 # This program is free software; you can redistribute it and/or modify
   7 # it under the terms of the GNU General Public License as published by
   8 # the Free Software Foundation; either version 2 of the License, or
   9 # (at your option) any later version.
  10 #
  11 # This program is distributed in the hope that it will be useful, but
  12 # WITHOUT ANY WARRANTY; without even the implied warranty of
  13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 # General Public License for more details.
  15 #
  16 # You should have received a copy of the GNU General Public License
  17 # along with this program; if not, write to the Free Software
  18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  19 # 02110-1301, USA.
  20
  21
  22 """Module implementing the master-side code."""
  23
  24 # pylint: disable=W0201,C0302
  25
  26 # W0201 since most LU attributes are defined in CheckPrereq or similar
  27 # functions
  28
  29 # C0302: since we have waaaay too many lines in this module
  30
  31 import os
  32 import os.path
  33 import time
  34 import re
  35 import logging
  36 import copy
  37 import OpenSSL
  38 import socket
  39 import tempfile
  40 import shutil
  41 import itertools
  42 import operator
  43
  44 from ganeti import ssh
  45 from ganeti import utils
  46 from ganeti import errors
  47 from ganeti import hypervisor
  48 from ganeti import locking
  49 from ganeti import constants
  50 from ganeti import objects
  51 from ganeti import ssconf
  52 from ganeti import uidpool
  53 from ganeti import compat
  54 from ganeti import masterd
  55 from ganeti import netutils
  56 from ganeti import query
  57 from ganeti import qlang
  58 from ganeti import opcodes
  59 from ganeti import ht
  60 from ganeti import rpc
  61 from ganeti import runtime
  62 from ganeti.masterd import iallocator
  63
  64 import ganeti.masterd.instance # pylint: disable=W0611
  65
  66
  67 # States of instance
  68 INSTANCE_DOWN = [constants.ADMINST_DOWN]
  69 INSTANCE_ONLINE = [constants.ADMINST_DOWN, constants.ADMINST_UP]
  70 INSTANCE_NOT_RUNNING = [constants.ADMINST_DOWN, constants.ADMINST_OFFLINE]
  71
  72 #: Instance status in which an instance can be marked as offline/online
  73 CAN_CHANGE_INSTANCE_OFFLINE = (frozenset(INSTANCE_DOWN) | frozenset([
  74   constants.ADMINST_OFFLINE,
  75   ]))
  76
  77
  78 class ResultWithJobs:
  79   """Data container for LU results with jobs.
  80
  81   Instances of this class returned from L{LogicalUnit.Exec} will be recognized
  82   by L{mcpu._ProcessResult}. The latter will then submit the jobs
  83   contained in the C{jobs} attribute and include the job IDs in the opcode
  84   result.
  85
  86   """
  87   def __init__(self, jobs, **kwargs):
  88     """Initializes this class.
  89
  90     Additional return values can be specified as keyword arguments.
  91
  92     @type jobs: list of lists of L{opcode.OpCode}
  93     @param jobs: A list of lists of opcode objects
  94
  95     """
  96     self.jobs = jobs
  97     self.other = kwargs
  98
  99
 100 class LogicalUnit(object):
 101   """Logical Unit base class.
 102
 103   Subclasses must follow these rules:
 104     - implement ExpandNames
 105     - implement CheckPrereq (except when tasklets are used)
 106     - implement Exec (except when tasklets are used)
 107     - implement BuildHooksEnv
 108     - implement BuildHooksNodes
 109     - redefine HPATH and HTYPE
 110     - optionally redefine their run requirements:
 111         REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
 112
 113   Note that all commands require root permissions.
 114
 115   @ivar dry_run_result: the value (if any) that will be returned to the caller
 116       in dry-run mode (signalled by opcode dry_run parameter)
 117
 118   """
 119   HPATH = None
 120   HTYPE = None
 121   REQ_BGL = True
 122
 123   def __init__(self, processor, op, context, rpc_runner):
 124     """Constructor for LogicalUnit.
 125
 126     This needs to be overridden in derived classes in order to check op
 127     validity.
 128
 129     """
 130     self.proc = processor
 131     self.op = op
 132     self.cfg = context.cfg
 133     self.glm = context.glm
 134     # readability alias
 135     self.owned_locks = context.glm.list_owned
 136     self.context = context
 137     self.rpc = rpc_runner
 138     # Dicts used to declare locking needs to mcpu
 139     self.needed_locks = None
 140     self.share_locks = dict.fromkeys(locking.LEVELS, 0)
 141     self.add_locks = {}
 142     self.remove_locks = {}
 143     # Used to force good behavior when calling helper functions
 144     self.recalculate_locks = {}
 145     # logging
 146     self.Log = processor.Log # pylint: disable=C0103
 147     self.LogWarning = processor.LogWarning # pylint: disable=C0103
 148     self.LogInfo = processor.LogInfo # pylint: disable=C0103
 149     self.LogStep = processor.LogStep # pylint: disable=C0103
 150     # support for dry-run
 151     self.dry_run_result = None
 152     # support for generic debug attribute
 153     if (not hasattr(self.op, "debug_level") or
 154         not isinstance(self.op.debug_level, int)):
 155       self.op.debug_level = 0
 156
 157     # Tasklets
 158     self.tasklets = None
 159
 160     # Validate opcode parameters and set defaults
 161     self.op.Validate(True)
 162
 163     self.CheckArguments()
 164
 165   def CheckArguments(self):
 166     """Check syntactic validity for the opcode arguments.
 167
 168     This method is for doing a simple syntactic check and ensure
 169     validity of opcode parameters, without any cluster-related
 170     checks. While the same can be accomplished in ExpandNames and/or
 171     CheckPrereq, doing these separate is better because:
 172
 173       - ExpandNames is left as as purely a lock-related function
 174       - CheckPrereq is run after we have acquired locks (and possible
 175         waited for them)
 176
 177     The function is allowed to change the self.op attribute so that
 178     later methods can no longer worry about missing parameters.
 179
 180     """
 181     pass
 182
 183   def ExpandNames(self):
 184     """Expand names for this LU.
 185
 186     This method is called before starting to execute the opcode, and it should
 187     update all the parameters of the opcode to their canonical form (e.g. a
 188     short node name must be fully expanded after this method has successfully
 189     completed). This way locking, hooks, logging, etc. can work correctly.
 190
 191     LUs which implement this method must also populate the self.needed_locks
 192     member, as a dict with lock levels as keys, and a list of needed lock names
 193     as values. Rules:
 194
 195       - use an empty dict if you don't need any lock
 196       - if you don't need any lock at a particular level omit that
 197         level (note that in this case C{DeclareLocks} won't be called
 198         at all for that level)
 199       - if you need locks at a level, but you can't calculate it in
 200         this function, initialise that level with an empty list and do
 201         further processing in L{LogicalUnit.DeclareLocks} (see that
 202         function's docstring)
 203       - don't put anything for the BGL level
 204       - if you want all locks at a level use L{locking.ALL_SET} as a value
 205
 206     If you need to share locks (rather than acquire them exclusively) at one
 207     level you can modify self.share_locks, setting a true value (usually 1) for
 208     that level. By default locks are not shared.
 209
 210     This function can also define a list of tasklets, which then will be
 211     executed in order instead of the usual LU-level CheckPrereq and Exec
 212     functions, if those are not defined by the LU.
 213
 214     Examples::
 215
 216       # Acquire all nodes and one instance
 217       self.needed_locks = {
 218         locking.LEVEL_NODE: locking.ALL_SET,
 219         locking.LEVEL_INSTANCE: ['instance1.example.com'],
 220       }
 221       # Acquire just two nodes
 222       self.needed_locks = {
 223         locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
 224       }
 225       # Acquire no locks
 226       self.needed_locks = {} # No, you can't leave it to the default value None
 227
 228     """
 229     # The implementation of this method is mandatory only if the new LU is
 230     # concurrent, so that old LUs don't need to be changed all at the same
 231     # time.
 232     if self.REQ_BGL:
 233       self.needed_locks = {} # Exclusive LUs don't need locks.
 234     else:
 235       raise NotImplementedError
 236
 237   def DeclareLocks(self, level):
 238     """Declare LU locking needs for a level
 239
 240     While most LUs can just declare their locking needs at ExpandNames time,
 241     sometimes there's the need to calculate some locks after having acquired
 242     the ones before. This function is called just before acquiring locks at a
 243     particular level, but after acquiring the ones at lower levels, and permits
 244     such calculations. It can be used to modify self.needed_locks, and by
 245     default it does nothing.
 246
 247     This function is only called if you have something already set in
 248     self.needed_locks for the level.
 249
 250     @param level: Locking level which is going to be locked
 251     @type level: member of L{ganeti.locking.LEVELS}
 252
 253     """
 254
 255   def CheckPrereq(self):
 256     """Check prerequisites for this LU.
 257
 258     This method should check that the prerequisites for the execution
 259     of this LU are fulfilled. It can do internode communication, but
 260     it should be idempotent - no cluster or system changes are
 261     allowed.
 262
 263     The method should raise errors.OpPrereqError in case something is
 264     not fulfilled. Its return value is ignored.
 265
 266     This method should also update all the parameters of the opcode to
 267     their canonical form if it hasn't been done by ExpandNames before.
 268
 269     """
 270     if self.tasklets is not None:
 271       for (idx, tl) in enumerate(self.tasklets):
 272         logging.debug("Checking prerequisites for tasklet %s/%s",
 273                       idx + 1, len(self.tasklets))
 274         tl.CheckPrereq()
 275     else:
 276       pass
 277
 278   def Exec(self, feedback_fn):
 279     """Execute the LU.
 280
 281     This method should implement the actual work. It should raise
 282     errors.OpExecError for failures that are somewhat dealt with in
 283     code, or expected.
 284
 285     """
 286     if self.tasklets is not None:
 287       for (idx, tl) in enumerate(self.tasklets):
 288         logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
 289         tl.Exec(feedback_fn)
 290     else:
 291       raise NotImplementedError
 292
 293   def BuildHooksEnv(self):
 294     """Build hooks environment for this LU.
 295
 296     @rtype: dict
 297     @return: Dictionary containing the environment that will be used for
 298       running the hooks for this LU. The keys of the dict must not be prefixed
 299       with "GANETI_"--that'll be added by the hooks runner. The hooks runner
 300       will extend the environment with additional variables. If no environment
 301       should be defined, an empty dictionary should be returned (not C{None}).
 302     @note: If the C{HPATH} attribute of the LU class is C{None}, this function
 303       will not be called.
 304
 305     """
 306     raise NotImplementedError
 307
 308   def BuildHooksNodes(self):
 309     """Build list of nodes to run LU's hooks.
 310
 311     @rtype: tuple; (list, list)
 312     @return: Tuple containing a list of node names on which the hook
 313       should run before the execution and a list of node names on which the
 314       hook should run after the execution. No nodes should be returned as an
 315       empty list (and not None).
 316     @note: If the C{HPATH} attribute of the LU class is C{None}, this function
 317       will not be called.
 318
 319     """
 320     raise NotImplementedError
 321
 322   def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
 323     """Notify the LU about the results of its hooks.
 324
 325     This method is called every time a hooks phase is executed, and notifies
 326     the Logical Unit about the hooks' result. The LU can then use it to alter
 327     its result based on the hooks.  By default the method does nothing and the
 328     previous result is passed back unchanged but any LU can define it if it
 329     wants to use the local cluster hook-scripts somehow.
 330
 331     @param phase: one of L{constants.HOOKS_PHASE_POST} or
 332         L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
 333     @param hook_results: the results of the multi-node hooks rpc call
 334     @param feedback_fn: function used send feedback back to the caller
 335     @param lu_result: the previous Exec result this LU had, or None
 336         in the PRE phase
 337     @return: the new Exec result, based on the previous result
 338         and hook results
 339
 340     """
 341     # API must be kept, thus we ignore the unused argument and could
 342     # be a function warnings
 343     # pylint: disable=W0613,R0201
 344     return lu_result
 345
 346   def _ExpandAndLockInstance(self):
 347     """Helper function to expand and lock an instance.
 348
 349     Many LUs that work on an instance take its name in self.op.instance_name
 350     and need to expand it and then declare the expanded name for locking. This
 351     function does it, and then updates self.op.instance_name to the expanded
 352     name. It also initializes needed_locks as a dict, if this hasn't been done
 353     before.
 354
 355     """
 356     if self.needed_locks is None:
 357       self.needed_locks = {}
 358     else:
 359       assert locking.LEVEL_INSTANCE not in self.needed_locks, \
 360         "_ExpandAndLockInstance called with instance-level locks set"
 361     self.op.instance_name = _ExpandInstanceName(self.cfg,
 362                                                 self.op.instance_name)
 363     self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
 364
 365   def _LockInstancesNodes(self, primary_only=False,
 366                           level=locking.LEVEL_NODE):
 367     """Helper function to declare instances' nodes for locking.
 368
 369     This function should be called after locking one or more instances to lock
 370     their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
 371     with all primary or secondary nodes for instances already locked and
 372     present in self.needed_locks[locking.LEVEL_INSTANCE].
 373
 374     It should be called from DeclareLocks, and for safety only works if
 375     self.recalculate_locks[locking.LEVEL_NODE] is set.
 376
 377     In the future it may grow parameters to just lock some instance's nodes, or
 378     to just lock primaries or secondary nodes, if needed.
 379
 380     If should be called in DeclareLocks in a way similar to::
 381
 382       if level == locking.LEVEL_NODE:
 383         self._LockInstancesNodes()
 384
 385     @type primary_only: boolean
 386     @param primary_only: only lock primary nodes of locked instances
 387     @param level: Which lock level to use for locking nodes
 388
 389     """
 390     assert level in self.recalculate_locks, \
 391       "_LockInstancesNodes helper function called with no nodes to recalculate"
 392
 393     # TODO: check if we're really been called with the instance locks held
 394
 395     # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
 396     # future we might want to have different behaviors depending on the value
 397     # of self.recalculate_locks[locking.LEVEL_NODE]
 398     wanted_nodes = []
 399     locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
 400     for _, instance in self.cfg.GetMultiInstanceInfo(locked_i):
 401       wanted_nodes.append(instance.primary_node)
 402       if not primary_only:
 403         wanted_nodes.extend(instance.secondary_nodes)
 404
 405     if self.recalculate_locks[level] == constants.LOCKS_REPLACE:
 406       self.needed_locks[level] = wanted_nodes
 407     elif self.recalculate_locks[level] == constants.LOCKS_APPEND:
 408       self.needed_locks[level].extend(wanted_nodes)
 409     else:
 410       raise errors.ProgrammerError("Unknown recalculation mode")
 411
 412     del self.recalculate_locks[level]
 413
 414
 415 class NoHooksLU(LogicalUnit): # pylint: disable=W0223
 416   """Simple LU which runs no hooks.
 417
 418   This LU is intended as a parent for other LogicalUnits which will
 419   run no hooks, in order to reduce duplicate code.
 420
 421   """
 422   HPATH = None
 423   HTYPE = None
 424
 425   def BuildHooksEnv(self):
 426     """Empty BuildHooksEnv for NoHooksLu.
 427
 428     This just raises an error.
 429
 430     """
 431     raise AssertionError("BuildHooksEnv called for NoHooksLUs")
 432
 433   def BuildHooksNodes(self):
 434     """Empty BuildHooksNodes for NoHooksLU.
 435
 436     """
 437     raise AssertionError("BuildHooksNodes called for NoHooksLU")
 438
 439
 440 class Tasklet:
 441   """Tasklet base class.
 442
 443   Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
 444   they can mix legacy code with tasklets. Locking needs to be done in the LU,
 445   tasklets know nothing about locks.
 446
 447   Subclasses must follow these rules:
 448     - Implement CheckPrereq
 449     - Implement Exec
 450
 451   """
 452   def __init__(self, lu):
 453     self.lu = lu
 454
 455     # Shortcuts
 456     self.cfg = lu.cfg
 457     self.rpc = lu.rpc
 458
 459   def CheckPrereq(self):
 460     """Check prerequisites for this tasklets.
 461
 462     This method should check whether the prerequisites for the execution of
 463     this tasklet are fulfilled. It can do internode communication, but it
 464     should be idempotent - no cluster or system changes are allowed.
 465
 466     The method should raise errors.OpPrereqError in case something is not
 467     fulfilled. Its return value is ignored.
 468
 469     This method should also update all parameters to their canonical form if it
 470     hasn't been done before.
 471
 472     """
 473     pass
 474
 475   def Exec(self, feedback_fn):
 476     """Execute the tasklet.
 477
 478     This method should implement the actual work. It should raise
 479     errors.OpExecError for failures that are somewhat dealt with in code, or
 480     expected.
 481
 482     """
 483     raise NotImplementedError
 484
 485
 486 class _QueryBase:
 487   """Base for query utility classes.
 488
 489   """
 490   #: Attribute holding field definitions
 491   FIELDS = None
 492
 493   #: Field to sort by
 494   SORT_FIELD = "name"
 495
 496   def __init__(self, qfilter, fields, use_locking):
 497     """Initializes this class.
 498
 499     """
 500     self.use_locking = use_locking
 501
 502     self.query = query.Query(self.FIELDS, fields, qfilter=qfilter,
 503                              namefield=self.SORT_FIELD)
 504     self.requested_data = self.query.RequestedData()
 505     self.names = self.query.RequestedNames()
 506
 507     # Sort only if no names were requested
 508     self.sort_by_name = not self.names
 509
 510     self.do_locking = None
 511     self.wanted = None
 512
 513   def _GetNames(self, lu, all_names, lock_level):
 514     """Helper function to determine names asked for in the query.
 515
 516     """
 517     if self.do_locking:
 518       names = lu.owned_locks(lock_level)
 519     else:
 520       names = all_names
 521
 522     if self.wanted == locking.ALL_SET:
 523       assert not self.names
 524       # caller didn't specify names, so ordering is not important
 525       return utils.NiceSort(names)
 526
 527     # caller specified names and we must keep the same order
 528     assert self.names
 529     assert not self.do_locking or lu.glm.is_owned(lock_level)
 530
 531     missing = set(self.wanted).difference(names)
 532     if missing:
 533       raise errors.OpExecError("Some items were removed before retrieving"
 534                                " their data: %s" % missing)
 535
 536     # Return expanded names
 537     return self.wanted
 538
 539   def ExpandNames(self, lu):
 540     """Expand names for this query.
 541
 542     See L{LogicalUnit.ExpandNames}.
 543
 544     """
 545     raise NotImplementedError()
 546
 547   def DeclareLocks(self, lu, level):
 548     """Declare locks for this query.
 549
 550     See L{LogicalUnit.DeclareLocks}.
 551
 552     """
 553     raise NotImplementedError()
 554
 555   def _GetQueryData(self, lu):
 556     """Collects all data for this query.
 557
 558     @return: Query data object
 559
 560     """
 561     raise NotImplementedError()
 562
 563   def NewStyleQuery(self, lu):
 564     """Collect data and execute query.
 565
 566     """
 567     return query.GetQueryResponse(self.query, self._GetQueryData(lu),
 568                                   sort_by_name=self.sort_by_name)
 569
 570   def OldStyleQuery(self, lu):
 571     """Collect data and execute query.
 572
 573     """
 574     return self.query.OldStyleQuery(self._GetQueryData(lu),
 575                                     sort_by_name=self.sort_by_name)
 576
 577
 578 def _ShareAll():
 579   """Returns a dict declaring all lock levels shared.
 580
 581   """
 582   return dict.fromkeys(locking.LEVELS, 1)
 583
 584
 585 def _AnnotateDiskParams(instance, devs, cfg):
 586   """Little helper wrapper to the rpc annotation method.
 587
 588   @param instance: The instance object
 589   @type devs: List of L{objects.Disk}
 590   @param devs: The root devices (not any of its children!)
 591   @param cfg: The config object
 592   @returns The annotated disk copies
 593   @see L{rpc.AnnotateDiskParams}
 594
 595   """
 596   return rpc.AnnotateDiskParams(instance.disk_template, devs,
 597                                 cfg.GetInstanceDiskParams(instance))
 598
 599
 600 def _CheckInstancesNodeGroups(cfg, instances, owned_groups, owned_nodes,
 601                               cur_group_uuid):
 602   """Checks if node groups for locked instances are still correct.
 603
 604   @type cfg: L{config.ConfigWriter}
 605   @param cfg: Cluster configuration
 606   @type instances: dict; string as key, L{objects.Instance} as value
 607   @param instances: Dictionary, instance name as key, instance object as value
 608   @type owned_groups: iterable of string
 609   @param owned_groups: List of owned groups
 610   @type owned_nodes: iterable of string
 611   @param owned_nodes: List of owned nodes
 612   @type cur_group_uuid: string or None
 613   @param cur_group_uuid: Optional group UUID to check against instance's groups
 614
 615   """
 616   for (name, inst) in instances.items():
 617     assert owned_nodes.issuperset(inst.all_nodes), \
 618       "Instance %s's nodes changed while we kept the lock" % name
 619
 620     inst_groups = _CheckInstanceNodeGroups(cfg, name, owned_groups)
 621
 622     assert cur_group_uuid is None or cur_group_uuid in inst_groups, \
 623       "Instance %s has no node in group %s" % (name, cur_group_uuid)
 624
 625
 626 def _CheckInstanceNodeGroups(cfg, instance_name, owned_groups,
 627                              primary_only=False):
 628   """Checks if the owned node groups are still correct for an instance.
 629
 630   @type cfg: L{config.ConfigWriter}
 631   @param cfg: The cluster configuration
 632   @type instance_name: string
 633   @param instance_name: Instance name
 634   @type owned_groups: set or frozenset
 635   @param owned_groups: List of currently owned node groups
 636   @type primary_only: boolean
 637   @param primary_only: Whether to check node groups for only the primary node
 638
 639   """
 640   inst_groups = cfg.GetInstanceNodeGroups(instance_name, primary_only)
 641
 642   if not owned_groups.issuperset(inst_groups):
 643     raise errors.OpPrereqError("Instance %s's node groups changed since"
 644                                " locks were acquired, current groups are"
 645                                " are '%s', owning groups '%s'; retry the"
 646                                " operation" %
 647                                (instance_name,
 648                                 utils.CommaJoin(inst_groups),
 649                                 utils.CommaJoin(owned_groups)),
 650                                errors.ECODE_STATE)
 651
 652   return inst_groups
 653
 654
 655 def _CheckNodeGroupInstances(cfg, group_uuid, owned_instances):
 656   """Checks if the instances in a node group are still correct.
 657
 658   @type cfg: L{config.ConfigWriter}
 659   @param cfg: The cluster configuration
 660   @type group_uuid: string
 661   @param group_uuid: Node group UUID
 662   @type owned_instances: set or frozenset
 663   @param owned_instances: List of currently owned instances
 664
 665   """
 666   wanted_instances = cfg.GetNodeGroupInstances(group_uuid)
 667   if owned_instances != wanted_instances:
 668     raise errors.OpPrereqError("Instances in node group '%s' changed since"
 669                                " locks were acquired, wanted '%s', have '%s';"
 670                                " retry the operation" %
 671                                (group_uuid,
 672                                 utils.CommaJoin(wanted_instances),
 673                                 utils.CommaJoin(owned_instances)),
 674                                errors.ECODE_STATE)
 675
 676   return wanted_instances
 677
 678
 679 def _SupportsOob(cfg, node):
 680   """Tells if node supports OOB.
 681
 682   @type cfg: L{config.ConfigWriter}
 683   @param cfg: The cluster configuration
 684   @type node: L{objects.Node}
 685   @param node: The node
 686   @return: The OOB script if supported or an empty string otherwise
 687
 688   """
 689   return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
 690
 691
 692 def _GetWantedNodes(lu, nodes):
 693   """Returns list of checked and expanded node names.
 694
 695   @type lu: L{LogicalUnit}
 696   @param lu: the logical unit on whose behalf we execute
 697   @type nodes: list
 698   @param nodes: list of node names or None for all nodes
 699   @rtype: list
 700   @return: the list of nodes, sorted
 701   @raise errors.ProgrammerError: if the nodes parameter is wrong type
 702
 703   """
 704   if nodes:
 705     return [_ExpandNodeName(lu.cfg, name) for name in nodes]
 706
 707   return utils.NiceSort(lu.cfg.GetNodeList())
 708
 709
 710 def _GetWantedInstances(lu, instances):
 711   """Returns list of checked and expanded instance names.
 712
 713   @type lu: L{LogicalUnit}
 714   @param lu: the logical unit on whose behalf we execute
 715   @type instances: list
 716   @param instances: list of instance names or None for all instances
 717   @rtype: list
 718   @return: the list of instances, sorted
 719   @raise errors.OpPrereqError: if the instances parameter is wrong type
 720   @raise errors.OpPrereqError: if any of the passed instances is not found
 721
 722   """
 723   if instances:
 724     wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
 725   else:
 726     wanted = utils.NiceSort(lu.cfg.GetInstanceList())
 727   return wanted
 728
 729
 730 def _GetUpdatedParams(old_params, update_dict,
 731                       use_default=True, use_none=False):
 732   """Return the new version of a parameter dictionary.
 733
 734   @type old_params: dict
 735   @param old_params: old parameters
 736   @type update_dict: dict
 737   @param update_dict: dict containing new parameter values, or
 738       constants.VALUE_DEFAULT to reset the parameter to its default
 739       value
 740   @param use_default: boolean
 741   @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
 742       values as 'to be deleted' values
 743   @param use_none: boolean
 744   @type use_none: whether to recognise C{None} values as 'to be
 745       deleted' values
 746   @rtype: dict
 747   @return: the new parameter dictionary
 748
 749   """
 750   params_copy = copy.deepcopy(old_params)
 751   for key, val in update_dict.iteritems():
 752     if ((use_default and val == constants.VALUE_DEFAULT) or
 753         (use_none and val is None)):
 754       try:
 755         del params_copy[key]
 756       except KeyError:
 757         pass
 758     else:
 759       params_copy[key] = val
 760   return params_copy
 761
 762
 763 def _GetUpdatedIPolicy(old_ipolicy, new_ipolicy, group_policy=False):
 764   """Return the new version of a instance policy.
 765
 766   @param group_policy: whether this policy applies to a group and thus
 767     we should support removal of policy entries
 768
 769   """
 770   use_none = use_default = group_policy
 771   ipolicy = copy.deepcopy(old_ipolicy)
 772   for key, value in new_ipolicy.items():
 773     if key not in constants.IPOLICY_ALL_KEYS:
 774       raise errors.OpPrereqError("Invalid key in new ipolicy: %s" % key,
 775                                  errors.ECODE_INVAL)
 776     if key in constants.IPOLICY_ISPECS:
 777       utils.ForceDictType(value, constants.ISPECS_PARAMETER_TYPES)
 778       ipolicy[key] = _GetUpdatedParams(old_ipolicy.get(key, {}), value,
 779                                        use_none=use_none,
 780                                        use_default=use_default)
 781     else:
 782       if (not value or value == [constants.VALUE_DEFAULT] or
 783           value == constants.VALUE_DEFAULT):
 784         if group_policy:
 785           del ipolicy[key]
 786         else:
 787           raise errors.OpPrereqError("Can't unset ipolicy attribute '%s'"
 788                                      " on the cluster'" % key,
 789                                      errors.ECODE_INVAL)
 790       else:
 791         if key in constants.IPOLICY_PARAMETERS:
 792           # FIXME: we assume all such values are float
 793           try:
 794             ipolicy[key] = float(value)
 795           except (TypeError, ValueError), err:
 796             raise errors.OpPrereqError("Invalid value for attribute"
 797                                        " '%s': '%s', error: %s" %
 798                                        (key, value, err), errors.ECODE_INVAL)
 799         else:
 800           # FIXME: we assume all others are lists; this should be redone
 801           # in a nicer way
 802           ipolicy[key] = list(value)
 803   try:
 804     objects.InstancePolicy.CheckParameterSyntax(ipolicy, not group_policy)
 805   except errors.ConfigurationError, err:
 806     raise errors.OpPrereqError("Invalid instance policy: %s" % err,
 807                                errors.ECODE_INVAL)
 808   return ipolicy
 809
 810
 811 def _UpdateAndVerifySubDict(base, updates, type_check):
 812   """Updates and verifies a dict with sub dicts of the same type.
 813
 814   @param base: The dict with the old data
 815   @param updates: The dict with the new data
 816   @param type_check: Dict suitable to ForceDictType to verify correct types
 817   @returns: A new dict with updated and verified values
 818
 819   """
 820   def fn(old, value):
 821     new = _GetUpdatedParams(old, value)
 822     utils.ForceDictType(new, type_check)
 823     return new
 824
 825   ret = copy.deepcopy(base)
 826   ret.update(dict((key, fn(base.get(key, {}), value))
 827                   for key, value in updates.items()))
 828   return ret
 829
 830
 831 def _MergeAndVerifyHvState(op_input, obj_input):
 832   """Combines the hv state from an opcode with the one of the object
 833
 834   @param op_input: The input dict from the opcode
 835   @param obj_input: The input dict from the objects
 836   @return: The verified and updated dict
 837
 838   """
 839   if op_input:
 840     invalid_hvs = set(op_input) - constants.HYPER_TYPES
 841     if invalid_hvs:
 842       raise errors.OpPrereqError("Invalid hypervisor(s) in hypervisor state:"
 843                                  " %s" % utils.CommaJoin(invalid_hvs),
 844                                  errors.ECODE_INVAL)
 845     if obj_input is None:
 846       obj_input = {}
 847     type_check = constants.HVSTS_PARAMETER_TYPES
 848     return _UpdateAndVerifySubDict(obj_input, op_input, type_check)
 849
 850   return None
 851
 852
 853 def _MergeAndVerifyDiskState(op_input, obj_input):
 854   """Combines the disk state from an opcode with the one of the object
 855
 856   @param op_input: The input dict from the opcode
 857   @param obj_input: The input dict from the objects
 858   @return: The verified and updated dict
 859   """
 860   if op_input:
 861     invalid_dst = set(op_input) - constants.DS_VALID_TYPES
 862     if invalid_dst:
 863       raise errors.OpPrereqError("Invalid storage type(s) in disk state: %s" %
 864                                  utils.CommaJoin(invalid_dst),
 865                                  errors.ECODE_INVAL)
 866     type_check = constants.DSS_PARAMETER_TYPES
 867     if obj_input is None:
 868       obj_input = {}
 869     return dict((key, _UpdateAndVerifySubDict(obj_input.get(key, {}), value,
 870                                               type_check))
 871                 for key, value in op_input.items())
 872
 873   return None
 874
 875
 876 def _ReleaseLocks(lu, level, names=None, keep=None):
 877   """Releases locks owned by an LU.
 878
 879   @type lu: L{LogicalUnit}
 880   @param level: Lock level
 881   @type names: list or None
 882   @param names: Names of locks to release
 883   @type keep: list or None
 884   @param keep: Names of locks to retain
 885
 886   """
 887   assert not (keep is not None and names is not None), \
 888          "Only one of the 'names' and the 'keep' parameters can be given"
 889
 890   if names is not None:
 891     should_release = names.__contains__
 892   elif keep:
 893     should_release = lambda name: name not in keep
 894   else:
 895     should_release = None
 896
 897   owned = lu.owned_locks(level)
 898   if not owned:
 899     # Not owning any lock at this level, do nothing
 900     pass
 901
 902   elif should_release:
 903     retain = []
 904     release = []
 905
 906     # Determine which locks to release
 907     for name in owned:
 908       if should_release(name):
 909         release.append(name)
 910       else:
 911         retain.append(name)
 912
 913     assert len(lu.owned_locks(level)) == (len(retain) + len(release))
 914
 915     # Release just some locks
 916     lu.glm.release(level, names=release)
 917
 918     assert frozenset(lu.owned_locks(level)) == frozenset(retain)
 919   else:
 920     # Release everything
 921     lu.glm.release(level)
 922
 923     assert not lu.glm.is_owned(level), "No locks should be owned"
 924
 925
 926 def _MapInstanceDisksToNodes(instances):
 927   """Creates a map from (node, volume) to instance name.
 928
 929   @type instances: list of L{objects.Instance}
 930   @rtype: dict; tuple of (node name, volume name) as key, instance name as value
 931
 932   """
 933   return dict(((node, vol), inst.name)
 934               for inst in instances
 935               for (node, vols) in inst.MapLVsByNode().items()
 936               for vol in vols)
 937
 938
 939 def _RunPostHook(lu, node_name):
 940   """Runs the post-hook for an opcode on a single node.
 941
 942   """
 943   hm = lu.proc.BuildHooksManager(lu)
 944   try:
 945     hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
 946   except Exception, err: # pylint: disable=W0703
 947     lu.LogWarning("Errors occurred running hooks on %s: %s" % (node_name, err))
 948
 949
 950 def _CheckOutputFields(static, dynamic, selected):
 951   """Checks whether all selected fields are valid.
 952
 953   @type static: L{utils.FieldSet}
 954   @param static: static fields set
 955   @type dynamic: L{utils.FieldSet}
 956   @param dynamic: dynamic fields set
 957
 958   """
 959   f = utils.FieldSet()
 960   f.Extend(static)
 961   f.Extend(dynamic)
 962
 963   delta = f.NonMatching(selected)
 964   if delta:
 965     raise errors.OpPrereqError("Unknown output fields selected: %s"
 966                                % ",".join(delta), errors.ECODE_INVAL)
 967
 968
 969 def _CheckGlobalHvParams(params):
 970   """Validates that given hypervisor params are not global ones.
 971
 972   This will ensure that instances don't get customised versions of
 973   global params.
 974
 975   """
 976   used_globals = constants.HVC_GLOBALS.intersection(params)
 977   if used_globals:
 978     msg = ("The following hypervisor parameters are global and cannot"
 979            " be customized at instance level, please modify them at"
 980            " cluster level: %s" % utils.CommaJoin(used_globals))
 981     raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
 982
 983
 984 def _CheckNodeOnline(lu, node, msg=None):
 985   """Ensure that a given node is online.
 986
 987   @param lu: the LU on behalf of which we make the check
 988   @param node: the node to check
 989   @param msg: if passed, should be a message to replace the default one
 990   @raise errors.OpPrereqError: if the node is offline
 991
 992   """
 993   if msg is None:
 994     msg = "Can't use offline node"
 995   if lu.cfg.GetNodeInfo(node).offline:
 996     raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
 997
 998
 999 def _CheckNodeNotDrained(lu, node):
1000   """Ensure that a given node is not drained.
1001
1002   @param lu: the LU on behalf of which we make the check
1003   @param node: the node to check
1004   @raise errors.OpPrereqError: if the node is drained
1005
1006   """
1007   if lu.cfg.GetNodeInfo(node).drained:
1008     raise errors.OpPrereqError("Can't use drained node %s" % node,
1009                                errors.ECODE_STATE)
1010
1011
1012 def _CheckNodeVmCapable(lu, node):
1013   """Ensure that a given node is vm capable.
1014
1015   @param lu: the LU on behalf of which we make the check
1016   @param node: the node to check
1017   @raise errors.OpPrereqError: if the node is not vm capable
1018
1019   """
1020   if not lu.cfg.GetNodeInfo(node).vm_capable:
1021     raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
1022                                errors.ECODE_STATE)
1023
1024
1025 def _CheckNodeHasOS(lu, node, os_name, force_variant):
1026   """Ensure that a node supports a given OS.
1027
1028   @param lu: the LU on behalf of which we make the check
1029   @param node: the node to check
1030   @param os_name: the OS to query about
1031   @param force_variant: whether to ignore variant errors
1032   @raise errors.OpPrereqError: if the node is not supporting the OS
1033
1034   """
1035   result = lu.rpc.call_os_get(node, os_name)
1036   result.Raise("OS '%s' not in supported OS list for node %s" %
1037                (os_name, node),
1038                prereq=True, ecode=errors.ECODE_INVAL)
1039   if not force_variant:
1040     _CheckOSVariant(result.payload, os_name)
1041
1042
1043 def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
1044   """Ensure that a node has the given secondary ip.
1045
1046   @type lu: L{LogicalUnit}
1047   @param lu: the LU on behalf of which we make the check
1048   @type node: string
1049   @param node: the node to check
1050   @type secondary_ip: string
1051   @param secondary_ip: the ip to check
1052   @type prereq: boolean
1053   @param prereq: whether to throw a prerequisite or an execute error
1054   @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
1055   @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
1056
1057   """
1058   result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
1059   result.Raise("Failure checking secondary ip on node %s" % node,
1060                prereq=prereq, ecode=errors.ECODE_ENVIRON)
1061   if not result.payload:
1062     msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
1063            " please fix and re-run this command" % secondary_ip)
1064     if prereq:
1065       raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
1066     else:
1067       raise errors.OpExecError(msg)
1068
1069
1070 def _GetClusterDomainSecret():
1071   """Reads the cluster domain secret.
1072
1073   """
1074   return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
1075                                strict=True)
1076
1077
1078 def _CheckInstanceState(lu, instance, req_states, msg=None):
1079   """Ensure that an instance is in one of the required states.
1080
1081   @param lu: the LU on behalf of which we make the check
1082   @param instance: the instance to check
1083   @param msg: if passed, should be a message to replace the default one
1084   @raise errors.OpPrereqError: if the instance is not in the required state
1085
1086   """
1087   if msg is None:
1088     msg = "can't use instance from outside %s states" % ", ".join(req_states)
1089   if instance.admin_state not in req_states:
1090     raise errors.OpPrereqError("Instance '%s' is marked to be %s, %s" %
1091                                (instance.name, instance.admin_state, msg),
1092                                errors.ECODE_STATE)
1093
1094   if constants.ADMINST_UP not in req_states:
1095     pnode = instance.primary_node
1096     if not lu.cfg.GetNodeInfo(pnode).offline:
1097       ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
1098       ins_l.Raise("Can't contact node %s for instance information" % pnode,
1099                   prereq=True, ecode=errors.ECODE_ENVIRON)
1100       if instance.name in ins_l.payload:
1101         raise errors.OpPrereqError("Instance %s is running, %s" %
1102                                    (instance.name, msg), errors.ECODE_STATE)
1103     else:
1104       lu.LogWarning("Primary node offline, ignoring check that instance"
1105                      " is down")
1106
1107
1108 def _ComputeMinMaxSpec(name, qualifier, ipolicy, value):
1109   """Computes if value is in the desired range.
1110
1111   @param name: name of the parameter for which we perform the check
1112   @param qualifier: a qualifier used in the error message (e.g. 'disk/1',
1113       not just 'disk')
1114   @param ipolicy: dictionary containing min, max and std values
1115   @param value: actual value that we want to use
1116   @return: None or element not meeting the criteria
1117
1118
1119   """
1120   if value in [None, constants.VALUE_AUTO]:
1121     return None
1122   max_v = ipolicy[constants.ISPECS_MAX].get(name, value)
1123   min_v = ipolicy[constants.ISPECS_MIN].get(name, value)
1124   if value > max_v or min_v > value:
1125     if qualifier:
1126       fqn = "%s/%s" % (name, qualifier)
1127     else:
1128       fqn = name
1129     return ("%s value %s is not in range [%s, %s]" %
1130             (fqn, value, min_v, max_v))
1131   return None
1132
1133
1134 def _ComputeIPolicySpecViolation(ipolicy, mem_size, cpu_count, disk_count,
1135                                  nic_count, disk_sizes, spindle_use,
1136                                  _compute_fn=_ComputeMinMaxSpec):
1137   """Verifies ipolicy against provided specs.
1138
1139   @type ipolicy: dict
1140   @param ipolicy: The ipolicy
1141   @type mem_size: int
1142   @param mem_size: The memory size
1143   @type cpu_count: int
1144   @param cpu_count: Used cpu cores
1145   @type disk_count: int
1146   @param disk_count: Number of disks used
1147   @type nic_count: int
1148   @param nic_count: Number of nics used
1149   @type disk_sizes: list of ints
1150   @param disk_sizes: Disk sizes of used disk (len must match C{disk_count})
1151   @type spindle_use: int
1152   @param spindle_use: The number of spindles this instance uses
1153   @param _compute_fn: The compute function (unittest only)
1154   @return: A list of violations, or an empty list of no violations are found
1155
1156   """
1157   assert disk_count == len(disk_sizes)
1158
1159   test_settings = [
1160     (constants.ISPEC_MEM_SIZE, "", mem_size),
1161     (constants.ISPEC_CPU_COUNT, "", cpu_count),
1162     (constants.ISPEC_DISK_COUNT, "", disk_count),
1163     (constants.ISPEC_NIC_COUNT, "", nic_count),
1164     (constants.ISPEC_SPINDLE_USE, "", spindle_use),
1165     ] + [(constants.ISPEC_DISK_SIZE, str(idx), d)
1166          for idx, d in enumerate(disk_sizes)]
1167
1168   return filter(None,
1169                 (_compute_fn(name, qualifier, ipolicy, value)
1170                  for (name, qualifier, value) in test_settings))
1171
1172
1173 def _ComputeIPolicyInstanceViolation(ipolicy, instance,
1174                                      _compute_fn=_ComputeIPolicySpecViolation):
1175   """Compute if instance meets the specs of ipolicy.
1176
1177   @type ipolicy: dict
1178   @param ipolicy: The ipolicy to verify against
1179   @type instance: L{objects.Instance}
1180   @param instance: The instance to verify
1181   @param _compute_fn: The function to verify ipolicy (unittest only)
1182   @see: L{_ComputeIPolicySpecViolation}
1183
1184   """
1185   mem_size = instance.beparams.get(constants.BE_MAXMEM, None)
1186   cpu_count = instance.beparams.get(constants.BE_VCPUS, None)
1187   spindle_use = instance.beparams.get(constants.BE_SPINDLE_USE, None)
1188   disk_count = len(instance.disks)
1189   disk_sizes = [disk.size for disk in instance.disks]
1190   nic_count = len(instance.nics)
1191
1192   return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1193                      disk_sizes, spindle_use)
1194
1195
1196 def _ComputeIPolicyInstanceSpecViolation(
1197   ipolicy, instance_spec, _compute_fn=_ComputeIPolicySpecViolation):
1198   """Compute if instance specs meets the specs of ipolicy.
1199
1200   @type ipolicy: dict
1201   @param ipolicy: The ipolicy to verify against
1202   @param instance_spec: dict
1203   @param instance_spec: The instance spec to verify
1204   @param _compute_fn: The function to verify ipolicy (unittest only)
1205   @see: L{_ComputeIPolicySpecViolation}
1206
1207   """
1208   mem_size = instance_spec.get(constants.ISPEC_MEM_SIZE, None)
1209   cpu_count = instance_spec.get(constants.ISPEC_CPU_COUNT, None)
1210   disk_count = instance_spec.get(constants.ISPEC_DISK_COUNT, 0)
1211   disk_sizes = instance_spec.get(constants.ISPEC_DISK_SIZE, [])
1212   nic_count = instance_spec.get(constants.ISPEC_NIC_COUNT, 0)
1213   spindle_use = instance_spec.get(constants.ISPEC_SPINDLE_USE, None)
1214
1215   return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1216                      disk_sizes, spindle_use)
1217
1218
1219 def _ComputeIPolicyNodeViolation(ipolicy, instance, current_group,
1220                                  target_group,
1221                                  _compute_fn=_ComputeIPolicyInstanceViolation):
1222   """Compute if instance meets the specs of the new target group.
1223
1224   @param ipolicy: The ipolicy to verify
1225   @param instance: The instance object to verify
1226   @param current_group: The current group of the instance
1227   @param target_group: The new group of the instance
1228   @param _compute_fn: The function to verify ipolicy (unittest only)
1229   @see: L{_ComputeIPolicySpecViolation}
1230
1231   """
1232   if current_group == target_group:
1233     return []
1234   else:
1235     return _compute_fn(ipolicy, instance)
1236
1237
1238 def _CheckTargetNodeIPolicy(lu, ipolicy, instance, node, ignore=False,
1239                             _compute_fn=_ComputeIPolicyNodeViolation):
1240   """Checks that the target node is correct in terms of instance policy.
1241
1242   @param ipolicy: The ipolicy to verify
1243   @param instance: The instance object to verify
1244   @param node: The new node to relocate
1245   @param ignore: Ignore violations of the ipolicy
1246   @param _compute_fn: The function to verify ipolicy (unittest only)
1247   @see: L{_ComputeIPolicySpecViolation}
1248
1249   """
1250   primary_node = lu.cfg.GetNodeInfo(instance.primary_node)
1251   res = _compute_fn(ipolicy, instance, primary_node.group, node.group)
1252
1253   if res:
1254     msg = ("Instance does not meet target node group's (%s) instance"
1255            " policy: %s") % (node.group, utils.CommaJoin(res))
1256     if ignore:
1257       lu.LogWarning(msg)
1258     else:
1259       raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
1260
1261
1262 def _ComputeNewInstanceViolations(old_ipolicy, new_ipolicy, instances):
1263   """Computes a set of any instances that would violate the new ipolicy.
1264
1265   @param old_ipolicy: The current (still in-place) ipolicy
1266   @param new_ipolicy: The new (to become) ipolicy
1267   @param instances: List of instances to verify
1268   @return: A list of instances which violates the new ipolicy but
1269       did not before
1270
1271   """
1272   return (_ComputeViolatingInstances(new_ipolicy, instances) -
1273           _ComputeViolatingInstances(old_ipolicy, instances))
1274
1275
1276 def _ExpandItemName(fn, name, kind):
1277   """Expand an item name.
1278
1279   @param fn: the function to use for expansion
1280   @param name: requested item name
1281   @param kind: text description ('Node' or 'Instance')
1282   @return: the resolved (full) name
1283   @raise errors.OpPrereqError: if the item is not found
1284
1285   """
1286   full_name = fn(name)
1287   if full_name is None:
1288     raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
1289                                errors.ECODE_NOENT)
1290   return full_name
1291
1292
1293 def _ExpandNodeName(cfg, name):
1294   """Wrapper over L{_ExpandItemName} for nodes."""
1295   return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
1296
1297
1298 def _ExpandInstanceName(cfg, name):
1299   """Wrapper over L{_ExpandItemName} for instance."""
1300   return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
1301
1302
1303 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
1304                           minmem, maxmem, vcpus, nics, disk_template, disks,
1305                           bep, hvp, hypervisor_name, tags):
1306   """Builds instance related env variables for hooks
1307
1308   This builds the hook environment from individual variables.
1309
1310   @type name: string
1311   @param name: the name of the instance
1312   @type primary_node: string
1313   @param primary_node: the name of the instance's primary node
1314   @type secondary_nodes: list
1315   @param secondary_nodes: list of secondary nodes as strings
1316   @type os_type: string
1317   @param os_type: the name of the instance's OS
1318   @type status: string
1319   @param status: the desired status of the instance
1320   @type minmem: string
1321   @param minmem: the minimum memory size of the instance
1322   @type maxmem: string
1323   @param maxmem: the maximum memory size of the instance
1324   @type vcpus: string
1325   @param vcpus: the count of VCPUs the instance has
1326   @type nics: list
1327   @param nics: list of tuples (ip, mac, mode, link) representing
1328       the NICs the instance has
1329   @type disk_template: string
1330   @param disk_template: the disk template of the instance
1331   @type disks: list
1332   @param disks: the list of (size, mode) pairs
1333   @type bep: dict
1334   @param bep: the backend parameters for the instance
1335   @type hvp: dict
1336   @param hvp: the hypervisor parameters for the instance
1337   @type hypervisor_name: string
1338   @param hypervisor_name: the hypervisor for the instance
1339   @type tags: list
1340   @param tags: list of instance tags as strings
1341   @rtype: dict
1342   @return: the hook environment for this instance
1343
1344   """
1345   env = {
1346     "OP_TARGET": name,
1347     "INSTANCE_NAME": name,
1348     "INSTANCE_PRIMARY": primary_node,
1349     "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
1350     "INSTANCE_OS_TYPE": os_type,
1351     "INSTANCE_STATUS": status,
1352     "INSTANCE_MINMEM": minmem,
1353     "INSTANCE_MAXMEM": maxmem,
1354     # TODO(2.7) remove deprecated "memory" value
1355     "INSTANCE_MEMORY": maxmem,
1356     "INSTANCE_VCPUS": vcpus,
1357     "INSTANCE_DISK_TEMPLATE": disk_template,
1358     "INSTANCE_HYPERVISOR": hypervisor_name,
1359   }
1360   if nics:
1361     nic_count = len(nics)
1362     for idx, (ip, mac, mode, link) in enumerate(nics):
1363       if ip is None:
1364         ip = ""
1365       env["INSTANCE_NIC%d_IP" % idx] = ip
1366       env["INSTANCE_NIC%d_MAC" % idx] = mac
1367       env["INSTANCE_NIC%d_MODE" % idx] = mode
1368       env["INSTANCE_NIC%d_LINK" % idx] = link
1369       if mode == constants.NIC_MODE_BRIDGED:
1370         env["INSTANCE_NIC%d_BRIDGE" % idx] = link
1371   else:
1372     nic_count = 0
1373
1374   env["INSTANCE_NIC_COUNT"] = nic_count
1375
1376   if disks:
1377     disk_count = len(disks)
1378     for idx, (size, mode) in enumerate(disks):
1379       env["INSTANCE_DISK%d_SIZE" % idx] = size
1380       env["INSTANCE_DISK%d_MODE" % idx] = mode
1381   else:
1382     disk_count = 0
1383
1384   env["INSTANCE_DISK_COUNT"] = disk_count
1385
1386   if not tags:
1387     tags = []
1388
1389   env["INSTANCE_TAGS"] = " ".join(tags)
1390
1391   for source, kind in [(bep, "BE"), (hvp, "HV")]:
1392     for key, value in source.items():
1393       env["INSTANCE_%s_%s" % (kind, key)] = value
1394
1395   return env
1396
1397
1398 def _NICListToTuple(lu, nics):
1399   """Build a list of nic information tuples.
1400
1401   This list is suitable to be passed to _BuildInstanceHookEnv or as a return
1402   value in LUInstanceQueryData.
1403
1404   @type lu:  L{LogicalUnit}
1405   @param lu: the logical unit on whose behalf we execute
1406   @type nics: list of L{objects.NIC}
1407   @param nics: list of nics to convert to hooks tuples
1408
1409   """
1410   hooks_nics = []
1411   cluster = lu.cfg.GetClusterInfo()
1412   for nic in nics:
1413     ip = nic.ip
1414     mac = nic.mac
1415     filled_params = cluster.SimpleFillNIC(nic.nicparams)
1416     mode = filled_params[constants.NIC_MODE]
1417     link = filled_params[constants.NIC_LINK]
1418     hooks_nics.append((ip, mac, mode, link))
1419   return hooks_nics
1420
1421
1422 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
1423   """Builds instance related env variables for hooks from an object.
1424
1425   @type lu: L{LogicalUnit}
1426   @param lu: the logical unit on whose behalf we execute
1427   @type instance: L{objects.Instance}
1428   @param instance: the instance for which we should build the
1429       environment
1430   @type override: dict
1431   @param override: dictionary with key/values that will override
1432       our values
1433   @rtype: dict
1434   @return: the hook environment dictionary
1435
1436   """
1437   cluster = lu.cfg.GetClusterInfo()
1438   bep = cluster.FillBE(instance)
1439   hvp = cluster.FillHV(instance)
1440   args = {
1441     "name": instance.name,
1442     "primary_node": instance.primary_node,
1443     "secondary_nodes": instance.secondary_nodes,
1444     "os_type": instance.os,
1445     "status": instance.admin_state,
1446     "maxmem": bep[constants.BE_MAXMEM],
1447     "minmem": bep[constants.BE_MINMEM],
1448     "vcpus": bep[constants.BE_VCPUS],
1449     "nics": _NICListToTuple(lu, instance.nics),
1450     "disk_template": instance.disk_template,
1451     "disks": [(disk.size, disk.mode) for disk in instance.disks],
1452     "bep": bep,
1453     "hvp": hvp,
1454     "hypervisor_name": instance.hypervisor,
1455     "tags": instance.tags,
1456   }
1457   if override:
1458     args.update(override)
1459   return _BuildInstanceHookEnv(**args) # pylint: disable=W0142
1460
1461
1462 def _AdjustCandidatePool(lu, exceptions):
1463   """Adjust the candidate pool after node operations.
1464
1465   """
1466   mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1467   if mod_list:
1468     lu.LogInfo("Promoted nodes to master candidate role: %s",
1469                utils.CommaJoin(node.name for node in mod_list))
1470     for name in mod_list:
1471       lu.context.ReaddNode(name)
1472   mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1473   if mc_now > mc_max:
1474     lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1475                (mc_now, mc_max))
1476
1477
1478 def _DecideSelfPromotion(lu, exceptions=None):
1479   """Decide whether I should promote myself as a master candidate.
1480
1481   """
1482   cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1483   mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1484   # the new node will increase mc_max with one, so:
1485   mc_should = min(mc_should + 1, cp_size)
1486   return mc_now < mc_should
1487
1488
1489 def _ComputeViolatingInstances(ipolicy, instances):
1490   """Computes a set of instances who violates given ipolicy.
1491
1492   @param ipolicy: The ipolicy to verify
1493   @type instances: object.Instance
1494   @param instances: List of instances to verify
1495   @return: A frozenset of instance names violating the ipolicy
1496
1497   """
1498   return frozenset([inst.name for inst in instances
1499                     if _ComputeIPolicyInstanceViolation(ipolicy, inst)])
1500
1501
1502 def _CheckNicsBridgesExist(lu, target_nics, target_node):
1503   """Check that the brigdes needed by a list of nics exist.
1504
1505   """
1506   cluster = lu.cfg.GetClusterInfo()
1507   paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1508   brlist = [params[constants.NIC_LINK] for params in paramslist
1509             if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1510   if brlist:
1511     result = lu.rpc.call_bridges_exist(target_node, brlist)
1512     result.Raise("Error checking bridges on destination node '%s'" %
1513                  target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1514
1515
1516 def _CheckInstanceBridgesExist(lu, instance, node=None):
1517   """Check that the brigdes needed by an instance exist.
1518
1519   """
1520   if node is None:
1521     node = instance.primary_node
1522   _CheckNicsBridgesExist(lu, instance.nics, node)
1523
1524
1525 def _CheckOSVariant(os_obj, name):
1526   """Check whether an OS name conforms to the os variants specification.
1527
1528   @type os_obj: L{objects.OS}
1529   @param os_obj: OS object to check
1530   @type name: string
1531   @param name: OS name passed by the user, to check for validity
1532
1533   """
1534   variant = objects.OS.GetVariant(name)
1535   if not os_obj.supported_variants:
1536     if variant:
1537       raise errors.OpPrereqError("OS '%s' doesn't support variants ('%s'"
1538                                  " passed)" % (os_obj.name, variant),
1539                                  errors.ECODE_INVAL)
1540     return
1541   if not variant:
1542     raise errors.OpPrereqError("OS name must include a variant",
1543                                errors.ECODE_INVAL)
1544
1545   if variant not in os_obj.supported_variants:
1546     raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1547
1548
1549 def _GetNodeInstancesInner(cfg, fn):
1550   return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1551
1552
1553 def _GetNodeInstances(cfg, node_name):
1554   """Returns a list of all primary and secondary instances on a node.
1555
1556   """
1557
1558   return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1559
1560
1561 def _GetNodePrimaryInstances(cfg, node_name):
1562   """Returns primary instances on a node.
1563
1564   """
1565   return _GetNodeInstancesInner(cfg,
1566                                 lambda inst: node_name == inst.primary_node)
1567
1568
1569 def _GetNodeSecondaryInstances(cfg, node_name):
1570   """Returns secondary instances on a node.
1571
1572   """
1573   return _GetNodeInstancesInner(cfg,
1574                                 lambda inst: node_name in inst.secondary_nodes)
1575
1576
1577 def _GetStorageTypeArgs(cfg, storage_type):
1578   """Returns the arguments for a storage type.
1579
1580   """
1581   # Special case for file storage
1582   if storage_type == constants.ST_FILE:
1583     # storage.FileStorage wants a list of storage directories
1584     return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1585
1586   return []
1587
1588
1589 def _FindFaultyInstanceDisks(cfg, rpc_runner, instance, node_name, prereq):
1590   faulty = []
1591
1592   for dev in instance.disks:
1593     cfg.SetDiskID(dev, node_name)
1594
1595   result = rpc_runner.call_blockdev_getmirrorstatus(node_name, (instance.disks,
1596                                                                 instance))
1597   result.Raise("Failed to get disk status from node %s" % node_name,
1598                prereq=prereq, ecode=errors.ECODE_ENVIRON)
1599
1600   for idx, bdev_status in enumerate(result.payload):
1601     if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1602       faulty.append(idx)
1603
1604   return faulty
1605
1606
1607 def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1608   """Check the sanity of iallocator and node arguments and use the
1609   cluster-wide iallocator if appropriate.
1610
1611   Check that at most one of (iallocator, node) is specified. If none is
1612   specified, then the LU's opcode's iallocator slot is filled with the
1613   cluster-wide default iallocator.
1614
1615   @type iallocator_slot: string
1616   @param iallocator_slot: the name of the opcode iallocator slot
1617   @type node_slot: string
1618   @param node_slot: the name of the opcode target node slot
1619
1620   """
1621   node = getattr(lu.op, node_slot, None)
1622   ialloc = getattr(lu.op, iallocator_slot, None)
1623
1624   if node is not None and ialloc is not None:
1625     raise errors.OpPrereqError("Do not specify both, iallocator and node",
1626                                errors.ECODE_INVAL)
1627   elif node is None and ialloc is None:
1628     default_iallocator = lu.cfg.GetDefaultIAllocator()
1629     if default_iallocator:
1630       setattr(lu.op, iallocator_slot, default_iallocator)
1631     else:
1632       raise errors.OpPrereqError("No iallocator or node given and no"
1633                                  " cluster-wide default iallocator found;"
1634                                  " please specify either an iallocator or a"
1635                                  " node, or set a cluster-wide default"
1636                                  " iallocator", errors.ECODE_INVAL)
1637
1638
1639 def _GetDefaultIAllocator(cfg, ialloc):
1640   """Decides on which iallocator to use.
1641
1642   @type cfg: L{config.ConfigWriter}
1643   @param cfg: Cluster configuration object
1644   @type ialloc: string or None
1645   @param ialloc: Iallocator specified in opcode
1646   @rtype: string
1647   @return: Iallocator name
1648
1649   """
1650   if not ialloc:
1651     # Use default iallocator
1652     ialloc = cfg.GetDefaultIAllocator()
1653
1654   if not ialloc:
1655     raise errors.OpPrereqError("No iallocator was specified, neither in the"
1656                                " opcode nor as a cluster-wide default",
1657                                errors.ECODE_INVAL)
1658
1659   return ialloc
1660
1661
1662 class LUClusterPostInit(LogicalUnit):
1663   """Logical unit for running hooks after cluster initialization.
1664
1665   """
1666   HPATH = "cluster-init"
1667   HTYPE = constants.HTYPE_CLUSTER
1668
1669   def BuildHooksEnv(self):
1670     """Build hooks env.
1671
1672     """
1673     return {
1674       "OP_TARGET": self.cfg.GetClusterName(),
1675       }
1676
1677   def BuildHooksNodes(self):
1678     """Build hooks nodes.
1679
1680     """
1681     return ([], [self.cfg.GetMasterNode()])
1682
1683   def Exec(self, feedback_fn):
1684     """Nothing to do.
1685
1686     """
1687     return True
1688
1689
1690 class LUClusterDestroy(LogicalUnit):
1691   """Logical unit for destroying the cluster.
1692
1693   """
1694   HPATH = "cluster-destroy"
1695   HTYPE = constants.HTYPE_CLUSTER
1696
1697   def BuildHooksEnv(self):
1698     """Build hooks env.
1699
1700     """
1701     return {
1702       "OP_TARGET": self.cfg.GetClusterName(),
1703       }
1704
1705   def BuildHooksNodes(self):
1706     """Build hooks nodes.
1707
1708     """
1709     return ([], [])
1710
1711   def CheckPrereq(self):
1712     """Check prerequisites.
1713
1714     This checks whether the cluster is empty.
1715
1716     Any errors are signaled by raising errors.OpPrereqError.
1717
1718     """
1719     master = self.cfg.GetMasterNode()
1720
1721     nodelist = self.cfg.GetNodeList()
1722     if len(nodelist) != 1 or nodelist[0] != master:
1723       raise errors.OpPrereqError("There are still %d node(s) in"
1724                                  " this cluster." % (len(nodelist) - 1),
1725                                  errors.ECODE_INVAL)
1726     instancelist = self.cfg.GetInstanceList()
1727     if instancelist:
1728       raise errors.OpPrereqError("There are still %d instance(s) in"
1729                                  " this cluster." % len(instancelist),
1730                                  errors.ECODE_INVAL)
1731
1732   def Exec(self, feedback_fn):
1733     """Destroys the cluster.
1734
1735     """
1736     master_params = self.cfg.GetMasterNetworkParameters()
1737
1738     # Run post hooks on master node before it's removed
1739     _RunPostHook(self, master_params.name)
1740
1741     ems = self.cfg.GetUseExternalMipScript()
1742     result = self.rpc.call_node_deactivate_master_ip(master_params.name,
1743                                                      master_params, ems)
1744     if result.fail_msg:
1745       self.LogWarning("Error disabling the master IP address: %s",
1746                       result.fail_msg)
1747
1748     return master_params.name
1749
1750
1751 def _VerifyCertificate(filename):
1752   """Verifies a certificate for L{LUClusterVerifyConfig}.
1753
1754   @type filename: string
1755   @param filename: Path to PEM file
1756
1757   """
1758   try:
1759     cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1760                                            utils.ReadFile(filename))
1761   except Exception, err: # pylint: disable=W0703
1762     return (LUClusterVerifyConfig.ETYPE_ERROR,
1763             "Failed to load X509 certificate %s: %s" % (filename, err))
1764
1765   (errcode, msg) = \
1766     utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1767                                 constants.SSL_CERT_EXPIRATION_ERROR)
1768
1769   if msg:
1770     fnamemsg = "While verifying %s: %s" % (filename, msg)
1771   else:
1772     fnamemsg = None
1773
1774   if errcode is None:
1775     return (None, fnamemsg)
1776   elif errcode == utils.CERT_WARNING:
1777     return (LUClusterVerifyConfig.ETYPE_WARNING, fnamemsg)
1778   elif errcode == utils.CERT_ERROR:
1779     return (LUClusterVerifyConfig.ETYPE_ERROR, fnamemsg)
1780
1781   raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1782
1783
1784 def _GetAllHypervisorParameters(cluster, instances):
1785   """Compute the set of all hypervisor parameters.
1786
1787   @type cluster: L{objects.Cluster}
1788   @param cluster: the cluster object
1789   @param instances: list of L{objects.Instance}
1790   @param instances: additional instances from which to obtain parameters
1791   @rtype: list of (origin, hypervisor, parameters)
1792   @return: a list with all parameters found, indicating the hypervisor they
1793        apply to, and the origin (can be "cluster", "os X", or "instance Y")
1794
1795   """
1796   hvp_data = []
1797
1798   for hv_name in cluster.enabled_hypervisors:
1799     hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
1800
1801   for os_name, os_hvp in cluster.os_hvp.items():
1802     for hv_name, hv_params in os_hvp.items():
1803       if hv_params:
1804         full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
1805         hvp_data.append(("os %s" % os_name, hv_name, full_params))
1806
1807   # TODO: collapse identical parameter values in a single one
1808   for instance in instances:
1809     if instance.hvparams:
1810       hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
1811                        cluster.FillHV(instance)))
1812
1813   return hvp_data
1814
1815
1816 class _VerifyErrors(object):
1817   """Mix-in for cluster/group verify LUs.
1818
1819   It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects
1820   self.op and self._feedback_fn to be available.)
1821
1822   """
1823
1824   ETYPE_FIELD = "code"
1825   ETYPE_ERROR = "ERROR"
1826   ETYPE_WARNING = "WARNING"
1827
1828   def _Error(self, ecode, item, msg, *args, **kwargs):
1829     """Format an error message.
1830
1831     Based on the opcode's error_codes parameter, either format a
1832     parseable error code, or a simpler error string.
1833
1834     This must be called only from Exec and functions called from Exec.
1835
1836     """
1837     ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1838     itype, etxt, _ = ecode
1839     # first complete the msg
1840     if args:
1841       msg = msg % args
1842     # then format the whole message
1843     if self.op.error_codes: # This is a mix-in. pylint: disable=E1101
1844       msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1845     else:
1846       if item:
1847         item = " " + item
1848       else:
1849         item = ""
1850       msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1851     # and finally report it via the feedback_fn
1852     self._feedback_fn("  - %s" % msg) # Mix-in. pylint: disable=E1101
1853
1854   def _ErrorIf(self, cond, ecode, *args, **kwargs):
1855     """Log an error message if the passed condition is True.
1856
1857     """
1858     cond = (bool(cond)
1859             or self.op.debug_simulate_errors) # pylint: disable=E1101
1860
1861     # If the error code is in the list of ignored errors, demote the error to a
1862     # warning
1863     (_, etxt, _) = ecode
1864     if etxt in self.op.ignore_errors:     # pylint: disable=E1101
1865       kwargs[self.ETYPE_FIELD] = self.ETYPE_WARNING
1866
1867     if cond:
1868       self._Error(ecode, *args, **kwargs)
1869
1870     # do not mark the operation as failed for WARN cases only
1871     if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1872       self.bad = self.bad or cond
1873
1874
1875 class LUClusterVerify(NoHooksLU):
1876   """Submits all jobs necessary to verify the cluster.
1877
1878   """
1879   REQ_BGL = False
1880
1881   def ExpandNames(self):
1882     self.needed_locks = {}
1883
1884   def Exec(self, feedback_fn):
1885     jobs = []
1886
1887     if self.op.group_name:
1888       groups = [self.op.group_name]
1889       depends_fn = lambda: None
1890     else:
1891       groups = self.cfg.GetNodeGroupList()
1892
1893       # Verify global configuration
1894       jobs.append([
1895         opcodes.OpClusterVerifyConfig(ignore_errors=self.op.ignore_errors)
1896         ])
1897
1898       # Always depend on global verification
1899       depends_fn = lambda: [(-len(jobs), [])]
1900
1901     jobs.extend(
1902       [opcodes.OpClusterVerifyGroup(group_name=group,
1903                                     ignore_errors=self.op.ignore_errors,
1904                                     depends=depends_fn())]
1905       for group in groups)
1906
1907     # Fix up all parameters
1908     for op in itertools.chain(*jobs): # pylint: disable=W0142
1909       op.debug_simulate_errors = self.op.debug_simulate_errors
1910       op.verbose = self.op.verbose
1911       op.error_codes = self.op.error_codes
1912       try:
1913         op.skip_checks = self.op.skip_checks
1914       except AttributeError:
1915         assert not isinstance(op, opcodes.OpClusterVerifyGroup)
1916
1917     return ResultWithJobs(jobs)
1918
1919
1920 class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
1921   """Verifies the cluster config.
1922
1923   """
1924   REQ_BGL = False
1925
1926   def _VerifyHVP(self, hvp_data):
1927     """Verifies locally the syntax of the hypervisor parameters.
1928
1929     """
1930     for item, hv_name, hv_params in hvp_data:
1931       msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
1932              (item, hv_name))
1933       try:
1934         hv_class = hypervisor.GetHypervisor(hv_name)
1935         utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
1936         hv_class.CheckParameterSyntax(hv_params)
1937       except errors.GenericError, err:
1938         self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg % str(err))
1939
1940   def ExpandNames(self):
1941     self.needed_locks = dict.fromkeys(locking.LEVELS, locking.ALL_SET)
1942     self.share_locks = _ShareAll()
1943
1944   def CheckPrereq(self):
1945     """Check prerequisites.
1946
1947     """
1948     # Retrieve all information
1949     self.all_group_info = self.cfg.GetAllNodeGroupsInfo()
1950     self.all_node_info = self.cfg.GetAllNodesInfo()
1951     self.all_inst_info = self.cfg.GetAllInstancesInfo()
1952
1953   def Exec(self, feedback_fn):
1954     """Verify integrity of cluster, performing various test on nodes.
1955
1956     """
1957     self.bad = False
1958     self._feedback_fn = feedback_fn
1959
1960     feedback_fn("* Verifying cluster config")
1961
1962     for msg in self.cfg.VerifyConfig():
1963       self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg)
1964
1965     feedback_fn("* Verifying cluster certificate files")
1966
1967     for cert_filename in constants.ALL_CERT_FILES:
1968       (errcode, msg) = _VerifyCertificate(cert_filename)
1969       self._ErrorIf(errcode, constants.CV_ECLUSTERCERT, None, msg, code=errcode)
1970
1971     feedback_fn("* Verifying hypervisor parameters")
1972
1973     self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(),
1974                                                 self.all_inst_info.values()))
1975
1976     feedback_fn("* Verifying all nodes belong to an existing group")
1977
1978     # We do this verification here because, should this bogus circumstance
1979     # occur, it would never be caught by VerifyGroup, which only acts on
1980     # nodes/instances reachable from existing node groups.
1981
1982     dangling_nodes = set(node.name for node in self.all_node_info.values()
1983                          if node.group not in self.all_group_info)
1984
1985     dangling_instances = {}
1986     no_node_instances = []
1987
1988     for inst in self.all_inst_info.values():
1989       if inst.primary_node in dangling_nodes:
1990         dangling_instances.setdefault(inst.primary_node, []).append(inst.name)
1991       elif inst.primary_node not in self.all_node_info:
1992         no_node_instances.append(inst.name)
1993
1994     pretty_dangling = [
1995         "%s (%s)" %
1996         (node.name,
1997          utils.CommaJoin(dangling_instances.get(node.name,
1998                                                 ["no instances"])))
1999         for node in dangling_nodes]
2000
2001     self._ErrorIf(bool(dangling_nodes), constants.CV_ECLUSTERDANGLINGNODES,
2002                   None,
2003                   "the following nodes (and their instances) belong to a non"
2004                   " existing group: %s", utils.CommaJoin(pretty_dangling))
2005
2006     self._ErrorIf(bool(no_node_instances), constants.CV_ECLUSTERDANGLINGINST,
2007                   None,
2008                   "the following instances have a non-existing primary-node:"
2009                   " %s", utils.CommaJoin(no_node_instances))
2010
2011     return not self.bad
2012
2013
2014 class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
2015   """Verifies the status of a node group.
2016
2017   """
2018   HPATH = "cluster-verify"
2019   HTYPE = constants.HTYPE_CLUSTER
2020   REQ_BGL = False
2021
2022   _HOOKS_INDENT_RE = re.compile("^", re.M)
2023
2024   class NodeImage(object):
2025     """A class representing the logical and physical status of a node.
2026
2027     @type name: string
2028     @ivar name: the node name to which this object refers
2029     @ivar volumes: a structure as returned from
2030         L{ganeti.backend.GetVolumeList} (runtime)
2031     @ivar instances: a list of running instances (runtime)
2032     @ivar pinst: list of configured primary instances (config)
2033     @ivar sinst: list of configured secondary instances (config)
2034     @ivar sbp: dictionary of {primary-node: list of instances} for all
2035         instances for which this node is secondary (config)
2036     @ivar mfree: free memory, as reported by hypervisor (runtime)
2037     @ivar dfree: free disk, as reported by the node (runtime)
2038     @ivar offline: the offline status (config)
2039     @type rpc_fail: boolean
2040     @ivar rpc_fail: whether the RPC verify call was successfull (overall,
2041         not whether the individual keys were correct) (runtime)
2042     @type lvm_fail: boolean
2043     @ivar lvm_fail: whether the RPC call didn't return valid LVM data
2044     @type hyp_fail: boolean
2045     @ivar hyp_fail: whether the RPC call didn't return the instance list
2046     @type ghost: boolean
2047     @ivar ghost: whether this is a known node or not (config)
2048     @type os_fail: boolean
2049     @ivar os_fail: whether the RPC call didn't return valid OS data
2050     @type oslist: list
2051     @ivar oslist: list of OSes as diagnosed by DiagnoseOS
2052     @type vm_capable: boolean
2053     @ivar vm_capable: whether the node can host instances
2054
2055     """
2056     def __init__(self, offline=False, name=None, vm_capable=True):
2057       self.name = name
2058       self.volumes = {}
2059       self.instances = []
2060       self.pinst = []
2061       self.sinst = []
2062       self.sbp = {}
2063       self.mfree = 0
2064       self.dfree = 0
2065       self.offline = offline
2066       self.vm_capable = vm_capable
2067       self.rpc_fail = False
2068       self.lvm_fail = False
2069       self.hyp_fail = False
2070       self.ghost = False
2071       self.os_fail = False
2072       self.oslist = {}
2073
2074   def ExpandNames(self):
2075     # This raises errors.OpPrereqError on its own:
2076     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
2077
2078     # Get instances in node group; this is unsafe and needs verification later
2079     inst_names = \
2080       self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True)
2081
2082     self.needed_locks = {
2083       locking.LEVEL_INSTANCE: inst_names,
2084       locking.LEVEL_NODEGROUP: [self.group_uuid],
2085       locking.LEVEL_NODE: [],
2086       }
2087
2088     self.share_locks = _ShareAll()
2089
2090   def DeclareLocks(self, level):
2091     if level == locking.LEVEL_NODE:
2092       # Get members of node group; this is unsafe and needs verification later
2093       nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
2094
2095       all_inst_info = self.cfg.GetAllInstancesInfo()
2096
2097       # In Exec(), we warn about mirrored instances that have primary and
2098       # secondary living in separate node groups. To fully verify that
2099       # volumes for these instances are healthy, we will need to do an
2100       # extra call to their secondaries. We ensure here those nodes will
2101       # be locked.
2102       for inst in self.owned_locks(locking.LEVEL_INSTANCE):
2103         # Important: access only the instances whose lock is owned
2104         if all_inst_info[inst].disk_template in constants.DTS_INT_MIRROR:
2105           nodes.update(all_inst_info[inst].secondary_nodes)
2106
2107       self.needed_locks[locking.LEVEL_NODE] = nodes
2108
2109   def CheckPrereq(self):
2110     assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
2111     self.group_info = self.cfg.GetNodeGroup(self.group_uuid)
2112
2113     group_nodes = set(self.group_info.members)
2114     group_instances = \
2115       self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True)
2116
2117     unlocked_nodes = \
2118         group_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
2119
2120     unlocked_instances = \
2121         group_instances.difference(self.owned_locks(locking.LEVEL_INSTANCE))
2122
2123     if unlocked_nodes:
2124       raise errors.OpPrereqError("Missing lock for nodes: %s" %
2125                                  utils.CommaJoin(unlocked_nodes),
2126                                  errors.ECODE_STATE)
2127
2128     if unlocked_instances:
2129       raise errors.OpPrereqError("Missing lock for instances: %s" %
2130                                  utils.CommaJoin(unlocked_instances),
2131                                  errors.ECODE_STATE)
2132
2133     self.all_node_info = self.cfg.GetAllNodesInfo()
2134     self.all_inst_info = self.cfg.GetAllInstancesInfo()
2135
2136     self.my_node_names = utils.NiceSort(group_nodes)
2137     self.my_inst_names = utils.NiceSort(group_instances)
2138
2139     self.my_node_info = dict((name, self.all_node_info[name])
2140                              for name in self.my_node_names)
2141
2142     self.my_inst_info = dict((name, self.all_inst_info[name])
2143                              for name in self.my_inst_names)
2144
2145     # We detect here the nodes that will need the extra RPC calls for verifying
2146     # split LV volumes; they should be locked.
2147     extra_lv_nodes = set()
2148
2149     for inst in self.my_inst_info.values():
2150       if inst.disk_template in constants.DTS_INT_MIRROR:
2151         for nname in inst.all_nodes:
2152           if self.all_node_info[nname].group != self.group_uuid:
2153             extra_lv_nodes.add(nname)
2154
2155     unlocked_lv_nodes = \
2156         extra_lv_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
2157
2158     if unlocked_lv_nodes:
2159       raise errors.OpPrereqError("Missing node locks for LV check: %s" %
2160                                  utils.CommaJoin(unlocked_lv_nodes),
2161                                  errors.ECODE_STATE)
2162     self.extra_lv_nodes = list(extra_lv_nodes)
2163
2164   def _VerifyNode(self, ninfo, nresult):
2165     """Perform some basic validation on data returned from a node.
2166
2167       - check the result data structure is well formed and has all the
2168         mandatory fields
2169       - check ganeti version
2170
2171     @type ninfo: L{objects.Node}
2172     @param ninfo: the node to check
2173     @param nresult: the results from the node
2174     @rtype: boolean
2175     @return: whether overall this call was successful (and we can expect
2176          reasonable values in the respose)
2177
2178     """
2179     node = ninfo.name
2180     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2181
2182     # main result, nresult should be a non-empty dict
2183     test = not nresult or not isinstance(nresult, dict)
2184     _ErrorIf(test, constants.CV_ENODERPC, node,
2185                   "unable to verify node: no data returned")
2186     if test:
2187       return False
2188
2189     # compares ganeti version
2190     local_version = constants.PROTOCOL_VERSION
2191     remote_version = nresult.get("version", None)
2192     test = not (remote_version and
2193                 isinstance(remote_version, (list, tuple)) and
2194                 len(remote_version) == 2)
2195     _ErrorIf(test, constants.CV_ENODERPC, node,
2196              "connection to node returned invalid data")
2197     if test:
2198       return False
2199
2200     test = local_version != remote_version[0]
2201     _ErrorIf(test, constants.CV_ENODEVERSION, node,
2202              "incompatible protocol versions: master %s,"
2203              " node %s", local_version, remote_version[0])
2204     if test:
2205       return False
2206
2207     # node seems compatible, we can actually try to look into its results
2208
2209     # full package version
2210     self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
2211                   constants.CV_ENODEVERSION, node,
2212                   "software version mismatch: master %s, node %s",
2213                   constants.RELEASE_VERSION, remote_version[1],
2214                   code=self.ETYPE_WARNING)
2215
2216     hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
2217     if ninfo.vm_capable and isinstance(hyp_result, dict):
2218       for hv_name, hv_result in hyp_result.iteritems():
2219         test = hv_result is not None
2220         _ErrorIf(test, constants.CV_ENODEHV, node,
2221                  "hypervisor %s verify failure: '%s'", hv_name, hv_result)
2222
2223     hvp_result = nresult.get(constants.NV_HVPARAMS, None)
2224     if ninfo.vm_capable and isinstance(hvp_result, list):
2225       for item, hv_name, hv_result in hvp_result:
2226         _ErrorIf(True, constants.CV_ENODEHV, node,
2227                  "hypervisor %s parameter verify failure (source %s): %s",
2228                  hv_name, item, hv_result)
2229
2230     test = nresult.get(constants.NV_NODESETUP,
2231                        ["Missing NODESETUP results"])
2232     _ErrorIf(test, constants.CV_ENODESETUP, node, "node setup error: %s",
2233              "; ".join(test))
2234
2235     return True
2236
2237   def _VerifyNodeTime(self, ninfo, nresult,
2238                       nvinfo_starttime, nvinfo_endtime):
2239     """Check the node time.
2240
2241     @type ninfo: L{objects.Node}
2242     @param ninfo: the node to check
2243     @param nresult: the remote results for the node
2244     @param nvinfo_starttime: the start time of the RPC call
2245     @param nvinfo_endtime: the end time of the RPC call
2246
2247     """
2248     node = ninfo.name
2249     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2250
2251     ntime = nresult.get(constants.NV_TIME, None)
2252     try:
2253       ntime_merged = utils.MergeTime(ntime)
2254     except (ValueError, TypeError):
2255       _ErrorIf(True, constants.CV_ENODETIME, node, "Node returned invalid time")
2256       return
2257
2258     if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
2259       ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
2260     elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
2261       ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
2262     else:
2263       ntime_diff = None
2264
2265     _ErrorIf(ntime_diff is not None, constants.CV_ENODETIME, node,
2266              "Node time diverges by at least %s from master node time",
2267              ntime_diff)
2268
2269   def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
2270     """Check the node LVM results.
2271
2272     @type ninfo: L{objects.Node}
2273     @param ninfo: the node to check
2274     @param nresult: the remote results for the node
2275     @param vg_name: the configured VG name
2276
2277     """
2278     if vg_name is None:
2279       return
2280
2281     node = ninfo.name
2282     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2283
2284     # checks vg existence and size > 20G
2285     vglist = nresult.get(constants.NV_VGLIST, None)
2286     test = not vglist
2287     _ErrorIf(test, constants.CV_ENODELVM, node, "unable to check volume groups")
2288     if not test:
2289       vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
2290                                             constants.MIN_VG_SIZE)
2291       _ErrorIf(vgstatus, constants.CV_ENODELVM, node, vgstatus)
2292
2293     # check pv names
2294     pvlist = nresult.get(constants.NV_PVLIST, None)
2295     test = pvlist is None
2296     _ErrorIf(test, constants.CV_ENODELVM, node, "Can't get PV list from node")
2297     if not test:
2298       # check that ':' is not present in PV names, since it's a
2299       # special character for lvcreate (denotes the range of PEs to
2300       # use on the PV)
2301       for _, pvname, owner_vg in pvlist:
2302         test = ":" in pvname
2303         _ErrorIf(test, constants.CV_ENODELVM, node,
2304                  "Invalid character ':' in PV '%s' of VG '%s'",
2305                  pvname, owner_vg)
2306
2307   def _VerifyNodeBridges(self, ninfo, nresult, bridges):
2308     """Check the node bridges.
2309
2310     @type ninfo: L{objects.Node}
2311     @param ninfo: the node to check
2312     @param nresult: the remote results for the node
2313     @param bridges: the expected list of bridges
2314
2315     """
2316     if not bridges:
2317       return
2318
2319     node = ninfo.name
2320     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2321
2322     missing = nresult.get(constants.NV_BRIDGES, None)
2323     test = not isinstance(missing, list)
2324     _ErrorIf(test, constants.CV_ENODENET, node,
2325              "did not return valid bridge information")
2326     if not test:
2327       _ErrorIf(bool(missing), constants.CV_ENODENET, node,
2328                "missing bridges: %s" % utils.CommaJoin(sorted(missing)))
2329
2330   def _VerifyNodeUserScripts(self, ninfo, nresult):
2331     """Check the results of user scripts presence and executability on the node
2332
2333     @type ninfo: L{objects.Node}
2334     @param ninfo: the node to check
2335     @param nresult: the remote results for the node
2336
2337     """
2338     node = ninfo.name
2339
2340     test = not constants.NV_USERSCRIPTS in nresult
2341     self._ErrorIf(test, constants.CV_ENODEUSERSCRIPTS, node,
2342                   "did not return user scripts information")
2343
2344     broken_scripts = nresult.get(constants.NV_USERSCRIPTS, None)
2345     if not test:
2346       self._ErrorIf(broken_scripts, constants.CV_ENODEUSERSCRIPTS, node,
2347                     "user scripts not present or not executable: %s" %
2348                     utils.CommaJoin(sorted(broken_scripts)))
2349
2350   def _VerifyNodeNetwork(self, ninfo, nresult):
2351     """Check the node network connectivity results.
2352
2353     @type ninfo: L{objects.Node}
2354     @param ninfo: the node to check
2355     @param nresult: the remote results for the node
2356
2357     """
2358     node = ninfo.name
2359     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2360
2361     test = constants.NV_NODELIST not in nresult
2362     _ErrorIf(test, constants.CV_ENODESSH, node,
2363              "node hasn't returned node ssh connectivity data")
2364     if not test:
2365       if nresult[constants.NV_NODELIST]:
2366         for a_node, a_msg in nresult[constants.NV_NODELIST].items():
2367           _ErrorIf(True, constants.CV_ENODESSH, node,
2368                    "ssh communication with node '%s': %s", a_node, a_msg)
2369
2370     test = constants.NV_NODENETTEST not in nresult
2371     _ErrorIf(test, constants.CV_ENODENET, node,
2372              "node hasn't returned node tcp connectivity data")
2373     if not test:
2374       if nresult[constants.NV_NODENETTEST]:
2375         nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
2376         for anode in nlist:
2377           _ErrorIf(True, constants.CV_ENODENET, node,
2378                    "tcp communication with node '%s': %s",
2379                    anode, nresult[constants.NV_NODENETTEST][anode])
2380
2381     test = constants.NV_MASTERIP not in nresult
2382     _ErrorIf(test, constants.CV_ENODENET, node,
2383              "node hasn't returned node master IP reachability data")
2384     if not test:
2385       if not nresult[constants.NV_MASTERIP]:
2386         if node == self.master_node:
2387           msg = "the master node cannot reach the master IP (not configured?)"
2388         else:
2389           msg = "cannot reach the master IP"
2390         _ErrorIf(True, constants.CV_ENODENET, node, msg)
2391
2392   def _VerifyInstance(self, instance, instanceconfig, node_image,
2393                       diskstatus):
2394     """Verify an instance.
2395
2396     This function checks to see if the required block devices are
2397     available on the instance's node.
2398
2399     """
2400     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2401     node_current = instanceconfig.primary_node
2402
2403     node_vol_should = {}
2404     instanceconfig.MapLVsByNode(node_vol_should)
2405
2406     cluster = self.cfg.GetClusterInfo()
2407     ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
2408                                                             self.group_info)
2409     err = _ComputeIPolicyInstanceViolation(ipolicy, instanceconfig)
2410     _ErrorIf(err, constants.CV_EINSTANCEPOLICY, instance, utils.CommaJoin(err))
2411
2412     for node in node_vol_should:
2413       n_img = node_image[node]
2414       if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2415         # ignore missing volumes on offline or broken nodes
2416         continue
2417       for volume in node_vol_should[node]:
2418         test = volume not in n_img.volumes
2419         _ErrorIf(test, constants.CV_EINSTANCEMISSINGDISK, instance,
2420                  "volume %s missing on node %s", volume, node)
2421
2422     if instanceconfig.admin_state == constants.ADMINST_UP:
2423       pri_img = node_image[node_current]
2424       test = instance not in pri_img.instances and not pri_img.offline
2425       _ErrorIf(test, constants.CV_EINSTANCEDOWN, instance,
2426                "instance not running on its primary node %s",
2427                node_current)
2428
2429     diskdata = [(nname, success, status, idx)
2430                 for (nname, disks) in diskstatus.items()
2431                 for idx, (success, status) in enumerate(disks)]
2432
2433     for nname, success, bdev_status, idx in diskdata:
2434       # the 'ghost node' construction in Exec() ensures that we have a
2435       # node here
2436       snode = node_image[nname]
2437       bad_snode = snode.ghost or snode.offline
2438       _ErrorIf(instanceconfig.admin_state == constants.ADMINST_UP and
2439                not success and not bad_snode,
2440                constants.CV_EINSTANCEFAULTYDISK, instance,
2441                "couldn't retrieve status for disk/%s on %s: %s",
2442                idx, nname, bdev_status)
2443       _ErrorIf((instanceconfig.admin_state == constants.ADMINST_UP and
2444                 success and bdev_status.ldisk_status == constants.LDS_FAULTY),
2445                constants.CV_EINSTANCEFAULTYDISK, instance,
2446                "disk/%s on %s is faulty", idx, nname)
2447
2448   def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
2449     """Verify if there are any unknown volumes in the cluster.
2450
2451     The .os, .swap and backup volumes are ignored. All other volumes are
2452     reported as unknown.
2453
2454     @type reserved: L{ganeti.utils.FieldSet}
2455     @param reserved: a FieldSet of reserved volume names
2456
2457     """
2458     for node, n_img in node_image.items():
2459       if (n_img.offline or n_img.rpc_fail or n_img.lvm_fail or
2460           self.all_node_info[node].group != self.group_uuid):
2461         # skip non-healthy nodes
2462         continue
2463       for volume in n_img.volumes:
2464         test = ((node not in node_vol_should or
2465                 volume not in node_vol_should[node]) and
2466                 not reserved.Matches(volume))
2467         self._ErrorIf(test, constants.CV_ENODEORPHANLV, node,
2468                       "volume %s is unknown", volume)
2469
2470   def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
2471     """Verify N+1 Memory Resilience.
2472
2473     Check that if one single node dies we can still start all the
2474     instances it was primary for.
2475
2476     """
2477     cluster_info = self.cfg.GetClusterInfo()
2478     for node, n_img in node_image.items():
2479       # This code checks that every node which is now listed as
2480       # secondary has enough memory to host all instances it is
2481       # supposed to should a single other node in the cluster fail.
2482       # FIXME: not ready for failover to an arbitrary node
2483       # FIXME: does not support file-backed instances
2484       # WARNING: we currently take into account down instances as well
2485       # as up ones, considering that even if they're down someone
2486       # might want to start them even in the event of a node failure.
2487       if n_img.offline or self.all_node_info[node].group != self.group_uuid:
2488         # we're skipping nodes marked offline and nodes in other groups from
2489         # the N+1 warning, since most likely we don't have good memory
2490         # infromation from them; we already list instances living on such
2491         # nodes, and that's enough warning
2492         continue
2493       #TODO(dynmem): also consider ballooning out other instances
2494       for prinode, instances in n_img.sbp.items():
2495         needed_mem = 0
2496         for instance in instances:
2497           bep = cluster_info.FillBE(instance_cfg[instance])
2498           if bep[constants.BE_AUTO_BALANCE]:
2499             needed_mem += bep[constants.BE_MINMEM]
2500         test = n_img.mfree < needed_mem
2501         self._ErrorIf(test, constants.CV_ENODEN1, node,
2502                       "not enough memory to accomodate instance failovers"
2503                       " should node %s fail (%dMiB needed, %dMiB available)",
2504                       prinode, needed_mem, n_img.mfree)
2505
2506   @classmethod
2507   def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
2508                    (files_all, files_opt, files_mc, files_vm)):
2509     """Verifies file checksums collected from all nodes.
2510
2511     @param errorif: Callback for reporting errors
2512     @param nodeinfo: List of L{objects.Node} objects
2513     @param master_node: Name of master node
2514     @param all_nvinfo: RPC results
2515
2516     """
2517     # Define functions determining which nodes to consider for a file
2518     files2nodefn = [
2519       (files_all, None),
2520       (files_mc, lambda node: (node.master_candidate or
2521                                node.name == master_node)),
2522       (files_vm, lambda node: node.vm_capable),
2523       ]
2524
2525     # Build mapping from filename to list of nodes which should have the file
2526     nodefiles = {}
2527     for (files, fn) in files2nodefn:
2528       if fn is None:
2529         filenodes = nodeinfo
2530       else:
2531         filenodes = filter(fn, nodeinfo)
2532       nodefiles.update((filename,
2533                         frozenset(map(operator.attrgetter("name"), filenodes)))
2534                        for filename in files)
2535
2536     assert set(nodefiles) == (files_all | files_mc | files_vm)
2537
2538     fileinfo = dict((filename, {}) for filename in nodefiles)
2539     ignore_nodes = set()
2540
2541     for node in nodeinfo:
2542       if node.offline:
2543         ignore_nodes.add(node.name)
2544         continue
2545
2546       nresult = all_nvinfo[node.name]
2547
2548       if nresult.fail_msg or not nresult.payload:
2549         node_files = None
2550       else:
2551         node_files = nresult.payload.get(constants.NV_FILELIST, None)
2552
2553       test = not (node_files and isinstance(node_files, dict))
2554       errorif(test, constants.CV_ENODEFILECHECK, node.name,
2555               "Node did not return file checksum data")
2556       if test:
2557         ignore_nodes.add(node.name)
2558         continue
2559
2560       # Build per-checksum mapping from filename to nodes having it
2561       for (filename, checksum) in node_files.items():
2562         assert filename in nodefiles
2563         fileinfo[filename].setdefault(checksum, set()).add(node.name)
2564
2565     for (filename, checksums) in fileinfo.items():
2566       assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
2567
2568       # Nodes having the file
2569       with_file = frozenset(node_name
2570                             for nodes in fileinfo[filename].values()
2571                             for node_name in nodes) - ignore_nodes
2572
2573       expected_nodes = nodefiles[filename] - ignore_nodes
2574
2575       # Nodes missing file
2576       missing_file = expected_nodes - with_file
2577
2578       if filename in files_opt:
2579         # All or no nodes
2580         errorif(missing_file and missing_file != expected_nodes,
2581                 constants.CV_ECLUSTERFILECHECK, None,
2582                 "File %s is optional, but it must exist on all or no"
2583                 " nodes (not found on %s)",
2584                 filename, utils.CommaJoin(utils.NiceSort(missing_file)))
2585       else:
2586         errorif(missing_file, constants.CV_ECLUSTERFILECHECK, None,
2587                 "File %s is missing from node(s) %s", filename,
2588                 utils.CommaJoin(utils.NiceSort(missing_file)))
2589
2590         # Warn if a node has a file it shouldn't
2591         unexpected = with_file - expected_nodes
2592         errorif(unexpected,
2593                 constants.CV_ECLUSTERFILECHECK, None,
2594                 "File %s should not exist on node(s) %s",
2595                 filename, utils.CommaJoin(utils.NiceSort(unexpected)))
2596
2597       # See if there are multiple versions of the file
2598       test = len(checksums) > 1
2599       if test:
2600         variants = ["variant %s on %s" %
2601                     (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
2602                     for (idx, (checksum, nodes)) in
2603                       enumerate(sorted(checksums.items()))]
2604       else:
2605         variants = []
2606
2607       errorif(test, constants.CV_ECLUSTERFILECHECK, None,
2608               "File %s found with %s different checksums (%s)",
2609               filename, len(checksums), "; ".join(variants))
2610
2611   def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
2612                       drbd_map):
2613     """Verifies and the node DRBD status.
2614
2615     @type ninfo: L{objects.Node}
2616     @param ninfo: the node to check
2617     @param nresult: the remote results for the node
2618     @param instanceinfo: the dict of instances
2619     @param drbd_helper: the configured DRBD usermode helper
2620     @param drbd_map: the DRBD map as returned by
2621         L{ganeti.config.ConfigWriter.ComputeDRBDMap}
2622
2623     """
2624     node = ninfo.name
2625     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2626
2627     if drbd_helper:
2628       helper_result = nresult.get(constants.NV_DRBDHELPER, None)
2629       test = (helper_result is None)
2630       _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2631                "no drbd usermode helper returned")
2632       if helper_result:
2633         status, payload = helper_result
2634         test = not status
2635         _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2636                  "drbd usermode helper check unsuccessful: %s", payload)
2637         test = status and (payload != drbd_helper)
2638         _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2639                  "wrong drbd usermode helper: %s", payload)
2640
2641     # compute the DRBD minors
2642     node_drbd = {}
2643     for minor, instance in drbd_map[node].items():
2644       test = instance not in instanceinfo
2645       _ErrorIf(test, constants.CV_ECLUSTERCFG, None,
2646                "ghost instance '%s' in temporary DRBD map", instance)
2647         # ghost instance should not be running, but otherwise we
2648         # don't give double warnings (both ghost instance and
2649         # unallocated minor in use)
2650       if test:
2651         node_drbd[minor] = (instance, False)
2652       else:
2653         instance = instanceinfo[instance]
2654         node_drbd[minor] = (instance.name,
2655                             instance.admin_state == constants.ADMINST_UP)
2656
2657     # and now check them
2658     used_minors = nresult.get(constants.NV_DRBDLIST, [])
2659     test = not isinstance(used_minors, (tuple, list))
2660     _ErrorIf(test, constants.CV_ENODEDRBD, node,
2661              "cannot parse drbd status file: %s", str(used_minors))
2662     if test:
2663       # we cannot check drbd status
2664       return
2665
2666     for minor, (iname, must_exist) in node_drbd.items():
2667       test = minor not in used_minors and must_exist
2668       _ErrorIf(test, constants.CV_ENODEDRBD, node,
2669                "drbd minor %d of instance %s is not active", minor, iname)
2670     for minor in used_minors:
2671       test = minor not in node_drbd
2672       _ErrorIf(test, constants.CV_ENODEDRBD, node,
2673                "unallocated drbd minor %d is in use", minor)
2674
2675   def _UpdateNodeOS(self, ninfo, nresult, nimg):
2676     """Builds the node OS structures.
2677
2678     @type ninfo: L{objects.Node}
2679     @param ninfo: the node to check
2680     @param nresult: the remote results for the node
2681     @param nimg: the node image object
2682
2683     """
2684     node = ninfo.name
2685     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2686
2687     remote_os = nresult.get(constants.NV_OSLIST, None)
2688     test = (not isinstance(remote_os, list) or
2689             not compat.all(isinstance(v, list) and len(v) == 7
2690                            for v in remote_os))
2691
2692     _ErrorIf(test, constants.CV_ENODEOS, node,
2693              "node hasn't returned valid OS data")
2694
2695     nimg.os_fail = test
2696
2697     if test:
2698       return
2699
2700     os_dict = {}
2701
2702     for (name, os_path, status, diagnose,
2703          variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
2704
2705       if name not in os_dict:
2706         os_dict[name] = []
2707
2708       # parameters is a list of lists instead of list of tuples due to
2709       # JSON lacking a real tuple type, fix it:
2710       parameters = [tuple(v) for v in parameters]
2711       os_dict[name].append((os_path, status, diagnose,
2712                             set(variants), set(parameters), set(api_ver)))
2713
2714     nimg.oslist = os_dict
2715
2716   def _VerifyNodeOS(self, ninfo, nimg, base):
2717     """Verifies the node OS list.
2718
2719     @type ninfo: L{objects.Node}
2720     @param ninfo: the node to check
2721     @param nimg: the node image object
2722     @param base: the 'template' node we match against (e.g. from the master)
2723
2724     """
2725     node = ninfo.name
2726     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2727
2728     assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
2729
2730     beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
2731     for os_name, os_data in nimg.oslist.items():
2732       assert os_data, "Empty OS status for OS %s?!" % os_name
2733       f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
2734       _ErrorIf(not f_status, constants.CV_ENODEOS, node,
2735                "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
2736       _ErrorIf(len(os_data) > 1, constants.CV_ENODEOS, node,
2737                "OS '%s' has multiple entries (first one shadows the rest): %s",
2738                os_name, utils.CommaJoin([v[0] for v in os_data]))
2739       # comparisons with the 'base' image
2740       test = os_name not in base.oslist
2741       _ErrorIf(test, constants.CV_ENODEOS, node,
2742                "Extra OS %s not present on reference node (%s)",
2743                os_name, base.name)
2744       if test:
2745         continue
2746       assert base.oslist[os_name], "Base node has empty OS status?"
2747       _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
2748       if not b_status:
2749         # base OS is invalid, skipping
2750         continue
2751       for kind, a, b in [("API version", f_api, b_api),
2752                          ("variants list", f_var, b_var),
2753                          ("parameters", beautify_params(f_param),
2754                           beautify_params(b_param))]:
2755         _ErrorIf(a != b, constants.CV_ENODEOS, node,
2756                  "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
2757                  kind, os_name, base.name,
2758                  utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
2759
2760     # check any missing OSes
2761     missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
2762     _ErrorIf(missing, constants.CV_ENODEOS, node,
2763              "OSes present on reference node %s but missing on this node: %s",
2764              base.name, utils.CommaJoin(missing))
2765
2766   def _VerifyOob(self, ninfo, nresult):
2767     """Verifies out of band functionality of a node.
2768
2769     @type ninfo: L{objects.Node}
2770     @param ninfo: the node to check
2771     @param nresult: the remote results for the node
2772
2773     """
2774     node = ninfo.name
2775     # We just have to verify the paths on master and/or master candidates
2776     # as the oob helper is invoked on the master
2777     if ((ninfo.master_candidate or ninfo.master_capable) and
2778         constants.NV_OOB_PATHS in nresult):
2779       for path_result in nresult[constants.NV_OOB_PATHS]:
2780         self._ErrorIf(path_result, constants.CV_ENODEOOBPATH, node, path_result)
2781
2782   def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
2783     """Verifies and updates the node volume data.
2784
2785     This function will update a L{NodeImage}'s internal structures
2786     with data from the remote call.
2787
2788     @type ninfo: L{objects.Node}
2789     @param ninfo: the node to check
2790     @param nresult: the remote results for the node
2791     @param nimg: the node image object
2792     @param vg_name: the configured VG name
2793
2794     """
2795     node = ninfo.name
2796     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2797
2798     nimg.lvm_fail = True
2799     lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
2800     if vg_name is None:
2801       pass
2802     elif isinstance(lvdata, basestring):
2803       _ErrorIf(True, constants.CV_ENODELVM, node, "LVM problem on node: %s",
2804                utils.SafeEncode(lvdata))
2805     elif not isinstance(lvdata, dict):
2806       _ErrorIf(True, constants.CV_ENODELVM, node,
2807                "rpc call to node failed (lvlist)")
2808     else:
2809       nimg.volumes = lvdata
2810       nimg.lvm_fail = False
2811
2812   def _UpdateNodeInstances(self, ninfo, nresult, nimg):
2813     """Verifies and updates the node instance list.
2814
2815     If the listing was successful, then updates this node's instance
2816     list. Otherwise, it marks the RPC call as failed for the instance
2817     list key.
2818
2819     @type ninfo: L{objects.Node}
2820     @param ninfo: the node to check
2821     @param nresult: the remote results for the node
2822     @param nimg: the node image object
2823
2824     """
2825     idata = nresult.get(constants.NV_INSTANCELIST, None)
2826     test = not isinstance(idata, list)
2827     self._ErrorIf(test, constants.CV_ENODEHV, ninfo.name,
2828                   "rpc call to node failed (instancelist): %s",
2829                   utils.SafeEncode(str(idata)))
2830     if test:
2831       nimg.hyp_fail = True
2832     else:
2833       nimg.instances = idata
2834
2835   def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
2836     """Verifies and computes a node information map
2837
2838     @type ninfo: L{objects.Node}
2839     @param ninfo: the node to check
2840     @param nresult: the remote results for the node
2841     @param nimg: the node image object
2842     @param vg_name: the configured VG name
2843
2844     """
2845     node = ninfo.name
2846     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2847
2848     # try to read free memory (from the hypervisor)
2849     hv_info = nresult.get(constants.NV_HVINFO, None)
2850     test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
2851     _ErrorIf(test, constants.CV_ENODEHV, node,
2852              "rpc call to node failed (hvinfo)")
2853     if not test:
2854       try:
2855         nimg.mfree = int(hv_info["memory_free"])
2856       except (ValueError, TypeError):
2857         _ErrorIf(True, constants.CV_ENODERPC, node,
2858                  "node returned invalid nodeinfo, check hypervisor")
2859
2860     # FIXME: devise a free space model for file based instances as well
2861     if vg_name is not None:
2862       test = (constants.NV_VGLIST not in nresult or
2863               vg_name not in nresult[constants.NV_VGLIST])
2864       _ErrorIf(test, constants.CV_ENODELVM, node,
2865                "node didn't return data for the volume group '%s'"
2866                " - it is either missing or broken", vg_name)
2867       if not test:
2868         try:
2869           nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
2870         except (ValueError, TypeError):
2871           _ErrorIf(True, constants.CV_ENODERPC, node,
2872                    "node returned invalid LVM info, check LVM status")
2873
2874   def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
2875     """Gets per-disk status information for all instances.
2876
2877     @type nodelist: list of strings
2878     @param nodelist: Node names
2879     @type node_image: dict of (name, L{objects.Node})
2880     @param node_image: Node objects
2881     @type instanceinfo: dict of (name, L{objects.Instance})
2882     @param instanceinfo: Instance objects
2883     @rtype: {instance: {node: [(succes, payload)]}}
2884     @return: a dictionary of per-instance dictionaries with nodes as
2885         keys and disk information as values; the disk information is a
2886         list of tuples (success, payload)
2887
2888     """
2889     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2890
2891     node_disks = {}
2892     node_disks_devonly = {}
2893     diskless_instances = set()
2894     diskless = constants.DT_DISKLESS
2895
2896     for nname in nodelist:
2897       node_instances = list(itertools.chain(node_image[nname].pinst,
2898                                             node_image[nname].sinst))
2899       diskless_instances.update(inst for inst in node_instances
2900                                 if instanceinfo[inst].disk_template == diskless)
2901       disks = [(inst, disk)
2902                for inst in node_instances
2903                for disk in instanceinfo[inst].disks]
2904
2905       if not disks:
2906         # No need to collect data
2907         continue
2908
2909       node_disks[nname] = disks
2910
2911       # _AnnotateDiskParams makes already copies of the disks
2912       devonly = []
2913       for (inst, dev) in disks:
2914         (anno_disk,) = _AnnotateDiskParams(instanceinfo[inst], [dev], self.cfg)
2915         self.cfg.SetDiskID(anno_disk, nname)
2916         devonly.append(anno_disk)
2917
2918       node_disks_devonly[nname] = devonly
2919
2920     assert len(node_disks) == len(node_disks_devonly)
2921
2922     # Collect data from all nodes with disks
2923     result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
2924                                                           node_disks_devonly)
2925
2926     assert len(result) == len(node_disks)
2927
2928     instdisk = {}
2929
2930     for (nname, nres) in result.items():
2931       disks = node_disks[nname]
2932
2933       if nres.offline:
2934         # No data from this node
2935         data = len(disks) * [(False, "node offline")]
2936       else:
2937         msg = nres.fail_msg
2938         _ErrorIf(msg, constants.CV_ENODERPC, nname,
2939                  "while getting disk information: %s", msg)
2940         if msg:
2941           # No data from this node
2942           data = len(disks) * [(False, msg)]
2943         else:
2944           data = []
2945           for idx, i in enumerate(nres.payload):
2946             if isinstance(i, (tuple, list)) and len(i) == 2:
2947               data.append(i)
2948             else:
2949               logging.warning("Invalid result from node %s, entry %d: %s",
2950                               nname, idx, i)
2951               data.append((False, "Invalid result from the remote node"))
2952
2953       for ((inst, _), status) in zip(disks, data):
2954         instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
2955
2956     # Add empty entries for diskless instances.
2957     for inst in diskless_instances:
2958       assert inst not in instdisk
2959       instdisk[inst] = {}
2960
2961     assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
2962                       len(nnames) <= len(instanceinfo[inst].all_nodes) and
2963                       compat.all(isinstance(s, (tuple, list)) and
2964                                  len(s) == 2 for s in statuses)
2965                       for inst, nnames in instdisk.items()
2966                       for nname, statuses in nnames.items())
2967     assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
2968
2969     return instdisk
2970
2971   @staticmethod
2972   def _SshNodeSelector(group_uuid, all_nodes):
2973     """Create endless iterators for all potential SSH check hosts.
2974
2975     """
2976     nodes = [node for node in all_nodes
2977              if (node.group != group_uuid and
2978                  not node.offline)]
2979     keyfunc = operator.attrgetter("group")
2980
2981     return map(itertools.cycle,
2982                [sorted(map(operator.attrgetter("name"), names))
2983                 for _, names in itertools.groupby(sorted(nodes, key=keyfunc),
2984                                                   keyfunc)])
2985
2986   @classmethod
2987   def _SelectSshCheckNodes(cls, group_nodes, group_uuid, all_nodes):
2988     """Choose which nodes should talk to which other nodes.
2989
2990     We will make nodes contact all nodes in their group, and one node from
2991     every other group.
2992
2993     @warning: This algorithm has a known issue if one node group is much
2994       smaller than others (e.g. just one node). In such a case all other
2995       nodes will talk to the single node.
2996
2997     """
2998     online_nodes = sorted(node.name for node in group_nodes if not node.offline)
2999     sel = cls._SshNodeSelector(group_uuid, all_nodes)
3000
3001     return (online_nodes,
3002             dict((name, sorted([i.next() for i in sel]))
3003                  for name in online_nodes))
3004
3005   def BuildHooksEnv(self):
3006     """Build hooks env.
3007
3008     Cluster-Verify hooks just ran in the post phase and their failure makes
3009     the output be logged in the verify output and the verification to fail.
3010
3011     """
3012     env = {
3013       "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
3014       }
3015
3016     env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
3017                for node in self.my_node_info.values())
3018
3019     return env
3020
3021   def BuildHooksNodes(self):
3022     """Build hooks nodes.
3023
3024     """
3025     return ([], self.my_node_names)
3026
3027   def Exec(self, feedback_fn):
3028     """Verify integrity of the node group, performing various test on nodes.
3029
3030     """
3031     # This method has too many local variables. pylint: disable=R0914
3032     feedback_fn("* Verifying group '%s'" % self.group_info.name)
3033
3034     if not self.my_node_names:
3035       # empty node group
3036       feedback_fn("* Empty node group, skipping verification")
3037       return True
3038
3039     self.bad = False
3040     _ErrorIf = self._ErrorIf # pylint: disable=C0103
3041     verbose = self.op.verbose
3042     self._feedback_fn = feedback_fn
3043
3044     vg_name = self.cfg.GetVGName()
3045     drbd_helper = self.cfg.GetDRBDHelper()
3046     cluster = self.cfg.GetClusterInfo()
3047     groupinfo = self.cfg.GetAllNodeGroupsInfo()
3048     hypervisors = cluster.enabled_hypervisors
3049     node_data_list = [self.my_node_info[name] for name in self.my_node_names]
3050
3051     i_non_redundant = [] # Non redundant instances
3052     i_non_a_balanced = [] # Non auto-balanced instances
3053     i_offline = 0 # Count of offline instances
3054     n_offline = 0 # Count of offline nodes
3055     n_drained = 0 # Count of nodes being drained
3056     node_vol_should = {}
3057
3058     # FIXME: verify OS list
3059
3060     # File verification
3061     filemap = _ComputeAncillaryFiles(cluster, False)
3062
3063     # do local checksums
3064     master_node = self.master_node = self.cfg.GetMasterNode()
3065     master_ip = self.cfg.GetMasterIP()
3066
3067     feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names))
3068
3069     user_scripts = []
3070     if self.cfg.GetUseExternalMipScript():
3071       user_scripts.append(constants.EXTERNAL_MASTER_SETUP_SCRIPT)
3072
3073     node_verify_param = {
3074       constants.NV_FILELIST:
3075         utils.UniqueSequence(filename
3076                              for files in filemap
3077                              for filename in files),
3078       constants.NV_NODELIST:
3079         self._SelectSshCheckNodes(node_data_list, self.group_uuid,
3080                                   self.all_node_info.values()),
3081       constants.NV_HYPERVISOR: hypervisors,
3082       constants.NV_HVPARAMS:
3083         _GetAllHypervisorParameters(cluster, self.all_inst_info.values()),
3084       constants.NV_NODENETTEST: [(node.name, node.primary_ip, node.secondary_ip)
3085                                  for node in node_data_list
3086                                  if not node.offline],
3087       constants.NV_INSTANCELIST: hypervisors,
3088       constants.NV_VERSION: None,
3089       constants.NV_HVINFO: self.cfg.GetHypervisorType(),
3090       constants.NV_NODESETUP: None,
3091       constants.NV_TIME: None,
3092       constants.NV_MASTERIP: (master_node, master_ip),
3093       constants.NV_OSLIST: None,
3094       constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
3095       constants.NV_USERSCRIPTS: user_scripts,
3096       }
3097
3098     if vg_name is not None:
3099       node_verify_param[constants.NV_VGLIST] = None
3100       node_verify_param[constants.NV_LVLIST] = vg_name
3101       node_verify_param[constants.NV_PVLIST] = [vg_name]
3102       node_verify_param[constants.NV_DRBDLIST] = None
3103
3104     if drbd_helper:
3105       node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
3106
3107     # bridge checks
3108     # FIXME: this needs to be changed per node-group, not cluster-wide
3109     bridges = set()
3110     default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
3111     if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3112       bridges.add(default_nicpp[constants.NIC_LINK])
3113     for instance in self.my_inst_info.values():
3114       for nic in instance.nics:
3115         full_nic = cluster.SimpleFillNIC(nic.nicparams)
3116         if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3117           bridges.add(full_nic[constants.NIC_LINK])
3118
3119     if bridges:
3120       node_verify_param[constants.NV_BRIDGES] = list(bridges)
3121
3122     # Build our expected cluster state
3123     node_image = dict((node.name, self.NodeImage(offline=node.offline,
3124                                                  name=node.name,
3125                                                  vm_capable=node.vm_capable))
3126                       for node in node_data_list)
3127
3128     # Gather OOB paths
3129     oob_paths = []
3130     for node in self.all_node_info.values():
3131       path = _SupportsOob(self.cfg, node)
3132       if path and path not in oob_paths:
3133         oob_paths.append(path)
3134
3135     if oob_paths:
3136       node_verify_param[constants.NV_OOB_PATHS] = oob_paths
3137
3138     for instance in self.my_inst_names:
3139       inst_config = self.my_inst_info[instance]
3140       if inst_config.admin_state == constants.ADMINST_OFFLINE:
3141         i_offline += 1
3142
3143       for nname in inst_config.all_nodes:
3144         if nname not in node_image:
3145           gnode = self.NodeImage(name=nname)
3146           gnode.ghost = (nname not in self.all_node_info)
3147           node_image[nname] = gnode
3148
3149       inst_config.MapLVsByNode(node_vol_should)
3150
3151       pnode = inst_config.primary_node
3152       node_image[pnode].pinst.append(instance)
3153
3154       for snode in inst_config.secondary_nodes:
3155         nimg = node_image[snode]
3156         nimg.sinst.append(instance)
3157         if pnode not in nimg.sbp:
3158           nimg.sbp[pnode] = []
3159         nimg.sbp[pnode].append(instance)
3160
3161     # At this point, we have the in-memory data structures complete,
3162     # except for the runtime information, which we'll gather next
3163
3164     # Due to the way our RPC system works, exact response times cannot be
3165     # guaranteed (e.g. a broken node could run into a timeout). By keeping the
3166     # time before and after executing the request, we can at least have a time
3167     # window.
3168     nvinfo_starttime = time.time()
3169     all_nvinfo = self.rpc.call_node_verify(self.my_node_names,
3170                                            node_verify_param,
3171                                            self.cfg.GetClusterName())
3172     nvinfo_endtime = time.time()
3173
3174     if self.extra_lv_nodes and vg_name is not None:
3175       extra_lv_nvinfo = \
3176           self.rpc.call_node_verify(self.extra_lv_nodes,
3177                                     {constants.NV_LVLIST: vg_name},
3178                                     self.cfg.GetClusterName())
3179     else:
3180       extra_lv_nvinfo = {}
3181
3182     all_drbd_map = self.cfg.ComputeDRBDMap()
3183
3184     feedback_fn("* Gathering disk information (%s nodes)" %
3185                 len(self.my_node_names))
3186     instdisk = self._CollectDiskInfo(self.my_node_names, node_image,
3187                                      self.my_inst_info)
3188
3189     feedback_fn("* Verifying configuration file consistency")
3190
3191     # If not all nodes are being checked, we need to make sure the master node
3192     # and a non-checked vm_capable node are in the list.
3193     absent_nodes = set(self.all_node_info).difference(self.my_node_info)
3194     if absent_nodes:
3195       vf_nvinfo = all_nvinfo.copy()
3196       vf_node_info = list(self.my_node_info.values())
3197       additional_nodes = []
3198       if master_node not in self.my_node_info:
3199         additional_nodes.append(master_node)
3200         vf_node_info.append(self.all_node_info[master_node])
3201       # Add the first vm_capable node we find which is not included,
3202       # excluding the master node (which we already have)
3203       for node in absent_nodes:
3204         nodeinfo = self.all_node_info[node]
3205         if (nodeinfo.vm_capable and not nodeinfo.offline and
3206             node != master_node):
3207           additional_nodes.append(node)
3208           vf_node_info.append(self.all_node_info[node])
3209           break
3210       key = constants.NV_FILELIST
3211       vf_nvinfo.update(self.rpc.call_node_verify(additional_nodes,
3212                                                  {key: node_verify_param[key]},
3213                                                  self.cfg.GetClusterName()))
3214     else:
3215       vf_nvinfo = all_nvinfo
3216       vf_node_info = self.my_node_info.values()
3217
3218     self._VerifyFiles(_ErrorIf, vf_node_info, master_node, vf_nvinfo, filemap)
3219
3220     feedback_fn("* Verifying node status")
3221
3222     refos_img = None
3223
3224     for node_i in node_data_list:
3225       node = node_i.name
3226       nimg = node_image[node]
3227
3228       if node_i.offline:
3229         if verbose:
3230           feedback_fn("* Skipping offline node %s" % (node,))
3231         n_offline += 1
3232         continue
3233
3234       if node == master_node:
3235         ntype = "master"
3236       elif node_i.master_candidate:
3237         ntype = "master candidate"
3238       elif node_i.drained:
3239         ntype = "drained"
3240         n_drained += 1
3241       else:
3242         ntype = "regular"
3243       if verbose:
3244         feedback_fn("* Verifying node %s (%s)" % (node, ntype))
3245
3246       msg = all_nvinfo[node].fail_msg
3247       _ErrorIf(msg, constants.CV_ENODERPC, node, "while contacting node: %s",
3248                msg)
3249       if msg:
3250         nimg.rpc_fail = True
3251         continue
3252
3253       nresult = all_nvinfo[node].payload
3254
3255       nimg.call_ok = self._VerifyNode(node_i, nresult)
3256       self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
3257       self._VerifyNodeNetwork(node_i, nresult)
3258       self._VerifyNodeUserScripts(node_i, nresult)
3259       self._VerifyOob(node_i, nresult)
3260
3261       if nimg.vm_capable:
3262         self._VerifyNodeLVM(node_i, nresult, vg_name)
3263         self._VerifyNodeDrbd(node_i, nresult, self.all_inst_info, drbd_helper,
3264                              all_drbd_map)
3265
3266         self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
3267         self._UpdateNodeInstances(node_i, nresult, nimg)
3268         self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
3269         self._UpdateNodeOS(node_i, nresult, nimg)
3270
3271         if not nimg.os_fail:
3272           if refos_img is None:
3273             refos_img = nimg
3274           self._VerifyNodeOS(node_i, nimg, refos_img)
3275         self._VerifyNodeBridges(node_i, nresult, bridges)
3276
3277         # Check whether all running instancies are primary for the node. (This
3278         # can no longer be done from _VerifyInstance below, since some of the
3279         # wrong instances could be from other node groups.)
3280         non_primary_inst = set(nimg.instances).difference(nimg.pinst)
3281
3282         for inst in non_primary_inst:
3283           test = inst in self.all_inst_info
3284           _ErrorIf(test, constants.CV_EINSTANCEWRONGNODE, inst,
3285                    "instance should not run on node %s", node_i.name)
3286           _ErrorIf(not test, constants.CV_ENODEORPHANINSTANCE, node_i.name,
3287                    "node is running unknown instance %s", inst)
3288
3289     for node, result in extra_lv_nvinfo.items():
3290       self._UpdateNodeVolumes(self.all_node_info[node], result.payload,
3291                               node_image[node], vg_name)
3292
3293     feedback_fn("* Verifying instance status")
3294     for instance in self.my_inst_names:
3295       if verbose:
3296         feedback_fn("* Verifying instance %s" % instance)
3297       inst_config = self.my_inst_info[instance]
3298       self._VerifyInstance(instance, inst_config, node_image,
3299                            instdisk[instance])
3300       inst_nodes_offline = []
3301
3302       pnode = inst_config.primary_node
3303       pnode_img = node_image[pnode]
3304       _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
3305                constants.CV_ENODERPC, pnode, "instance %s, connection to"
3306                " primary node failed", instance)
3307
3308       _ErrorIf(inst_config.admin_state == constants.ADMINST_UP and
3309                pnode_img.offline,
3310                constants.CV_EINSTANCEBADNODE, instance,
3311                "instance is marked as running and lives on offline node %s",
3312                inst_config.primary_node)
3313
3314       # If the instance is non-redundant we cannot survive losing its primary
3315       # node, so we are not N+1 compliant. On the other hand we have no disk
3316       # templates with more than one secondary so that situation is not well
3317       # supported either.
3318       # FIXME: does not support file-backed instances
3319       if not inst_config.secondary_nodes:
3320         i_non_redundant.append(instance)
3321
3322       _ErrorIf(len(inst_config.secondary_nodes) > 1,
3323                constants.CV_EINSTANCELAYOUT,
3324                instance, "instance has multiple secondary nodes: %s",
3325                utils.CommaJoin(inst_config.secondary_nodes),
3326                code=self.ETYPE_WARNING)
3327
3328       if inst_config.disk_template in constants.DTS_INT_MIRROR:
3329         pnode = inst_config.primary_node
3330         instance_nodes = utils.NiceSort(inst_config.all_nodes)
3331         instance_groups = {}
3332
3333         for node in instance_nodes:
3334           instance_groups.setdefault(self.all_node_info[node].group,
3335                                      []).append(node)
3336
3337         pretty_list = [
3338           "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
3339           # Sort so that we always list the primary node first.
3340           for group, nodes in sorted(instance_groups.items(),
3341                                      key=lambda (_, nodes): pnode in nodes,
3342                                      reverse=True)]
3343
3344         self._ErrorIf(len(instance_groups) > 1,
3345                       constants.CV_EINSTANCESPLITGROUPS,
3346                       instance, "instance has primary and secondary nodes in"
3347                       " different groups: %s", utils.CommaJoin(pretty_list),
3348                       code=self.ETYPE_WARNING)
3349
3350       if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
3351         i_non_a_balanced.append(instance)
3352
3353       for snode in inst_config.secondary_nodes:
3354         s_img = node_image[snode]
3355         _ErrorIf(s_img.rpc_fail and not s_img.offline, constants.CV_ENODERPC,
3356                  snode, "instance %s, connection to secondary node failed",
3357                  instance)
3358
3359         if s_img.offline:
3360           inst_nodes_offline.append(snode)
3361
3362       # warn that the instance lives on offline nodes
3363       _ErrorIf(inst_nodes_offline, constants.CV_EINSTANCEBADNODE, instance,
3364                "instance has offline secondary node(s) %s",
3365                utils.CommaJoin(inst_nodes_offline))
3366       # ... or ghost/non-vm_capable nodes
3367       for node in inst_config.all_nodes:
3368         _ErrorIf(node_image[node].ghost, constants.CV_EINSTANCEBADNODE,
3369                  instance, "instance lives on ghost node %s", node)
3370         _ErrorIf(not node_image[node].vm_capable, constants.CV_EINSTANCEBADNODE,
3371                  instance, "instance lives on non-vm_capable node %s", node)
3372
3373     feedback_fn("* Verifying orphan volumes")
3374     reserved = utils.FieldSet(*cluster.reserved_lvs)
3375
3376     # We will get spurious "unknown volume" warnings if any node of this group
3377     # is secondary for an instance whose primary is in another group. To avoid
3378     # them, we find these instances and add their volumes to node_vol_should.
3379     for inst in self.all_inst_info.values():
3380       for secondary in inst.secondary_nodes:
3381         if (secondary in self.my_node_info
3382             and inst.name not in self.my_inst_info):
3383           inst.MapLVsByNode(node_vol_should)
3384           break
3385
3386     self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
3387
3388     if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
3389       feedback_fn("* Verifying N+1 Memory redundancy")
3390       self._VerifyNPlusOneMemory(node_image, self.my_inst_info)
3391
3392     feedback_fn("* Other Notes")
3393     if i_non_redundant:
3394       feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
3395                   % len(i_non_redundant))
3396
3397     if i_non_a_balanced:
3398       feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
3399                   % len(i_non_a_balanced))
3400
3401     if i_offline:
3402       feedback_fn("  - NOTICE: %d offline instance(s) found." % i_offline)
3403
3404     if n_offline:
3405       feedback_fn("  - NOTICE: %d offline node(s) found." % n_offline)
3406
3407     if n_drained:
3408       feedback_fn("  - NOTICE: %d drained node(s) found." % n_drained)
3409
3410     return not self.bad
3411
3412   def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
3413     """Analyze the post-hooks' result
3414
3415     This method analyses the hook result, handles it, and sends some
3416     nicely-formatted feedback back to the user.
3417
3418     @param phase: one of L{constants.HOOKS_PHASE_POST} or
3419         L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
3420     @param hooks_results: the results of the multi-node hooks rpc call
3421     @param feedback_fn: function used send feedback back to the caller
3422     @param lu_result: previous Exec result
3423     @return: the new Exec result, based on the previous result
3424         and hook results
3425
3426     """
3427     # We only really run POST phase hooks, only for non-empty groups,
3428     # and are only interested in their results
3429     if not self.my_node_names:
3430       # empty node group
3431       pass
3432     elif phase == constants.HOOKS_PHASE_POST:
3433       # Used to change hooks' output to proper indentation
3434       feedback_fn("* Hooks Results")
3435       assert hooks_results, "invalid result from hooks"
3436
3437       for node_name in hooks_results:
3438         res = hooks_results[node_name]
3439         msg = res.fail_msg
3440         test = msg and not res.offline
3441         self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3442                       "Communication failure in hooks execution: %s", msg)
3443         if res.offline or msg:
3444           # No need to investigate payload if node is offline or gave
3445           # an error.
3446           continue
3447         for script, hkr, output in res.payload:
3448           test = hkr == constants.HKR_FAIL
3449           self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3450                         "Script %s failed, output:", script)
3451           if test:
3452             output = self._HOOKS_INDENT_RE.sub("      ", output)
3453             feedback_fn("%s" % output)
3454             lu_result = False
3455
3456     return lu_result
3457
3458
3459 class LUClusterVerifyDisks(NoHooksLU):
3460   """Verifies the cluster disks status.
3461
3462   """
3463   REQ_BGL = False
3464
3465   def ExpandNames(self):
3466     self.share_locks = _ShareAll()
3467     self.needed_locks = {
3468       locking.LEVEL_NODEGROUP: locking.ALL_SET,
3469       }
3470
3471   def Exec(self, feedback_fn):
3472     group_names = self.owned_locks(locking.LEVEL_NODEGROUP)
3473
3474     # Submit one instance of L{opcodes.OpGroupVerifyDisks} per node group
3475     return ResultWithJobs([[opcodes.OpGroupVerifyDisks(group_name=group)]
3476                            for group in group_names])
3477
3478
3479 class LUGroupVerifyDisks(NoHooksLU):
3480   """Verifies the status of all disks in a node group.
3481
3482   """
3483   REQ_BGL = False
3484
3485   def ExpandNames(self):
3486     # Raises errors.OpPrereqError on its own if group can't be found
3487     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
3488
3489     self.share_locks = _ShareAll()
3490     self.needed_locks = {
3491       locking.LEVEL_INSTANCE: [],
3492       locking.LEVEL_NODEGROUP: [],
3493       locking.LEVEL_NODE: [],
3494       }
3495
3496   def DeclareLocks(self, level):
3497     if level == locking.LEVEL_INSTANCE:
3498       assert not self.needed_locks[locking.LEVEL_INSTANCE]
3499
3500       # Lock instances optimistically, needs verification once node and group
3501       # locks have been acquired
3502       self.needed_locks[locking.LEVEL_INSTANCE] = \
3503         self.cfg.GetNodeGroupInstances(self.group_uuid)
3504
3505     elif level == locking.LEVEL_NODEGROUP:
3506       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
3507
3508       self.needed_locks[locking.LEVEL_NODEGROUP] = \
3509         set([self.group_uuid] +
3510             # Lock all groups used by instances optimistically; this requires
3511             # going via the node before it's locked, requiring verification
3512             # later on
3513             [group_uuid
3514              for instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
3515              for group_uuid in self.cfg.GetInstanceNodeGroups(instance_name)])
3516
3517     elif level == locking.LEVEL_NODE:
3518       # This will only lock the nodes in the group to be verified which contain
3519       # actual instances
3520       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
3521       self._LockInstancesNodes()
3522
3523       # Lock all nodes in group to be verified
3524       assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
3525       member_nodes = self.cfg.GetNodeGroup(self.group_uuid).members
3526       self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
3527
3528   def CheckPrereq(self):
3529     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
3530     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
3531     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
3532
3533     assert self.group_uuid in owned_groups
3534
3535     # Check if locked instances are still correct
3536     _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
3537
3538     # Get instance information
3539     self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
3540
3541     # Check if node groups for locked instances are still correct
3542     _CheckInstancesNodeGroups(self.cfg, self.instances,
3543                               owned_groups, owned_nodes, self.group_uuid)
3544
3545   def Exec(self, feedback_fn):
3546     """Verify integrity of cluster disks.
3547
3548     @rtype: tuple of three items
3549     @return: a tuple of (dict of node-to-node_error, list of instances
3550         which need activate-disks, dict of instance: (node, volume) for
3551         missing volumes
3552
3553     """
3554     res_nodes = {}
3555     res_instances = set()
3556     res_missing = {}
3557
3558     nv_dict = _MapInstanceDisksToNodes(
3559       [inst for inst in self.instances.values()
3560        if inst.admin_state == constants.ADMINST_UP])
3561
3562     if nv_dict:
3563       nodes = utils.NiceSort(set(self.owned_locks(locking.LEVEL_NODE)) &
3564                              set(self.cfg.GetVmCapableNodeList()))
3565
3566       node_lvs = self.rpc.call_lv_list(nodes, [])
3567
3568       for (node, node_res) in node_lvs.items():
3569         if node_res.offline:
3570           continue
3571
3572         msg = node_res.fail_msg
3573         if msg:
3574           logging.warning("Error enumerating LVs on node %s: %s", node, msg)
3575           res_nodes[node] = msg
3576           continue
3577
3578         for lv_name, (_, _, lv_online) in node_res.payload.items():
3579           inst = nv_dict.pop((node, lv_name), None)
3580           if not (lv_online or inst is None):
3581             res_instances.add(inst)
3582
3583       # any leftover items in nv_dict are missing LVs, let's arrange the data
3584       # better
3585       for key, inst in nv_dict.iteritems():
3586         res_missing.setdefault(inst, []).append(list(key))
3587
3588     return (res_nodes, list(res_instances), res_missing)
3589
3590
3591 class LUClusterRepairDiskSizes(NoHooksLU):
3592   """Verifies the cluster disks sizes.
3593
3594   """
3595   REQ_BGL = False
3596
3597   def ExpandNames(self):
3598     if self.op.instances:
3599       self.wanted_names = _GetWantedInstances(self, self.op.instances)
3600       self.needed_locks = {
3601         locking.LEVEL_NODE_RES: [],
3602         locking.LEVEL_INSTANCE: self.wanted_names,
3603         }
3604       self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
3605     else:
3606       self.wanted_names = None
3607       self.needed_locks = {
3608         locking.LEVEL_NODE_RES: locking.ALL_SET,
3609         locking.LEVEL_INSTANCE: locking.ALL_SET,
3610         }
3611     self.share_locks = {
3612       locking.LEVEL_NODE_RES: 1,
3613       locking.LEVEL_INSTANCE: 0,
3614       }
3615
3616   def DeclareLocks(self, level):
3617     if level == locking.LEVEL_NODE_RES and self.wanted_names is not None:
3618       self._LockInstancesNodes(primary_only=True, level=level)
3619
3620   def CheckPrereq(self):
3621     """Check prerequisites.
3622
3623     This only checks the optional instance list against the existing names.
3624
3625     """
3626     if self.wanted_names is None:
3627       self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
3628
3629     self.wanted_instances = \
3630         map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
3631
3632   def _EnsureChildSizes(self, disk):
3633     """Ensure children of the disk have the needed disk size.
3634
3635     This is valid mainly for DRBD8 and fixes an issue where the
3636     children have smaller disk size.
3637
3638     @param disk: an L{ganeti.objects.Disk} object
3639
3640     """
3641     if disk.dev_type == constants.LD_DRBD8:
3642       assert disk.children, "Empty children for DRBD8?"
3643       fchild = disk.children[0]
3644       mismatch = fchild.size < disk.size
3645       if mismatch:
3646         self.LogInfo("Child disk has size %d, parent %d, fixing",
3647                      fchild.size, disk.size)
3648         fchild.size = disk.size
3649
3650       # and we recurse on this child only, not on the metadev
3651       return self._EnsureChildSizes(fchild) or mismatch
3652     else:
3653       return False
3654
3655   def Exec(self, feedback_fn):
3656     """Verify the size of cluster disks.
3657
3658     """
3659     # TODO: check child disks too
3660     # TODO: check differences in size between primary/secondary nodes
3661     per_node_disks = {}
3662     for instance in self.wanted_instances:
3663       pnode = instance.primary_node
3664       if pnode not in per_node_disks:
3665         per_node_disks[pnode] = []
3666       for idx, disk in enumerate(instance.disks):
3667         per_node_disks[pnode].append((instance, idx, disk))
3668
3669     assert not (frozenset(per_node_disks.keys()) -
3670                 self.owned_locks(locking.LEVEL_NODE_RES)), \
3671       "Not owning correct locks"
3672     assert not self.owned_locks(locking.LEVEL_NODE)
3673
3674     changed = []
3675     for node, dskl in per_node_disks.items():
3676       newl = [v[2].Copy() for v in dskl]
3677       for dsk in newl:
3678         self.cfg.SetDiskID(dsk, node)
3679       result = self.rpc.call_blockdev_getsize(node, newl)
3680       if result.fail_msg:
3681         self.LogWarning("Failure in blockdev_getsize call to node"
3682                         " %s, ignoring", node)
3683         continue
3684       if len(result.payload) != len(dskl):
3685         logging.warning("Invalid result from node %s: len(dksl)=%d,"
3686                         " result.payload=%s", node, len(dskl), result.payload)
3687         self.LogWarning("Invalid result from node %s, ignoring node results",
3688                         node)
3689         continue
3690       for ((instance, idx, disk), size) in zip(dskl, result.payload):
3691         if size is None:
3692           self.LogWarning("Disk %d of instance %s did not return size"
3693                           " information, ignoring", idx, instance.name)
3694           continue
3695         if not isinstance(size, (int, long)):
3696           self.LogWarning("Disk %d of instance %s did not return valid"
3697                           " size information, ignoring", idx, instance.name)
3698           continue
3699         size = size >> 20
3700         if size != disk.size:
3701           self.LogInfo("Disk %d of instance %s has mismatched size,"
3702                        " correcting: recorded %d, actual %d", idx,
3703                        instance.name, disk.size, size)
3704           disk.size = size
3705           self.cfg.Update(instance, feedback_fn)
3706           changed.append((instance.name, idx, size))
3707         if self._EnsureChildSizes(disk):
3708           self.cfg.Update(instance, feedback_fn)
3709           changed.append((instance.name, idx, disk.size))
3710     return changed
3711
3712
3713 class LUClusterRename(LogicalUnit):
3714   """Rename the cluster.
3715
3716   """
3717   HPATH = "cluster-rename"
3718   HTYPE = constants.HTYPE_CLUSTER
3719
3720   def BuildHooksEnv(self):
3721     """Build hooks env.
3722
3723     """
3724     return {
3725       "OP_TARGET": self.cfg.GetClusterName(),
3726       "NEW_NAME": self.op.name,
3727       }
3728
3729   def BuildHooksNodes(self):
3730     """Build hooks nodes.
3731
3732     """
3733     return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
3734
3735   def CheckPrereq(self):
3736     """Verify that the passed name is a valid one.
3737
3738     """
3739     hostname = netutils.GetHostname(name=self.op.name,
3740                                     family=self.cfg.GetPrimaryIPFamily())
3741
3742     new_name = hostname.name
3743     self.ip = new_ip = hostname.ip
3744     old_name = self.cfg.GetClusterName()
3745     old_ip = self.cfg.GetMasterIP()
3746     if new_name == old_name and new_ip == old_ip:
3747       raise errors.OpPrereqError("Neither the name nor the IP address of the"
3748                                  " cluster has changed",
3749                                  errors.ECODE_INVAL)
3750     if new_ip != old_ip:
3751       if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
3752         raise errors.OpPrereqError("The given cluster IP address (%s) is"
3753                                    " reachable on the network" %
3754                                    new_ip, errors.ECODE_NOTUNIQUE)
3755
3756     self.op.name = new_name
3757
3758   def Exec(self, feedback_fn):
3759     """Rename the cluster.
3760
3761     """
3762     clustername = self.op.name
3763     new_ip = self.ip
3764
3765     # shutdown the master IP
3766     master_params = self.cfg.GetMasterNetworkParameters()
3767     ems = self.cfg.GetUseExternalMipScript()
3768     result = self.rpc.call_node_deactivate_master_ip(master_params.name,
3769                                                      master_params, ems)
3770     result.Raise("Could not disable the master role")
3771
3772     try:
3773       cluster = self.cfg.GetClusterInfo()
3774       cluster.cluster_name = clustername
3775       cluster.master_ip = new_ip
3776       self.cfg.Update(cluster, feedback_fn)
3777
3778       # update the known hosts file
3779       ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
3780       node_list = self.cfg.GetOnlineNodeList()
3781       try:
3782         node_list.remove(master_params.name)
3783       except ValueError:
3784         pass
3785       _UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE)
3786     finally:
3787       master_params.ip = new_ip
3788       result = self.rpc.call_node_activate_master_ip(master_params.name,
3789                                                      master_params, ems)
3790       msg = result.fail_msg
3791       if msg:
3792         self.LogWarning("Could not re-enable the master role on"
3793                         " the master, please restart manually: %s", msg)
3794
3795     return clustername
3796
3797
3798 def _ValidateNetmask(cfg, netmask):
3799   """Checks if a netmask is valid.
3800
3801   @type cfg: L{config.ConfigWriter}
3802   @param cfg: The cluster configuration
3803   @type netmask: int
3804   @param netmask: the netmask to be verified
3805   @raise errors.OpPrereqError: if the validation fails
3806
3807   """
3808   ip_family = cfg.GetPrimaryIPFamily()
3809   try:
3810     ipcls = netutils.IPAddress.GetClassFromIpFamily(ip_family)
3811   except errors.ProgrammerError:
3812     raise errors.OpPrereqError("Invalid primary ip family: %s." %
3813                                ip_family, errors.ECODE_INVAL)
3814   if not ipcls.ValidateNetmask(netmask):
3815     raise errors.OpPrereqError("CIDR netmask (%s) not valid" %
3816                                 (netmask), errors.ECODE_INVAL)
3817
3818
3819 class LUClusterSetParams(LogicalUnit):
3820   """Change the parameters of the cluster.
3821
3822   """
3823   HPATH = "cluster-modify"
3824   HTYPE = constants.HTYPE_CLUSTER
3825   REQ_BGL = False
3826
3827   def CheckArguments(self):
3828     """Check parameters
3829
3830     """
3831     if self.op.uid_pool:
3832       uidpool.CheckUidPool(self.op.uid_pool)
3833
3834     if self.op.add_uids:
3835       uidpool.CheckUidPool(self.op.add_uids)
3836
3837     if self.op.remove_uids:
3838       uidpool.CheckUidPool(self.op.remove_uids)
3839
3840     if self.op.master_netmask is not None:
3841       _ValidateNetmask(self.cfg, self.op.master_netmask)
3842
3843     if self.op.diskparams:
3844       for dt_params in self.op.diskparams.values():
3845         utils.ForceDictType(dt_params, constants.DISK_DT_TYPES)
3846       try:
3847         utils.VerifyDictOptions(self.op.diskparams, constants.DISK_DT_DEFAULTS)
3848       except errors.OpPrereqError, err:
3849         raise errors.OpPrereqError("While verify diskparams options: %s" % err,
3850                                    errors.ECODE_INVAL)
3851
3852   def ExpandNames(self):
3853     # FIXME: in the future maybe other cluster params won't require checking on
3854     # all nodes to be modified.
3855     self.needed_locks = {
3856       locking.LEVEL_NODE: locking.ALL_SET,
3857       locking.LEVEL_INSTANCE: locking.ALL_SET,
3858       locking.LEVEL_NODEGROUP: locking.ALL_SET,
3859     }
3860     self.share_locks = {
3861         locking.LEVEL_NODE: 1,
3862         locking.LEVEL_INSTANCE: 1,
3863         locking.LEVEL_NODEGROUP: 1,
3864     }
3865
3866   def BuildHooksEnv(self):
3867     """Build hooks env.
3868
3869     """
3870     return {
3871       "OP_TARGET": self.cfg.GetClusterName(),
3872       "NEW_VG_NAME": self.op.vg_name,
3873       }
3874
3875   def BuildHooksNodes(self):
3876     """Build hooks nodes.
3877
3878     """
3879     mn = self.cfg.GetMasterNode()
3880     return ([mn], [mn])
3881
3882   def CheckPrereq(self):
3883     """Check prerequisites.
3884
3885     This checks whether the given params don't conflict and
3886     if the given volume group is valid.
3887
3888     """
3889     if self.op.vg_name is not None and not self.op.vg_name:
3890       if self.cfg.HasAnyDiskOfType(constants.LD_LV):
3891         raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
3892                                    " instances exist", errors.ECODE_INVAL)
3893
3894     if self.op.drbd_helper is not None and not self.op.drbd_helper:
3895       if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
3896         raise errors.OpPrereqError("Cannot disable drbd helper while"
3897                                    " drbd-based instances exist",
3898                                    errors.ECODE_INVAL)
3899
3900     node_list = self.owned_locks(locking.LEVEL_NODE)
3901
3902     # if vg_name not None, checks given volume group on all nodes
3903     if self.op.vg_name:
3904       vglist = self.rpc.call_vg_list(node_list)
3905       for node in node_list:
3906         msg = vglist[node].fail_msg
3907         if msg:
3908           # ignoring down node
3909           self.LogWarning("Error while gathering data on node %s"
3910                           " (ignoring node): %s", node, msg)
3911           continue
3912         vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
3913                                               self.op.vg_name,
3914                                               constants.MIN_VG_SIZE)
3915         if vgstatus:
3916           raise errors.OpPrereqError("Error on node '%s': %s" %
3917                                      (node, vgstatus), errors.ECODE_ENVIRON)
3918
3919     if self.op.drbd_helper:
3920       # checks given drbd helper on all nodes
3921       helpers = self.rpc.call_drbd_helper(node_list)
3922       for (node, ninfo) in self.cfg.GetMultiNodeInfo(node_list):
3923         if ninfo.offline:
3924           self.LogInfo("Not checking drbd helper on offline node %s", node)
3925           continue
3926         msg = helpers[node].fail_msg
3927         if msg:
3928           raise errors.OpPrereqError("Error checking drbd helper on node"
3929                                      " '%s': %s" % (node, msg),
3930                                      errors.ECODE_ENVIRON)
3931         node_helper = helpers[node].payload
3932         if node_helper != self.op.drbd_helper:
3933           raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
3934                                      (node, node_helper), errors.ECODE_ENVIRON)
3935
3936     self.cluster = cluster = self.cfg.GetClusterInfo()
3937     # validate params changes
3938     if self.op.beparams:
3939       objects.UpgradeBeParams(self.op.beparams)
3940       utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
3941       self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
3942
3943     if self.op.ndparams:
3944       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
3945       self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
3946
3947       # TODO: we need a more general way to handle resetting
3948       # cluster-level parameters to default values
3949       if self.new_ndparams["oob_program"] == "":
3950         self.new_ndparams["oob_program"] = \
3951             constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
3952
3953     if self.op.hv_state:
3954       new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
3955                                             self.cluster.hv_state_static)
3956       self.new_hv_state = dict((hv, cluster.SimpleFillHvState(values))
3957                                for hv, values in new_hv_state.items())
3958
3959     if self.op.disk_state:
3960       new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state,
3961                                                 self.cluster.disk_state_static)
3962       self.new_disk_state = \
3963         dict((storage, dict((name, cluster.SimpleFillDiskState(values))
3964                             for name, values in svalues.items()))
3965              for storage, svalues in new_disk_state.items())
3966
3967     if self.op.ipolicy:
3968       self.new_ipolicy = _GetUpdatedIPolicy(cluster.ipolicy, self.op.ipolicy,
3969                                             group_policy=False)
3970
3971       all_instances = self.cfg.GetAllInstancesInfo().values()
3972       violations = set()
3973       for group in self.cfg.GetAllNodeGroupsInfo().values():
3974         instances = frozenset([inst for inst in all_instances
3975                                if compat.any(node in group.members
3976                                              for node in inst.all_nodes)])
3977         new_ipolicy = objects.FillIPolicy(self.new_ipolicy, group.ipolicy)
3978         ipol = ganeti.masterd.instance.CalculateGroupIPolicy(cluster, group)
3979         new = _ComputeNewInstanceViolations(ipol,
3980                                             new_ipolicy, instances)
3981         if new:
3982           violations.update(new)
3983
3984       if violations:
3985         self.LogWarning("After the ipolicy change the following instances"
3986                         " violate them: %s",
3987                         utils.CommaJoin(utils.NiceSort(violations)))
3988
3989     if self.op.nicparams:
3990       utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
3991       self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
3992       objects.NIC.CheckParameterSyntax(self.new_nicparams)
3993       nic_errors = []
3994
3995       # check all instances for consistency
3996       for instance in self.cfg.GetAllInstancesInfo().values():
3997         for nic_idx, nic in enumerate(instance.nics):
3998           params_copy = copy.deepcopy(nic.nicparams)
3999           params_filled = objects.FillDict(self.new_nicparams, params_copy)
4000
4001           # check parameter syntax
4002           try:
4003             objects.NIC.CheckParameterSyntax(params_filled)
4004           except errors.ConfigurationError, err:
4005             nic_errors.append("Instance %s, nic/%d: %s" %
4006                               (instance.name, nic_idx, err))
4007
4008           # if we're moving instances to routed, check that they have an ip
4009           target_mode = params_filled[constants.NIC_MODE]
4010           if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
4011             nic_errors.append("Instance %s, nic/%d: routed NIC with no ip"
4012                               " address" % (instance.name, nic_idx))
4013       if nic_errors:
4014         raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
4015                                    "\n".join(nic_errors), errors.ECODE_INVAL)
4016
4017     # hypervisor list/parameters
4018     self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
4019     if self.op.hvparams:
4020       for hv_name, hv_dict in self.op.hvparams.items():
4021         if hv_name not in self.new_hvparams:
4022           self.new_hvparams[hv_name] = hv_dict
4023         else:
4024           self.new_hvparams[hv_name].update(hv_dict)
4025
4026     # disk template parameters
4027     self.new_diskparams = objects.FillDict(cluster.diskparams, {})
4028     if self.op.diskparams:
4029       for dt_name, dt_params in self.op.diskparams.items():
4030         if dt_name not in self.op.diskparams:
4031           self.new_diskparams[dt_name] = dt_params
4032         else:
4033           self.new_diskparams[dt_name].update(dt_params)
4034
4035     # os hypervisor parameters
4036     self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
4037     if self.op.os_hvp:
4038       for os_name, hvs in self.op.os_hvp.items():
4039         if os_name not in self.new_os_hvp:
4040           self.new_os_hvp[os_name] = hvs
4041         else:
4042           for hv_name, hv_dict in hvs.items():
4043             if hv_name not in self.new_os_hvp[os_name]:
4044               self.new_os_hvp[os_name][hv_name] = hv_dict
4045             else:
4046               self.new_os_hvp[os_name][hv_name].update(hv_dict)
4047
4048     # os parameters
4049     self.new_osp = objects.FillDict(cluster.osparams, {})
4050     if self.op.osparams:
4051       for os_name, osp in self.op.osparams.items():
4052         if os_name not in self.new_osp:
4053           self.new_osp[os_name] = {}
4054
4055         self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
4056                                                   use_none=True)
4057
4058         if not self.new_osp[os_name]:
4059           # we removed all parameters
4060           del self.new_osp[os_name]
4061         else:
4062           # check the parameter validity (remote check)
4063           _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
4064                          os_name, self.new_osp[os_name])
4065
4066     # changes to the hypervisor list
4067     if self.op.enabled_hypervisors is not None:
4068       self.hv_list = self.op.enabled_hypervisors
4069       for hv in self.hv_list:
4070         # if the hypervisor doesn't already exist in the cluster
4071         # hvparams, we initialize it to empty, and then (in both
4072         # cases) we make sure to fill the defaults, as we might not
4073         # have a complete defaults list if the hypervisor wasn't
4074         # enabled before
4075         if hv not in new_hvp:
4076           new_hvp[hv] = {}
4077         new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
4078         utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
4079     else:
4080       self.hv_list = cluster.enabled_hypervisors
4081
4082     if self.op.hvparams or self.op.enabled_hypervisors is not None:
4083       # either the enabled list has changed, or the parameters have, validate
4084       for hv_name, hv_params in self.new_hvparams.items():
4085         if ((self.op.hvparams and hv_name in self.op.hvparams) or
4086             (self.op.enabled_hypervisors and
4087              hv_name in self.op.enabled_hypervisors)):
4088           # either this is a new hypervisor, or its parameters have changed
4089           hv_class = hypervisor.GetHypervisor(hv_name)
4090           utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
4091           hv_class.CheckParameterSyntax(hv_params)
4092           _CheckHVParams(self, node_list, hv_name, hv_params)
4093
4094     if self.op.os_hvp:
4095       # no need to check any newly-enabled hypervisors, since the
4096       # defaults have already been checked in the above code-block
4097       for os_name, os_hvp in self.new_os_hvp.items():
4098         for hv_name, hv_params in os_hvp.items():
4099           utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
4100           # we need to fill in the new os_hvp on top of the actual hv_p
4101           cluster_defaults = self.new_hvparams.get(hv_name, {})
4102           new_osp = objects.FillDict(cluster_defaults, hv_params)
4103           hv_class = hypervisor.GetHypervisor(hv_name)
4104           hv_class.CheckParameterSyntax(new_osp)
4105           _CheckHVParams(self, node_list, hv_name, new_osp)
4106
4107     if self.op.default_iallocator:
4108       alloc_script = utils.FindFile(self.op.default_iallocator,
4109                                     constants.IALLOCATOR_SEARCH_PATH,
4110                                     os.path.isfile)
4111       if alloc_script is None:
4112         raise errors.OpPrereqError("Invalid default iallocator script '%s'"
4113                                    " specified" % self.op.default_iallocator,
4114                                    errors.ECODE_INVAL)
4115
4116   def Exec(self, feedback_fn):
4117     """Change the parameters of the cluster.
4118
4119     """
4120     if self.op.vg_name is not None:
4121       new_volume = self.op.vg_name
4122       if not new_volume:
4123         new_volume = None
4124       if new_volume != self.cfg.GetVGName():
4125         self.cfg.SetVGName(new_volume)
4126       else:
4127         feedback_fn("Cluster LVM configuration already in desired"
4128                     " state, not changing")
4129     if self.op.drbd_helper is not None:
4130       new_helper = self.op.drbd_helper
4131       if not new_helper:
4132         new_helper = None
4133       if new_helper != self.cfg.GetDRBDHelper():
4134         self.cfg.SetDRBDHelper(new_helper)
4135       else:
4136         feedback_fn("Cluster DRBD helper already in desired state,"
4137                     " not changing")
4138     if self.op.hvparams:
4139       self.cluster.hvparams = self.new_hvparams
4140     if self.op.os_hvp:
4141       self.cluster.os_hvp = self.new_os_hvp
4142     if self.op.enabled_hypervisors is not None:
4143       self.cluster.hvparams = self.new_hvparams
4144       self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
4145     if self.op.beparams:
4146       self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
4147     if self.op.nicparams:
4148       self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
4149     if self.op.ipolicy:
4150       self.cluster.ipolicy = self.new_ipolicy
4151     if self.op.osparams:
4152       self.cluster.osparams = self.new_osp
4153     if self.op.ndparams:
4154       self.cluster.ndparams = self.new_ndparams
4155     if self.op.diskparams:
4156       self.cluster.diskparams = self.new_diskparams
4157     if self.op.hv_state:
4158       self.cluster.hv_state_static = self.new_hv_state
4159     if self.op.disk_state:
4160       self.cluster.disk_state_static = self.new_disk_state
4161
4162     if self.op.candidate_pool_size is not None:
4163       self.cluster.candidate_pool_size = self.op.candidate_pool_size
4164       # we need to update the pool size here, otherwise the save will fail
4165       _AdjustCandidatePool(self, [])
4166
4167     if self.op.maintain_node_health is not None:
4168       if self.op.maintain_node_health and not constants.ENABLE_CONFD:
4169         feedback_fn("Note: CONFD was disabled at build time, node health"
4170                     " maintenance is not useful (still enabling it)")
4171       self.cluster.maintain_node_health = self.op.maintain_node_health
4172
4173     if self.op.prealloc_wipe_disks is not None:
4174       self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
4175
4176     if self.op.add_uids is not None:
4177       uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
4178
4179     if self.op.remove_uids is not None:
4180       uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
4181
4182     if self.op.uid_pool is not None:
4183       self.cluster.uid_pool = self.op.uid_pool
4184
4185     if self.op.default_iallocator is not None:
4186       self.cluster.default_iallocator = self.op.default_iallocator
4187
4188     if self.op.reserved_lvs is not None:
4189       self.cluster.reserved_lvs = self.op.reserved_lvs
4190
4191     if self.op.use_external_mip_script is not None:
4192       self.cluster.use_external_mip_script = self.op.use_external_mip_script
4193
4194     def helper_os(aname, mods, desc):
4195       desc += " OS list"
4196       lst = getattr(self.cluster, aname)
4197       for key, val in mods:
4198         if key == constants.DDM_ADD:
4199           if val in lst:
4200             feedback_fn("OS %s already in %s, ignoring" % (val, desc))
4201           else:
4202             lst.append(val)
4203         elif key == constants.DDM_REMOVE:
4204           if val in lst:
4205             lst.remove(val)
4206           else:
4207             feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
4208         else:
4209           raise errors.ProgrammerError("Invalid modification '%s'" % key)
4210
4211     if self.op.hidden_os:
4212       helper_os("hidden_os", self.op.hidden_os, "hidden")
4213
4214     if self.op.blacklisted_os:
4215       helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
4216
4217     if self.op.master_netdev:
4218       master_params = self.cfg.GetMasterNetworkParameters()
4219       ems = self.cfg.GetUseExternalMipScript()
4220       feedback_fn("Shutting down master ip on the current netdev (%s)" %
4221                   self.cluster.master_netdev)
4222       result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4223                                                        master_params, ems)
4224       result.Raise("Could not disable the master ip")
4225       feedback_fn("Changing master_netdev from %s to %s" %
4226                   (master_params.netdev, self.op.master_netdev))
4227       self.cluster.master_netdev = self.op.master_netdev
4228
4229     if self.op.master_netmask:
4230       master_params = self.cfg.GetMasterNetworkParameters()
4231       feedback_fn("Changing master IP netmask to %s" % self.op.master_netmask)
4232       result = self.rpc.call_node_change_master_netmask(master_params.name,
4233                                                         master_params.netmask,
4234                                                         self.op.master_netmask,
4235                                                         master_params.ip,
4236                                                         master_params.netdev)
4237       if result.fail_msg:
4238         msg = "Could not change the master IP netmask: %s" % result.fail_msg
4239         feedback_fn(msg)
4240
4241       self.cluster.master_netmask = self.op.master_netmask
4242
4243     self.cfg.Update(self.cluster, feedback_fn)
4244
4245     if self.op.master_netdev:
4246       master_params = self.cfg.GetMasterNetworkParameters()
4247       feedback_fn("Starting the master ip on the new master netdev (%s)" %
4248                   self.op.master_netdev)
4249       ems = self.cfg.GetUseExternalMipScript()
4250       result = self.rpc.call_node_activate_master_ip(master_params.name,
4251                                                      master_params, ems)
4252       if result.fail_msg:
4253         self.LogWarning("Could not re-enable the master ip on"
4254                         " the master, please restart manually: %s",
4255                         result.fail_msg)
4256
4257
4258 def _UploadHelper(lu, nodes, fname):
4259   """Helper for uploading a file and showing warnings.
4260
4261   """
4262   if os.path.exists(fname):
4263     result = lu.rpc.call_upload_file(nodes, fname)
4264     for to_node, to_result in result.items():
4265       msg = to_result.fail_msg
4266       if msg:
4267         msg = ("Copy of file %s to node %s failed: %s" %
4268                (fname, to_node, msg))
4269         lu.proc.LogWarning(msg)
4270
4271
4272 def _ComputeAncillaryFiles(cluster, redist):
4273   """Compute files external to Ganeti which need to be consistent.
4274
4275   @type redist: boolean
4276   @param redist: Whether to include files which need to be redistributed
4277
4278   """
4279   # Compute files for all nodes
4280   files_all = set([
4281     constants.SSH_KNOWN_HOSTS_FILE,
4282     constants.CONFD_HMAC_KEY,
4283     constants.CLUSTER_DOMAIN_SECRET_FILE,
4284     constants.SPICE_CERT_FILE,
4285     constants.SPICE_CACERT_FILE,
4286     constants.RAPI_USERS_FILE,
4287     ])
4288
4289   if not redist:
4290     files_all.update(constants.ALL_CERT_FILES)
4291     files_all.update(ssconf.SimpleStore().GetFileList())
4292   else:
4293     # we need to ship at least the RAPI certificate
4294     files_all.add(constants.RAPI_CERT_FILE)
4295
4296   if cluster.modify_etc_hosts:
4297     files_all.add(constants.ETC_HOSTS)
4298
4299   if cluster.use_external_mip_script:
4300     files_all.add(constants.EXTERNAL_MASTER_SETUP_SCRIPT)
4301
4302   # Files which are optional, these must:
4303   # - be present in one other category as well
4304   # - either exist or not exist on all nodes of that category (mc, vm all)
4305   files_opt = set([
4306     constants.RAPI_USERS_FILE,
4307     ])
4308
4309   # Files which should only be on master candidates
4310   files_mc = set()
4311
4312   if not redist:
4313     files_mc.add(constants.CLUSTER_CONF_FILE)
4314
4315   # Files which should only be on VM-capable nodes
4316   files_vm = set(
4317     filename
4318     for hv_name in cluster.enabled_hypervisors
4319     for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[0])
4320
4321   files_opt |= set(
4322     filename
4323     for hv_name in cluster.enabled_hypervisors
4324     for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[1])
4325
4326   # Filenames in each category must be unique
4327   all_files_set = files_all | files_mc | files_vm
4328   assert (len(all_files_set) ==
4329           sum(map(len, [files_all, files_mc, files_vm]))), \
4330          "Found file listed in more than one file list"
4331
4332   # Optional files must be present in one other category
4333   assert all_files_set.issuperset(files_opt), \
4334          "Optional file not in a different required list"
4335
4336   return (files_all, files_opt, files_mc, files_vm)
4337
4338
4339 def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
4340   """Distribute additional files which are part of the cluster configuration.
4341
4342   ConfigWriter takes care of distributing the config and ssconf files, but
4343   there are more files which should be distributed to all nodes. This function
4344   makes sure those are copied.
4345
4346   @param lu: calling logical unit
4347   @param additional_nodes: list of nodes not in the config to distribute to
4348   @type additional_vm: boolean
4349   @param additional_vm: whether the additional nodes are vm-capable or not
4350
4351   """
4352   # Gather target nodes
4353   cluster = lu.cfg.GetClusterInfo()
4354   master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
4355
4356   online_nodes = lu.cfg.GetOnlineNodeList()
4357   online_set = frozenset(online_nodes)
4358   vm_nodes = list(online_set.intersection(lu.cfg.GetVmCapableNodeList()))
4359
4360   if additional_nodes is not None:
4361     online_nodes.extend(additional_nodes)
4362     if additional_vm:
4363       vm_nodes.extend(additional_nodes)
4364
4365   # Never distribute to master node
4366   for nodelist in [online_nodes, vm_nodes]:
4367     if master_info.name in nodelist:
4368       nodelist.remove(master_info.name)
4369
4370   # Gather file lists
4371   (files_all, _, files_mc, files_vm) = \
4372     _ComputeAncillaryFiles(cluster, True)
4373
4374   # Never re-distribute configuration file from here
4375   assert not (constants.CLUSTER_CONF_FILE in files_all or
4376               constants.CLUSTER_CONF_FILE in files_vm)
4377   assert not files_mc, "Master candidates not handled in this function"
4378
4379   filemap = [
4380     (online_nodes, files_all),
4381     (vm_nodes, files_vm),
4382     ]
4383
4384   # Upload the files
4385   for (node_list, files) in filemap:
4386     for fname in files:
4387       _UploadHelper(lu, node_list, fname)
4388
4389
4390 class LUClusterRedistConf(NoHooksLU):
4391   """Force the redistribution of cluster configuration.
4392
4393   This is a very simple LU.
4394
4395   """
4396   REQ_BGL = False
4397
4398   def ExpandNames(self):
4399     self.needed_locks = {
4400       locking.LEVEL_NODE: locking.ALL_SET,
4401     }
4402     self.share_locks[locking.LEVEL_NODE] = 1
4403
4404   def Exec(self, feedback_fn):
4405     """Redistribute the configuration.
4406
4407     """
4408     self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
4409     _RedistributeAncillaryFiles(self)
4410
4411
4412 class LUClusterActivateMasterIp(NoHooksLU):
4413   """Activate the master IP on the master node.
4414
4415   """
4416   def Exec(self, feedback_fn):
4417     """Activate the master IP.
4418
4419     """
4420     master_params = self.cfg.GetMasterNetworkParameters()
4421     ems = self.cfg.GetUseExternalMipScript()
4422     result = self.rpc.call_node_activate_master_ip(master_params.name,
4423                                                    master_params, ems)
4424     result.Raise("Could not activate the master IP")
4425
4426
4427 class LUClusterDeactivateMasterIp(NoHooksLU):
4428   """Deactivate the master IP on the master node.
4429
4430   """
4431   def Exec(self, feedback_fn):
4432     """Deactivate the master IP.
4433
4434     """
4435     master_params = self.cfg.GetMasterNetworkParameters()
4436     ems = self.cfg.GetUseExternalMipScript()
4437     result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4438                                                      master_params, ems)
4439     result.Raise("Could not deactivate the master IP")
4440
4441
4442 def _WaitForSync(lu, instance, disks=None, oneshot=False):
4443   """Sleep and poll for an instance's disk to sync.
4444
4445   """
4446   if not instance.disks or disks is not None and not disks:
4447     return True
4448
4449   disks = _ExpandCheckDisks(instance, disks)
4450
4451   if not oneshot:
4452     lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
4453
4454   node = instance.primary_node
4455
4456   for dev in disks:
4457     lu.cfg.SetDiskID(dev, node)
4458
4459   # TODO: Convert to utils.Retry
4460
4461   retries = 0
4462   degr_retries = 10 # in seconds, as we sleep 1 second each time
4463   while True:
4464     max_time = 0
4465     done = True
4466     cumul_degraded = False
4467     rstats = lu.rpc.call_blockdev_getmirrorstatus(node, (disks, instance))
4468     msg = rstats.fail_msg
4469     if msg:
4470       lu.LogWarning("Can't get any data from node %s: %s", node, msg)
4471       retries += 1
4472       if retries >= 10:
4473         raise errors.RemoteError("Can't contact node %s for mirror data,"
4474                                  " aborting." % node)
4475       time.sleep(6)
4476       continue
4477     rstats = rstats.payload
4478     retries = 0
4479     for i, mstat in enumerate(rstats):
4480       if mstat is None:
4481         lu.LogWarning("Can't compute data for node %s/%s",
4482                            node, disks[i].iv_name)
4483         continue
4484
4485       cumul_degraded = (cumul_degraded or
4486                         (mstat.is_degraded and mstat.sync_percent is None))
4487       if mstat.sync_percent is not None:
4488         done = False
4489         if mstat.estimated_time is not None:
4490           rem_time = ("%s remaining (estimated)" %
4491                       utils.FormatSeconds(mstat.estimated_time))
4492           max_time = mstat.estimated_time
4493         else:
4494           rem_time = "no time estimate"
4495         lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
4496                         (disks[i].iv_name, mstat.sync_percent, rem_time))
4497
4498     # if we're done but degraded, let's do a few small retries, to
4499     # make sure we see a stable and not transient situation; therefore
4500     # we force restart of the loop
4501     if (done or oneshot) and cumul_degraded and degr_retries > 0:
4502       logging.info("Degraded disks found, %d retries left", degr_retries)
4503       degr_retries -= 1
4504       time.sleep(1)
4505       continue
4506
4507     if done or oneshot:
4508       break
4509
4510     time.sleep(min(60, max_time))
4511
4512   if done:
4513     lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
4514   return not cumul_degraded
4515
4516
4517 def _BlockdevFind(lu, node, dev, instance):
4518   """Wrapper around call_blockdev_find to annotate diskparams.
4519
4520   @param lu: A reference to the lu object
4521   @param node: The node to call out
4522   @param dev: The device to find
4523   @param instance: The instance object the device belongs to
4524   @returns The result of the rpc call
4525
4526   """
4527   (disk,) = _AnnotateDiskParams(instance, [dev], lu.cfg)
4528   return lu.rpc.call_blockdev_find(node, disk)
4529
4530
4531 def _CheckDiskConsistency(lu, instance, dev, node, on_primary, ldisk=False):
4532   """Wrapper around L{_CheckDiskConsistencyInner}.
4533
4534   """
4535   (disk,) = _AnnotateDiskParams(instance, [dev], lu.cfg)
4536   return _CheckDiskConsistencyInner(lu, instance, disk, node, on_primary,
4537                                     ldisk=ldisk)
4538
4539
4540 def _CheckDiskConsistencyInner(lu, instance, dev, node, on_primary,
4541                                ldisk=False):
4542   """Check that mirrors are not degraded.
4543
4544   @attention: The device has to be annotated already.
4545
4546   The ldisk parameter, if True, will change the test from the
4547   is_degraded attribute (which represents overall non-ok status for
4548   the device(s)) to the ldisk (representing the local storage status).
4549
4550   """
4551   lu.cfg.SetDiskID(dev, node)
4552
4553   result = True
4554
4555   if on_primary or dev.AssembleOnSecondary():
4556     rstats = lu.rpc.call_blockdev_find(node, dev)
4557     msg = rstats.fail_msg
4558     if msg:
4559       lu.LogWarning("Can't find disk on node %s: %s", node, msg)
4560       result = False
4561     elif not rstats.payload:
4562       lu.LogWarning("Can't find disk on node %s", node)
4563       result = False
4564     else:
4565       if ldisk:
4566         result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
4567       else:
4568         result = result and not rstats.payload.is_degraded
4569
4570   if dev.children:
4571     for child in dev.children:
4572       result = result and _CheckDiskConsistencyInner(lu, instance, child, node,
4573                                                      on_primary)
4574
4575   return result
4576
4577
4578 class LUOobCommand(NoHooksLU):
4579   """Logical unit for OOB handling.
4580
4581   """
4582   REQ_BGL = False
4583   _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
4584
4585   def ExpandNames(self):
4586     """Gather locks we need.
4587
4588     """
4589     if self.op.node_names:
4590       self.op.node_names = _GetWantedNodes(self, self.op.node_names)
4591       lock_names = self.op.node_names
4592     else:
4593       lock_names = locking.ALL_SET
4594
4595     self.needed_locks = {
4596       locking.LEVEL_NODE: lock_names,
4597       }
4598
4599   def CheckPrereq(self):
4600     """Check prerequisites.
4601
4602     This checks:
4603      - the node exists in the configuration
4604      - OOB is supported
4605
4606     Any errors are signaled by raising errors.OpPrereqError.
4607
4608     """
4609     self.nodes = []
4610     self.master_node = self.cfg.GetMasterNode()
4611
4612     assert self.op.power_delay >= 0.0
4613
4614     if self.op.node_names:
4615       if (self.op.command in self._SKIP_MASTER and
4616           self.master_node in self.op.node_names):
4617         master_node_obj = self.cfg.GetNodeInfo(self.master_node)
4618         master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
4619
4620         if master_oob_handler:
4621           additional_text = ("run '%s %s %s' if you want to operate on the"
4622                              " master regardless") % (master_oob_handler,
4623                                                       self.op.command,
4624                                                       self.master_node)
4625         else:
4626           additional_text = "it does not support out-of-band operations"
4627
4628         raise errors.OpPrereqError(("Operating on the master node %s is not"
4629                                     " allowed for %s; %s") %
4630                                    (self.master_node, self.op.command,
4631                                     additional_text), errors.ECODE_INVAL)
4632     else:
4633       self.op.node_names = self.cfg.GetNodeList()
4634       if self.op.command in self._SKIP_MASTER:
4635         self.op.node_names.remove(self.master_node)
4636
4637     if self.op.command in self._SKIP_MASTER:
4638       assert self.master_node not in self.op.node_names
4639
4640     for (node_name, node) in self.cfg.GetMultiNodeInfo(self.op.node_names):
4641       if node is None:
4642         raise errors.OpPrereqError("Node %s not found" % node_name,
4643                                    errors.ECODE_NOENT)
4644       else:
4645         self.nodes.append(node)
4646
4647       if (not self.op.ignore_status and
4648           (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
4649         raise errors.OpPrereqError(("Cannot power off node %s because it is"
4650                                     " not marked offline") % node_name,
4651                                    errors.ECODE_STATE)
4652
4653   def Exec(self, feedback_fn):
4654     """Execute OOB and return result if we expect any.
4655
4656     """
4657     master_node = self.master_node
4658     ret = []
4659
4660     for idx, node in enumerate(utils.NiceSort(self.nodes,
4661                                               key=lambda node: node.name)):
4662       node_entry = [(constants.RS_NORMAL, node.name)]
4663       ret.append(node_entry)
4664
4665       oob_program = _SupportsOob(self.cfg, node)
4666
4667       if not oob_program:
4668         node_entry.append((constants.RS_UNAVAIL, None))
4669         continue
4670
4671       logging.info("Executing out-of-band command '%s' using '%s' on %s",
4672                    self.op.command, oob_program, node.name)
4673       result = self.rpc.call_run_oob(master_node, oob_program,
4674                                      self.op.command, node.name,
4675                                      self.op.timeout)
4676
4677       if result.fail_msg:
4678         self.LogWarning("Out-of-band RPC failed on node '%s': %s",
4679                         node.name, result.fail_msg)
4680         node_entry.append((constants.RS_NODATA, None))
4681       else:
4682         try:
4683           self._CheckPayload(result)
4684         except errors.OpExecError, err:
4685           self.LogWarning("Payload returned by node '%s' is not valid: %s",
4686                           node.name, err)
4687           node_entry.append((constants.RS_NODATA, None))
4688         else:
4689           if self.op.command == constants.OOB_HEALTH:
4690             # For health we should log important events
4691             for item, status in result.payload:
4692               if status in [constants.OOB_STATUS_WARNING,
4693                             constants.OOB_STATUS_CRITICAL]:
4694                 self.LogWarning("Item '%s' on node '%s' has status '%s'",
4695                                 item, node.name, status)
4696
4697           if self.op.command == constants.OOB_POWER_ON:
4698             node.powered = True
4699           elif self.op.command == constants.OOB_POWER_OFF:
4700             node.powered = False
4701           elif self.op.command == constants.OOB_POWER_STATUS:
4702             powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
4703             if powered != node.powered:
4704               logging.warning(("Recorded power state (%s) of node '%s' does not"
4705                                " match actual power state (%s)"), node.powered,
4706                               node.name, powered)
4707
4708           # For configuration changing commands we should update the node
4709           if self.op.command in (constants.OOB_POWER_ON,
4710                                  constants.OOB_POWER_OFF):
4711             self.cfg.Update(node, feedback_fn)
4712
4713           node_entry.append((constants.RS_NORMAL, result.payload))
4714
4715           if (self.op.command == constants.OOB_POWER_ON and
4716               idx < len(self.nodes) - 1):
4717             time.sleep(self.op.power_delay)
4718
4719     return ret
4720
4721   def _CheckPayload(self, result):
4722     """Checks if the payload is valid.
4723
4724     @param result: RPC result
4725     @raises errors.OpExecError: If payload is not valid
4726
4727     """
4728     errs = []
4729     if self.op.command == constants.OOB_HEALTH:
4730       if not isinstance(result.payload, list):
4731         errs.append("command 'health' is expected to return a list but got %s" %
4732                     type(result.payload))
4733       else:
4734         for item, status in result.payload:
4735           if status not in constants.OOB_STATUSES:
4736             errs.append("health item '%s' has invalid status '%s'" %
4737                         (item, status))
4738
4739     if self.op.command == constants.OOB_POWER_STATUS:
4740       if not isinstance(result.payload, dict):
4741         errs.append("power-status is expected to return a dict but got %s" %
4742                     type(result.payload))
4743
4744     if self.op.command in [
4745       constants.OOB_POWER_ON,
4746       constants.OOB_POWER_OFF,
4747       constants.OOB_POWER_CYCLE,
4748       ]:
4749       if result.payload is not None:
4750         errs.append("%s is expected to not return payload but got '%s'" %
4751                     (self.op.command, result.payload))
4752
4753     if errs:
4754       raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
4755                                utils.CommaJoin(errs))
4756
4757
4758 class _OsQuery(_QueryBase):
4759   FIELDS = query.OS_FIELDS
4760
4761   def ExpandNames(self, lu):
4762     # Lock all nodes in shared mode
4763     # Temporary removal of locks, should be reverted later
4764     # TODO: reintroduce locks when they are lighter-weight
4765     lu.needed_locks = {}
4766     #self.share_locks[locking.LEVEL_NODE] = 1
4767     #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4768
4769     # The following variables interact with _QueryBase._GetNames
4770     if self.names:
4771       self.wanted = self.names
4772     else:
4773       self.wanted = locking.ALL_SET
4774
4775     self.do_locking = self.use_locking
4776
4777   def DeclareLocks(self, lu, level):
4778     pass
4779
4780   @staticmethod
4781   def _DiagnoseByOS(rlist):
4782     """Remaps a per-node return list into an a per-os per-node dictionary
4783
4784     @param rlist: a map with node names as keys and OS objects as values
4785
4786     @rtype: dict
4787     @return: a dictionary with osnames as keys and as value another
4788         map, with nodes as keys and tuples of (path, status, diagnose,
4789         variants, parameters, api_versions) as values, eg::
4790
4791           {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
4792                                      (/srv/..., False, "invalid api")],
4793                            "node2": [(/srv/..., True, "", [], [])]}
4794           }
4795
4796     """
4797     all_os = {}
4798     # we build here the list of nodes that didn't fail the RPC (at RPC
4799     # level), so that nodes with a non-responding node daemon don't
4800     # make all OSes invalid
4801     good_nodes = [node_name for node_name in rlist
4802                   if not rlist[node_name].fail_msg]
4803     for node_name, nr in rlist.items():
4804       if nr.fail_msg or not nr.payload:
4805         continue
4806       for (name, path, status, diagnose, variants,
4807            params, api_versions) in nr.payload:
4808         if name not in all_os:
4809           # build a list of nodes for this os containing empty lists
4810           # for each node in node_list
4811           all_os[name] = {}
4812           for nname in good_nodes:
4813             all_os[name][nname] = []
4814         # convert params from [name, help] to (name, help)
4815         params = [tuple(v) for v in params]
4816         all_os[name][node_name].append((path, status, diagnose,
4817                                         variants, params, api_versions))
4818     return all_os
4819
4820   def _GetQueryData(self, lu):
4821     """Computes the list of nodes and their attributes.
4822
4823     """
4824     # Locking is not used
4825     assert not (compat.any(lu.glm.is_owned(level)
4826                            for level in locking.LEVELS
4827                            if level != locking.LEVEL_CLUSTER) or
4828                 self.do_locking or self.use_locking)
4829
4830     valid_nodes = [node.name
4831                    for node in lu.cfg.GetAllNodesInfo().values()
4832                    if not node.offline and node.vm_capable]
4833     pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
4834     cluster = lu.cfg.GetClusterInfo()
4835
4836     data = {}
4837
4838     for (os_name, os_data) in pol.items():
4839       info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
4840                           hidden=(os_name in cluster.hidden_os),
4841                           blacklisted=(os_name in cluster.blacklisted_os))
4842
4843       variants = set()
4844       parameters = set()
4845       api_versions = set()
4846
4847       for idx, osl in enumerate(os_data.values()):
4848         info.valid = bool(info.valid and osl and osl[0][1])
4849         if not info.valid:
4850           break
4851
4852         (node_variants, node_params, node_api) = osl[0][3:6]
4853         if idx == 0:
4854           # First entry
4855           variants.update(node_variants)
4856           parameters.update(node_params)
4857           api_versions.update(node_api)
4858         else:
4859           # Filter out inconsistent values
4860           variants.intersection_update(node_variants)
4861           parameters.intersection_update(node_params)
4862           api_versions.intersection_update(node_api)
4863
4864       info.variants = list(variants)
4865       info.parameters = list(parameters)
4866       info.api_versions = list(api_versions)
4867
4868       data[os_name] = info
4869
4870     # Prepare data in requested order
4871     return [data[name] for name in self._GetNames(lu, pol.keys(), None)
4872             if name in data]
4873
4874
4875 class LUOsDiagnose(NoHooksLU):
4876   """Logical unit for OS diagnose/query.
4877
4878   """
4879   REQ_BGL = False
4880
4881   @staticmethod
4882   def _BuildFilter(fields, names):
4883     """Builds a filter for querying OSes.
4884
4885     """
4886     name_filter = qlang.MakeSimpleFilter("name", names)
4887
4888     # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
4889     # respective field is not requested
4890     status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
4891                      for fname in ["hidden", "blacklisted"]
4892                      if fname not in fields]
4893     if "valid" not in fields:
4894       status_filter.append([qlang.OP_TRUE, "valid"])
4895
4896     if status_filter:
4897       status_filter.insert(0, qlang.OP_AND)
4898     else:
4899       status_filter = None
4900
4901     if name_filter and status_filter:
4902       return [qlang.OP_AND, name_filter, status_filter]
4903     elif name_filter:
4904       return name_filter
4905     else:
4906       return status_filter
4907
4908   def CheckArguments(self):
4909     self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
4910                        self.op.output_fields, False)
4911
4912   def ExpandNames(self):
4913     self.oq.ExpandNames(self)
4914
4915   def Exec(self, feedback_fn):
4916     return self.oq.OldStyleQuery(self)
4917
4918
4919 class LUNodeRemove(LogicalUnit):
4920   """Logical unit for removing a node.
4921
4922   """
4923   HPATH = "node-remove"
4924   HTYPE = constants.HTYPE_NODE
4925
4926   def BuildHooksEnv(self):
4927     """Build hooks env.
4928
4929     """
4930     return {
4931       "OP_TARGET": self.op.node_name,
4932       "NODE_NAME": self.op.node_name,
4933       }
4934
4935   def BuildHooksNodes(self):
4936     """Build hooks nodes.
4937
4938     This doesn't run on the target node in the pre phase as a failed
4939     node would then be impossible to remove.
4940
4941     """
4942     all_nodes = self.cfg.GetNodeList()
4943     try:
4944       all_nodes.remove(self.op.node_name)
4945     except ValueError:
4946       pass
4947     return (all_nodes, all_nodes)
4948
4949   def CheckPrereq(self):
4950     """Check prerequisites.
4951
4952     This checks:
4953      - the node exists in the configuration
4954      - it does not have primary or secondary instances
4955      - it's not the master
4956
4957     Any errors are signaled by raising errors.OpPrereqError.
4958
4959     """
4960     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4961     node = self.cfg.GetNodeInfo(self.op.node_name)
4962     assert node is not None
4963
4964     masternode = self.cfg.GetMasterNode()
4965     if node.name == masternode:
4966       raise errors.OpPrereqError("Node is the master node, failover to another"
4967                                  " node is required", errors.ECODE_INVAL)
4968
4969     for instance_name, instance in self.cfg.GetAllInstancesInfo().items():
4970       if node.name in instance.all_nodes:
4971         raise errors.OpPrereqError("Instance %s is still running on the node,"
4972                                    " please remove first" % instance_name,
4973                                    errors.ECODE_INVAL)
4974     self.op.node_name = node.name
4975     self.node = node
4976
4977   def Exec(self, feedback_fn):
4978     """Removes the node from the cluster.
4979
4980     """
4981     node = self.node
4982     logging.info("Stopping the node daemon and removing configs from node %s",
4983                  node.name)
4984
4985     modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
4986
4987     assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
4988       "Not owning BGL"
4989
4990     # Promote nodes to master candidate as needed
4991     _AdjustCandidatePool(self, exceptions=[node.name])
4992     self.context.RemoveNode(node.name)
4993
4994     # Run post hooks on the node before it's removed
4995     _RunPostHook(self, node.name)
4996
4997     result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
4998     msg = result.fail_msg
4999     if msg:
5000       self.LogWarning("Errors encountered on the remote node while leaving"
5001                       " the cluster: %s", msg)
5002
5003     # Remove node from our /etc/hosts
5004     if self.cfg.GetClusterInfo().modify_etc_hosts:
5005       master_node = self.cfg.GetMasterNode()
5006       result = self.rpc.call_etc_hosts_modify(master_node,
5007                                               constants.ETC_HOSTS_REMOVE,
5008                                               node.name, None)
5009       result.Raise("Can't update hosts file with new host data")
5010       _RedistributeAncillaryFiles(self)
5011
5012
5013 class _NodeQuery(_QueryBase):
5014   FIELDS = query.NODE_FIELDS
5015
5016   def ExpandNames(self, lu):
5017     lu.needed_locks = {}
5018     lu.share_locks = _ShareAll()
5019
5020     if self.names:
5021       self.wanted = _GetWantedNodes(lu, self.names)
5022     else:
5023       self.wanted = locking.ALL_SET
5024
5025     self.do_locking = (self.use_locking and
5026                        query.NQ_LIVE in self.requested_data)
5027
5028     if self.do_locking:
5029       # If any non-static field is requested we need to lock the nodes
5030       lu.needed_locks[locking.LEVEL_NODE] = self.wanted
5031
5032   def DeclareLocks(self, lu, level):
5033     pass
5034
5035   def _GetQueryData(self, lu):
5036     """Computes the list of nodes and their attributes.
5037
5038     """
5039     all_info = lu.cfg.GetAllNodesInfo()
5040
5041     nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
5042
5043     # Gather data as requested
5044     if query.NQ_LIVE in self.requested_data:
5045       # filter out non-vm_capable nodes
5046       toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
5047
5048       node_data = lu.rpc.call_node_info(toquery_nodes, [lu.cfg.GetVGName()],
5049                                         [lu.cfg.GetHypervisorType()])
5050       live_data = dict((name, rpc.MakeLegacyNodeInfo(nresult.payload))
5051                        for (name, nresult) in node_data.items()
5052                        if not nresult.fail_msg and nresult.payload)
5053     else:
5054       live_data = None
5055
5056     if query.NQ_INST in self.requested_data:
5057       node_to_primary = dict([(name, set()) for name in nodenames])
5058       node_to_secondary = dict([(name, set()) for name in nodenames])
5059
5060       inst_data = lu.cfg.GetAllInstancesInfo()
5061
5062       for inst in inst_data.values():
5063         if inst.primary_node in node_to_primary:
5064           node_to_primary[inst.primary_node].add(inst.name)
5065         for secnode in inst.secondary_nodes:
5066           if secnode in node_to_secondary:
5067             node_to_secondary[secnode].add(inst.name)
5068     else:
5069       node_to_primary = None
5070       node_to_secondary = None
5071
5072     if query.NQ_OOB in self.requested_data:
5073       oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
5074                          for name, node in all_info.iteritems())
5075     else:
5076       oob_support = None
5077
5078     if query.NQ_GROUP in self.requested_data:
5079       groups = lu.cfg.GetAllNodeGroupsInfo()
5080     else:
5081       groups = {}
5082
5083     return query.NodeQueryData([all_info[name] for name in nodenames],
5084                                live_data, lu.cfg.GetMasterNode(),
5085                                node_to_primary, node_to_secondary, groups,
5086                                oob_support, lu.cfg.GetClusterInfo())
5087
5088
5089 class LUNodeQuery(NoHooksLU):
5090   """Logical unit for querying nodes.
5091
5092   """
5093   # pylint: disable=W0142
5094   REQ_BGL = False
5095
5096   def CheckArguments(self):
5097     self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
5098                          self.op.output_fields, self.op.use_locking)
5099
5100   def ExpandNames(self):
5101     self.nq.ExpandNames(self)
5102
5103   def DeclareLocks(self, level):
5104     self.nq.DeclareLocks(self, level)
5105
5106   def Exec(self, feedback_fn):
5107     return self.nq.OldStyleQuery(self)
5108
5109
5110 class LUNodeQueryvols(NoHooksLU):
5111   """Logical unit for getting volumes on node(s).
5112
5113   """
5114   REQ_BGL = False
5115   _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
5116   _FIELDS_STATIC = utils.FieldSet("node")
5117
5118   def CheckArguments(self):
5119     _CheckOutputFields(static=self._FIELDS_STATIC,
5120                        dynamic=self._FIELDS_DYNAMIC,
5121                        selected=self.op.output_fields)
5122
5123   def ExpandNames(self):
5124     self.share_locks = _ShareAll()
5125     self.needed_locks = {}
5126
5127     if not self.op.nodes:
5128       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5129     else:
5130       self.needed_locks[locking.LEVEL_NODE] = \
5131         _GetWantedNodes(self, self.op.nodes)
5132
5133   def Exec(self, feedback_fn):
5134     """Computes the list of nodes and their attributes.
5135
5136     """
5137     nodenames = self.owned_locks(locking.LEVEL_NODE)
5138     volumes = self.rpc.call_node_volumes(nodenames)
5139
5140     ilist = self.cfg.GetAllInstancesInfo()
5141     vol2inst = _MapInstanceDisksToNodes(ilist.values())
5142
5143     output = []
5144     for node in nodenames:
5145       nresult = volumes[node]
5146       if nresult.offline:
5147         continue
5148       msg = nresult.fail_msg
5149       if msg:
5150         self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
5151         continue
5152
5153       node_vols = sorted(nresult.payload,
5154                          key=operator.itemgetter("dev"))
5155
5156       for vol in node_vols:
5157         node_output = []
5158         for field in self.op.output_fields:
5159           if field == "node":
5160             val = node
5161           elif field == "phys":
5162             val = vol["dev"]
5163           elif field == "vg":
5164             val = vol["vg"]
5165           elif field == "name":
5166             val = vol["name"]
5167           elif field == "size":
5168             val = int(float(vol["size"]))
5169           elif field == "instance":
5170             val = vol2inst.get((node, vol["vg"] + "/" + vol["name"]), "-")
5171           else:
5172             raise errors.ParameterError(field)
5173           node_output.append(str(val))
5174
5175         output.append(node_output)
5176
5177     return output
5178
5179
5180 class LUNodeQueryStorage(NoHooksLU):
5181   """Logical unit for getting information on storage units on node(s).
5182
5183   """
5184   _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
5185   REQ_BGL = False
5186
5187   def CheckArguments(self):
5188     _CheckOutputFields(static=self._FIELDS_STATIC,
5189                        dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
5190                        selected=self.op.output_fields)
5191
5192   def ExpandNames(self):
5193     self.share_locks = _ShareAll()
5194     self.needed_locks = {}
5195
5196     if self.op.nodes:
5197       self.needed_locks[locking.LEVEL_NODE] = \
5198         _GetWantedNodes(self, self.op.nodes)
5199     else:
5200       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5201
5202   def Exec(self, feedback_fn):
5203     """Computes the list of nodes and their attributes.
5204
5205     """
5206     self.nodes = self.owned_locks(locking.LEVEL_NODE)
5207
5208     # Always get name to sort by
5209     if constants.SF_NAME in self.op.output_fields:
5210       fields = self.op.output_fields[:]
5211     else:
5212       fields = [constants.SF_NAME] + self.op.output_fields
5213
5214     # Never ask for node or type as it's only known to the LU
5215     for extra in [constants.SF_NODE, constants.SF_TYPE]:
5216       while extra in fields:
5217         fields.remove(extra)
5218
5219     field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
5220     name_idx = field_idx[constants.SF_NAME]
5221
5222     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5223     data = self.rpc.call_storage_list(self.nodes,
5224                                       self.op.storage_type, st_args,
5225                                       self.op.name, fields)
5226
5227     result = []
5228
5229     for node in utils.NiceSort(self.nodes):
5230       nresult = data[node]
5231       if nresult.offline:
5232         continue
5233
5234       msg = nresult.fail_msg
5235       if msg:
5236         self.LogWarning("Can't get storage data from node %s: %s", node, msg)
5237         continue
5238
5239       rows = dict([(row[name_idx], row) for row in nresult.payload])
5240
5241       for name in utils.NiceSort(rows.keys()):
5242         row = rows[name]
5243
5244         out = []
5245
5246         for field in self.op.output_fields:
5247           if field == constants.SF_NODE:
5248             val = node
5249           elif field == constants.SF_TYPE:
5250             val = self.op.storage_type
5251           elif field in field_idx:
5252             val = row[field_idx[field]]
5253           else:
5254             raise errors.ParameterError(field)
5255
5256           out.append(val)
5257
5258         result.append(out)
5259
5260     return result
5261
5262
5263 class _InstanceQuery(_QueryBase):
5264   FIELDS = query.INSTANCE_FIELDS
5265
5266   def ExpandNames(self, lu):
5267     lu.needed_locks = {}
5268     lu.share_locks = _ShareAll()
5269
5270     if self.names:
5271       self.wanted = _GetWantedInstances(lu, self.names)
5272     else:
5273       self.wanted = locking.ALL_SET
5274
5275     self.do_locking = (self.use_locking and
5276                        query.IQ_LIVE in self.requested_data)
5277     if self.do_locking:
5278       lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
5279       lu.needed_locks[locking.LEVEL_NODEGROUP] = []
5280       lu.needed_locks[locking.LEVEL_NODE] = []
5281       lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5282
5283     self.do_grouplocks = (self.do_locking and
5284                           query.IQ_NODES in self.requested_data)
5285
5286   def DeclareLocks(self, lu, level):
5287     if self.do_locking:
5288       if level == locking.LEVEL_NODEGROUP and self.do_grouplocks:
5289         assert not lu.needed_locks[locking.LEVEL_NODEGROUP]
5290
5291         # Lock all groups used by instances optimistically; this requires going
5292         # via the node before it's locked, requiring verification later on
5293         lu.needed_locks[locking.LEVEL_NODEGROUP] = \
5294           set(group_uuid
5295               for instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
5296               for group_uuid in lu.cfg.GetInstanceNodeGroups(instance_name))
5297       elif level == locking.LEVEL_NODE:
5298         lu._LockInstancesNodes() # pylint: disable=W0212
5299
5300   @staticmethod
5301   def _CheckGroupLocks(lu):
5302     owned_instances = frozenset(lu.owned_locks(locking.LEVEL_INSTANCE))
5303     owned_groups = frozenset(lu.owned_locks(locking.LEVEL_NODEGROUP))
5304
5305     # Check if node groups for locked instances are still correct
5306     for instance_name in owned_instances:
5307       _CheckInstanceNodeGroups(lu.cfg, instance_name, owned_groups)
5308
5309   def _GetQueryData(self, lu):
5310     """Computes the list of instances and their attributes.
5311
5312     """
5313     if self.do_grouplocks:
5314       self._CheckGroupLocks(lu)
5315
5316     cluster = lu.cfg.GetClusterInfo()
5317     all_info = lu.cfg.GetAllInstancesInfo()
5318
5319     instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
5320
5321     instance_list = [all_info[name] for name in instance_names]
5322     nodes = frozenset(itertools.chain(*(inst.all_nodes
5323                                         for inst in instance_list)))
5324     hv_list = list(set([inst.hypervisor for inst in instance_list]))
5325     bad_nodes = []
5326     offline_nodes = []
5327     wrongnode_inst = set()
5328
5329     # Gather data as requested
5330     if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
5331       live_data = {}
5332       node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
5333       for name in nodes:
5334         result = node_data[name]
5335         if result.offline:
5336           # offline nodes will be in both lists
5337           assert result.fail_msg
5338           offline_nodes.append(name)
5339         if result.fail_msg:
5340           bad_nodes.append(name)
5341         elif result.payload:
5342           for inst in result.payload:
5343             if inst in all_info:
5344               if all_info[inst].primary_node == name:
5345                 live_data.update(result.payload)
5346               else:
5347                 wrongnode_inst.add(inst)
5348             else:
5349               # orphan instance; we don't list it here as we don't
5350               # handle this case yet in the output of instance listing
5351               logging.warning("Orphan instance '%s' found on node %s",
5352                               inst, name)
5353         # else no instance is alive
5354     else:
5355       live_data = {}
5356
5357     if query.IQ_DISKUSAGE in self.requested_data:
5358       gmi = ganeti.masterd.instance
5359       disk_usage = dict((inst.name,
5360                          gmi.ComputeDiskSize(inst.disk_template,
5361                                              [{constants.IDISK_SIZE: disk.size}
5362                                               for disk in inst.disks]))
5363                         for inst in instance_list)
5364     else:
5365       disk_usage = None
5366
5367     if query.IQ_CONSOLE in self.requested_data:
5368       consinfo = {}
5369       for inst in instance_list:
5370         if inst.name in live_data:
5371           # Instance is running
5372           consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
5373         else:
5374           consinfo[inst.name] = None
5375       assert set(consinfo.keys()) == set(instance_names)
5376     else:
5377       consinfo = None
5378
5379     if query.IQ_NODES in self.requested_data:
5380       node_names = set(itertools.chain(*map(operator.attrgetter("all_nodes"),
5381                                             instance_list)))
5382       nodes = dict(lu.cfg.GetMultiNodeInfo(node_names))
5383       groups = dict((uuid, lu.cfg.GetNodeGroup(uuid))
5384                     for uuid in set(map(operator.attrgetter("group"),
5385                                         nodes.values())))
5386     else:
5387       nodes = None
5388       groups = None
5389
5390     return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
5391                                    disk_usage, offline_nodes, bad_nodes,
5392                                    live_data, wrongnode_inst, consinfo,
5393                                    nodes, groups)
5394
5395
5396 class LUQuery(NoHooksLU):
5397   """Query for resources/items of a certain kind.
5398
5399   """
5400   # pylint: disable=W0142
5401   REQ_BGL = False
5402
5403   def CheckArguments(self):
5404     qcls = _GetQueryImplementation(self.op.what)
5405
5406     self.impl = qcls(self.op.qfilter, self.op.fields, self.op.use_locking)
5407
5408   def ExpandNames(self):
5409     self.impl.ExpandNames(self)
5410
5411   def DeclareLocks(self, level):
5412     self.impl.DeclareLocks(self, level)
5413
5414   def Exec(self, feedback_fn):
5415     return self.impl.NewStyleQuery(self)
5416
5417
5418 class LUQueryFields(NoHooksLU):
5419   """Query for resources/items of a certain kind.
5420
5421   """
5422   # pylint: disable=W0142
5423   REQ_BGL = False
5424
5425   def CheckArguments(self):
5426     self.qcls = _GetQueryImplementation(self.op.what)
5427
5428   def ExpandNames(self):
5429     self.needed_locks = {}
5430
5431   def Exec(self, feedback_fn):
5432     return query.QueryFields(self.qcls.FIELDS, self.op.fields)
5433
5434
5435 class LUNodeModifyStorage(NoHooksLU):
5436   """Logical unit for modifying a storage volume on a node.
5437
5438   """
5439   REQ_BGL = False
5440
5441   def CheckArguments(self):
5442     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5443
5444     storage_type = self.op.storage_type
5445
5446     try:
5447       modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
5448     except KeyError:
5449       raise errors.OpPrereqError("Storage units of type '%s' can not be"
5450                                  " modified" % storage_type,
5451                                  errors.ECODE_INVAL)
5452
5453     diff = set(self.op.changes.keys()) - modifiable
5454     if diff:
5455       raise errors.OpPrereqError("The following fields can not be modified for"
5456                                  " storage units of type '%s': %r" %
5457                                  (storage_type, list(diff)),
5458                                  errors.ECODE_INVAL)
5459
5460   def ExpandNames(self):
5461     self.needed_locks = {
5462       locking.LEVEL_NODE: self.op.node_name,
5463       }
5464
5465   def Exec(self, feedback_fn):
5466     """Computes the list of nodes and their attributes.
5467
5468     """
5469     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5470     result = self.rpc.call_storage_modify(self.op.node_name,
5471                                           self.op.storage_type, st_args,
5472                                           self.op.name, self.op.changes)
5473     result.Raise("Failed to modify storage unit '%s' on %s" %
5474                  (self.op.name, self.op.node_name))
5475
5476
5477 class LUNodeAdd(LogicalUnit):
5478   """Logical unit for adding node to the cluster.
5479
5480   """
5481   HPATH = "node-add"
5482   HTYPE = constants.HTYPE_NODE
5483   _NFLAGS = ["master_capable", "vm_capable"]
5484
5485   def CheckArguments(self):
5486     self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
5487     # validate/normalize the node name
5488     self.hostname = netutils.GetHostname(name=self.op.node_name,
5489                                          family=self.primary_ip_family)
5490     self.op.node_name = self.hostname.name
5491
5492     if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
5493       raise errors.OpPrereqError("Cannot readd the master node",
5494                                  errors.ECODE_STATE)
5495
5496     if self.op.readd and self.op.group:
5497       raise errors.OpPrereqError("Cannot pass a node group when a node is"
5498                                  " being readded", errors.ECODE_INVAL)
5499
5500   def BuildHooksEnv(self):
5501     """Build hooks env.
5502
5503     This will run on all nodes before, and on all nodes + the new node after.
5504
5505     """
5506     return {
5507       "OP_TARGET": self.op.node_name,
5508       "NODE_NAME": self.op.node_name,
5509       "NODE_PIP": self.op.primary_ip,
5510       "NODE_SIP": self.op.secondary_ip,
5511       "MASTER_CAPABLE": str(self.op.master_capable),
5512       "VM_CAPABLE": str(self.op.vm_capable),
5513       }
5514
5515   def BuildHooksNodes(self):
5516     """Build hooks nodes.
5517
5518     """
5519     # Exclude added node
5520     pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
5521     post_nodes = pre_nodes + [self.op.node_name, ]
5522
5523     return (pre_nodes, post_nodes)
5524
5525   def CheckPrereq(self):
5526     """Check prerequisites.
5527
5528     This checks:
5529      - the new node is not already in the config
5530      - it is resolvable
5531      - its parameters (single/dual homed) matches the cluster
5532
5533     Any errors are signaled by raising errors.OpPrereqError.
5534
5535     """
5536     cfg = self.cfg
5537     hostname = self.hostname
5538     node = hostname.name
5539     primary_ip = self.op.primary_ip = hostname.ip
5540     if self.op.secondary_ip is None:
5541       if self.primary_ip_family == netutils.IP6Address.family:
5542         raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
5543                                    " IPv4 address must be given as secondary",
5544                                    errors.ECODE_INVAL)
5545       self.op.secondary_ip = primary_ip
5546
5547     secondary_ip = self.op.secondary_ip
5548     if not netutils.IP4Address.IsValid(secondary_ip):
5549       raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5550                                  " address" % secondary_ip, errors.ECODE_INVAL)
5551
5552     node_list = cfg.GetNodeList()
5553     if not self.op.readd and node in node_list:
5554       raise errors.OpPrereqError("Node %s is already in the configuration" %
5555                                  node, errors.ECODE_EXISTS)
5556     elif self.op.readd and node not in node_list:
5557       raise errors.OpPrereqError("Node %s is not in the configuration" % node,
5558                                  errors.ECODE_NOENT)
5559
5560     self.changed_primary_ip = False
5561
5562     for existing_node_name, existing_node in cfg.GetMultiNodeInfo(node_list):
5563       if self.op.readd and node == existing_node_name:
5564         if existing_node.secondary_ip != secondary_ip:
5565           raise errors.OpPrereqError("Readded node doesn't have the same IP"
5566                                      " address configuration as before",
5567                                      errors.ECODE_INVAL)
5568         if existing_node.primary_ip != primary_ip:
5569           self.changed_primary_ip = True
5570
5571         continue
5572
5573       if (existing_node.primary_ip == primary_ip or
5574           existing_node.secondary_ip == primary_ip or
5575           existing_node.primary_ip == secondary_ip or
5576           existing_node.secondary_ip == secondary_ip):
5577         raise errors.OpPrereqError("New node ip address(es) conflict with"
5578                                    " existing node %s" % existing_node.name,
5579                                    errors.ECODE_NOTUNIQUE)
5580
5581     # After this 'if' block, None is no longer a valid value for the
5582     # _capable op attributes
5583     if self.op.readd:
5584       old_node = self.cfg.GetNodeInfo(node)
5585       assert old_node is not None, "Can't retrieve locked node %s" % node
5586       for attr in self._NFLAGS:
5587         if getattr(self.op, attr) is None:
5588           setattr(self.op, attr, getattr(old_node, attr))
5589     else:
5590       for attr in self._NFLAGS:
5591         if getattr(self.op, attr) is None:
5592           setattr(self.op, attr, True)
5593
5594     if self.op.readd and not self.op.vm_capable:
5595       pri, sec = cfg.GetNodeInstances(node)
5596       if pri or sec:
5597         raise errors.OpPrereqError("Node %s being re-added with vm_capable"
5598                                    " flag set to false, but it already holds"
5599                                    " instances" % node,
5600                                    errors.ECODE_STATE)
5601
5602     # check that the type of the node (single versus dual homed) is the
5603     # same as for the master
5604     myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
5605     master_singlehomed = myself.secondary_ip == myself.primary_ip
5606     newbie_singlehomed = secondary_ip == primary_ip
5607     if master_singlehomed != newbie_singlehomed:
5608       if master_singlehomed:
5609         raise errors.OpPrereqError("The master has no secondary ip but the"
5610                                    " new node has one",
5611                                    errors.ECODE_INVAL)
5612       else:
5613         raise errors.OpPrereqError("The master has a secondary ip but the"
5614                                    " new node doesn't have one",
5615                                    errors.ECODE_INVAL)
5616
5617     # checks reachability
5618     if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
5619       raise errors.OpPrereqError("Node not reachable by ping",
5620                                  errors.ECODE_ENVIRON)
5621
5622     if not newbie_singlehomed:
5623       # check reachability from my secondary ip to newbie's secondary ip
5624       if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
5625                               source=myself.secondary_ip):
5626         raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5627                                    " based ping to node daemon port",
5628                                    errors.ECODE_ENVIRON)
5629
5630     if self.op.readd:
5631       exceptions = [node]
5632     else:
5633       exceptions = []
5634
5635     if self.op.master_capable:
5636       self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
5637     else:
5638       self.master_candidate = False
5639
5640     if self.op.readd:
5641       self.new_node = old_node
5642     else:
5643       node_group = cfg.LookupNodeGroup(self.op.group)
5644       self.new_node = objects.Node(name=node,
5645                                    primary_ip=primary_ip,
5646                                    secondary_ip=secondary_ip,
5647                                    master_candidate=self.master_candidate,
5648                                    offline=False, drained=False,
5649                                    group=node_group)
5650
5651     if self.op.ndparams:
5652       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
5653
5654     if self.op.hv_state:
5655       self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
5656
5657     if self.op.disk_state:
5658       self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
5659
5660     # TODO: If we need to have multiple DnsOnlyRunner we probably should make
5661     #       it a property on the base class.
5662     result = rpc.DnsOnlyRunner().call_version([node])[node]
5663     result.Raise("Can't get version information from node %s" % node)
5664     if constants.PROTOCOL_VERSION == result.payload:
5665       logging.info("Communication to node %s fine, sw version %s match",
5666                    node, result.payload)
5667     else:
5668       raise errors.OpPrereqError("Version mismatch master version %s,"
5669                                  " node version %s" %
5670                                  (constants.PROTOCOL_VERSION, result.payload),
5671                                  errors.ECODE_ENVIRON)
5672
5673   def Exec(self, feedback_fn):
5674     """Adds the new node to the cluster.
5675
5676     """
5677     new_node = self.new_node
5678     node = new_node.name
5679
5680     assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
5681       "Not owning BGL"
5682
5683     # We adding a new node so we assume it's powered
5684     new_node.powered = True
5685
5686     # for re-adds, reset the offline/drained/master-candidate flags;
5687     # we need to reset here, otherwise offline would prevent RPC calls
5688     # later in the procedure; this also means that if the re-add
5689     # fails, we are left with a non-offlined, broken node
5690     if self.op.readd:
5691       new_node.drained = new_node.offline = False # pylint: disable=W0201
5692       self.LogInfo("Readding a node, the offline/drained flags were reset")
5693       # if we demote the node, we do cleanup later in the procedure
5694       new_node.master_candidate = self.master_candidate
5695       if self.changed_primary_ip:
5696         new_node.primary_ip = self.op.primary_ip
5697
5698     # copy the master/vm_capable flags
5699     for attr in self._NFLAGS:
5700       setattr(new_node, attr, getattr(self.op, attr))
5701
5702     # notify the user about any possible mc promotion
5703     if new_node.master_candidate:
5704       self.LogInfo("Node will be a master candidate")
5705
5706     if self.op.ndparams:
5707       new_node.ndparams = self.op.ndparams
5708     else:
5709       new_node.ndparams = {}
5710
5711     if self.op.hv_state:
5712       new_node.hv_state_static = self.new_hv_state
5713
5714     if self.op.disk_state:
5715       new_node.disk_state_static = self.new_disk_state
5716
5717     # Add node to our /etc/hosts, and add key to known_hosts
5718     if self.cfg.GetClusterInfo().modify_etc_hosts:
5719       master_node = self.cfg.GetMasterNode()
5720       result = self.rpc.call_etc_hosts_modify(master_node,
5721                                               constants.ETC_HOSTS_ADD,
5722                                               self.hostname.name,
5723                                               self.hostname.ip)
5724       result.Raise("Can't update hosts file with new host data")
5725
5726     if new_node.secondary_ip != new_node.primary_ip:
5727       _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
5728                                False)
5729
5730     node_verify_list = [self.cfg.GetMasterNode()]
5731     node_verify_param = {
5732       constants.NV_NODELIST: ([node], {}),
5733       # TODO: do a node-net-test as well?
5734     }
5735
5736     result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
5737                                        self.cfg.GetClusterName())
5738     for verifier in node_verify_list:
5739       result[verifier].Raise("Cannot communicate with node %s" % verifier)
5740       nl_payload = result[verifier].payload[constants.NV_NODELIST]
5741       if nl_payload:
5742         for failed in nl_payload:
5743           feedback_fn("ssh/hostname verification failed"
5744                       " (checking from %s): %s" %
5745                       (verifier, nl_payload[failed]))
5746         raise errors.OpExecError("ssh/hostname verification failed")
5747
5748     if self.op.readd:
5749       _RedistributeAncillaryFiles(self)
5750       self.context.ReaddNode(new_node)
5751       # make sure we redistribute the config
5752       self.cfg.Update(new_node, feedback_fn)
5753       # and make sure the new node will not have old files around
5754       if not new_node.master_candidate:
5755         result = self.rpc.call_node_demote_from_mc(new_node.name)
5756         msg = result.fail_msg
5757         if msg:
5758           self.LogWarning("Node failed to demote itself from master"
5759                           " candidate status: %s" % msg)
5760     else:
5761       _RedistributeAncillaryFiles(self, additional_nodes=[node],
5762                                   additional_vm=self.op.vm_capable)
5763       self.context.AddNode(new_node, self.proc.GetECId())
5764
5765
5766 class LUNodeSetParams(LogicalUnit):
5767   """Modifies the parameters of a node.
5768
5769   @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
5770       to the node role (as _ROLE_*)
5771   @cvar _R2F: a dictionary from node role to tuples of flags
5772   @cvar _FLAGS: a list of attribute names corresponding to the flags
5773
5774   """
5775   HPATH = "node-modify"
5776   HTYPE = constants.HTYPE_NODE
5777   REQ_BGL = False
5778   (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
5779   _F2R = {
5780     (True, False, False): _ROLE_CANDIDATE,
5781     (False, True, False): _ROLE_DRAINED,
5782     (False, False, True): _ROLE_OFFLINE,
5783     (False, False, False): _ROLE_REGULAR,
5784     }
5785   _R2F = dict((v, k) for k, v in _F2R.items())
5786   _FLAGS = ["master_candidate", "drained", "offline"]
5787
5788   def CheckArguments(self):
5789     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5790     all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
5791                 self.op.master_capable, self.op.vm_capable,
5792                 self.op.secondary_ip, self.op.ndparams, self.op.hv_state,
5793                 self.op.disk_state]
5794     if all_mods.count(None) == len(all_mods):
5795       raise errors.OpPrereqError("Please pass at least one modification",
5796                                  errors.ECODE_INVAL)
5797     if all_mods.count(True) > 1:
5798       raise errors.OpPrereqError("Can't set the node into more than one"
5799                                  " state at the same time",
5800                                  errors.ECODE_INVAL)
5801
5802     # Boolean value that tells us whether we might be demoting from MC
5803     self.might_demote = (self.op.master_candidate is False or
5804                          self.op.offline is True or
5805                          self.op.drained is True or
5806                          self.op.master_capable is False)
5807
5808     if self.op.secondary_ip:
5809       if not netutils.IP4Address.IsValid(self.op.secondary_ip):
5810         raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5811                                    " address" % self.op.secondary_ip,
5812                                    errors.ECODE_INVAL)
5813
5814     self.lock_all = self.op.auto_promote and self.might_demote
5815     self.lock_instances = self.op.secondary_ip is not None
5816
5817   def _InstanceFilter(self, instance):
5818     """Filter for getting affected instances.
5819
5820     """
5821     return (instance.disk_template in constants.DTS_INT_MIRROR and
5822             self.op.node_name in instance.all_nodes)
5823
5824   def ExpandNames(self):
5825     if self.lock_all:
5826       self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
5827     else:
5828       self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
5829
5830     # Since modifying a node can have severe effects on currently running
5831     # operations the resource lock is at least acquired in shared mode
5832     self.needed_locks[locking.LEVEL_NODE_RES] = \
5833       self.needed_locks[locking.LEVEL_NODE]
5834
5835     # Get node resource and instance locks in shared mode; they are not used
5836     # for anything but read-only access
5837     self.share_locks[locking.LEVEL_NODE_RES] = 1
5838     self.share_locks[locking.LEVEL_INSTANCE] = 1
5839
5840     if self.lock_instances:
5841       self.needed_locks[locking.LEVEL_INSTANCE] = \
5842         frozenset(self.cfg.GetInstancesInfoByFilter(self._InstanceFilter))
5843
5844   def BuildHooksEnv(self):
5845     """Build hooks env.
5846
5847     This runs on the master node.
5848
5849     """
5850     return {
5851       "OP_TARGET": self.op.node_name,
5852       "MASTER_CANDIDATE": str(self.op.master_candidate),
5853       "OFFLINE": str(self.op.offline),
5854       "DRAINED": str(self.op.drained),
5855       "MASTER_CAPABLE": str(self.op.master_capable),
5856       "VM_CAPABLE": str(self.op.vm_capable),
5857       }
5858
5859   def BuildHooksNodes(self):
5860     """Build hooks nodes.
5861
5862     """
5863     nl = [self.cfg.GetMasterNode(), self.op.node_name]
5864     return (nl, nl)
5865
5866   def CheckPrereq(self):
5867     """Check prerequisites.
5868
5869     This only checks the instance list against the existing names.
5870
5871     """
5872     node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
5873
5874     if self.lock_instances:
5875       affected_instances = \
5876         self.cfg.GetInstancesInfoByFilter(self._InstanceFilter)
5877
5878       # Verify instance locks
5879       owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
5880       wanted_instances = frozenset(affected_instances.keys())
5881       if wanted_instances - owned_instances:
5882         raise errors.OpPrereqError("Instances affected by changing node %s's"
5883                                    " secondary IP address have changed since"
5884                                    " locks were acquired, wanted '%s', have"
5885                                    " '%s'; retry the operation" %
5886                                    (self.op.node_name,
5887                                     utils.CommaJoin(wanted_instances),
5888                                     utils.CommaJoin(owned_instances)),
5889                                    errors.ECODE_STATE)
5890     else:
5891       affected_instances = None
5892
5893     if (self.op.master_candidate is not None or
5894         self.op.drained is not None or
5895         self.op.offline is not None):
5896       # we can't change the master's node flags
5897       if self.op.node_name == self.cfg.GetMasterNode():
5898         raise errors.OpPrereqError("The master role can be changed"
5899                                    " only via master-failover",
5900                                    errors.ECODE_INVAL)
5901
5902     if self.op.master_candidate and not node.master_capable:
5903       raise errors.OpPrereqError("Node %s is not master capable, cannot make"
5904                                  " it a master candidate" % node.name,
5905                                  errors.ECODE_STATE)
5906
5907     if self.op.vm_capable is False:
5908       (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
5909       if ipri or isec:
5910         raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
5911                                    " the vm_capable flag" % node.name,
5912                                    errors.ECODE_STATE)
5913
5914     if node.master_candidate and self.might_demote and not self.lock_all:
5915       assert not self.op.auto_promote, "auto_promote set but lock_all not"
5916       # check if after removing the current node, we're missing master
5917       # candidates
5918       (mc_remaining, mc_should, _) = \
5919           self.cfg.GetMasterCandidateStats(exceptions=[node.name])
5920       if mc_remaining < mc_should:
5921         raise errors.OpPrereqError("Not enough master candidates, please"
5922                                    " pass auto promote option to allow"
5923                                    " promotion (--auto-promote or RAPI"
5924                                    " auto_promote=True)", errors.ECODE_STATE)
5925
5926     self.old_flags = old_flags = (node.master_candidate,
5927                                   node.drained, node.offline)
5928     assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
5929     self.old_role = old_role = self._F2R[old_flags]
5930
5931     # Check for ineffective changes
5932     for attr in self._FLAGS:
5933       if (getattr(self.op, attr) is False and getattr(node, attr) is False):
5934         self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
5935         setattr(self.op, attr, None)
5936
5937     # Past this point, any flag change to False means a transition
5938     # away from the respective state, as only real changes are kept
5939
5940     # TODO: We might query the real power state if it supports OOB
5941     if _SupportsOob(self.cfg, node):
5942       if self.op.offline is False and not (node.powered or
5943                                            self.op.powered is True):
5944         raise errors.OpPrereqError(("Node %s needs to be turned on before its"
5945                                     " offline status can be reset") %
5946                                    self.op.node_name, errors.ECODE_STATE)
5947     elif self.op.powered is not None:
5948       raise errors.OpPrereqError(("Unable to change powered state for node %s"
5949                                   " as it does not support out-of-band"
5950                                   " handling") % self.op.node_name,
5951                                  errors.ECODE_STATE)
5952
5953     # If we're being deofflined/drained, we'll MC ourself if needed
5954     if (self.op.drained is False or self.op.offline is False or
5955         (self.op.master_capable and not node.master_capable)):
5956       if _DecideSelfPromotion(self):
5957         self.op.master_candidate = True
5958         self.LogInfo("Auto-promoting node to master candidate")
5959
5960     # If we're no longer master capable, we'll demote ourselves from MC
5961     if self.op.master_capable is False and node.master_candidate:
5962       self.LogInfo("Demoting from master candidate")
5963       self.op.master_candidate = False
5964
5965     # Compute new role
5966     assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
5967     if self.op.master_candidate:
5968       new_role = self._ROLE_CANDIDATE
5969     elif self.op.drained:
5970       new_role = self._ROLE_DRAINED
5971     elif self.op.offline:
5972       new_role = self._ROLE_OFFLINE
5973     elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
5974       # False is still in new flags, which means we're un-setting (the
5975       # only) True flag
5976       new_role = self._ROLE_REGULAR
5977     else: # no new flags, nothing, keep old role
5978       new_role = old_role
5979
5980     self.new_role = new_role
5981
5982     if old_role == self._ROLE_OFFLINE and new_role != old_role:
5983       # Trying to transition out of offline status
5984       result = self.rpc.call_version([node.name])[node.name]
5985       if result.fail_msg:
5986         raise errors.OpPrereqError("Node %s is being de-offlined but fails"
5987                                    " to report its version: %s" %
5988                                    (node.name, result.fail_msg),
5989                                    errors.ECODE_STATE)
5990       else:
5991         self.LogWarning("Transitioning node from offline to online state"
5992                         " without using re-add. Please make sure the node"
5993                         " is healthy!")
5994
5995     # When changing the secondary ip, verify if this is a single-homed to
5996     # multi-homed transition or vice versa, and apply the relevant
5997     # restrictions.
5998     if self.op.secondary_ip:
5999       # Ok even without locking, because this can't be changed by any LU
6000       master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
6001       master_singlehomed = master.secondary_ip == master.primary_ip
6002       if master_singlehomed and self.op.secondary_ip != node.primary_ip:
6003         if self.op.force and node.name == master.name:
6004           self.LogWarning("Transitioning from single-homed to multi-homed"
6005                           " cluster. All nodes will require a secondary ip.")
6006         else:
6007           raise errors.OpPrereqError("Changing the secondary ip on a"
6008                                      " single-homed cluster requires the"
6009                                      " --force option to be passed, and the"
6010                                      " target node to be the master",
6011                                      errors.ECODE_INVAL)
6012       elif not master_singlehomed and self.op.secondary_ip == node.primary_ip:
6013         if self.op.force and node.name == master.name:
6014           self.LogWarning("Transitioning from multi-homed to single-homed"
6015                           " cluster. Secondary IPs will have to be removed.")
6016         else:
6017           raise errors.OpPrereqError("Cannot set the secondary IP to be the"
6018                                      " same as the primary IP on a multi-homed"
6019                                      " cluster, unless the --force option is"
6020                                      " passed, and the target node is the"
6021                                      " master", errors.ECODE_INVAL)
6022
6023       assert not (frozenset(affected_instances) -
6024                   self.owned_locks(locking.LEVEL_INSTANCE))
6025
6026       if node.offline:
6027         if affected_instances:
6028           msg = ("Cannot change secondary IP address: offline node has"
6029                  " instances (%s) configured to use it" %
6030                  utils.CommaJoin(affected_instances.keys()))
6031           raise errors.OpPrereqError(msg, errors.ECODE_STATE)
6032       else:
6033         # On online nodes, check that no instances are running, and that
6034         # the node has the new ip and we can reach it.
6035         for instance in affected_instances.values():
6036           _CheckInstanceState(self, instance, INSTANCE_DOWN,
6037                               msg="cannot change secondary ip")
6038
6039         _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
6040         if master.name != node.name:
6041           # check reachability from master secondary ip to new secondary ip
6042           if not netutils.TcpPing(self.op.secondary_ip,
6043                                   constants.DEFAULT_NODED_PORT,
6044                                   source=master.secondary_ip):
6045             raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
6046                                        " based ping to node daemon port",
6047                                        errors.ECODE_ENVIRON)
6048
6049     if self.op.ndparams:
6050       new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
6051       utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
6052       self.new_ndparams = new_ndparams
6053
6054     if self.op.hv_state:
6055       self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
6056                                                  self.node.hv_state_static)
6057
6058     if self.op.disk_state:
6059       self.new_disk_state = \
6060         _MergeAndVerifyDiskState(self.op.disk_state,
6061                                  self.node.disk_state_static)
6062
6063   def Exec(self, feedback_fn):
6064     """Modifies a node.
6065
6066     """
6067     node = self.node
6068     old_role = self.old_role
6069     new_role = self.new_role
6070
6071     result = []
6072
6073     if self.op.ndparams:
6074       node.ndparams = self.new_ndparams
6075
6076     if self.op.powered is not None:
6077       node.powered = self.op.powered
6078
6079     if self.op.hv_state:
6080       node.hv_state_static = self.new_hv_state
6081
6082     if self.op.disk_state:
6083       node.disk_state_static = self.new_disk_state
6084
6085     for attr in ["master_capable", "vm_capable"]:
6086       val = getattr(self.op, attr)
6087       if val is not None:
6088         setattr(node, attr, val)
6089         result.append((attr, str(val)))
6090
6091     if new_role != old_role:
6092       # Tell the node to demote itself, if no longer MC and not offline
6093       if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
6094         msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
6095         if msg:
6096           self.LogWarning("Node failed to demote itself: %s", msg)
6097
6098       new_flags = self._R2F[new_role]
6099       for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
6100         if of != nf:
6101           result.append((desc, str(nf)))
6102       (node.master_candidate, node.drained, node.offline) = new_flags
6103
6104       # we locked all nodes, we adjust the CP before updating this node
6105       if self.lock_all:
6106         _AdjustCandidatePool(self, [node.name])
6107
6108     if self.op.secondary_ip:
6109       node.secondary_ip = self.op.secondary_ip
6110       result.append(("secondary_ip", self.op.secondary_ip))
6111
6112     # this will trigger configuration file update, if needed
6113     self.cfg.Update(node, feedback_fn)
6114
6115     # this will trigger job queue propagation or cleanup if the mc
6116     # flag changed
6117     if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
6118       self.context.ReaddNode(node)
6119
6120     return result
6121
6122
6123 class LUNodePowercycle(NoHooksLU):
6124   """Powercycles a node.
6125
6126   """
6127   REQ_BGL = False
6128
6129   def CheckArguments(self):
6130     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6131     if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
6132       raise errors.OpPrereqError("The node is the master and the force"
6133                                  " parameter was not set",
6134                                  errors.ECODE_INVAL)
6135
6136   def ExpandNames(self):
6137     """Locking for PowercycleNode.
6138
6139     This is a last-resort option and shouldn't block on other
6140     jobs. Therefore, we grab no locks.
6141
6142     """
6143     self.needed_locks = {}
6144
6145   def Exec(self, feedback_fn):
6146     """Reboots a node.
6147
6148     """
6149     result = self.rpc.call_node_powercycle(self.op.node_name,
6150                                            self.cfg.GetHypervisorType())
6151     result.Raise("Failed to schedule the reboot")
6152     return result.payload
6153
6154
6155 class LUClusterQuery(NoHooksLU):
6156   """Query cluster configuration.
6157
6158   """
6159   REQ_BGL = False
6160
6161   def ExpandNames(self):
6162     self.needed_locks = {}
6163
6164   def Exec(self, feedback_fn):
6165     """Return cluster config.
6166
6167     """
6168     cluster = self.cfg.GetClusterInfo()
6169     os_hvp = {}
6170
6171     # Filter just for enabled hypervisors
6172     for os_name, hv_dict in cluster.os_hvp.items():
6173       os_hvp[os_name] = {}
6174       for hv_name, hv_params in hv_dict.items():
6175         if hv_name in cluster.enabled_hypervisors:
6176           os_hvp[os_name][hv_name] = hv_params
6177
6178     # Convert ip_family to ip_version
6179     primary_ip_version = constants.IP4_VERSION
6180     if cluster.primary_ip_family == netutils.IP6Address.family:
6181       primary_ip_version = constants.IP6_VERSION
6182
6183     result = {
6184       "software_version": constants.RELEASE_VERSION,
6185       "protocol_version": constants.PROTOCOL_VERSION,
6186       "config_version": constants.CONFIG_VERSION,
6187       "os_api_version": max(constants.OS_API_VERSIONS),
6188       "export_version": constants.EXPORT_VERSION,
6189       "architecture": runtime.GetArchInfo(),
6190       "name": cluster.cluster_name,
6191       "master": cluster.master_node,
6192       "default_hypervisor": cluster.primary_hypervisor,
6193       "enabled_hypervisors": cluster.enabled_hypervisors,
6194       "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
6195                         for hypervisor_name in cluster.enabled_hypervisors]),
6196       "os_hvp": os_hvp,
6197       "beparams": cluster.beparams,
6198       "osparams": cluster.osparams,
6199       "ipolicy": cluster.ipolicy,
6200       "nicparams": cluster.nicparams,
6201       "ndparams": cluster.ndparams,
6202       "diskparams": cluster.diskparams,
6203       "candidate_pool_size": cluster.candidate_pool_size,
6204       "master_netdev": cluster.master_netdev,
6205       "master_netmask": cluster.master_netmask,
6206       "use_external_mip_script": cluster.use_external_mip_script,
6207       "volume_group_name": cluster.volume_group_name,
6208       "drbd_usermode_helper": cluster.drbd_usermode_helper,
6209       "file_storage_dir": cluster.file_storage_dir,
6210       "shared_file_storage_dir": cluster.shared_file_storage_dir,
6211       "maintain_node_health": cluster.maintain_node_health,
6212       "ctime": cluster.ctime,
6213       "mtime": cluster.mtime,
6214       "uuid": cluster.uuid,
6215       "tags": list(cluster.GetTags()),
6216       "uid_pool": cluster.uid_pool,
6217       "default_iallocator": cluster.default_iallocator,
6218       "reserved_lvs": cluster.reserved_lvs,
6219       "primary_ip_version": primary_ip_version,
6220       "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
6221       "hidden_os": cluster.hidden_os,
6222       "blacklisted_os": cluster.blacklisted_os,
6223       }
6224
6225     return result
6226
6227
6228 class LUClusterConfigQuery(NoHooksLU):
6229   """Return configuration values.
6230
6231   """
6232   REQ_BGL = False
6233
6234   def CheckArguments(self):
6235     self.cq = _ClusterQuery(None, self.op.output_fields, False)
6236
6237   def ExpandNames(self):
6238     self.cq.ExpandNames(self)
6239
6240   def DeclareLocks(self, level):
6241     self.cq.DeclareLocks(self, level)
6242
6243   def Exec(self, feedback_fn):
6244     result = self.cq.OldStyleQuery(self)
6245
6246     assert len(result) == 1
6247
6248     return result[0]
6249
6250
6251 class _ClusterQuery(_QueryBase):
6252   FIELDS = query.CLUSTER_FIELDS
6253
6254   #: Do not sort (there is only one item)
6255   SORT_FIELD = None
6256
6257   def ExpandNames(self, lu):
6258     lu.needed_locks = {}
6259
6260     # The following variables interact with _QueryBase._GetNames
6261     self.wanted = locking.ALL_SET
6262     self.do_locking = self.use_locking
6263
6264     if self.do_locking:
6265       raise errors.OpPrereqError("Can not use locking for cluster queries",
6266                                  errors.ECODE_INVAL)
6267
6268   def DeclareLocks(self, lu, level):
6269     pass
6270
6271   def _GetQueryData(self, lu):
6272     """Computes the list of nodes and their attributes.
6273
6274     """
6275     # Locking is not used
6276     assert not (compat.any(lu.glm.is_owned(level)
6277                            for level in locking.LEVELS
6278                            if level != locking.LEVEL_CLUSTER) or
6279                 self.do_locking or self.use_locking)
6280
6281     if query.CQ_CONFIG in self.requested_data:
6282       cluster = lu.cfg.GetClusterInfo()
6283     else:
6284       cluster = NotImplemented
6285
6286     if query.CQ_QUEUE_DRAINED in self.requested_data:
6287       drain_flag = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
6288     else:
6289       drain_flag = NotImplemented
6290
6291     if query.CQ_WATCHER_PAUSE in self.requested_data:
6292       watcher_pause = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
6293     else:
6294       watcher_pause = NotImplemented
6295
6296     return query.ClusterQueryData(cluster, drain_flag, watcher_pause)
6297
6298
6299 class LUInstanceActivateDisks(NoHooksLU):
6300   """Bring up an instance's disks.
6301
6302   """
6303   REQ_BGL = False
6304
6305   def ExpandNames(self):
6306     self._ExpandAndLockInstance()
6307     self.needed_locks[locking.LEVEL_NODE] = []
6308     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6309
6310   def DeclareLocks(self, level):
6311     if level == locking.LEVEL_NODE:
6312       self._LockInstancesNodes()
6313
6314   def CheckPrereq(self):
6315     """Check prerequisites.
6316
6317     This checks that the instance is in the cluster.
6318
6319     """
6320     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6321     assert self.instance is not None, \
6322       "Cannot retrieve locked instance %s" % self.op.instance_name
6323     _CheckNodeOnline(self, self.instance.primary_node)
6324
6325   def Exec(self, feedback_fn):
6326     """Activate the disks.
6327
6328     """
6329     disks_ok, disks_info = \
6330               _AssembleInstanceDisks(self, self.instance,
6331                                      ignore_size=self.op.ignore_size)
6332     if not disks_ok:
6333       raise errors.OpExecError("Cannot activate block devices")
6334
6335     if self.op.wait_for_sync:
6336       if not _WaitForSync(self, self.instance):
6337         raise errors.OpExecError("Some disks of the instance are degraded!")
6338
6339     return disks_info
6340
6341
6342 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
6343                            ignore_size=False):
6344   """Prepare the block devices for an instance.
6345
6346   This sets up the block devices on all nodes.
6347
6348   @type lu: L{LogicalUnit}
6349   @param lu: the logical unit on whose behalf we execute
6350   @type instance: L{objects.Instance}
6351   @param instance: the instance for whose disks we assemble
6352   @type disks: list of L{objects.Disk} or None
6353   @param disks: which disks to assemble (or all, if None)
6354   @type ignore_secondaries: boolean
6355   @param ignore_secondaries: if true, errors on secondary nodes
6356       won't result in an error return from the function
6357   @type ignore_size: boolean
6358   @param ignore_size: if true, the current known size of the disk
6359       will not be used during the disk activation, useful for cases
6360       when the size is wrong
6361   @return: False if the operation failed, otherwise a list of
6362       (host, instance_visible_name, node_visible_name)
6363       with the mapping from node devices to instance devices
6364
6365   """
6366   device_info = []
6367   disks_ok = True
6368   iname = instance.name
6369   disks = _ExpandCheckDisks(instance, disks)
6370
6371   # With the two passes mechanism we try to reduce the window of
6372   # opportunity for the race condition of switching DRBD to primary
6373   # before handshaking occured, but we do not eliminate it
6374
6375   # The proper fix would be to wait (with some limits) until the
6376   # connection has been made and drbd transitions from WFConnection
6377   # into any other network-connected state (Connected, SyncTarget,
6378   # SyncSource, etc.)
6379
6380   # 1st pass, assemble on all nodes in secondary mode
6381   for idx, inst_disk in enumerate(disks):
6382     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6383       if ignore_size:
6384         node_disk = node_disk.Copy()
6385         node_disk.UnsetSize()
6386       lu.cfg.SetDiskID(node_disk, node)
6387       result = lu.rpc.call_blockdev_assemble(node, (node_disk, instance), iname,
6388                                              False, idx)
6389       msg = result.fail_msg
6390       if msg:
6391         is_offline_secondary = (node in instance.secondary_nodes and
6392                                 result.offline)
6393         lu.proc.LogWarning("Could not prepare block device %s on node %s"
6394                            " (is_primary=False, pass=1): %s",
6395                            inst_disk.iv_name, node, msg)
6396         if not (ignore_secondaries or is_offline_secondary):
6397           disks_ok = False
6398
6399   # FIXME: race condition on drbd migration to primary
6400
6401   # 2nd pass, do only the primary node
6402   for idx, inst_disk in enumerate(disks):
6403     dev_path = None
6404
6405     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6406       if node != instance.primary_node:
6407         continue
6408       if ignore_size:
6409         node_disk = node_disk.Copy()
6410         node_disk.UnsetSize()
6411       lu.cfg.SetDiskID(node_disk, node)
6412       result = lu.rpc.call_blockdev_assemble(node, (node_disk, instance), iname,
6413                                              True, idx)
6414       msg = result.fail_msg
6415       if msg:
6416         lu.proc.LogWarning("Could not prepare block device %s on node %s"
6417                            " (is_primary=True, pass=2): %s",
6418                            inst_disk.iv_name, node, msg)
6419         disks_ok = False
6420       else:
6421         dev_path = result.payload
6422
6423     device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
6424
6425   # leave the disks configured for the primary node
6426   # this is a workaround that would be fixed better by
6427   # improving the logical/physical id handling
6428   for disk in disks:
6429     lu.cfg.SetDiskID(disk, instance.primary_node)
6430
6431   return disks_ok, device_info
6432
6433
6434 def _StartInstanceDisks(lu, instance, force):
6435   """Start the disks of an instance.
6436
6437   """
6438   disks_ok, _ = _AssembleInstanceDisks(lu, instance,
6439                                            ignore_secondaries=force)
6440   if not disks_ok:
6441     _ShutdownInstanceDisks(lu, instance)
6442     if force is not None and not force:
6443       lu.proc.LogWarning("", hint="If the message above refers to a"
6444                          " secondary node,"
6445                          " you can retry the operation using '--force'.")
6446     raise errors.OpExecError("Disk consistency error")
6447
6448
6449 class LUInstanceDeactivateDisks(NoHooksLU):
6450   """Shutdown an instance's disks.
6451
6452   """
6453   REQ_BGL = False
6454
6455   def ExpandNames(self):
6456     self._ExpandAndLockInstance()
6457     self.needed_locks[locking.LEVEL_NODE] = []
6458     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6459
6460   def DeclareLocks(self, level):
6461     if level == locking.LEVEL_NODE:
6462       self._LockInstancesNodes()
6463
6464   def CheckPrereq(self):
6465     """Check prerequisites.
6466
6467     This checks that the instance is in the cluster.
6468
6469     """
6470     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6471     assert self.instance is not None, \
6472       "Cannot retrieve locked instance %s" % self.op.instance_name
6473
6474   def Exec(self, feedback_fn):
6475     """Deactivate the disks
6476
6477     """
6478     instance = self.instance
6479     if self.op.force:
6480       _ShutdownInstanceDisks(self, instance)
6481     else:
6482       _SafeShutdownInstanceDisks(self, instance)
6483
6484
6485 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
6486   """Shutdown block devices of an instance.
6487
6488   This function checks if an instance is running, before calling
6489   _ShutdownInstanceDisks.
6490
6491   """
6492   _CheckInstanceState(lu, instance, INSTANCE_DOWN, msg="cannot shutdown disks")
6493   _ShutdownInstanceDisks(lu, instance, disks=disks)
6494
6495
6496 def _ExpandCheckDisks(instance, disks):
6497   """Return the instance disks selected by the disks list
6498
6499   @type disks: list of L{objects.Disk} or None
6500   @param disks: selected disks
6501   @rtype: list of L{objects.Disk}
6502   @return: selected instance disks to act on
6503
6504   """
6505   if disks is None:
6506     return instance.disks
6507   else:
6508     if not set(disks).issubset(instance.disks):
6509       raise errors.ProgrammerError("Can only act on disks belonging to the"
6510                                    " target instance")
6511     return disks
6512
6513
6514 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
6515   """Shutdown block devices of an instance.
6516
6517   This does the shutdown on all nodes of the instance.
6518
6519   If the ignore_primary is false, errors on the primary node are
6520   ignored.
6521
6522   """
6523   all_result = True
6524   disks = _ExpandCheckDisks(instance, disks)
6525
6526   for disk in disks:
6527     for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
6528       lu.cfg.SetDiskID(top_disk, node)
6529       result = lu.rpc.call_blockdev_shutdown(node, (top_disk, instance))
6530       msg = result.fail_msg
6531       if msg:
6532         lu.LogWarning("Could not shutdown block device %s on node %s: %s",
6533                       disk.iv_name, node, msg)
6534         if ((node == instance.primary_node and not ignore_primary) or
6535             (node != instance.primary_node and not result.offline)):
6536           all_result = False
6537   return all_result
6538
6539
6540 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
6541   """Checks if a node has enough free memory.
6542
6543   This function check if a given node has the needed amount of free
6544   memory. In case the node has less memory or we cannot get the
6545   information from the node, this function raise an OpPrereqError
6546   exception.
6547
6548   @type lu: C{LogicalUnit}
6549   @param lu: a logical unit from which we get configuration data
6550   @type node: C{str}
6551   @param node: the node to check
6552   @type reason: C{str}
6553   @param reason: string to use in the error message
6554   @type requested: C{int}
6555   @param requested: the amount of memory in MiB to check for
6556   @type hypervisor_name: C{str}
6557   @param hypervisor_name: the hypervisor to ask for memory stats
6558   @rtype: integer
6559   @return: node current free memory
6560   @raise errors.OpPrereqError: if the node doesn't have enough memory, or
6561       we cannot check the node
6562
6563   """
6564   nodeinfo = lu.rpc.call_node_info([node], None, [hypervisor_name])
6565   nodeinfo[node].Raise("Can't get data from node %s" % node,
6566                        prereq=True, ecode=errors.ECODE_ENVIRON)
6567   (_, _, (hv_info, )) = nodeinfo[node].payload
6568
6569   free_mem = hv_info.get("memory_free", None)
6570   if not isinstance(free_mem, int):
6571     raise errors.OpPrereqError("Can't compute free memory on node %s, result"
6572                                " was '%s'" % (node, free_mem),
6573                                errors.ECODE_ENVIRON)
6574   if requested > free_mem:
6575     raise errors.OpPrereqError("Not enough memory on node %s for %s:"
6576                                " needed %s MiB, available %s MiB" %
6577                                (node, reason, requested, free_mem),
6578                                errors.ECODE_NORES)
6579   return free_mem
6580
6581
6582 def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
6583   """Checks if nodes have enough free disk space in the all VGs.
6584
6585   This function check if all given nodes have the needed amount of
6586   free disk. In case any node has less disk or we cannot get the
6587   information from the node, this function raise an OpPrereqError
6588   exception.
6589
6590   @type lu: C{LogicalUnit}
6591   @param lu: a logical unit from which we get configuration data
6592   @type nodenames: C{list}
6593   @param nodenames: the list of node names to check
6594   @type req_sizes: C{dict}
6595   @param req_sizes: the hash of vg and corresponding amount of disk in
6596       MiB to check for
6597   @raise errors.OpPrereqError: if the node doesn't have enough disk,
6598       or we cannot check the node
6599
6600   """
6601   for vg, req_size in req_sizes.items():
6602     _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
6603
6604
6605 def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
6606   """Checks if nodes have enough free disk space in the specified VG.
6607
6608   This function check if all given nodes have the needed amount of
6609   free disk. In case any node has less disk or we cannot get the
6610   information from the node, this function raise an OpPrereqError
6611   exception.
6612
6613   @type lu: C{LogicalUnit}
6614   @param lu: a logical unit from which we get configuration data
6615   @type nodenames: C{list}
6616   @param nodenames: the list of node names to check
6617   @type vg: C{str}
6618   @param vg: the volume group to check
6619   @type requested: C{int}
6620   @param requested: the amount of disk in MiB to check for
6621   @raise errors.OpPrereqError: if the node doesn't have enough disk,
6622       or we cannot check the node
6623
6624   """
6625   nodeinfo = lu.rpc.call_node_info(nodenames, [vg], None)
6626   for node in nodenames:
6627     info = nodeinfo[node]
6628     info.Raise("Cannot get current information from node %s" % node,
6629                prereq=True, ecode=errors.ECODE_ENVIRON)
6630     (_, (vg_info, ), _) = info.payload
6631     vg_free = vg_info.get("vg_free", None)
6632     if not isinstance(vg_free, int):
6633       raise errors.OpPrereqError("Can't compute free disk space on node"
6634                                  " %s for vg %s, result was '%s'" %
6635                                  (node, vg, vg_free), errors.ECODE_ENVIRON)
6636     if requested > vg_free:
6637       raise errors.OpPrereqError("Not enough disk space on target node %s"
6638                                  " vg %s: required %d MiB, available %d MiB" %
6639                                  (node, vg, requested, vg_free),
6640                                  errors.ECODE_NORES)
6641
6642
6643 def _CheckNodesPhysicalCPUs(lu, nodenames, requested, hypervisor_name):
6644   """Checks if nodes have enough physical CPUs
6645
6646   This function checks if all given nodes have the needed number of
6647   physical CPUs. In case any node has less CPUs or we cannot get the
6648   information from the node, this function raises an OpPrereqError
6649   exception.
6650
6651   @type lu: C{LogicalUnit}
6652   @param lu: a logical unit from which we get configuration data
6653   @type nodenames: C{list}
6654   @param nodenames: the list of node names to check
6655   @type requested: C{int}
6656   @param requested: the minimum acceptable number of physical CPUs
6657   @raise errors.OpPrereqError: if the node doesn't have enough CPUs,
6658       or we cannot check the node
6659
6660   """
6661   nodeinfo = lu.rpc.call_node_info(nodenames, None, [hypervisor_name])
6662   for node in nodenames:
6663     info = nodeinfo[node]
6664     info.Raise("Cannot get current information from node %s" % node,
6665                prereq=True, ecode=errors.ECODE_ENVIRON)
6666     (_, _, (hv_info, )) = info.payload
6667     num_cpus = hv_info.get("cpu_total", None)
6668     if not isinstance(num_cpus, int):
6669       raise errors.OpPrereqError("Can't compute the number of physical CPUs"
6670                                  " on node %s, result was '%s'" %
6671                                  (node, num_cpus), errors.ECODE_ENVIRON)
6672     if requested > num_cpus:
6673       raise errors.OpPrereqError("Node %s has %s physical CPUs, but %s are "
6674                                  "required" % (node, num_cpus, requested),
6675                                  errors.ECODE_NORES)
6676
6677
6678 class LUInstanceStartup(LogicalUnit):
6679   """Starts an instance.
6680
6681   """
6682   HPATH = "instance-start"
6683   HTYPE = constants.HTYPE_INSTANCE
6684   REQ_BGL = False
6685
6686   def CheckArguments(self):
6687     # extra beparams
6688     if self.op.beparams:
6689       # fill the beparams dict
6690       objects.UpgradeBeParams(self.op.beparams)
6691       utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
6692
6693   def ExpandNames(self):
6694     self._ExpandAndLockInstance()
6695     self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
6696
6697   def DeclareLocks(self, level):
6698     if level == locking.LEVEL_NODE_RES:
6699       self._LockInstancesNodes(primary_only=True, level=locking.LEVEL_NODE_RES)
6700
6701   def BuildHooksEnv(self):
6702     """Build hooks env.
6703
6704     This runs on master, primary and secondary nodes of the instance.
6705
6706     """
6707     env = {
6708       "FORCE": self.op.force,
6709       }
6710
6711     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6712
6713     return env
6714
6715   def BuildHooksNodes(self):
6716     """Build hooks nodes.
6717
6718     """
6719     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6720     return (nl, nl)
6721
6722   def CheckPrereq(self):
6723     """Check prerequisites.
6724
6725     This checks that the instance is in the cluster.
6726
6727     """
6728     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6729     assert self.instance is not None, \
6730       "Cannot retrieve locked instance %s" % self.op.instance_name
6731
6732     # extra hvparams
6733     if self.op.hvparams:
6734       # check hypervisor parameter syntax (locally)
6735       cluster = self.cfg.GetClusterInfo()
6736       utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
6737       filled_hvp = cluster.FillHV(instance)
6738       filled_hvp.update(self.op.hvparams)
6739       hv_type = hypervisor.GetHypervisor(instance.hypervisor)
6740       hv_type.CheckParameterSyntax(filled_hvp)
6741       _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
6742
6743     _CheckInstanceState(self, instance, INSTANCE_ONLINE)
6744
6745     self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
6746
6747     if self.primary_offline and self.op.ignore_offline_nodes:
6748       self.proc.LogWarning("Ignoring offline primary node")
6749
6750       if self.op.hvparams or self.op.beparams:
6751         self.proc.LogWarning("Overridden parameters are ignored")
6752     else:
6753       _CheckNodeOnline(self, instance.primary_node)
6754
6755       bep = self.cfg.GetClusterInfo().FillBE(instance)
6756       bep.update(self.op.beparams)
6757
6758       # check bridges existence
6759       _CheckInstanceBridgesExist(self, instance)
6760
6761       remote_info = self.rpc.call_instance_info(instance.primary_node,
6762                                                 instance.name,
6763                                                 instance.hypervisor)
6764       remote_info.Raise("Error checking node %s" % instance.primary_node,
6765                         prereq=True, ecode=errors.ECODE_ENVIRON)
6766       if not remote_info.payload: # not running already
6767         _CheckNodeFreeMemory(self, instance.primary_node,
6768                              "starting instance %s" % instance.name,
6769                              bep[constants.BE_MINMEM], instance.hypervisor)
6770
6771   def Exec(self, feedback_fn):
6772     """Start the instance.
6773
6774     """
6775     instance = self.instance
6776     force = self.op.force
6777
6778     if not self.op.no_remember:
6779       self.cfg.MarkInstanceUp(instance.name)
6780
6781     if self.primary_offline:
6782       assert self.op.ignore_offline_nodes
6783       self.proc.LogInfo("Primary node offline, marked instance as started")
6784     else:
6785       node_current = instance.primary_node
6786
6787       _StartInstanceDisks(self, instance, force)
6788
6789       result = \
6790         self.rpc.call_instance_start(node_current,
6791                                      (instance, self.op.hvparams,
6792                                       self.op.beparams),
6793                                      self.op.startup_paused)
6794       msg = result.fail_msg
6795       if msg:
6796         _ShutdownInstanceDisks(self, instance)
6797         raise errors.OpExecError("Could not start instance: %s" % msg)
6798
6799
6800 class LUInstanceReboot(LogicalUnit):
6801   """Reboot an instance.
6802
6803   """
6804   HPATH = "instance-reboot"
6805   HTYPE = constants.HTYPE_INSTANCE
6806   REQ_BGL = False
6807
6808   def ExpandNames(self):
6809     self._ExpandAndLockInstance()
6810
6811   def BuildHooksEnv(self):
6812     """Build hooks env.
6813
6814     This runs on master, primary and secondary nodes of the instance.
6815
6816     """
6817     env = {
6818       "IGNORE_SECONDARIES": self.op.ignore_secondaries,
6819       "REBOOT_TYPE": self.op.reboot_type,
6820       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6821       }
6822
6823     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6824
6825     return env
6826
6827   def BuildHooksNodes(self):
6828     """Build hooks nodes.
6829
6830     """
6831     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6832     return (nl, nl)
6833
6834   def CheckPrereq(self):
6835     """Check prerequisites.
6836
6837     This checks that the instance is in the cluster.
6838
6839     """
6840     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6841     assert self.instance is not None, \
6842       "Cannot retrieve locked instance %s" % self.op.instance_name
6843     _CheckInstanceState(self, instance, INSTANCE_ONLINE)
6844     _CheckNodeOnline(self, instance.primary_node)
6845
6846     # check bridges existence
6847     _CheckInstanceBridgesExist(self, instance)
6848
6849   def Exec(self, feedback_fn):
6850     """Reboot the instance.
6851
6852     """
6853     instance = self.instance
6854     ignore_secondaries = self.op.ignore_secondaries
6855     reboot_type = self.op.reboot_type
6856
6857     remote_info = self.rpc.call_instance_info(instance.primary_node,
6858                                               instance.name,
6859                                               instance.hypervisor)
6860     remote_info.Raise("Error checking node %s" % instance.primary_node)
6861     instance_running = bool(remote_info.payload)
6862
6863     node_current = instance.primary_node
6864
6865     if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
6866                                             constants.INSTANCE_REBOOT_HARD]:
6867       for disk in instance.disks:
6868         self.cfg.SetDiskID(disk, node_current)
6869       result = self.rpc.call_instance_reboot(node_current, instance,
6870                                              reboot_type,
6871                                              self.op.shutdown_timeout)
6872       result.Raise("Could not reboot instance")
6873     else:
6874       if instance_running:
6875         result = self.rpc.call_instance_shutdown(node_current, instance,
6876                                                  self.op.shutdown_timeout)
6877         result.Raise("Could not shutdown instance for full reboot")
6878         _ShutdownInstanceDisks(self, instance)
6879       else:
6880         self.LogInfo("Instance %s was already stopped, starting now",
6881                      instance.name)
6882       _StartInstanceDisks(self, instance, ignore_secondaries)
6883       result = self.rpc.call_instance_start(node_current,
6884                                             (instance, None, None), False)
6885       msg = result.fail_msg
6886       if msg:
6887         _ShutdownInstanceDisks(self, instance)
6888         raise errors.OpExecError("Could not start instance for"
6889                                  " full reboot: %s" % msg)
6890
6891     self.cfg.MarkInstanceUp(instance.name)
6892
6893
6894 class LUInstanceShutdown(LogicalUnit):
6895   """Shutdown an instance.
6896
6897   """
6898   HPATH = "instance-stop"
6899   HTYPE = constants.HTYPE_INSTANCE
6900   REQ_BGL = False
6901
6902   def ExpandNames(self):
6903     self._ExpandAndLockInstance()
6904
6905   def BuildHooksEnv(self):
6906     """Build hooks env.
6907
6908     This runs on master, primary and secondary nodes of the instance.
6909
6910     """
6911     env = _BuildInstanceHookEnvByObject(self, self.instance)
6912     env["TIMEOUT"] = self.op.timeout
6913     return env
6914
6915   def BuildHooksNodes(self):
6916     """Build hooks nodes.
6917
6918     """
6919     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6920     return (nl, nl)
6921
6922   def CheckPrereq(self):
6923     """Check prerequisites.
6924
6925     This checks that the instance is in the cluster.
6926
6927     """
6928     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6929     assert self.instance is not None, \
6930       "Cannot retrieve locked instance %s" % self.op.instance_name
6931
6932     _CheckInstanceState(self, self.instance, INSTANCE_ONLINE)
6933
6934     self.primary_offline = \
6935       self.cfg.GetNodeInfo(self.instance.primary_node).offline
6936
6937     if self.primary_offline and self.op.ignore_offline_nodes:
6938       self.proc.LogWarning("Ignoring offline primary node")
6939     else:
6940       _CheckNodeOnline(self, self.instance.primary_node)
6941
6942   def Exec(self, feedback_fn):
6943     """Shutdown the instance.
6944
6945     """
6946     instance = self.instance
6947     node_current = instance.primary_node
6948     timeout = self.op.timeout
6949
6950     if not self.op.no_remember:
6951       self.cfg.MarkInstanceDown(instance.name)
6952
6953     if self.primary_offline:
6954       assert self.op.ignore_offline_nodes
6955       self.proc.LogInfo("Primary node offline, marked instance as stopped")
6956     else:
6957       result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
6958       msg = result.fail_msg
6959       if msg:
6960         self.proc.LogWarning("Could not shutdown instance: %s" % msg)
6961
6962       _ShutdownInstanceDisks(self, instance)
6963
6964
6965 class LUInstanceReinstall(LogicalUnit):
6966   """Reinstall an instance.
6967
6968   """
6969   HPATH = "instance-reinstall"
6970   HTYPE = constants.HTYPE_INSTANCE
6971   REQ_BGL = False
6972
6973   def ExpandNames(self):
6974     self._ExpandAndLockInstance()
6975
6976   def BuildHooksEnv(self):
6977     """Build hooks env.
6978
6979     This runs on master, primary and secondary nodes of the instance.
6980
6981     """
6982     return _BuildInstanceHookEnvByObject(self, self.instance)
6983
6984   def BuildHooksNodes(self):
6985     """Build hooks nodes.
6986
6987     """
6988     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6989     return (nl, nl)
6990
6991   def CheckPrereq(self):
6992     """Check prerequisites.
6993
6994     This checks that the instance is in the cluster and is not running.
6995
6996     """
6997     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6998     assert instance is not None, \
6999       "Cannot retrieve locked instance %s" % self.op.instance_name
7000     _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
7001                      " offline, cannot reinstall")
7002
7003     if instance.disk_template == constants.DT_DISKLESS:
7004       raise errors.OpPrereqError("Instance '%s' has no disks" %
7005                                  self.op.instance_name,
7006                                  errors.ECODE_INVAL)
7007     _CheckInstanceState(self, instance, INSTANCE_DOWN, msg="cannot reinstall")
7008
7009     if self.op.os_type is not None:
7010       # OS verification
7011       pnode = _ExpandNodeName(self.cfg, instance.primary_node)
7012       _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
7013       instance_os = self.op.os_type
7014     else:
7015       instance_os = instance.os
7016
7017     nodelist = list(instance.all_nodes)
7018
7019     if self.op.osparams:
7020       i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
7021       _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
7022       self.os_inst = i_osdict # the new dict (without defaults)
7023     else:
7024       self.os_inst = None
7025
7026     self.instance = instance
7027
7028   def Exec(self, feedback_fn):
7029     """Reinstall the instance.
7030
7031     """
7032     inst = self.instance
7033
7034     if self.op.os_type is not None:
7035       feedback_fn("Changing OS to '%s'..." % self.op.os_type)
7036       inst.os = self.op.os_type
7037       # Write to configuration
7038       self.cfg.Update(inst, feedback_fn)
7039
7040     _StartInstanceDisks(self, inst, None)
7041     try:
7042       feedback_fn("Running the instance OS create scripts...")
7043       # FIXME: pass debug option from opcode to backend
7044       result = self.rpc.call_instance_os_add(inst.primary_node,
7045                                              (inst, self.os_inst), True,
7046                                              self.op.debug_level)
7047       result.Raise("Could not install OS for instance %s on node %s" %
7048                    (inst.name, inst.primary_node))
7049     finally:
7050       _ShutdownInstanceDisks(self, inst)
7051
7052
7053 class LUInstanceRecreateDisks(LogicalUnit):
7054   """Recreate an instance's missing disks.
7055
7056   """
7057   HPATH = "instance-recreate-disks"
7058   HTYPE = constants.HTYPE_INSTANCE
7059   REQ_BGL = False
7060
7061   _MODIFYABLE = frozenset([
7062     constants.IDISK_SIZE,
7063     constants.IDISK_MODE,
7064     ])
7065
7066   # New or changed disk parameters may have different semantics
7067   assert constants.IDISK_PARAMS == (_MODIFYABLE | frozenset([
7068     constants.IDISK_ADOPT,
7069
7070     # TODO: Implement support changing VG while recreating
7071     constants.IDISK_VG,
7072     constants.IDISK_METAVG,
7073     ]))
7074
7075   def _RunAllocator(self):
7076     """Run the allocator based on input opcode.
7077
7078     """
7079     be_full = self.cfg.GetClusterInfo().FillBE(self.instance)
7080
7081     # FIXME
7082     # The allocator should actually run in "relocate" mode, but current
7083     # allocators don't support relocating all the nodes of an instance at
7084     # the same time. As a workaround we use "allocate" mode, but this is
7085     # suboptimal for two reasons:
7086     # - The instance name passed to the allocator is present in the list of
7087     #   existing instances, so there could be a conflict within the
7088     #   internal structures of the allocator. This doesn't happen with the
7089     #   current allocators, but it's a liability.
7090     # - The allocator counts the resources used by the instance twice: once
7091     #   because the instance exists already, and once because it tries to
7092     #   allocate a new instance.
7093     # The allocator could choose some of the nodes on which the instance is
7094     # running, but that's not a problem. If the instance nodes are broken,
7095     # they should be already be marked as drained or offline, and hence
7096     # skipped by the allocator. If instance disks have been lost for other
7097     # reasons, then recreating the disks on the same nodes should be fine.
7098     disk_template = self.instance.disk_template
7099     spindle_use = be_full[constants.BE_SPINDLE_USE]
7100     req = iallocator.IAReqInstanceAlloc(name=self.op.instance_name,
7101                                         disk_template=disk_template,
7102                                         tags=list(self.instance.GetTags()),
7103                                         os=self.instance.os,
7104                                         nics=[{}],
7105                                         vcpus=be_full[constants.BE_VCPUS],
7106                                         memory=be_full[constants.BE_MAXMEM],
7107                                         spindle_use=spindle_use,
7108                                         disks=[{constants.IDISK_SIZE: d.size,
7109                                                 constants.IDISK_MODE: d.mode}
7110                                                 for d in self.instance.disks],
7111                                         hypervisor=self.instance.hypervisor)
7112     ial = iallocator.IAllocator(self.cfg, self.rpc, req)
7113
7114     ial.Run(self.op.iallocator)
7115
7116     assert req.RequiredNodes() == len(self.instance.all_nodes)
7117
7118     if not ial.success:
7119       raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
7120                                  " %s" % (self.op.iallocator, ial.info),
7121                                  errors.ECODE_NORES)
7122
7123     self.op.nodes = ial.result
7124     self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
7125                  self.op.instance_name, self.op.iallocator,
7126                  utils.CommaJoin(ial.result))
7127
7128   def CheckArguments(self):
7129     if self.op.disks and ht.TPositiveInt(self.op.disks[0]):
7130       # Normalize and convert deprecated list of disk indices
7131       self.op.disks = [(idx, {}) for idx in sorted(frozenset(self.op.disks))]
7132
7133     duplicates = utils.FindDuplicates(map(compat.fst, self.op.disks))
7134     if duplicates:
7135       raise errors.OpPrereqError("Some disks have been specified more than"
7136                                  " once: %s" % utils.CommaJoin(duplicates),
7137                                  errors.ECODE_INVAL)
7138
7139     if self.op.iallocator and self.op.nodes:
7140       raise errors.OpPrereqError("Give either the iallocator or the new"
7141                                  " nodes, not both", errors.ECODE_INVAL)
7142
7143     for (idx, params) in self.op.disks:
7144       utils.ForceDictType(params, constants.IDISK_PARAMS_TYPES)
7145       unsupported = frozenset(params.keys()) - self._MODIFYABLE
7146       if unsupported:
7147         raise errors.OpPrereqError("Parameters for disk %s try to change"
7148                                    " unmodifyable parameter(s): %s" %
7149                                    (idx, utils.CommaJoin(unsupported)),
7150                                    errors.ECODE_INVAL)
7151
7152   def ExpandNames(self):
7153     self._ExpandAndLockInstance()
7154     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7155     if self.op.nodes:
7156       self.op.nodes = [_ExpandNodeName(self.cfg, n) for n in self.op.nodes]
7157       self.needed_locks[locking.LEVEL_NODE] = list(self.op.nodes)
7158     else:
7159       self.needed_locks[locking.LEVEL_NODE] = []
7160       if self.op.iallocator:
7161         # iallocator will select a new node in the same group
7162         self.needed_locks[locking.LEVEL_NODEGROUP] = []
7163     self.needed_locks[locking.LEVEL_NODE_RES] = []
7164
7165   def DeclareLocks(self, level):
7166     if level == locking.LEVEL_NODEGROUP:
7167       assert self.op.iallocator is not None
7168       assert not self.op.nodes
7169       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
7170       self.share_locks[locking.LEVEL_NODEGROUP] = 1
7171       # Lock the primary group used by the instance optimistically; this
7172       # requires going via the node before it's locked, requiring
7173       # verification later on
7174       self.needed_locks[locking.LEVEL_NODEGROUP] = \
7175         self.cfg.GetInstanceNodeGroups(self.op.instance_name, primary_only=True)
7176
7177     elif level == locking.LEVEL_NODE:
7178       # If an allocator is used, then we lock all the nodes in the current
7179       # instance group, as we don't know yet which ones will be selected;
7180       # if we replace the nodes without using an allocator, locks are
7181       # already declared in ExpandNames; otherwise, we need to lock all the
7182       # instance nodes for disk re-creation
7183       if self.op.iallocator:
7184         assert not self.op.nodes
7185         assert not self.needed_locks[locking.LEVEL_NODE]
7186         assert len(self.owned_locks(locking.LEVEL_NODEGROUP)) == 1
7187
7188         # Lock member nodes of the group of the primary node
7189         for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP):
7190           self.needed_locks[locking.LEVEL_NODE].extend(
7191             self.cfg.GetNodeGroup(group_uuid).members)
7192       elif not self.op.nodes:
7193         self._LockInstancesNodes(primary_only=False)
7194     elif level == locking.LEVEL_NODE_RES:
7195       # Copy node locks
7196       self.needed_locks[locking.LEVEL_NODE_RES] = \
7197         self.needed_locks[locking.LEVEL_NODE][:]
7198
7199   def BuildHooksEnv(self):
7200     """Build hooks env.
7201
7202     This runs on master, primary and secondary nodes of the instance.
7203
7204     """
7205     return _BuildInstanceHookEnvByObject(self, self.instance)
7206
7207   def BuildHooksNodes(self):
7208     """Build hooks nodes.
7209
7210     """
7211     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7212     return (nl, nl)
7213
7214   def CheckPrereq(self):
7215     """Check prerequisites.
7216
7217     This checks that the instance is in the cluster and is not running.
7218
7219     """
7220     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7221     assert instance is not None, \
7222       "Cannot retrieve locked instance %s" % self.op.instance_name
7223     if self.op.nodes:
7224       if len(self.op.nodes) != len(instance.all_nodes):
7225         raise errors.OpPrereqError("Instance %s currently has %d nodes, but"
7226                                    " %d replacement nodes were specified" %
7227                                    (instance.name, len(instance.all_nodes),
7228                                     len(self.op.nodes)),
7229                                    errors.ECODE_INVAL)
7230       assert instance.disk_template != constants.DT_DRBD8 or \
7231           len(self.op.nodes) == 2
7232       assert instance.disk_template != constants.DT_PLAIN or \
7233           len(self.op.nodes) == 1
7234       primary_node = self.op.nodes[0]
7235     else:
7236       primary_node = instance.primary_node
7237     if not self.op.iallocator:
7238       _CheckNodeOnline(self, primary_node)
7239
7240     if instance.disk_template == constants.DT_DISKLESS:
7241       raise errors.OpPrereqError("Instance '%s' has no disks" %
7242                                  self.op.instance_name, errors.ECODE_INVAL)
7243
7244     # Verify if node group locks are still correct
7245     owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
7246     if owned_groups:
7247       # Node group locks are acquired only for the primary node (and only
7248       # when the allocator is used)
7249       _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups,
7250                                primary_only=True)
7251
7252     # if we replace nodes *and* the old primary is offline, we don't
7253     # check the instance state
7254     old_pnode = self.cfg.GetNodeInfo(instance.primary_node)
7255     if not ((self.op.iallocator or self.op.nodes) and old_pnode.offline):
7256       _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
7257                           msg="cannot recreate disks")
7258
7259     if self.op.disks:
7260       self.disks = dict(self.op.disks)
7261     else:
7262       self.disks = dict((idx, {}) for idx in range(len(instance.disks)))
7263
7264     maxidx = max(self.disks.keys())
7265     if maxidx >= len(instance.disks):
7266       raise errors.OpPrereqError("Invalid disk index '%s'" % maxidx,
7267                                  errors.ECODE_INVAL)
7268
7269     if ((self.op.nodes or self.op.iallocator) and
7270         sorted(self.disks.keys()) != range(len(instance.disks))):
7271       raise errors.OpPrereqError("Can't recreate disks partially and"
7272                                  " change the nodes at the same time",
7273                                  errors.ECODE_INVAL)
7274
7275     self.instance = instance
7276
7277     if self.op.iallocator:
7278       self._RunAllocator()
7279
7280     # Release unneeded node and node resource locks
7281     _ReleaseLocks(self, locking.LEVEL_NODE, keep=self.op.nodes)
7282     _ReleaseLocks(self, locking.LEVEL_NODE_RES, keep=self.op.nodes)
7283
7284   def Exec(self, feedback_fn):
7285     """Recreate the disks.
7286
7287     """
7288     instance = self.instance
7289
7290     assert (self.owned_locks(locking.LEVEL_NODE) ==
7291             self.owned_locks(locking.LEVEL_NODE_RES))
7292
7293     to_skip = []
7294     mods = [] # keeps track of needed changes
7295
7296     for idx, disk in enumerate(instance.disks):
7297       try:
7298         changes = self.disks[idx]
7299       except KeyError:
7300         # Disk should not be recreated
7301         to_skip.append(idx)
7302         continue
7303
7304       # update secondaries for disks, if needed
7305       if self.op.nodes and disk.dev_type == constants.LD_DRBD8:
7306         # need to update the nodes and minors
7307         assert len(self.op.nodes) == 2
7308         assert len(disk.logical_id) == 6 # otherwise disk internals
7309                                          # have changed
7310         (_, _, old_port, _, _, old_secret) = disk.logical_id
7311         new_minors = self.cfg.AllocateDRBDMinor(self.op.nodes, instance.name)
7312         new_id = (self.op.nodes[0], self.op.nodes[1], old_port,
7313                   new_minors[0], new_minors[1], old_secret)
7314         assert len(disk.logical_id) == len(new_id)
7315       else:
7316         new_id = None
7317
7318       mods.append((idx, new_id, changes))
7319
7320     # now that we have passed all asserts above, we can apply the mods
7321     # in a single run (to avoid partial changes)
7322     for idx, new_id, changes in mods:
7323       disk = instance.disks[idx]
7324       if new_id is not None:
7325         assert disk.dev_type == constants.LD_DRBD8
7326         disk.logical_id = new_id
7327       if changes:
7328         disk.Update(size=changes.get(constants.IDISK_SIZE, None),
7329                     mode=changes.get(constants.IDISK_MODE, None))
7330
7331     # change primary node, if needed
7332     if self.op.nodes:
7333       instance.primary_node = self.op.nodes[0]
7334       self.LogWarning("Changing the instance's nodes, you will have to"
7335                       " remove any disks left on the older nodes manually")
7336
7337     if self.op.nodes:
7338       self.cfg.Update(instance, feedback_fn)
7339
7340     _CreateDisks(self, instance, to_skip=to_skip)
7341
7342
7343 class LUInstanceRename(LogicalUnit):
7344   """Rename an instance.
7345
7346   """
7347   HPATH = "instance-rename"
7348   HTYPE = constants.HTYPE_INSTANCE
7349
7350   def CheckArguments(self):
7351     """Check arguments.
7352
7353     """
7354     if self.op.ip_check and not self.op.name_check:
7355       # TODO: make the ip check more flexible and not depend on the name check
7356       raise errors.OpPrereqError("IP address check requires a name check",
7357                                  errors.ECODE_INVAL)
7358
7359   def BuildHooksEnv(self):
7360     """Build hooks env.
7361
7362     This runs on master, primary and secondary nodes of the instance.
7363
7364     """
7365     env = _BuildInstanceHookEnvByObject(self, self.instance)
7366     env["INSTANCE_NEW_NAME"] = self.op.new_name
7367     return env
7368
7369   def BuildHooksNodes(self):
7370     """Build hooks nodes.
7371
7372     """
7373     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7374     return (nl, nl)
7375
7376   def CheckPrereq(self):
7377     """Check prerequisites.
7378
7379     This checks that the instance is in the cluster and is not running.
7380
7381     """
7382     self.op.instance_name = _ExpandInstanceName(self.cfg,
7383                                                 self.op.instance_name)
7384     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7385     assert instance is not None
7386     _CheckNodeOnline(self, instance.primary_node)
7387     _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
7388                         msg="cannot rename")
7389     self.instance = instance
7390
7391     new_name = self.op.new_name
7392     if self.op.name_check:
7393       hostname = netutils.GetHostname(name=new_name)
7394       if hostname.name != new_name:
7395         self.LogInfo("Resolved given name '%s' to '%s'", new_name,
7396                      hostname.name)
7397       if not utils.MatchNameComponent(self.op.new_name, [hostname.name]):
7398         raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
7399                                     " same as given hostname '%s'") %
7400                                     (hostname.name, self.op.new_name),
7401                                     errors.ECODE_INVAL)
7402       new_name = self.op.new_name = hostname.name
7403       if (self.op.ip_check and
7404           netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
7405         raise errors.OpPrereqError("IP %s of instance %s already in use" %
7406                                    (hostname.ip, new_name),
7407                                    errors.ECODE_NOTUNIQUE)
7408
7409     instance_list = self.cfg.GetInstanceList()
7410     if new_name in instance_list and new_name != instance.name:
7411       raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
7412                                  new_name, errors.ECODE_EXISTS)
7413
7414   def Exec(self, feedback_fn):
7415     """Rename the instance.
7416
7417     """
7418     inst = self.instance
7419     old_name = inst.name
7420
7421     rename_file_storage = False
7422     if (inst.disk_template in constants.DTS_FILEBASED and
7423         self.op.new_name != inst.name):
7424       old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
7425       rename_file_storage = True
7426
7427     self.cfg.RenameInstance(inst.name, self.op.new_name)
7428     # Change the instance lock. This is definitely safe while we hold the BGL.
7429     # Otherwise the new lock would have to be added in acquired mode.
7430     assert self.REQ_BGL
7431     self.glm.remove(locking.LEVEL_INSTANCE, old_name)
7432     self.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
7433
7434     # re-read the instance from the configuration after rename
7435     inst = self.cfg.GetInstanceInfo(self.op.new_name)
7436
7437     if rename_file_storage:
7438       new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
7439       result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
7440                                                      old_file_storage_dir,
7441                                                      new_file_storage_dir)
7442       result.Raise("Could not rename on node %s directory '%s' to '%s'"
7443                    " (but the instance has been renamed in Ganeti)" %
7444                    (inst.primary_node, old_file_storage_dir,
7445                     new_file_storage_dir))
7446
7447     _StartInstanceDisks(self, inst, None)
7448     try:
7449       result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
7450                                                  old_name, self.op.debug_level)
7451       msg = result.fail_msg
7452       if msg:
7453         msg = ("Could not run OS rename script for instance %s on node %s"
7454                " (but the instance has been renamed in Ganeti): %s" %
7455                (inst.name, inst.primary_node, msg))
7456         self.proc.LogWarning(msg)
7457     finally:
7458       _ShutdownInstanceDisks(self, inst)
7459
7460     return inst.name
7461
7462
7463 class LUInstanceRemove(LogicalUnit):
7464   """Remove an instance.
7465
7466   """
7467   HPATH = "instance-remove"
7468   HTYPE = constants.HTYPE_INSTANCE
7469   REQ_BGL = False
7470
7471   def ExpandNames(self):
7472     self._ExpandAndLockInstance()
7473     self.needed_locks[locking.LEVEL_NODE] = []
7474     self.needed_locks[locking.LEVEL_NODE_RES] = []
7475     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7476
7477   def DeclareLocks(self, level):
7478     if level == locking.LEVEL_NODE:
7479       self._LockInstancesNodes()
7480     elif level == locking.LEVEL_NODE_RES:
7481       # Copy node locks
7482       self.needed_locks[locking.LEVEL_NODE_RES] = \
7483         self.needed_locks[locking.LEVEL_NODE][:]
7484
7485   def BuildHooksEnv(self):
7486     """Build hooks env.
7487
7488     This runs on master, primary and secondary nodes of the instance.
7489
7490     """
7491     env = _BuildInstanceHookEnvByObject(self, self.instance)
7492     env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
7493     return env
7494
7495   def BuildHooksNodes(self):
7496     """Build hooks nodes.
7497
7498     """
7499     nl = [self.cfg.GetMasterNode()]
7500     nl_post = list(self.instance.all_nodes) + nl
7501     return (nl, nl_post)
7502
7503   def CheckPrereq(self):
7504     """Check prerequisites.
7505
7506     This checks that the instance is in the cluster.
7507
7508     """
7509     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7510     assert self.instance is not None, \
7511       "Cannot retrieve locked instance %s" % self.op.instance_name
7512
7513   def Exec(self, feedback_fn):
7514     """Remove the instance.
7515
7516     """
7517     instance = self.instance
7518     logging.info("Shutting down instance %s on node %s",
7519                  instance.name, instance.primary_node)
7520
7521     result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
7522                                              self.op.shutdown_timeout)
7523     msg = result.fail_msg
7524     if msg:
7525       if self.op.ignore_failures:
7526         feedback_fn("Warning: can't shutdown instance: %s" % msg)
7527       else:
7528         raise errors.OpExecError("Could not shutdown instance %s on"
7529                                  " node %s: %s" %
7530                                  (instance.name, instance.primary_node, msg))
7531
7532     assert (self.owned_locks(locking.LEVEL_NODE) ==
7533             self.owned_locks(locking.LEVEL_NODE_RES))
7534     assert not (set(instance.all_nodes) -
7535                 self.owned_locks(locking.LEVEL_NODE)), \
7536       "Not owning correct locks"
7537
7538     _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
7539
7540
7541 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
7542   """Utility function to remove an instance.
7543
7544   """
7545   logging.info("Removing block devices for instance %s", instance.name)
7546
7547   if not _RemoveDisks(lu, instance, ignore_failures=ignore_failures):
7548     if not ignore_failures:
7549       raise errors.OpExecError("Can't remove instance's disks")
7550     feedback_fn("Warning: can't remove instance's disks")
7551
7552   logging.info("Removing instance %s out of cluster config", instance.name)
7553
7554   lu.cfg.RemoveInstance(instance.name)
7555
7556   assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
7557     "Instance lock removal conflict"
7558
7559   # Remove lock for the instance
7560   lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
7561
7562
7563 class LUInstanceQuery(NoHooksLU):
7564   """Logical unit for querying instances.
7565
7566   """
7567   # pylint: disable=W0142
7568   REQ_BGL = False
7569
7570   def CheckArguments(self):
7571     self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
7572                              self.op.output_fields, self.op.use_locking)
7573
7574   def ExpandNames(self):
7575     self.iq.ExpandNames(self)
7576
7577   def DeclareLocks(self, level):
7578     self.iq.DeclareLocks(self, level)
7579
7580   def Exec(self, feedback_fn):
7581     return self.iq.OldStyleQuery(self)
7582
7583
7584 class LUInstanceFailover(LogicalUnit):
7585   """Failover an instance.
7586
7587   """
7588   HPATH = "instance-failover"
7589   HTYPE = constants.HTYPE_INSTANCE
7590   REQ_BGL = False
7591
7592   def CheckArguments(self):
7593     """Check the arguments.
7594
7595     """
7596     self.iallocator = getattr(self.op, "iallocator", None)
7597     self.target_node = getattr(self.op, "target_node", None)
7598
7599   def ExpandNames(self):
7600     self._ExpandAndLockInstance()
7601
7602     if self.op.target_node is not None:
7603       self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7604
7605     self.needed_locks[locking.LEVEL_NODE] = []
7606     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7607
7608     self.needed_locks[locking.LEVEL_NODE_RES] = []
7609     self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
7610
7611     ignore_consistency = self.op.ignore_consistency
7612     shutdown_timeout = self.op.shutdown_timeout
7613     self._migrater = TLMigrateInstance(self, self.op.instance_name,
7614                                        cleanup=False,
7615                                        failover=True,
7616                                        ignore_consistency=ignore_consistency,
7617                                        shutdown_timeout=shutdown_timeout,
7618                                        ignore_ipolicy=self.op.ignore_ipolicy)
7619     self.tasklets = [self._migrater]
7620
7621   def DeclareLocks(self, level):
7622     if level == locking.LEVEL_NODE:
7623       instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
7624       if instance.disk_template in constants.DTS_EXT_MIRROR:
7625         if self.op.target_node is None:
7626           self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7627         else:
7628           self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
7629                                                    self.op.target_node]
7630         del self.recalculate_locks[locking.LEVEL_NODE]
7631       else:
7632         self._LockInstancesNodes()
7633     elif level == locking.LEVEL_NODE_RES:
7634       # Copy node locks
7635       self.needed_locks[locking.LEVEL_NODE_RES] = \
7636         self.needed_locks[locking.LEVEL_NODE][:]
7637
7638   def BuildHooksEnv(self):
7639     """Build hooks env.
7640
7641     This runs on master, primary and secondary nodes of the instance.
7642
7643     """
7644     instance = self._migrater.instance
7645     source_node = instance.primary_node
7646     target_node = self.op.target_node
7647     env = {
7648       "IGNORE_CONSISTENCY": self.op.ignore_consistency,
7649       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7650       "OLD_PRIMARY": source_node,
7651       "NEW_PRIMARY": target_node,
7652       }
7653
7654     if instance.disk_template in constants.DTS_INT_MIRROR:
7655       env["OLD_SECONDARY"] = instance.secondary_nodes[0]
7656       env["NEW_SECONDARY"] = source_node
7657     else:
7658       env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
7659
7660     env.update(_BuildInstanceHookEnvByObject(self, instance))
7661
7662     return env
7663
7664   def BuildHooksNodes(self):
7665     """Build hooks nodes.
7666
7667     """
7668     instance = self._migrater.instance
7669     nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
7670     return (nl, nl + [instance.primary_node])
7671
7672
7673 class LUInstanceMigrate(LogicalUnit):
7674   """Migrate an instance.
7675
7676   This is migration without shutting down, compared to the failover,
7677   which is done with shutdown.
7678
7679   """
7680   HPATH = "instance-migrate"
7681   HTYPE = constants.HTYPE_INSTANCE
7682   REQ_BGL = False
7683
7684   def ExpandNames(self):
7685     self._ExpandAndLockInstance()
7686
7687     if self.op.target_node is not None:
7688       self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7689
7690     self.needed_locks[locking.LEVEL_NODE] = []
7691     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7692
7693     self.needed_locks[locking.LEVEL_NODE] = []
7694     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7695
7696     self._migrater = \
7697       TLMigrateInstance(self, self.op.instance_name,
7698                         cleanup=self.op.cleanup,
7699                         failover=False,
7700                         fallback=self.op.allow_failover,
7701                         allow_runtime_changes=self.op.allow_runtime_changes,
7702                         ignore_ipolicy=self.op.ignore_ipolicy)
7703     self.tasklets = [self._migrater]
7704
7705   def DeclareLocks(self, level):
7706     if level == locking.LEVEL_NODE:
7707       instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
7708       if instance.disk_template in constants.DTS_EXT_MIRROR:
7709         if self.op.target_node is None:
7710           self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7711         else:
7712           self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
7713                                                    self.op.target_node]
7714         del self.recalculate_locks[locking.LEVEL_NODE]
7715       else:
7716         self._LockInstancesNodes()
7717     elif level == locking.LEVEL_NODE_RES:
7718       # Copy node locks
7719       self.needed_locks[locking.LEVEL_NODE_RES] = \
7720         self.needed_locks[locking.LEVEL_NODE][:]
7721
7722   def BuildHooksEnv(self):
7723     """Build hooks env.
7724
7725     This runs on master, primary and secondary nodes of the instance.
7726
7727     """
7728     instance = self._migrater.instance
7729     source_node = instance.primary_node
7730     target_node = self.op.target_node
7731     env = _BuildInstanceHookEnvByObject(self, instance)
7732     env.update({
7733       "MIGRATE_LIVE": self._migrater.live,
7734       "MIGRATE_CLEANUP": self.op.cleanup,
7735       "OLD_PRIMARY": source_node,
7736       "NEW_PRIMARY": target_node,
7737       "ALLOW_RUNTIME_CHANGES": self.op.allow_runtime_changes,
7738       })
7739
7740     if instance.disk_template in constants.DTS_INT_MIRROR:
7741       env["OLD_SECONDARY"] = target_node
7742       env["NEW_SECONDARY"] = source_node
7743     else:
7744       env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
7745
7746     return env
7747
7748   def BuildHooksNodes(self):
7749     """Build hooks nodes.
7750
7751     """
7752     instance = self._migrater.instance
7753     nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
7754     return (nl, nl + [instance.primary_node])
7755
7756
7757 class LUInstanceMove(LogicalUnit):
7758   """Move an instance by data-copying.
7759
7760   """
7761   HPATH = "instance-move"
7762   HTYPE = constants.HTYPE_INSTANCE
7763   REQ_BGL = False
7764
7765   def ExpandNames(self):
7766     self._ExpandAndLockInstance()
7767     target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7768     self.op.target_node = target_node
7769     self.needed_locks[locking.LEVEL_NODE] = [target_node]
7770     self.needed_locks[locking.LEVEL_NODE_RES] = []
7771     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7772
7773   def DeclareLocks(self, level):
7774     if level == locking.LEVEL_NODE:
7775       self._LockInstancesNodes(primary_only=True)
7776     elif level == locking.LEVEL_NODE_RES:
7777       # Copy node locks
7778       self.needed_locks[locking.LEVEL_NODE_RES] = \
7779         self.needed_locks[locking.LEVEL_NODE][:]
7780
7781   def BuildHooksEnv(self):
7782     """Build hooks env.
7783
7784     This runs on master, primary and secondary nodes of the instance.
7785
7786     """
7787     env = {
7788       "TARGET_NODE": self.op.target_node,
7789       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7790       }
7791     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
7792     return env
7793
7794   def BuildHooksNodes(self):
7795     """Build hooks nodes.
7796
7797     """
7798     nl = [
7799       self.cfg.GetMasterNode(),
7800       self.instance.primary_node,
7801       self.op.target_node,
7802       ]
7803     return (nl, nl)
7804
7805   def CheckPrereq(self):
7806     """Check prerequisites.
7807
7808     This checks that the instance is in the cluster.
7809
7810     """
7811     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7812     assert self.instance is not None, \
7813       "Cannot retrieve locked instance %s" % self.op.instance_name
7814
7815     node = self.cfg.GetNodeInfo(self.op.target_node)
7816     assert node is not None, \
7817       "Cannot retrieve locked node %s" % self.op.target_node
7818
7819     self.target_node = target_node = node.name
7820
7821     if target_node == instance.primary_node:
7822       raise errors.OpPrereqError("Instance %s is already on the node %s" %
7823                                  (instance.name, target_node),
7824                                  errors.ECODE_STATE)
7825
7826     bep = self.cfg.GetClusterInfo().FillBE(instance)
7827
7828     for idx, dsk in enumerate(instance.disks):
7829       if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
7830         raise errors.OpPrereqError("Instance disk %d has a complex layout,"
7831                                    " cannot copy" % idx, errors.ECODE_STATE)
7832
7833     _CheckNodeOnline(self, target_node)
7834     _CheckNodeNotDrained(self, target_node)
7835     _CheckNodeVmCapable(self, target_node)
7836     cluster = self.cfg.GetClusterInfo()
7837     group_info = self.cfg.GetNodeGroup(node.group)
7838     ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster, group_info)
7839     _CheckTargetNodeIPolicy(self, ipolicy, instance, node,
7840                             ignore=self.op.ignore_ipolicy)
7841
7842     if instance.admin_state == constants.ADMINST_UP:
7843       # check memory requirements on the secondary node
7844       _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
7845                            instance.name, bep[constants.BE_MAXMEM],
7846                            instance.hypervisor)
7847     else:
7848       self.LogInfo("Not checking memory on the secondary node as"
7849                    " instance will not be started")
7850
7851     # check bridge existance
7852     _CheckInstanceBridgesExist(self, instance, node=target_node)
7853
7854   def Exec(self, feedback_fn):
7855     """Move an instance.
7856
7857     The move is done by shutting it down on its present node, copying
7858     the data over (slow) and starting it on the new node.
7859
7860     """
7861     instance = self.instance
7862
7863     source_node = instance.primary_node
7864     target_node = self.target_node
7865
7866     self.LogInfo("Shutting down instance %s on source node %s",
7867                  instance.name, source_node)
7868
7869     assert (self.owned_locks(locking.LEVEL_NODE) ==
7870             self.owned_locks(locking.LEVEL_NODE_RES))
7871
7872     result = self.rpc.call_instance_shutdown(source_node, instance,
7873                                              self.op.shutdown_timeout)
7874     msg = result.fail_msg
7875     if msg:
7876       if self.op.ignore_consistency:
7877         self.proc.LogWarning("Could not shutdown instance %s on node %s."
7878                              " Proceeding anyway. Please make sure node"
7879                              " %s is down. Error details: %s",
7880                              instance.name, source_node, source_node, msg)
7881       else:
7882         raise errors.OpExecError("Could not shutdown instance %s on"
7883                                  " node %s: %s" %
7884                                  (instance.name, source_node, msg))
7885
7886     # create the target disks
7887     try:
7888       _CreateDisks(self, instance, target_node=target_node)
7889     except errors.OpExecError:
7890       self.LogWarning("Device creation failed, reverting...")
7891       try:
7892         _RemoveDisks(self, instance, target_node=target_node)
7893       finally:
7894         self.cfg.ReleaseDRBDMinors(instance.name)
7895         raise
7896
7897     cluster_name = self.cfg.GetClusterInfo().cluster_name
7898
7899     errs = []
7900     # activate, get path, copy the data over
7901     for idx, disk in enumerate(instance.disks):
7902       self.LogInfo("Copying data for disk %d", idx)
7903       result = self.rpc.call_blockdev_assemble(target_node, (disk, instance),
7904                                                instance.name, True, idx)
7905       if result.fail_msg:
7906         self.LogWarning("Can't assemble newly created disk %d: %s",
7907                         idx, result.fail_msg)
7908         errs.append(result.fail_msg)
7909         break
7910       dev_path = result.payload
7911       result = self.rpc.call_blockdev_export(source_node, (disk, instance),
7912                                              target_node, dev_path,
7913                                              cluster_name)
7914       if result.fail_msg:
7915         self.LogWarning("Can't copy data over for disk %d: %s",
7916                         idx, result.fail_msg)
7917         errs.append(result.fail_msg)
7918         break
7919
7920     if errs:
7921       self.LogWarning("Some disks failed to copy, aborting")
7922       try:
7923         _RemoveDisks(self, instance, target_node=target_node)
7924       finally:
7925         self.cfg.ReleaseDRBDMinors(instance.name)
7926         raise errors.OpExecError("Errors during disk copy: %s" %
7927                                  (",".join(errs),))
7928
7929     instance.primary_node = target_node
7930     self.cfg.Update(instance, feedback_fn)
7931
7932     self.LogInfo("Removing the disks on the original node")
7933     _RemoveDisks(self, instance, target_node=source_node)
7934
7935     # Only start the instance if it's marked as up
7936     if instance.admin_state == constants.ADMINST_UP:
7937       self.LogInfo("Starting instance %s on node %s",
7938                    instance.name, target_node)
7939
7940       disks_ok, _ = _AssembleInstanceDisks(self, instance,
7941                                            ignore_secondaries=True)
7942       if not disks_ok:
7943         _ShutdownInstanceDisks(self, instance)
7944         raise errors.OpExecError("Can't activate the instance's disks")
7945
7946       result = self.rpc.call_instance_start(target_node,
7947                                             (instance, None, None), False)
7948       msg = result.fail_msg
7949       if msg:
7950         _ShutdownInstanceDisks(self, instance)
7951         raise errors.OpExecError("Could not start instance %s on node %s: %s" %
7952                                  (instance.name, target_node, msg))
7953
7954
7955 class LUNodeMigrate(LogicalUnit):
7956   """Migrate all instances from a node.
7957
7958   """
7959   HPATH = "node-migrate"
7960   HTYPE = constants.HTYPE_NODE
7961   REQ_BGL = False
7962
7963   def CheckArguments(self):
7964     pass
7965
7966   def ExpandNames(self):
7967     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
7968
7969     self.share_locks = _ShareAll()
7970     self.needed_locks = {
7971       locking.LEVEL_NODE: [self.op.node_name],
7972       }
7973
7974   def BuildHooksEnv(self):
7975     """Build hooks env.
7976
7977     This runs on the master, the primary and all the secondaries.
7978
7979     """
7980     return {
7981       "NODE_NAME": self.op.node_name,
7982       "ALLOW_RUNTIME_CHANGES": self.op.allow_runtime_changes,
7983       }
7984
7985   def BuildHooksNodes(self):
7986     """Build hooks nodes.
7987
7988     """
7989     nl = [self.cfg.GetMasterNode()]
7990     return (nl, nl)
7991
7992   def CheckPrereq(self):
7993     pass
7994
7995   def Exec(self, feedback_fn):
7996     # Prepare jobs for migration instances
7997     allow_runtime_changes = self.op.allow_runtime_changes
7998     jobs = [
7999       [opcodes.OpInstanceMigrate(instance_name=inst.name,
8000                                  mode=self.op.mode,
8001                                  live=self.op.live,
8002                                  iallocator=self.op.iallocator,
8003                                  target_node=self.op.target_node,
8004                                  allow_runtime_changes=allow_runtime_changes,
8005                                  ignore_ipolicy=self.op.ignore_ipolicy)]
8006       for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name)
8007       ]
8008
8009     # TODO: Run iallocator in this opcode and pass correct placement options to
8010     # OpInstanceMigrate. Since other jobs can modify the cluster between
8011     # running the iallocator and the actual migration, a good consistency model
8012     # will have to be found.
8013
8014     assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
8015             frozenset([self.op.node_name]))
8016
8017     return ResultWithJobs(jobs)
8018
8019
8020 class TLMigrateInstance(Tasklet):
8021   """Tasklet class for instance migration.
8022
8023   @type live: boolean
8024   @ivar live: whether the migration will be done live or non-live;
8025       this variable is initalized only after CheckPrereq has run
8026   @type cleanup: boolean
8027   @ivar cleanup: Wheater we cleanup from a failed migration
8028   @type iallocator: string
8029   @ivar iallocator: The iallocator used to determine target_node
8030   @type target_node: string
8031   @ivar target_node: If given, the target_node to reallocate the instance to
8032   @type failover: boolean
8033   @ivar failover: Whether operation results in failover or migration
8034   @type fallback: boolean
8035   @ivar fallback: Whether fallback to failover is allowed if migration not
8036                   possible
8037   @type ignore_consistency: boolean
8038   @ivar ignore_consistency: Wheter we should ignore consistency between source
8039                             and target node
8040   @type shutdown_timeout: int
8041   @ivar shutdown_timeout: In case of failover timeout of the shutdown
8042   @type ignore_ipolicy: bool
8043   @ivar ignore_ipolicy: If true, we can ignore instance policy when migrating
8044
8045   """
8046
8047   # Constants
8048   _MIGRATION_POLL_INTERVAL = 1      # seconds
8049   _MIGRATION_FEEDBACK_INTERVAL = 10 # seconds
8050
8051   def __init__(self, lu, instance_name, cleanup=False,
8052                failover=False, fallback=False,
8053                ignore_consistency=False,
8054                allow_runtime_changes=True,
8055                shutdown_timeout=constants.DEFAULT_SHUTDOWN_TIMEOUT,
8056                ignore_ipolicy=False):
8057     """Initializes this class.
8058
8059     """
8060     Tasklet.__init__(self, lu)
8061
8062     # Parameters
8063     self.instance_name = instance_name
8064     self.cleanup = cleanup
8065     self.live = False # will be overridden later
8066     self.failover = failover
8067     self.fallback = fallback
8068     self.ignore_consistency = ignore_consistency
8069     self.shutdown_timeout = shutdown_timeout
8070     self.ignore_ipolicy = ignore_ipolicy
8071     self.allow_runtime_changes = allow_runtime_changes
8072
8073   def CheckPrereq(self):
8074     """Check prerequisites.
8075
8076     This checks that the instance is in the cluster.
8077
8078     """
8079     instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
8080     instance = self.cfg.GetInstanceInfo(instance_name)
8081     assert instance is not None
8082     self.instance = instance
8083     cluster = self.cfg.GetClusterInfo()
8084
8085     if (not self.cleanup and
8086         not instance.admin_state == constants.ADMINST_UP and
8087         not self.failover and self.fallback):
8088       self.lu.LogInfo("Instance is marked down or offline, fallback allowed,"
8089                       " switching to failover")
8090       self.failover = True
8091
8092     if instance.disk_template not in constants.DTS_MIRRORED:
8093       if self.failover:
8094         text = "failovers"
8095       else:
8096         text = "migrations"
8097       raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
8098                                  " %s" % (instance.disk_template, text),
8099                                  errors.ECODE_STATE)
8100
8101     if instance.disk_template in constants.DTS_EXT_MIRROR:
8102       _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
8103
8104       if self.lu.op.iallocator:
8105         self._RunAllocator()
8106       else:
8107         # We set set self.target_node as it is required by
8108         # BuildHooksEnv
8109         self.target_node = self.lu.op.target_node
8110
8111       # Check that the target node is correct in terms of instance policy
8112       nodeinfo = self.cfg.GetNodeInfo(self.target_node)
8113       group_info = self.cfg.GetNodeGroup(nodeinfo.group)
8114       ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
8115                                                               group_info)
8116       _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo,
8117                               ignore=self.ignore_ipolicy)
8118
8119       # self.target_node is already populated, either directly or by the
8120       # iallocator run
8121       target_node = self.target_node
8122       if self.target_node == instance.primary_node:
8123         raise errors.OpPrereqError("Cannot migrate instance %s"
8124                                    " to its primary (%s)" %
8125                                    (instance.name, instance.primary_node),
8126                                    errors.ECODE_STATE)
8127
8128       if len(self.lu.tasklets) == 1:
8129         # It is safe to release locks only when we're the only tasklet
8130         # in the LU
8131         _ReleaseLocks(self.lu, locking.LEVEL_NODE,
8132                       keep=[instance.primary_node, self.target_node])
8133
8134     else:
8135       secondary_nodes = instance.secondary_nodes
8136       if not secondary_nodes:
8137         raise errors.ConfigurationError("No secondary node but using"
8138                                         " %s disk template" %
8139                                         instance.disk_template)
8140       target_node = secondary_nodes[0]
8141       if self.lu.op.iallocator or (self.lu.op.target_node and
8142                                    self.lu.op.target_node != target_node):
8143         if self.failover:
8144           text = "failed over"
8145         else:
8146           text = "migrated"
8147         raise errors.OpPrereqError("Instances with disk template %s cannot"
8148                                    " be %s to arbitrary nodes"
8149                                    " (neither an iallocator nor a target"
8150                                    " node can be passed)" %
8151                                    (instance.disk_template, text),
8152                                    errors.ECODE_INVAL)
8153       nodeinfo = self.cfg.GetNodeInfo(target_node)
8154       group_info = self.cfg.GetNodeGroup(nodeinfo.group)
8155       ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
8156                                                               group_info)
8157       _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo,
8158                               ignore=self.ignore_ipolicy)
8159
8160     i_be = cluster.FillBE(instance)
8161
8162     # check memory requirements on the secondary node
8163     if (not self.cleanup and
8164          (not self.failover or instance.admin_state == constants.ADMINST_UP)):
8165       self.tgt_free_mem = _CheckNodeFreeMemory(self.lu, target_node,
8166                                                "migrating instance %s" %
8167                                                instance.name,
8168                                                i_be[constants.BE_MINMEM],
8169                                                instance.hypervisor)
8170     else:
8171       self.lu.LogInfo("Not checking memory on the secondary node as"
8172                       " instance will not be started")
8173
8174     # check if failover must be forced instead of migration
8175     if (not self.cleanup and not self.failover and
8176         i_be[constants.BE_ALWAYS_FAILOVER]):
8177       self.lu.LogInfo("Instance configured to always failover; fallback"
8178                       " to failover")
8179       self.failover = True
8180
8181     # check bridge existance
8182     _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
8183
8184     if not self.cleanup:
8185       _CheckNodeNotDrained(self.lu, target_node)
8186       if not self.failover:
8187         result = self.rpc.call_instance_migratable(instance.primary_node,
8188                                                    instance)
8189         if result.fail_msg and self.fallback:
8190           self.lu.LogInfo("Can't migrate, instance offline, fallback to"
8191                           " failover")
8192           self.failover = True
8193         else:
8194           result.Raise("Can't migrate, please use failover",
8195                        prereq=True, ecode=errors.ECODE_STATE)
8196
8197     assert not (self.failover and self.cleanup)
8198
8199     if not self.failover:
8200       if self.lu.op.live is not None and self.lu.op.mode is not None:
8201         raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
8202                                    " parameters are accepted",
8203                                    errors.ECODE_INVAL)
8204       if self.lu.op.live is not None:
8205         if self.lu.op.live:
8206           self.lu.op.mode = constants.HT_MIGRATION_LIVE
8207         else:
8208           self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
8209         # reset the 'live' parameter to None so that repeated
8210         # invocations of CheckPrereq do not raise an exception
8211         self.lu.op.live = None
8212       elif self.lu.op.mode is None:
8213         # read the default value from the hypervisor
8214         i_hv = cluster.FillHV(self.instance, skip_globals=False)
8215         self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
8216
8217       self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
8218     else:
8219       # Failover is never live
8220       self.live = False
8221
8222     if not (self.failover or self.cleanup):
8223       remote_info = self.rpc.call_instance_info(instance.primary_node,
8224                                                 instance.name,
8225                                                 instance.hypervisor)
8226       remote_info.Raise("Error checking instance on node %s" %
8227                         instance.primary_node)
8228       instance_running = bool(remote_info.payload)
8229       if instance_running:
8230         self.current_mem = int(remote_info.payload["memory"])
8231
8232   def _RunAllocator(self):
8233     """Run the allocator based on input opcode.
8234
8235     """
8236     # FIXME: add a self.ignore_ipolicy option
8237     req = iallocator.IAReqRelocate(name=self.instance_name,
8238                                    relocate_from=[self.instance.primary_node])
8239     ial = iallocator.IAllocator(self.cfg, self.rpc, req)
8240
8241     ial.Run(self.lu.op.iallocator)
8242
8243     if not ial.success:
8244       raise errors.OpPrereqError("Can't compute nodes using"
8245                                  " iallocator '%s': %s" %
8246                                  (self.lu.op.iallocator, ial.info),
8247                                  errors.ECODE_NORES)
8248     self.target_node = ial.result[0]
8249     self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
8250                     self.instance_name, self.lu.op.iallocator,
8251                     utils.CommaJoin(ial.result))
8252
8253   def _WaitUntilSync(self):
8254     """Poll with custom rpc for disk sync.
8255
8256     This uses our own step-based rpc call.
8257
8258     """
8259     self.feedback_fn("* wait until resync is done")
8260     all_done = False
8261     while not all_done:
8262       all_done = True
8263       result = self.rpc.call_drbd_wait_sync(self.all_nodes,
8264                                             self.nodes_ip,
8265                                             (self.instance.disks,
8266                                              self.instance))
8267       min_percent = 100
8268       for node, nres in result.items():
8269         nres.Raise("Cannot resync disks on node %s" % node)
8270         node_done, node_percent = nres.payload
8271         all_done = all_done and node_done
8272         if node_percent is not None:
8273           min_percent = min(min_percent, node_percent)
8274       if not all_done:
8275         if min_percent < 100:
8276           self.feedback_fn("   - progress: %.1f%%" % min_percent)
8277         time.sleep(2)
8278
8279   def _EnsureSecondary(self, node):
8280     """Demote a node to secondary.
8281
8282     """
8283     self.feedback_fn("* switching node %s to secondary mode" % node)
8284
8285     for dev in self.instance.disks:
8286       self.cfg.SetDiskID(dev, node)
8287
8288     result = self.rpc.call_blockdev_close(node, self.instance.name,
8289                                           self.instance.disks)
8290     result.Raise("Cannot change disk to secondary on node %s" % node)
8291
8292   def _GoStandalone(self):
8293     """Disconnect from the network.
8294
8295     """
8296     self.feedback_fn("* changing into standalone mode")
8297     result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
8298                                                self.instance.disks)
8299     for node, nres in result.items():
8300       nres.Raise("Cannot disconnect disks node %s" % node)
8301
8302   def _GoReconnect(self, multimaster):
8303     """Reconnect to the network.
8304
8305     """
8306     if multimaster:
8307       msg = "dual-master"
8308     else:
8309       msg = "single-master"
8310     self.feedback_fn("* changing disks into %s mode" % msg)
8311     result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
8312                                            (self.instance.disks, self.instance),
8313                                            self.instance.name, multimaster)
8314     for node, nres in result.items():
8315       nres.Raise("Cannot change disks config on node %s" % node)
8316
8317   def _ExecCleanup(self):
8318     """Try to cleanup after a failed migration.
8319
8320     The cleanup is done by:
8321       - check that the instance is running only on one node
8322         (and update the config if needed)
8323       - change disks on its secondary node to secondary
8324       - wait until disks are fully synchronized
8325       - disconnect from the network
8326       - change disks into single-master mode
8327       - wait again until disks are fully synchronized
8328
8329     """
8330     instance = self.instance
8331     target_node = self.target_node
8332     source_node = self.source_node
8333
8334     # check running on only one node
8335     self.feedback_fn("* checking where the instance actually runs"
8336                      " (if this hangs, the hypervisor might be in"
8337                      " a bad state)")
8338     ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
8339     for node, result in ins_l.items():
8340       result.Raise("Can't contact node %s" % node)
8341
8342     runningon_source = instance.name in ins_l[source_node].payload
8343     runningon_target = instance.name in ins_l[target_node].payload
8344
8345     if runningon_source and runningon_target:
8346       raise errors.OpExecError("Instance seems to be running on two nodes,"
8347                                " or the hypervisor is confused; you will have"
8348                                " to ensure manually that it runs only on one"
8349                                " and restart this operation")
8350
8351     if not (runningon_source or runningon_target):
8352       raise errors.OpExecError("Instance does not seem to be running at all;"
8353                                " in this case it's safer to repair by"
8354                                " running 'gnt-instance stop' to ensure disk"
8355                                " shutdown, and then restarting it")
8356
8357     if runningon_target:
8358       # the migration has actually succeeded, we need to update the config
8359       self.feedback_fn("* instance running on secondary node (%s),"
8360                        " updating config" % target_node)
8361       instance.primary_node = target_node
8362       self.cfg.Update(instance, self.feedback_fn)
8363       demoted_node = source_node
8364     else:
8365       self.feedback_fn("* instance confirmed to be running on its"
8366                        " primary node (%s)" % source_node)
8367       demoted_node = target_node
8368
8369     if instance.disk_template in constants.DTS_INT_MIRROR:
8370       self._EnsureSecondary(demoted_node)
8371       try:
8372         self._WaitUntilSync()
8373       except errors.OpExecError:
8374         # we ignore here errors, since if the device is standalone, it
8375         # won't be able to sync
8376         pass
8377       self._GoStandalone()
8378       self._GoReconnect(False)
8379       self._WaitUntilSync()
8380
8381     self.feedback_fn("* done")
8382
8383   def _RevertDiskStatus(self):
8384     """Try to revert the disk status after a failed migration.
8385
8386     """
8387     target_node = self.target_node
8388     if self.instance.disk_template in constants.DTS_EXT_MIRROR:
8389       return
8390
8391     try:
8392       self._EnsureSecondary(target_node)
8393       self._GoStandalone()
8394       self._GoReconnect(False)
8395       self._WaitUntilSync()
8396     except errors.OpExecError, err:
8397       self.lu.LogWarning("Migration failed and I can't reconnect the drives,"
8398                          " please try to recover the instance manually;"
8399                          " error '%s'" % str(err))
8400
8401   def _AbortMigration(self):
8402     """Call the hypervisor code to abort a started migration.
8403
8404     """
8405     instance = self.instance
8406     target_node = self.target_node
8407     source_node = self.source_node
8408     migration_info = self.migration_info
8409
8410     abort_result = self.rpc.call_instance_finalize_migration_dst(target_node,
8411                                                                  instance,
8412                                                                  migration_info,
8413                                                                  False)
8414     abort_msg = abort_result.fail_msg
8415     if abort_msg:
8416       logging.error("Aborting migration failed on target node %s: %s",
8417                     target_node, abort_msg)
8418       # Don't raise an exception here, as we stil have to try to revert the
8419       # disk status, even if this step failed.
8420
8421     abort_result = self.rpc.call_instance_finalize_migration_src(
8422       source_node, instance, False, self.live)
8423     abort_msg = abort_result.fail_msg
8424     if abort_msg:
8425       logging.error("Aborting migration failed on source node %s: %s",
8426                     source_node, abort_msg)
8427
8428   def _ExecMigration(self):
8429     """Migrate an instance.
8430
8431     The migrate is done by:
8432       - change the disks into dual-master mode
8433       - wait until disks are fully synchronized again
8434       - migrate the instance
8435       - change disks on the new secondary node (the old primary) to secondary
8436       - wait until disks are fully synchronized
8437       - change disks into single-master mode
8438
8439     """
8440     instance = self.instance
8441     target_node = self.target_node
8442     source_node = self.source_node
8443
8444     # Check for hypervisor version mismatch and warn the user.
8445     nodeinfo = self.rpc.call_node_info([source_node, target_node],
8446                                        None, [self.instance.hypervisor])
8447     for ninfo in nodeinfo.values():
8448       ninfo.Raise("Unable to retrieve node information from node '%s'" %
8449                   ninfo.node)
8450     (_, _, (src_info, )) = nodeinfo[source_node].payload
8451     (_, _, (dst_info, )) = nodeinfo[target_node].payload
8452
8453     if ((constants.HV_NODEINFO_KEY_VERSION in src_info) and
8454         (constants.HV_NODEINFO_KEY_VERSION in dst_info)):
8455       src_version = src_info[constants.HV_NODEINFO_KEY_VERSION]
8456       dst_version = dst_info[constants.HV_NODEINFO_KEY_VERSION]
8457       if src_version != dst_version:
8458         self.feedback_fn("* warning: hypervisor version mismatch between"
8459                          " source (%s) and target (%s) node" %
8460                          (src_version, dst_version))
8461
8462     self.feedback_fn("* checking disk consistency between source and target")
8463     for (idx, dev) in enumerate(instance.disks):
8464       if not _CheckDiskConsistency(self.lu, instance, dev, target_node, False):
8465         raise errors.OpExecError("Disk %s is degraded or not fully"
8466                                  " synchronized on target node,"
8467                                  " aborting migration" % idx)
8468
8469     if self.current_mem > self.tgt_free_mem:
8470       if not self.allow_runtime_changes:
8471         raise errors.OpExecError("Memory ballooning not allowed and not enough"
8472                                  " free memory to fit instance %s on target"
8473                                  " node %s (have %dMB, need %dMB)" %
8474                                  (instance.name, target_node,
8475                                   self.tgt_free_mem, self.current_mem))
8476       self.feedback_fn("* setting instance memory to %s" % self.tgt_free_mem)
8477       rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node,
8478                                                      instance,
8479                                                      self.tgt_free_mem)
8480       rpcres.Raise("Cannot modify instance runtime memory")
8481
8482     # First get the migration information from the remote node
8483     result = self.rpc.call_migration_info(source_node, instance)
8484     msg = result.fail_msg
8485     if msg:
8486       log_err = ("Failed fetching source migration information from %s: %s" %
8487                  (source_node, msg))
8488       logging.error(log_err)
8489       raise errors.OpExecError(log_err)
8490
8491     self.migration_info = migration_info = result.payload
8492
8493     if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
8494       # Then switch the disks to master/master mode
8495       self._EnsureSecondary(target_node)
8496       self._GoStandalone()
8497       self._GoReconnect(True)
8498       self._WaitUntilSync()
8499
8500     self.feedback_fn("* preparing %s to accept the instance" % target_node)
8501     result = self.rpc.call_accept_instance(target_node,
8502                                            instance,
8503                                            migration_info,
8504                                            self.nodes_ip[target_node])
8505
8506     msg = result.fail_msg
8507     if msg:
8508       logging.error("Instance pre-migration failed, trying to revert"
8509                     " disk status: %s", msg)
8510       self.feedback_fn("Pre-migration failed, aborting")
8511       self._AbortMigration()
8512       self._RevertDiskStatus()
8513       raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
8514                                (instance.name, msg))
8515
8516     self.feedback_fn("* migrating instance to %s" % target_node)
8517     result = self.rpc.call_instance_migrate(source_node, instance,
8518                                             self.nodes_ip[target_node],
8519                                             self.live)
8520     msg = result.fail_msg
8521     if msg:
8522       logging.error("Instance migration failed, trying to revert"
8523                     " disk status: %s", msg)
8524       self.feedback_fn("Migration failed, aborting")
8525       self._AbortMigration()
8526       self._RevertDiskStatus()
8527       raise errors.OpExecError("Could not migrate instance %s: %s" %
8528                                (instance.name, msg))
8529
8530     self.feedback_fn("* starting memory transfer")
8531     last_feedback = time.time()
8532     while True:
8533       result = self.rpc.call_instance_get_migration_status(source_node,
8534                                                            instance)
8535       msg = result.fail_msg
8536       ms = result.payload   # MigrationStatus instance
8537       if msg or (ms.status in constants.HV_MIGRATION_FAILED_STATUSES):
8538         logging.error("Instance migration failed, trying to revert"
8539                       " disk status: %s", msg)
8540         self.feedback_fn("Migration failed, aborting")
8541         self._AbortMigration()
8542         self._RevertDiskStatus()
8543         raise errors.OpExecError("Could not migrate instance %s: %s" %
8544                                  (instance.name, msg))
8545
8546       if result.payload.status != constants.HV_MIGRATION_ACTIVE:
8547         self.feedback_fn("* memory transfer complete")
8548         break
8549
8550       if (utils.TimeoutExpired(last_feedback,
8551                                self._MIGRATION_FEEDBACK_INTERVAL) and
8552           ms.transferred_ram is not None):
8553         mem_progress = 100 * float(ms.transferred_ram) / float(ms.total_ram)
8554         self.feedback_fn("* memory transfer progress: %.2f %%" % mem_progress)
8555         last_feedback = time.time()
8556
8557       time.sleep(self._MIGRATION_POLL_INTERVAL)
8558
8559     result = self.rpc.call_instance_finalize_migration_src(source_node,
8560                                                            instance,
8561                                                            True,
8562                                                            self.live)
8563     msg = result.fail_msg
8564     if msg:
8565       logging.error("Instance migration succeeded, but finalization failed"
8566                     " on the source node: %s", msg)
8567       raise errors.OpExecError("Could not finalize instance migration: %s" %
8568                                msg)
8569
8570     instance.primary_node = target_node
8571
8572     # distribute new instance config to the other nodes
8573     self.cfg.Update(instance, self.feedback_fn)
8574
8575     result = self.rpc.call_instance_finalize_migration_dst(target_node,
8576                                                            instance,
8577                                                            migration_info,
8578                                                            True)
8579     msg = result.fail_msg
8580     if msg:
8581       logging.error("Instance migration succeeded, but finalization failed"
8582                     " on the target node: %s", msg)
8583       raise errors.OpExecError("Could not finalize instance migration: %s" %
8584                                msg)
8585
8586     if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
8587       self._EnsureSecondary(source_node)
8588       self._WaitUntilSync()
8589       self._GoStandalone()
8590       self._GoReconnect(False)
8591       self._WaitUntilSync()
8592
8593     # If the instance's disk template is `rbd' and there was a successful
8594     # migration, unmap the device from the source node.
8595     if self.instance.disk_template == constants.DT_RBD:
8596       disks = _ExpandCheckDisks(instance, instance.disks)
8597       self.feedback_fn("* unmapping instance's disks from %s" % source_node)
8598       for disk in disks:
8599         result = self.rpc.call_blockdev_shutdown(source_node, (disk, instance))
8600         msg = result.fail_msg
8601         if msg:
8602           logging.error("Migration was successful, but couldn't unmap the"
8603                         " block device %s on source node %s: %s",
8604                         disk.iv_name, source_node, msg)
8605           logging.error("You need to unmap the device %s manually on %s",
8606                         disk.iv_name, source_node)
8607
8608     self.feedback_fn("* done")
8609
8610   def _ExecFailover(self):
8611     """Failover an instance.
8612
8613     The failover is done by shutting it down on its present node and
8614     starting it on the secondary.
8615
8616     """
8617     instance = self.instance
8618     primary_node = self.cfg.GetNodeInfo(instance.primary_node)
8619
8620     source_node = instance.primary_node
8621     target_node = self.target_node
8622
8623     if instance.admin_state == constants.ADMINST_UP:
8624       self.feedback_fn("* checking disk consistency between source and target")
8625       for (idx, dev) in enumerate(instance.disks):
8626         # for drbd, these are drbd over lvm
8627         if not _CheckDiskConsistency(self.lu, instance, dev, target_node,
8628                                      False):
8629           if primary_node.offline:
8630             self.feedback_fn("Node %s is offline, ignoring degraded disk %s on"
8631                              " target node %s" %
8632                              (primary_node.name, idx, target_node))
8633           elif not self.ignore_consistency:
8634             raise errors.OpExecError("Disk %s is degraded on target node,"
8635                                      " aborting failover" % idx)
8636     else:
8637       self.feedback_fn("* not checking disk consistency as instance is not"
8638                        " running")
8639
8640     self.feedback_fn("* shutting down instance on source node")
8641     logging.info("Shutting down instance %s on node %s",
8642                  instance.name, source_node)
8643
8644     result = self.rpc.call_instance_shutdown(source_node, instance,
8645                                              self.shutdown_timeout)
8646     msg = result.fail_msg
8647     if msg:
8648       if self.ignore_consistency or primary_node.offline:
8649         self.lu.LogWarning("Could not shutdown instance %s on node %s,"
8650                            " proceeding anyway; please make sure node"
8651                            " %s is down; error details: %s",
8652                            instance.name, source_node, source_node, msg)
8653       else:
8654         raise errors.OpExecError("Could not shutdown instance %s on"
8655                                  " node %s: %s" %
8656                                  (instance.name, source_node, msg))
8657
8658     self.feedback_fn("* deactivating the instance's disks on source node")
8659     if not _ShutdownInstanceDisks(self.lu, instance, ignore_primary=True):
8660       raise errors.OpExecError("Can't shut down the instance's disks")
8661
8662     instance.primary_node = target_node
8663     # distribute new instance config to the other nodes
8664     self.cfg.Update(instance, self.feedback_fn)
8665
8666     # Only start the instance if it's marked as up
8667     if instance.admin_state == constants.ADMINST_UP:
8668       self.feedback_fn("* activating the instance's disks on target node %s" %
8669                        target_node)
8670       logging.info("Starting instance %s on node %s",
8671                    instance.name, target_node)
8672
8673       disks_ok, _ = _AssembleInstanceDisks(self.lu, instance,
8674                                            ignore_secondaries=True)
8675       if not disks_ok:
8676         _ShutdownInstanceDisks(self.lu, instance)
8677         raise errors.OpExecError("Can't activate the instance's disks")
8678
8679       self.feedback_fn("* starting the instance on the target node %s" %
8680                        target_node)
8681       result = self.rpc.call_instance_start(target_node, (instance, None, None),
8682                                             False)
8683       msg = result.fail_msg
8684       if msg:
8685         _ShutdownInstanceDisks(self.lu, instance)
8686         raise errors.OpExecError("Could not start instance %s on node %s: %s" %
8687                                  (instance.name, target_node, msg))
8688
8689   def Exec(self, feedback_fn):
8690     """Perform the migration.
8691
8692     """
8693     self.feedback_fn = feedback_fn
8694     self.source_node = self.instance.primary_node
8695
8696     # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
8697     if self.instance.disk_template in constants.DTS_INT_MIRROR:
8698       self.target_node = self.instance.secondary_nodes[0]
8699       # Otherwise self.target_node has been populated either
8700       # directly, or through an iallocator.
8701
8702     self.all_nodes = [self.source_node, self.target_node]
8703     self.nodes_ip = dict((name, node.secondary_ip) for (name, node)
8704                          in self.cfg.GetMultiNodeInfo(self.all_nodes))
8705
8706     if self.failover:
8707       feedback_fn("Failover instance %s" % self.instance.name)
8708       self._ExecFailover()
8709     else:
8710       feedback_fn("Migrating instance %s" % self.instance.name)
8711
8712       if self.cleanup:
8713         return self._ExecCleanup()
8714       else:
8715         return self._ExecMigration()
8716
8717
8718 def _CreateBlockDev(lu, node, instance, device, force_create, info,
8719                     force_open):
8720   """Wrapper around L{_CreateBlockDevInner}.
8721
8722   This method annotates the root device first.
8723
8724   """
8725   (disk,) = _AnnotateDiskParams(instance, [device], lu.cfg)
8726   return _CreateBlockDevInner(lu, node, instance, disk, force_create, info,
8727                               force_open)
8728
8729
8730 def _CreateBlockDevInner(lu, node, instance, device, force_create,
8731                          info, force_open):
8732   """Create a tree of block devices on a given node.
8733
8734   If this device type has to be created on secondaries, create it and
8735   all its children.
8736
8737   If not, just recurse to children keeping the same 'force' value.
8738
8739   @attention: The device has to be annotated already.
8740
8741   @param lu: the lu on whose behalf we execute
8742   @param node: the node on which to create the device
8743   @type instance: L{objects.Instance}
8744   @param instance: the instance which owns the device
8745   @type device: L{objects.Disk}
8746   @param device: the device to create
8747   @type force_create: boolean
8748   @param force_create: whether to force creation of this device; this
8749       will be change to True whenever we find a device which has
8750       CreateOnSecondary() attribute
8751   @param info: the extra 'metadata' we should attach to the device
8752       (this will be represented as a LVM tag)
8753   @type force_open: boolean
8754   @param force_open: this parameter will be passes to the
8755       L{backend.BlockdevCreate} function where it specifies
8756       whether we run on primary or not, and it affects both
8757       the child assembly and the device own Open() execution
8758
8759   """
8760   if device.CreateOnSecondary():
8761     force_create = True
8762
8763   if device.children:
8764     for child in device.children:
8765       _CreateBlockDevInner(lu, node, instance, child, force_create,
8766                            info, force_open)
8767
8768   if not force_create:
8769     return
8770
8771   _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
8772
8773
8774 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
8775   """Create a single block device on a given node.
8776
8777   This will not recurse over children of the device, so they must be
8778   created in advance.
8779
8780   @param lu: the lu on whose behalf we execute
8781   @param node: the node on which to create the device
8782   @type instance: L{objects.Instance}
8783   @param instance: the instance which owns the device
8784   @type device: L{objects.Disk}
8785   @param device: the device to create
8786   @param info: the extra 'metadata' we should attach to the device
8787       (this will be represented as a LVM tag)
8788   @type force_open: boolean
8789   @param force_open: this parameter will be passes to the
8790       L{backend.BlockdevCreate} function where it specifies
8791       whether we run on primary or not, and it affects both
8792       the child assembly and the device own Open() execution
8793
8794   """
8795   lu.cfg.SetDiskID(device, node)
8796   result = lu.rpc.call_blockdev_create(node, device, device.size,
8797                                        instance.name, force_open, info)
8798   result.Raise("Can't create block device %s on"
8799                " node %s for instance %s" % (device, node, instance.name))
8800   if device.physical_id is None:
8801     device.physical_id = result.payload
8802
8803
8804 def _GenerateUniqueNames(lu, exts):
8805   """Generate a suitable LV name.
8806
8807   This will generate a logical volume name for the given instance.
8808
8809   """
8810   results = []
8811   for val in exts:
8812     new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
8813     results.append("%s%s" % (new_id, val))
8814   return results
8815
8816
8817 def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
8818                          iv_name, p_minor, s_minor):
8819   """Generate a drbd8 device complete with its children.
8820
8821   """
8822   assert len(vgnames) == len(names) == 2
8823   port = lu.cfg.AllocatePort()
8824   shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
8825
8826   dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
8827                           logical_id=(vgnames[0], names[0]),
8828                           params={})
8829   dev_meta = objects.Disk(dev_type=constants.LD_LV,
8830                           size=constants.DRBD_META_SIZE,
8831                           logical_id=(vgnames[1], names[1]),
8832                           params={})
8833   drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
8834                           logical_id=(primary, secondary, port,
8835                                       p_minor, s_minor,
8836                                       shared_secret),
8837                           children=[dev_data, dev_meta],
8838                           iv_name=iv_name, params={})
8839   return drbd_dev
8840
8841
8842 _DISK_TEMPLATE_NAME_PREFIX = {
8843   constants.DT_PLAIN: "",
8844   constants.DT_RBD: ".rbd",
8845   }
8846
8847
8848 _DISK_TEMPLATE_DEVICE_TYPE = {
8849   constants.DT_PLAIN: constants.LD_LV,
8850   constants.DT_FILE: constants.LD_FILE,
8851   constants.DT_SHARED_FILE: constants.LD_FILE,
8852   constants.DT_BLOCK: constants.LD_BLOCKDEV,
8853   constants.DT_RBD: constants.LD_RBD,
8854   }
8855
8856
8857 def _GenerateDiskTemplate(
8858   lu, template_name, instance_name, primary_node, secondary_nodes,
8859   disk_info, file_storage_dir, file_driver, base_index,
8860   feedback_fn, full_disk_params, _req_file_storage=opcodes.RequireFileStorage,
8861   _req_shr_file_storage=opcodes.RequireSharedFileStorage):
8862   """Generate the entire disk layout for a given template type.
8863
8864   """
8865   #TODO: compute space requirements
8866
8867   vgname = lu.cfg.GetVGName()
8868   disk_count = len(disk_info)
8869   disks = []
8870
8871   if template_name == constants.DT_DISKLESS:
8872     pass
8873   elif template_name == constants.DT_DRBD8:
8874     if len(secondary_nodes) != 1:
8875       raise errors.ProgrammerError("Wrong template configuration")
8876     remote_node = secondary_nodes[0]
8877     minors = lu.cfg.AllocateDRBDMinor(
8878       [primary_node, remote_node] * len(disk_info), instance_name)
8879
8880     (drbd_params, _, _) = objects.Disk.ComputeLDParams(template_name,
8881                                                        full_disk_params)
8882     drbd_default_metavg = drbd_params[constants.LDP_DEFAULT_METAVG]
8883
8884     names = []
8885     for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
8886                                                for i in range(disk_count)]):
8887       names.append(lv_prefix + "_data")
8888       names.append(lv_prefix + "_meta")
8889     for idx, disk in enumerate(disk_info):
8890       disk_index = idx + base_index
8891       data_vg = disk.get(constants.IDISK_VG, vgname)
8892       meta_vg = disk.get(constants.IDISK_METAVG, drbd_default_metavg)
8893       disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
8894                                       disk[constants.IDISK_SIZE],
8895                                       [data_vg, meta_vg],
8896                                       names[idx * 2:idx * 2 + 2],
8897                                       "disk/%d" % disk_index,
8898                                       minors[idx * 2], minors[idx * 2 + 1])
8899       disk_dev.mode = disk[constants.IDISK_MODE]
8900       disks.append(disk_dev)
8901   else:
8902     if secondary_nodes:
8903       raise errors.ProgrammerError("Wrong template configuration")
8904
8905     if template_name == constants.DT_FILE:
8906       _req_file_storage()
8907     elif template_name == constants.DT_SHARED_FILE:
8908       _req_shr_file_storage()
8909
8910     name_prefix = _DISK_TEMPLATE_NAME_PREFIX.get(template_name, None)
8911     if name_prefix is None:
8912       names = None
8913     else:
8914       names = _GenerateUniqueNames(lu, ["%s.disk%s" %
8915                                         (name_prefix, base_index + i)
8916                                         for i in range(disk_count)])
8917
8918     if template_name == constants.DT_PLAIN:
8919       def logical_id_fn(idx, _, disk):
8920         vg = disk.get(constants.IDISK_VG, vgname)
8921         return (vg, names[idx])
8922     elif template_name in (constants.DT_FILE, constants.DT_SHARED_FILE):
8923       logical_id_fn = \
8924         lambda _, disk_index, disk: (file_driver,
8925                                      "%s/disk%d" % (file_storage_dir,
8926                                                     disk_index))
8927     elif template_name == constants.DT_BLOCK:
8928       logical_id_fn = \
8929         lambda idx, disk_index, disk: (constants.BLOCKDEV_DRIVER_MANUAL,
8930                                        disk[constants.IDISK_ADOPT])
8931     elif template_name == constants.DT_RBD:
8932       logical_id_fn = lambda idx, _, disk: ("rbd", names[idx])
8933     else:
8934       raise errors.ProgrammerError("Unknown disk template '%s'" % template_name)
8935
8936     dev_type = _DISK_TEMPLATE_DEVICE_TYPE[template_name]
8937
8938     for idx, disk in enumerate(disk_info):
8939       disk_index = idx + base_index
8940       size = disk[constants.IDISK_SIZE]
8941       feedback_fn("* disk %s, size %s" %
8942                   (disk_index, utils.FormatUnit(size, "h")))
8943       disks.append(objects.Disk(dev_type=dev_type, size=size,
8944                                 logical_id=logical_id_fn(idx, disk_index, disk),
8945                                 iv_name="disk/%d" % disk_index,
8946                                 mode=disk[constants.IDISK_MODE],
8947                                 params={}))
8948
8949   return disks
8950
8951
8952 def _GetInstanceInfoText(instance):
8953   """Compute that text that should be added to the disk's metadata.
8954
8955   """
8956   return "originstname+%s" % instance.name
8957
8958
8959 def _CalcEta(time_taken, written, total_size):
8960   """Calculates the ETA based on size written and total size.
8961
8962   @param time_taken: The time taken so far
8963   @param written: amount written so far
8964   @param total_size: The total size of data to be written
8965   @return: The remaining time in seconds
8966
8967   """
8968   avg_time = time_taken / float(written)
8969   return (total_size - written) * avg_time
8970
8971
8972 def _WipeDisks(lu, instance):
8973   """Wipes instance disks.
8974
8975   @type lu: L{LogicalUnit}
8976   @param lu: the logical unit on whose behalf we execute
8977   @type instance: L{objects.Instance}
8978   @param instance: the instance whose disks we should create
8979   @return: the success of the wipe
8980
8981   """
8982   node = instance.primary_node
8983
8984   for device in instance.disks:
8985     lu.cfg.SetDiskID(device, node)
8986
8987   logging.info("Pause sync of instance %s disks", instance.name)
8988   result = lu.rpc.call_blockdev_pause_resume_sync(node,
8989                                                   (instance.disks, instance),
8990                                                   True)
8991   result.Raise("Failed RPC to node %s for pausing the disk syncing" % node)
8992
8993   for idx, success in enumerate(result.payload):
8994     if not success:
8995       logging.warn("pause-sync of instance %s for disks %d failed",
8996                    instance.name, idx)
8997
8998   try:
8999     for idx, device in enumerate(instance.disks):
9000       # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
9001       # MAX_WIPE_CHUNK at max
9002       wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 *
9003                             constants.MIN_WIPE_CHUNK_PERCENT)
9004       # we _must_ make this an int, otherwise rounding errors will
9005       # occur
9006       wipe_chunk_size = int(wipe_chunk_size)
9007
9008       lu.LogInfo("* Wiping disk %d", idx)
9009       logging.info("Wiping disk %d for instance %s, node %s using"
9010                    " chunk size %s", idx, instance.name, node, wipe_chunk_size)
9011
9012       offset = 0
9013       size = device.size
9014       last_output = 0
9015       start_time = time.time()
9016
9017       while offset < size:
9018         wipe_size = min(wipe_chunk_size, size - offset)
9019         logging.debug("Wiping disk %d, offset %s, chunk %s",
9020                       idx, offset, wipe_size)
9021         result = lu.rpc.call_blockdev_wipe(node, (device, instance), offset,
9022                                            wipe_size)
9023         result.Raise("Could not wipe disk %d at offset %d for size %d" %
9024                      (idx, offset, wipe_size))
9025         now = time.time()
9026         offset += wipe_size
9027         if now - last_output >= 60:
9028           eta = _CalcEta(now - start_time, offset, size)
9029           lu.LogInfo(" - done: %.1f%% ETA: %s" %
9030                      (offset / float(size) * 100, utils.FormatSeconds(eta)))
9031           last_output = now
9032   finally:
9033     logging.info("Resume sync of instance %s disks", instance.name)
9034
9035     result = lu.rpc.call_blockdev_pause_resume_sync(node,
9036                                                     (instance.disks, instance),
9037                                                     False)
9038
9039     if result.fail_msg:
9040       lu.LogWarning("RPC call to %s for resuming disk syncing failed,"
9041                     " please have a look at the status and troubleshoot"
9042                     " the issue: %s", node, result.fail_msg)
9043     else:
9044       for idx, success in enumerate(result.payload):
9045         if not success:
9046           lu.LogWarning("Resume sync of disk %d failed, please have a"
9047                         " look at the status and troubleshoot the issue", idx)
9048           logging.warn("resume-sync of instance %s for disks %d failed",
9049                        instance.name, idx)
9050
9051
9052 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
9053   """Create all disks for an instance.
9054
9055   This abstracts away some work from AddInstance.
9056
9057   @type lu: L{LogicalUnit}
9058   @param lu: the logical unit on whose behalf we execute
9059   @type instance: L{objects.Instance}
9060   @param instance: the instance whose disks we should create
9061   @type to_skip: list
9062   @param to_skip: list of indices to skip
9063   @type target_node: string
9064   @param target_node: if passed, overrides the target node for creation
9065   @rtype: boolean
9066   @return: the success of the creation
9067
9068   """
9069   info = _GetInstanceInfoText(instance)
9070   if target_node is None:
9071     pnode = instance.primary_node
9072     all_nodes = instance.all_nodes
9073   else:
9074     pnode = target_node
9075     all_nodes = [pnode]
9076
9077   if instance.disk_template in constants.DTS_FILEBASED:
9078     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
9079     result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
9080
9081     result.Raise("Failed to create directory '%s' on"
9082                  " node %s" % (file_storage_dir, pnode))
9083
9084   # Note: this needs to be kept in sync with adding of disks in
9085   # LUInstanceSetParams
9086   for idx, device in enumerate(instance.disks):
9087     if to_skip and idx in to_skip:
9088       continue
9089     logging.info("Creating disk %s for instance '%s'", idx, instance.name)
9090     #HARDCODE
9091     for node in all_nodes:
9092       f_create = node == pnode
9093       _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
9094
9095
9096 def _RemoveDisks(lu, instance, target_node=None, ignore_failures=False):
9097   """Remove all disks for an instance.
9098
9099   This abstracts away some work from `AddInstance()` and
9100   `RemoveInstance()`. Note that in case some of the devices couldn't
9101   be removed, the removal will continue with the other ones (compare
9102   with `_CreateDisks()`).
9103
9104   @type lu: L{LogicalUnit}
9105   @param lu: the logical unit on whose behalf we execute
9106   @type instance: L{objects.Instance}
9107   @param instance: the instance whose disks we should remove
9108   @type target_node: string
9109   @param target_node: used to override the node on which to remove the disks
9110   @rtype: boolean
9111   @return: the success of the removal
9112
9113   """
9114   logging.info("Removing block devices for instance %s", instance.name)
9115
9116   all_result = True
9117   ports_to_release = set()
9118   anno_disks = _AnnotateDiskParams(instance, instance.disks, lu.cfg)
9119   for (idx, device) in enumerate(anno_disks):
9120     if target_node:
9121       edata = [(target_node, device)]
9122     else:
9123       edata = device.ComputeNodeTree(instance.primary_node)
9124     for node, disk in edata:
9125       lu.cfg.SetDiskID(disk, node)
9126       result = lu.rpc.call_blockdev_remove(node, disk)
9127       if result.fail_msg:
9128         lu.LogWarning("Could not remove disk %s on node %s,"
9129                       " continuing anyway: %s", idx, node, result.fail_msg)
9130         if not (result.offline and node != instance.primary_node):
9131           all_result = False
9132
9133     # if this is a DRBD disk, return its port to the pool
9134     if device.dev_type in constants.LDS_DRBD:
9135       ports_to_release.add(device.logical_id[2])
9136
9137   if all_result or ignore_failures:
9138     for port in ports_to_release:
9139       lu.cfg.AddTcpUdpPort(port)
9140
9141   if instance.disk_template == constants.DT_FILE:
9142     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
9143     if target_node:
9144       tgt = target_node
9145     else:
9146       tgt = instance.primary_node
9147     result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
9148     if result.fail_msg:
9149       lu.LogWarning("Could not remove directory '%s' on node %s: %s",
9150                     file_storage_dir, instance.primary_node, result.fail_msg)
9151       all_result = False
9152
9153   return all_result
9154
9155
9156 def _ComputeDiskSizePerVG(disk_template, disks):
9157   """Compute disk size requirements in the volume group
9158
9159   """
9160   def _compute(disks, payload):
9161     """Universal algorithm.
9162
9163     """
9164     vgs = {}
9165     for disk in disks:
9166       vgs[disk[constants.IDISK_VG]] = \
9167         vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
9168
9169     return vgs
9170
9171   # Required free disk space as a function of disk and swap space
9172   req_size_dict = {
9173     constants.DT_DISKLESS: {},
9174     constants.DT_PLAIN: _compute(disks, 0),
9175     # 128 MB are added for drbd metadata for each disk
9176     constants.DT_DRBD8: _compute(disks, constants.DRBD_META_SIZE),
9177     constants.DT_FILE: {},
9178     constants.DT_SHARED_FILE: {},
9179   }
9180
9181   if disk_template not in req_size_dict:
9182     raise errors.ProgrammerError("Disk template '%s' size requirement"
9183                                  " is unknown" % disk_template)
9184
9185   return req_size_dict[disk_template]
9186
9187
9188 def _FilterVmNodes(lu, nodenames):
9189   """Filters out non-vm_capable nodes from a list.
9190
9191   @type lu: L{LogicalUnit}
9192   @param lu: the logical unit for which we check
9193   @type nodenames: list
9194   @param nodenames: the list of nodes on which we should check
9195   @rtype: list
9196   @return: the list of vm-capable nodes
9197
9198   """
9199   vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
9200   return [name for name in nodenames if name not in vm_nodes]
9201
9202
9203 def _CheckHVParams(lu, nodenames, hvname, hvparams):
9204   """Hypervisor parameter validation.
9205
9206   This function abstract the hypervisor parameter validation to be
9207   used in both instance create and instance modify.
9208
9209   @type lu: L{LogicalUnit}
9210   @param lu: the logical unit for which we check
9211   @type nodenames: list
9212   @param nodenames: the list of nodes on which we should check
9213   @type hvname: string
9214   @param hvname: the name of the hypervisor we should use
9215   @type hvparams: dict
9216   @param hvparams: the parameters which we need to check
9217   @raise errors.OpPrereqError: if the parameters are not valid
9218
9219   """
9220   nodenames = _FilterVmNodes(lu, nodenames)
9221
9222   cluster = lu.cfg.GetClusterInfo()
9223   hvfull = objects.FillDict(cluster.hvparams.get(hvname, {}), hvparams)
9224
9225   hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames, hvname, hvfull)
9226   for node in nodenames:
9227     info = hvinfo[node]
9228     if info.offline:
9229       continue
9230     info.Raise("Hypervisor parameter validation failed on node %s" % node)
9231
9232
9233 def _CheckOSParams(lu, required, nodenames, osname, osparams):
9234   """OS parameters validation.
9235
9236   @type lu: L{LogicalUnit}
9237   @param lu: the logical unit for which we check
9238   @type required: boolean
9239   @param required: whether the validation should fail if the OS is not
9240       found
9241   @type nodenames: list
9242   @param nodenames: the list of nodes on which we should check
9243   @type osname: string
9244   @param osname: the name of the hypervisor we should use
9245   @type osparams: dict
9246   @param osparams: the parameters which we need to check
9247   @raise errors.OpPrereqError: if the parameters are not valid
9248
9249   """
9250   nodenames = _FilterVmNodes(lu, nodenames)
9251   result = lu.rpc.call_os_validate(nodenames, required, osname,
9252                                    [constants.OS_VALIDATE_PARAMETERS],
9253                                    osparams)
9254   for node, nres in result.items():
9255     # we don't check for offline cases since this should be run only
9256     # against the master node and/or an instance's nodes
9257     nres.Raise("OS Parameters validation failed on node %s" % node)
9258     if not nres.payload:
9259       lu.LogInfo("OS %s not found on node %s, validation skipped",
9260                  osname, node)
9261
9262
9263 class LUInstanceCreate(LogicalUnit):
9264   """Create an instance.
9265
9266   """
9267   HPATH = "instance-add"
9268   HTYPE = constants.HTYPE_INSTANCE
9269   REQ_BGL = False
9270
9271   def CheckArguments(self):
9272     """Check arguments.
9273
9274     """
9275     # do not require name_check to ease forward/backward compatibility
9276     # for tools
9277     if self.op.no_install and self.op.start:
9278       self.LogInfo("No-installation mode selected, disabling startup")
9279       self.op.start = False
9280     # validate/normalize the instance name
9281     self.op.instance_name = \
9282       netutils.Hostname.GetNormalizedName(self.op.instance_name)
9283
9284     if self.op.ip_check and not self.op.name_check:
9285       # TODO: make the ip check more flexible and not depend on the name check
9286       raise errors.OpPrereqError("Cannot do IP address check without a name"
9287                                  " check", errors.ECODE_INVAL)
9288
9289     # check nics' parameter names
9290     for nic in self.op.nics:
9291       utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
9292
9293     # check disks. parameter names and consistent adopt/no-adopt strategy
9294     has_adopt = has_no_adopt = False
9295     for disk in self.op.disks:
9296       utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
9297       if constants.IDISK_ADOPT in disk:
9298         has_adopt = True
9299       else:
9300         has_no_adopt = True
9301     if has_adopt and has_no_adopt:
9302       raise errors.OpPrereqError("Either all disks are adopted or none is",
9303                                  errors.ECODE_INVAL)
9304     if has_adopt:
9305       if self.op.disk_template not in constants.DTS_MAY_ADOPT:
9306         raise errors.OpPrereqError("Disk adoption is not supported for the"
9307                                    " '%s' disk template" %
9308                                    self.op.disk_template,
9309                                    errors.ECODE_INVAL)
9310       if self.op.iallocator is not None:
9311         raise errors.OpPrereqError("Disk adoption not allowed with an"
9312                                    " iallocator script", errors.ECODE_INVAL)
9313       if self.op.mode == constants.INSTANCE_IMPORT:
9314         raise errors.OpPrereqError("Disk adoption not allowed for"
9315                                    " instance import", errors.ECODE_INVAL)
9316     else:
9317       if self.op.disk_template in constants.DTS_MUST_ADOPT:
9318         raise errors.OpPrereqError("Disk template %s requires disk adoption,"
9319                                    " but no 'adopt' parameter given" %
9320                                    self.op.disk_template,
9321                                    errors.ECODE_INVAL)
9322
9323     self.adopt_disks = has_adopt
9324
9325     # instance name verification
9326     if self.op.name_check:
9327       self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
9328       self.op.instance_name = self.hostname1.name
9329       # used in CheckPrereq for ip ping check
9330       self.check_ip = self.hostname1.ip
9331     else:
9332       self.check_ip = None
9333
9334     # file storage checks
9335     if (self.op.file_driver and
9336         not self.op.file_driver in constants.FILE_DRIVER):
9337       raise errors.OpPrereqError("Invalid file driver name '%s'" %
9338                                  self.op.file_driver, errors.ECODE_INVAL)
9339
9340     if self.op.disk_template == constants.DT_FILE:
9341       opcodes.RequireFileStorage()
9342     elif self.op.disk_template == constants.DT_SHARED_FILE:
9343       opcodes.RequireSharedFileStorage()
9344
9345     ### Node/iallocator related checks
9346     _CheckIAllocatorOrNode(self, "iallocator", "pnode")
9347
9348     if self.op.pnode is not None:
9349       if self.op.disk_template in constants.DTS_INT_MIRROR:
9350         if self.op.snode is None:
9351           raise errors.OpPrereqError("The networked disk templates need"
9352                                      " a mirror node", errors.ECODE_INVAL)
9353       elif self.op.snode:
9354         self.LogWarning("Secondary node will be ignored on non-mirrored disk"
9355                         " template")
9356         self.op.snode = None
9357
9358     self._cds = _GetClusterDomainSecret()
9359
9360     if self.op.mode == constants.INSTANCE_IMPORT:
9361       # On import force_variant must be True, because if we forced it at
9362       # initial install, our only chance when importing it back is that it
9363       # works again!
9364       self.op.force_variant = True
9365
9366       if self.op.no_install:
9367         self.LogInfo("No-installation mode has no effect during import")
9368
9369     elif self.op.mode == constants.INSTANCE_CREATE:
9370       if self.op.os_type is None:
9371         raise errors.OpPrereqError("No guest OS specified",
9372                                    errors.ECODE_INVAL)
9373       if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
9374         raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
9375                                    " installation" % self.op.os_type,
9376                                    errors.ECODE_STATE)
9377       if self.op.disk_template is None:
9378         raise errors.OpPrereqError("No disk template specified",
9379                                    errors.ECODE_INVAL)
9380
9381     elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
9382       # Check handshake to ensure both clusters have the same domain secret
9383       src_handshake = self.op.source_handshake
9384       if not src_handshake:
9385         raise errors.OpPrereqError("Missing source handshake",
9386                                    errors.ECODE_INVAL)
9387
9388       errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
9389                                                            src_handshake)
9390       if errmsg:
9391         raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
9392                                    errors.ECODE_INVAL)
9393
9394       # Load and check source CA
9395       self.source_x509_ca_pem = self.op.source_x509_ca
9396       if not self.source_x509_ca_pem:
9397         raise errors.OpPrereqError("Missing source X509 CA",
9398                                    errors.ECODE_INVAL)
9399
9400       try:
9401         (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
9402                                                     self._cds)
9403       except OpenSSL.crypto.Error, err:
9404         raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
9405                                    (err, ), errors.ECODE_INVAL)
9406
9407       (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
9408       if errcode is not None:
9409         raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
9410                                    errors.ECODE_INVAL)
9411
9412       self.source_x509_ca = cert
9413
9414       src_instance_name = self.op.source_instance_name
9415       if not src_instance_name:
9416         raise errors.OpPrereqError("Missing source instance name",
9417                                    errors.ECODE_INVAL)
9418
9419       self.source_instance_name = \
9420           netutils.GetHostname(name=src_instance_name).name
9421
9422     else:
9423       raise errors.OpPrereqError("Invalid instance creation mode %r" %
9424                                  self.op.mode, errors.ECODE_INVAL)
9425
9426   def ExpandNames(self):
9427     """ExpandNames for CreateInstance.
9428
9429     Figure out the right locks for instance creation.
9430
9431     """
9432     self.needed_locks = {}
9433
9434     instance_name = self.op.instance_name
9435     # this is just a preventive check, but someone might still add this
9436     # instance in the meantime, and creation will fail at lock-add time
9437     if instance_name in self.cfg.GetInstanceList():
9438       raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
9439                                  instance_name, errors.ECODE_EXISTS)
9440
9441     self.add_locks[locking.LEVEL_INSTANCE] = instance_name
9442
9443     if self.op.iallocator:
9444       # TODO: Find a solution to not lock all nodes in the cluster, e.g. by
9445       # specifying a group on instance creation and then selecting nodes from
9446       # that group
9447       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9448       self.needed_locks[locking.LEVEL_NODE_RES] = locking.ALL_SET
9449     else:
9450       self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
9451       nodelist = [self.op.pnode]
9452       if self.op.snode is not None:
9453         self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
9454         nodelist.append(self.op.snode)
9455       self.needed_locks[locking.LEVEL_NODE] = nodelist
9456       # Lock resources of instance's primary and secondary nodes (copy to
9457       # prevent accidential modification)
9458       self.needed_locks[locking.LEVEL_NODE_RES] = list(nodelist)
9459
9460     # in case of import lock the source node too
9461     if self.op.mode == constants.INSTANCE_IMPORT:
9462       src_node = self.op.src_node
9463       src_path = self.op.src_path
9464
9465       if src_path is None:
9466         self.op.src_path = src_path = self.op.instance_name
9467
9468       if src_node is None:
9469         self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9470         self.op.src_node = None
9471         if os.path.isabs(src_path):
9472           raise errors.OpPrereqError("Importing an instance from a path"
9473                                      " requires a source node option",
9474                                      errors.ECODE_INVAL)
9475       else:
9476         self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
9477         if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
9478           self.needed_locks[locking.LEVEL_NODE].append(src_node)
9479         if not os.path.isabs(src_path):
9480           self.op.src_path = src_path = \
9481             utils.PathJoin(constants.EXPORT_DIR, src_path)
9482
9483   def _RunAllocator(self):
9484     """Run the allocator based on input opcode.
9485
9486     """
9487     nics = [n.ToDict() for n in self.nics]
9488     memory = self.be_full[constants.BE_MAXMEM]
9489     spindle_use = self.be_full[constants.BE_SPINDLE_USE]
9490     req = iallocator.IAReqInstanceAlloc(name=self.op.instance_name,
9491                                         disk_template=self.op.disk_template,
9492                                         tags=self.op.tags,
9493                                         os=self.op.os_type,
9494                                         vcpus=self.be_full[constants.BE_VCPUS],
9495                                         memory=memory,
9496                                         spindle_use=spindle_use,
9497                                         disks=self.disks,
9498                                         nics=nics,
9499                                         hypervisor=self.op.hypervisor)
9500     ial = iallocator.IAllocator(self.cfg, self.rpc, req)
9501
9502     ial.Run(self.op.iallocator)
9503
9504     if not ial.success:
9505       raise errors.OpPrereqError("Can't compute nodes using"
9506                                  " iallocator '%s': %s" %
9507                                  (self.op.iallocator, ial.info),
9508                                  errors.ECODE_NORES)
9509     self.op.pnode = ial.result[0]
9510     self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
9511                  self.op.instance_name, self.op.iallocator,
9512                  utils.CommaJoin(ial.result))
9513
9514     assert req.RequiredNodes() in (1, 2), "Wrong node count from iallocator"
9515
9516     if req.RequiredNodes() == 2:
9517       self.op.snode = ial.result[1]
9518
9519   def BuildHooksEnv(self):
9520     """Build hooks env.
9521
9522     This runs on master, primary and secondary nodes of the instance.
9523
9524     """
9525     env = {
9526       "ADD_MODE": self.op.mode,
9527       }
9528     if self.op.mode == constants.INSTANCE_IMPORT:
9529       env["SRC_NODE"] = self.op.src_node
9530       env["SRC_PATH"] = self.op.src_path
9531       env["SRC_IMAGES"] = self.src_images
9532
9533     env.update(_BuildInstanceHookEnv(
9534       name=self.op.instance_name,
9535       primary_node=self.op.pnode,
9536       secondary_nodes=self.secondaries,
9537       status=self.op.start,
9538       os_type=self.op.os_type,
9539       minmem=self.be_full[constants.BE_MINMEM],
9540       maxmem=self.be_full[constants.BE_MAXMEM],
9541       vcpus=self.be_full[constants.BE_VCPUS],
9542       nics=_NICListToTuple(self, self.nics),
9543       disk_template=self.op.disk_template,
9544       disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE])
9545              for d in self.disks],
9546       bep=self.be_full,
9547       hvp=self.hv_full,
9548       hypervisor_name=self.op.hypervisor,
9549       tags=self.op.tags,
9550     ))
9551
9552     return env
9553
9554   def BuildHooksNodes(self):
9555     """Build hooks nodes.
9556
9557     """
9558     nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
9559     return nl, nl
9560
9561   def _ReadExportInfo(self):
9562     """Reads the export information from disk.
9563
9564     It will override the opcode source node and path with the actual
9565     information, if these two were not specified before.
9566
9567     @return: the export information
9568
9569     """
9570     assert self.op.mode == constants.INSTANCE_IMPORT
9571
9572     src_node = self.op.src_node
9573     src_path = self.op.src_path
9574
9575     if src_node is None:
9576       locked_nodes = self.owned_locks(locking.LEVEL_NODE)
9577       exp_list = self.rpc.call_export_list(locked_nodes)
9578       found = False
9579       for node in exp_list:
9580         if exp_list[node].fail_msg:
9581           continue
9582         if src_path in exp_list[node].payload:
9583           found = True
9584           self.op.src_node = src_node = node
9585           self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
9586                                                        src_path)
9587           break
9588       if not found:
9589         raise errors.OpPrereqError("No export found for relative path %s" %
9590                                     src_path, errors.ECODE_INVAL)
9591
9592     _CheckNodeOnline(self, src_node)
9593     result = self.rpc.call_export_info(src_node, src_path)
9594     result.Raise("No export or invalid export found in dir %s" % src_path)
9595
9596     export_info = objects.SerializableConfigParser.Loads(str(result.payload))
9597     if not export_info.has_section(constants.INISECT_EXP):
9598       raise errors.ProgrammerError("Corrupted export config",
9599                                    errors.ECODE_ENVIRON)
9600
9601     ei_version = export_info.get(constants.INISECT_EXP, "version")
9602     if (int(ei_version) != constants.EXPORT_VERSION):
9603       raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
9604                                  (ei_version, constants.EXPORT_VERSION),
9605                                  errors.ECODE_ENVIRON)
9606     return export_info
9607
9608   def _ReadExportParams(self, einfo):
9609     """Use export parameters as defaults.
9610
9611     In case the opcode doesn't specify (as in override) some instance
9612     parameters, then try to use them from the export information, if
9613     that declares them.
9614
9615     """
9616     self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
9617
9618     if self.op.disk_template is None:
9619       if einfo.has_option(constants.INISECT_INS, "disk_template"):
9620         self.op.disk_template = einfo.get(constants.INISECT_INS,
9621                                           "disk_template")
9622         if self.op.disk_template not in constants.DISK_TEMPLATES:
9623           raise errors.OpPrereqError("Disk template specified in configuration"
9624                                      " file is not one of the allowed values:"
9625                                      " %s" %
9626                                      " ".join(constants.DISK_TEMPLATES),
9627                                      errors.ECODE_INVAL)
9628       else:
9629         raise errors.OpPrereqError("No disk template specified and the export"
9630                                    " is missing the disk_template information",
9631                                    errors.ECODE_INVAL)
9632
9633     if not self.op.disks:
9634       disks = []
9635       # TODO: import the disk iv_name too
9636       for idx in range(constants.MAX_DISKS):
9637         if einfo.has_option(constants.INISECT_INS, "disk%d_size" % idx):
9638           disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
9639           disks.append({constants.IDISK_SIZE: disk_sz})
9640       self.op.disks = disks
9641       if not disks and self.op.disk_template != constants.DT_DISKLESS:
9642         raise errors.OpPrereqError("No disk info specified and the export"
9643                                    " is missing the disk information",
9644                                    errors.ECODE_INVAL)
9645
9646     if not self.op.nics:
9647       nics = []
9648       for idx in range(constants.MAX_NICS):
9649         if einfo.has_option(constants.INISECT_INS, "nic%d_mac" % idx):
9650           ndict = {}
9651           for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
9652             v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
9653             ndict[name] = v
9654           nics.append(ndict)
9655         else:
9656           break
9657       self.op.nics = nics
9658
9659     if not self.op.tags and einfo.has_option(constants.INISECT_INS, "tags"):
9660       self.op.tags = einfo.get(constants.INISECT_INS, "tags").split()
9661
9662     if (self.op.hypervisor is None and
9663         einfo.has_option(constants.INISECT_INS, "hypervisor")):
9664       self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
9665
9666     if einfo.has_section(constants.INISECT_HYP):
9667       # use the export parameters but do not override the ones
9668       # specified by the user
9669       for name, value in einfo.items(constants.INISECT_HYP):
9670         if name not in self.op.hvparams:
9671           self.op.hvparams[name] = value
9672
9673     if einfo.has_section(constants.INISECT_BEP):
9674       # use the parameters, without overriding
9675       for name, value in einfo.items(constants.INISECT_BEP):
9676         if name not in self.op.beparams:
9677           self.op.beparams[name] = value
9678         # Compatibility for the old "memory" be param
9679         if name == constants.BE_MEMORY:
9680           if constants.BE_MAXMEM not in self.op.beparams:
9681             self.op.beparams[constants.BE_MAXMEM] = value
9682           if constants.BE_MINMEM not in self.op.beparams:
9683             self.op.beparams[constants.BE_MINMEM] = value
9684     else:
9685       # try to read the parameters old style, from the main section
9686       for name in constants.BES_PARAMETERS:
9687         if (name not in self.op.beparams and
9688             einfo.has_option(constants.INISECT_INS, name)):
9689           self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
9690
9691     if einfo.has_section(constants.INISECT_OSP):
9692       # use the parameters, without overriding
9693       for name, value in einfo.items(constants.INISECT_OSP):
9694         if name not in self.op.osparams:
9695           self.op.osparams[name] = value
9696
9697   def _RevertToDefaults(self, cluster):
9698     """Revert the instance parameters to the default values.
9699
9700     """
9701     # hvparams
9702     hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
9703     for name in self.op.hvparams.keys():
9704       if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
9705         del self.op.hvparams[name]
9706     # beparams
9707     be_defs = cluster.SimpleFillBE({})
9708     for name in self.op.beparams.keys():
9709       if name in be_defs and be_defs[name] == self.op.beparams[name]:
9710         del self.op.beparams[name]
9711     # nic params
9712     nic_defs = cluster.SimpleFillNIC({})
9713     for nic in self.op.nics:
9714       for name in constants.NICS_PARAMETERS:
9715         if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
9716           del nic[name]
9717     # osparams
9718     os_defs = cluster.SimpleFillOS(self.op.os_type, {})
9719     for name in self.op.osparams.keys():
9720       if name in os_defs and os_defs[name] == self.op.osparams[name]:
9721         del self.op.osparams[name]
9722
9723   def _CalculateFileStorageDir(self):
9724     """Calculate final instance file storage dir.
9725
9726     """
9727     # file storage dir calculation/check
9728     self.instance_file_storage_dir = None
9729     if self.op.disk_template in constants.DTS_FILEBASED:
9730       # build the full file storage dir path
9731       joinargs = []
9732
9733       if self.op.disk_template == constants.DT_SHARED_FILE:
9734         get_fsd_fn = self.cfg.GetSharedFileStorageDir
9735       else:
9736         get_fsd_fn = self.cfg.GetFileStorageDir
9737
9738       cfg_storagedir = get_fsd_fn()
9739       if not cfg_storagedir:
9740         raise errors.OpPrereqError("Cluster file storage dir not defined",
9741                                    errors.ECODE_STATE)
9742       joinargs.append(cfg_storagedir)
9743
9744       if self.op.file_storage_dir is not None:
9745         joinargs.append(self.op.file_storage_dir)
9746
9747       joinargs.append(self.op.instance_name)
9748
9749       # pylint: disable=W0142
9750       self.instance_file_storage_dir = utils.PathJoin(*joinargs)
9751
9752   def CheckPrereq(self): # pylint: disable=R0914
9753     """Check prerequisites.
9754
9755     """
9756     self._CalculateFileStorageDir()
9757
9758     if self.op.mode == constants.INSTANCE_IMPORT:
9759       export_info = self._ReadExportInfo()
9760       self._ReadExportParams(export_info)
9761       self._old_instance_name = export_info.get(constants.INISECT_INS, "name")
9762     else:
9763       self._old_instance_name = None
9764
9765     if (not self.cfg.GetVGName() and
9766         self.op.disk_template not in constants.DTS_NOT_LVM):
9767       raise errors.OpPrereqError("Cluster does not support lvm-based"
9768                                  " instances", errors.ECODE_STATE)
9769
9770     if (self.op.hypervisor is None or
9771         self.op.hypervisor == constants.VALUE_AUTO):
9772       self.op.hypervisor = self.cfg.GetHypervisorType()
9773
9774     cluster = self.cfg.GetClusterInfo()
9775     enabled_hvs = cluster.enabled_hypervisors
9776     if self.op.hypervisor not in enabled_hvs:
9777       raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
9778                                  " cluster (%s)" %
9779                                  (self.op.hypervisor, ",".join(enabled_hvs)),
9780                                  errors.ECODE_STATE)
9781
9782     # Check tag validity
9783     for tag in self.op.tags:
9784       objects.TaggableObject.ValidateTag(tag)
9785
9786     # check hypervisor parameter syntax (locally)
9787     utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
9788     filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
9789                                       self.op.hvparams)
9790     hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
9791     hv_type.CheckParameterSyntax(filled_hvp)
9792     self.hv_full = filled_hvp
9793     # check that we don't specify global parameters on an instance
9794     _CheckGlobalHvParams(self.op.hvparams)
9795
9796     # fill and remember the beparams dict
9797     default_beparams = cluster.beparams[constants.PP_DEFAULT]
9798     for param, value in self.op.beparams.iteritems():
9799       if value == constants.VALUE_AUTO:
9800         self.op.beparams[param] = default_beparams[param]
9801     objects.UpgradeBeParams(self.op.beparams)
9802     utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
9803     self.be_full = cluster.SimpleFillBE(self.op.beparams)
9804
9805     # build os parameters
9806     self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
9807
9808     # now that hvp/bep are in final format, let's reset to defaults,
9809     # if told to do so
9810     if self.op.identify_defaults:
9811       self._RevertToDefaults(cluster)
9812
9813     # NIC buildup
9814     self.nics = []
9815     for idx, nic in enumerate(self.op.nics):
9816       nic_mode_req = nic.get(constants.INIC_MODE, None)
9817       nic_mode = nic_mode_req
9818       if nic_mode is None or nic_mode == constants.VALUE_AUTO:
9819         nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
9820
9821       # in routed mode, for the first nic, the default ip is 'auto'
9822       if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
9823         default_ip_mode = constants.VALUE_AUTO
9824       else:
9825         default_ip_mode = constants.VALUE_NONE
9826
9827       # ip validity checks
9828       ip = nic.get(constants.INIC_IP, default_ip_mode)
9829       if ip is None or ip.lower() == constants.VALUE_NONE:
9830         nic_ip = None
9831       elif ip.lower() == constants.VALUE_AUTO:
9832         if not self.op.name_check:
9833           raise errors.OpPrereqError("IP address set to auto but name checks"
9834                                      " have been skipped",
9835                                      errors.ECODE_INVAL)
9836         nic_ip = self.hostname1.ip
9837       else:
9838         if not netutils.IPAddress.IsValid(ip):
9839           raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
9840                                      errors.ECODE_INVAL)
9841         nic_ip = ip
9842
9843       # TODO: check the ip address for uniqueness
9844       if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
9845         raise errors.OpPrereqError("Routed nic mode requires an ip address",
9846                                    errors.ECODE_INVAL)
9847
9848       # MAC address verification
9849       mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
9850       if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9851         mac = utils.NormalizeAndValidateMac(mac)
9852
9853         try:
9854           self.cfg.ReserveMAC(mac, self.proc.GetECId())
9855         except errors.ReservationError:
9856           raise errors.OpPrereqError("MAC address %s already in use"
9857                                      " in cluster" % mac,
9858                                      errors.ECODE_NOTUNIQUE)
9859
9860       #  Build nic parameters
9861       link = nic.get(constants.INIC_LINK, None)
9862       if link == constants.VALUE_AUTO:
9863         link = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_LINK]
9864       nicparams = {}
9865       if nic_mode_req:
9866         nicparams[constants.NIC_MODE] = nic_mode
9867       if link:
9868         nicparams[constants.NIC_LINK] = link
9869
9870       check_params = cluster.SimpleFillNIC(nicparams)
9871       objects.NIC.CheckParameterSyntax(check_params)
9872       self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
9873
9874     # disk checks/pre-build
9875     default_vg = self.cfg.GetVGName()
9876     self.disks = []
9877     for disk in self.op.disks:
9878       mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
9879       if mode not in constants.DISK_ACCESS_SET:
9880         raise errors.OpPrereqError("Invalid disk access mode '%s'" %
9881                                    mode, errors.ECODE_INVAL)
9882       size = disk.get(constants.IDISK_SIZE, None)
9883       if size is None:
9884         raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
9885       try:
9886         size = int(size)
9887       except (TypeError, ValueError):
9888         raise errors.OpPrereqError("Invalid disk size '%s'" % size,
9889                                    errors.ECODE_INVAL)
9890
9891       data_vg = disk.get(constants.IDISK_VG, default_vg)
9892       new_disk = {
9893         constants.IDISK_SIZE: size,
9894         constants.IDISK_MODE: mode,
9895         constants.IDISK_VG: data_vg,
9896         }
9897       if constants.IDISK_METAVG in disk:
9898         new_disk[constants.IDISK_METAVG] = disk[constants.IDISK_METAVG]
9899       if constants.IDISK_ADOPT in disk:
9900         new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
9901       self.disks.append(new_disk)
9902
9903     if self.op.mode == constants.INSTANCE_IMPORT:
9904       disk_images = []
9905       for idx in range(len(self.disks)):
9906         option = "disk%d_dump" % idx
9907         if export_info.has_option(constants.INISECT_INS, option):
9908           # FIXME: are the old os-es, disk sizes, etc. useful?
9909           export_name = export_info.get(constants.INISECT_INS, option)
9910           image = utils.PathJoin(self.op.src_path, export_name)
9911           disk_images.append(image)
9912         else:
9913           disk_images.append(False)
9914
9915       self.src_images = disk_images
9916
9917       if self.op.instance_name == self._old_instance_name:
9918         for idx, nic in enumerate(self.nics):
9919           if nic.mac == constants.VALUE_AUTO:
9920             nic_mac_ini = "nic%d_mac" % idx
9921             nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
9922
9923     # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
9924
9925     # ip ping checks (we use the same ip that was resolved in ExpandNames)
9926     if self.op.ip_check:
9927       if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
9928         raise errors.OpPrereqError("IP %s of instance %s already in use" %
9929                                    (self.check_ip, self.op.instance_name),
9930                                    errors.ECODE_NOTUNIQUE)
9931
9932     #### mac address generation
9933     # By generating here the mac address both the allocator and the hooks get
9934     # the real final mac address rather than the 'auto' or 'generate' value.
9935     # There is a race condition between the generation and the instance object
9936     # creation, which means that we know the mac is valid now, but we're not
9937     # sure it will be when we actually add the instance. If things go bad
9938     # adding the instance will abort because of a duplicate mac, and the
9939     # creation job will fail.
9940     for nic in self.nics:
9941       if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9942         nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
9943
9944     #### allocator run
9945
9946     if self.op.iallocator is not None:
9947       self._RunAllocator()
9948
9949     # Release all unneeded node locks
9950     _ReleaseLocks(self, locking.LEVEL_NODE,
9951                   keep=filter(None, [self.op.pnode, self.op.snode,
9952                                      self.op.src_node]))
9953     _ReleaseLocks(self, locking.LEVEL_NODE_RES,
9954                   keep=filter(None, [self.op.pnode, self.op.snode,
9955                                      self.op.src_node]))
9956
9957     #### node related checks
9958
9959     # check primary node
9960     self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
9961     assert self.pnode is not None, \
9962       "Cannot retrieve locked node %s" % self.op.pnode
9963     if pnode.offline:
9964       raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
9965                                  pnode.name, errors.ECODE_STATE)
9966     if pnode.drained:
9967       raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
9968                                  pnode.name, errors.ECODE_STATE)
9969     if not pnode.vm_capable:
9970       raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
9971                                  " '%s'" % pnode.name, errors.ECODE_STATE)
9972
9973     self.secondaries = []
9974
9975     # mirror node verification
9976     if self.op.disk_template in constants.DTS_INT_MIRROR:
9977       if self.op.snode == pnode.name:
9978         raise errors.OpPrereqError("The secondary node cannot be the"
9979                                    " primary node", errors.ECODE_INVAL)
9980       _CheckNodeOnline(self, self.op.snode)
9981       _CheckNodeNotDrained(self, self.op.snode)
9982       _CheckNodeVmCapable(self, self.op.snode)
9983       self.secondaries.append(self.op.snode)
9984
9985       snode = self.cfg.GetNodeInfo(self.op.snode)
9986       if pnode.group != snode.group:
9987         self.LogWarning("The primary and secondary nodes are in two"
9988                         " different node groups; the disk parameters"
9989                         " from the first disk's node group will be"
9990                         " used")
9991
9992     nodenames = [pnode.name] + self.secondaries
9993
9994     # Verify instance specs
9995     spindle_use = self.be_full.get(constants.BE_SPINDLE_USE, None)
9996     ispec = {
9997       constants.ISPEC_MEM_SIZE: self.be_full.get(constants.BE_MAXMEM, None),
9998       constants.ISPEC_CPU_COUNT: self.be_full.get(constants.BE_VCPUS, None),
9999       constants.ISPEC_DISK_COUNT: len(self.disks),
10000       constants.ISPEC_DISK_SIZE: [disk["size"] for disk in self.disks],
10001       constants.ISPEC_NIC_COUNT: len(self.nics),
10002       constants.ISPEC_SPINDLE_USE: spindle_use,
10003       }
10004
10005     group_info = self.cfg.GetNodeGroup(pnode.group)
10006     ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster, group_info)
10007     res = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec)
10008     if not self.op.ignore_ipolicy and res:
10009       raise errors.OpPrereqError(("Instance allocation to group %s violates"
10010                                   " policy: %s") % (pnode.group,
10011                                                     utils.CommaJoin(res)),
10012                                   errors.ECODE_INVAL)
10013
10014     if not self.adopt_disks:
10015       if self.op.disk_template == constants.DT_RBD:
10016         # _CheckRADOSFreeSpace() is just a placeholder.
10017         # Any function that checks prerequisites can be placed here.
10018         # Check if there is enough space on the RADOS cluster.
10019         _CheckRADOSFreeSpace()
10020       else:
10021         # Check lv size requirements, if not adopting
10022         req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
10023         _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
10024
10025     elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
10026       all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
10027                                 disk[constants.IDISK_ADOPT])
10028                      for disk in self.disks])
10029       if len(all_lvs) != len(self.disks):
10030         raise errors.OpPrereqError("Duplicate volume names given for adoption",
10031                                    errors.ECODE_INVAL)
10032       for lv_name in all_lvs:
10033         try:
10034           # FIXME: lv_name here is "vg/lv" need to ensure that other calls
10035           # to ReserveLV uses the same syntax
10036           self.cfg.ReserveLV(lv_name, self.proc.GetECId())
10037         except errors.ReservationError:
10038           raise errors.OpPrereqError("LV named %s used by another instance" %
10039                                      lv_name, errors.ECODE_NOTUNIQUE)
10040
10041       vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
10042       vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
10043
10044       node_lvs = self.rpc.call_lv_list([pnode.name],
10045                                        vg_names.payload.keys())[pnode.name]
10046       node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
10047       node_lvs = node_lvs.payload
10048
10049       delta = all_lvs.difference(node_lvs.keys())
10050       if delta:
10051         raise errors.OpPrereqError("Missing logical volume(s): %s" %
10052                                    utils.CommaJoin(delta),
10053                                    errors.ECODE_INVAL)
10054       online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
10055       if online_lvs:
10056         raise errors.OpPrereqError("Online logical volumes found, cannot"
10057                                    " adopt: %s" % utils.CommaJoin(online_lvs),
10058                                    errors.ECODE_STATE)
10059       # update the size of disk based on what is found
10060       for dsk in self.disks:
10061         dsk[constants.IDISK_SIZE] = \
10062           int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
10063                                         dsk[constants.IDISK_ADOPT])][0]))
10064
10065     elif self.op.disk_template == constants.DT_BLOCK:
10066       # Normalize and de-duplicate device paths
10067       all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
10068                        for disk in self.disks])
10069       if len(all_disks) != len(self.disks):
10070         raise errors.OpPrereqError("Duplicate disk names given for adoption",
10071                                    errors.ECODE_INVAL)
10072       baddisks = [d for d in all_disks
10073                   if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
10074       if baddisks:
10075         raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
10076                                    " cannot be adopted" %
10077                                    (", ".join(baddisks),
10078                                     constants.ADOPTABLE_BLOCKDEV_ROOT),
10079                                    errors.ECODE_INVAL)
10080
10081       node_disks = self.rpc.call_bdev_sizes([pnode.name],
10082                                             list(all_disks))[pnode.name]
10083       node_disks.Raise("Cannot get block device information from node %s" %
10084                        pnode.name)
10085       node_disks = node_disks.payload
10086       delta = all_disks.difference(node_disks.keys())
10087       if delta:
10088         raise errors.OpPrereqError("Missing block device(s): %s" %
10089                                    utils.CommaJoin(delta),
10090                                    errors.ECODE_INVAL)
10091       for dsk in self.disks:
10092         dsk[constants.IDISK_SIZE] = \
10093           int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
10094
10095     _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
10096
10097     _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
10098     # check OS parameters (remotely)
10099     _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
10100
10101     _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
10102
10103     # memory check on primary node
10104     #TODO(dynmem): use MINMEM for checking
10105     if self.op.start:
10106       _CheckNodeFreeMemory(self, self.pnode.name,
10107                            "creating instance %s" % self.op.instance_name,
10108                            self.be_full[constants.BE_MAXMEM],
10109                            self.op.hypervisor)
10110
10111     self.dry_run_result = list(nodenames)
10112
10113   def Exec(self, feedback_fn):
10114     """Create and add the instance to the cluster.
10115
10116     """
10117     instance = self.op.instance_name
10118     pnode_name = self.pnode.name
10119
10120     assert not (self.owned_locks(locking.LEVEL_NODE_RES) -
10121                 self.owned_locks(locking.LEVEL_NODE)), \
10122       "Node locks differ from node resource locks"
10123
10124     ht_kind = self.op.hypervisor
10125     if ht_kind in constants.HTS_REQ_PORT:
10126       network_port = self.cfg.AllocatePort()
10127     else:
10128       network_port = None
10129
10130     # This is ugly but we got a chicken-egg problem here
10131     # We can only take the group disk parameters, as the instance
10132     # has no disks yet (we are generating them right here).
10133     node = self.cfg.GetNodeInfo(pnode_name)
10134     nodegroup = self.cfg.GetNodeGroup(node.group)
10135     disks = _GenerateDiskTemplate(self,
10136                                   self.op.disk_template,
10137                                   instance, pnode_name,
10138                                   self.secondaries,
10139                                   self.disks,
10140                                   self.instance_file_storage_dir,
10141                                   self.op.file_driver,
10142                                   0,
10143                                   feedback_fn,
10144                                   self.cfg.GetGroupDiskParams(nodegroup))
10145
10146     iobj = objects.Instance(name=instance, os=self.op.os_type,
10147                             primary_node=pnode_name,
10148                             nics=self.nics, disks=disks,
10149                             disk_template=self.op.disk_template,
10150                             admin_state=constants.ADMINST_DOWN,
10151                             network_port=network_port,
10152                             beparams=self.op.beparams,
10153                             hvparams=self.op.hvparams,
10154                             hypervisor=self.op.hypervisor,
10155                             osparams=self.op.osparams,
10156                             )
10157
10158     if self.op.tags:
10159       for tag in self.op.tags:
10160         iobj.AddTag(tag)
10161
10162     if self.adopt_disks:
10163       if self.op.disk_template == constants.DT_PLAIN:
10164         # rename LVs to the newly-generated names; we need to construct
10165         # 'fake' LV disks with the old data, plus the new unique_id
10166         tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
10167         rename_to = []
10168         for t_dsk, a_dsk in zip(tmp_disks, self.disks):
10169           rename_to.append(t_dsk.logical_id)
10170           t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
10171           self.cfg.SetDiskID(t_dsk, pnode_name)
10172         result = self.rpc.call_blockdev_rename(pnode_name,
10173                                                zip(tmp_disks, rename_to))
10174         result.Raise("Failed to rename adoped LVs")
10175     else:
10176       feedback_fn("* creating instance disks...")
10177       try:
10178         _CreateDisks(self, iobj)
10179       except errors.OpExecError:
10180         self.LogWarning("Device creation failed, reverting...")
10181         try:
10182           _RemoveDisks(self, iobj)
10183         finally:
10184           self.cfg.ReleaseDRBDMinors(instance)
10185           raise
10186
10187     feedback_fn("adding instance %s to cluster config" % instance)
10188
10189     self.cfg.AddInstance(iobj, self.proc.GetECId())
10190
10191     # Declare that we don't want to remove the instance lock anymore, as we've
10192     # added the instance to the config
10193     del self.remove_locks[locking.LEVEL_INSTANCE]
10194
10195     if self.op.mode == constants.INSTANCE_IMPORT:
10196       # Release unused nodes
10197       _ReleaseLocks(self, locking.LEVEL_NODE, keep=[self.op.src_node])
10198     else:
10199       # Release all nodes
10200       _ReleaseLocks(self, locking.LEVEL_NODE)
10201
10202     disk_abort = False
10203     if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
10204       feedback_fn("* wiping instance disks...")
10205       try:
10206         _WipeDisks(self, iobj)
10207       except errors.OpExecError, err:
10208         logging.exception("Wiping disks failed")
10209         self.LogWarning("Wiping instance disks failed (%s)", err)
10210         disk_abort = True
10211
10212     if disk_abort:
10213       # Something is already wrong with the disks, don't do anything else
10214       pass
10215     elif self.op.wait_for_sync:
10216       disk_abort = not _WaitForSync(self, iobj)
10217     elif iobj.disk_template in constants.DTS_INT_MIRROR:
10218       # make sure the disks are not degraded (still sync-ing is ok)
10219       feedback_fn("* checking mirrors status")
10220       disk_abort = not _WaitForSync(self, iobj, oneshot=True)
10221     else:
10222       disk_abort = False
10223
10224     if disk_abort:
10225       _RemoveDisks(self, iobj)
10226       self.cfg.RemoveInstance(iobj.name)
10227       # Make sure the instance lock gets removed
10228       self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
10229       raise errors.OpExecError("There are some degraded disks for"
10230                                " this instance")
10231
10232     # Release all node resource locks
10233     _ReleaseLocks(self, locking.LEVEL_NODE_RES)
10234
10235     if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
10236       # we need to set the disks ID to the primary node, since the
10237       # preceding code might or might have not done it, depending on
10238       # disk template and other options
10239       for disk in iobj.disks:
10240         self.cfg.SetDiskID(disk, pnode_name)
10241       if self.op.mode == constants.INSTANCE_CREATE:
10242         if not self.op.no_install:
10243           pause_sync = (iobj.disk_template in constants.DTS_INT_MIRROR and
10244                         not self.op.wait_for_sync)
10245           if pause_sync:
10246             feedback_fn("* pausing disk sync to install instance OS")
10247             result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
10248                                                               (iobj.disks,
10249                                                                iobj), True)
10250             for idx, success in enumerate(result.payload):
10251               if not success:
10252                 logging.warn("pause-sync of instance %s for disk %d failed",
10253                              instance, idx)
10254
10255           feedback_fn("* running the instance OS create scripts...")
10256           # FIXME: pass debug option from opcode to backend
10257           os_add_result = \
10258             self.rpc.call_instance_os_add(pnode_name, (iobj, None), False,
10259                                           self.op.debug_level)
10260           if pause_sync:
10261             feedback_fn("* resuming disk sync")
10262             result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
10263                                                               (iobj.disks,
10264                                                                iobj), False)
10265             for idx, success in enumerate(result.payload):
10266               if not success:
10267                 logging.warn("resume-sync of instance %s for disk %d failed",
10268                              instance, idx)
10269
10270           os_add_result.Raise("Could not add os for instance %s"
10271                               " on node %s" % (instance, pnode_name))
10272
10273       else:
10274         if self.op.mode == constants.INSTANCE_IMPORT:
10275           feedback_fn("* running the instance OS import scripts...")
10276
10277           transfers = []
10278
10279           for idx, image in enumerate(self.src_images):
10280             if not image:
10281               continue
10282
10283             # FIXME: pass debug option from opcode to backend
10284             dt = masterd.instance.DiskTransfer("disk/%s" % idx,
10285                                                constants.IEIO_FILE, (image, ),
10286                                                constants.IEIO_SCRIPT,
10287                                                (iobj.disks[idx], idx),
10288                                                None)
10289             transfers.append(dt)
10290
10291           import_result = \
10292             masterd.instance.TransferInstanceData(self, feedback_fn,
10293                                                   self.op.src_node, pnode_name,
10294                                                   self.pnode.secondary_ip,
10295                                                   iobj, transfers)
10296           if not compat.all(import_result):
10297             self.LogWarning("Some disks for instance %s on node %s were not"
10298                             " imported successfully" % (instance, pnode_name))
10299
10300           rename_from = self._old_instance_name
10301
10302         elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
10303           feedback_fn("* preparing remote import...")
10304           # The source cluster will stop the instance before attempting to make
10305           # a connection. In some cases stopping an instance can take a long
10306           # time, hence the shutdown timeout is added to the connection
10307           # timeout.
10308           connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
10309                              self.op.source_shutdown_timeout)
10310           timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
10311
10312           assert iobj.primary_node == self.pnode.name
10313           disk_results = \
10314             masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
10315                                           self.source_x509_ca,
10316                                           self._cds, timeouts)
10317           if not compat.all(disk_results):
10318             # TODO: Should the instance still be started, even if some disks
10319             # failed to import (valid for local imports, too)?
10320             self.LogWarning("Some disks for instance %s on node %s were not"
10321                             " imported successfully" % (instance, pnode_name))
10322
10323           rename_from = self.source_instance_name
10324
10325         else:
10326           # also checked in the prereq part
10327           raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
10328                                        % self.op.mode)
10329
10330         # Run rename script on newly imported instance
10331         assert iobj.name == instance
10332         feedback_fn("Running rename script for %s" % instance)
10333         result = self.rpc.call_instance_run_rename(pnode_name, iobj,
10334                                                    rename_from,
10335                                                    self.op.debug_level)
10336         if result.fail_msg:
10337           self.LogWarning("Failed to run rename script for %s on node"
10338                           " %s: %s" % (instance, pnode_name, result.fail_msg))
10339
10340     assert not self.owned_locks(locking.LEVEL_NODE_RES)
10341
10342     if self.op.start:
10343       iobj.admin_state = constants.ADMINST_UP
10344       self.cfg.Update(iobj, feedback_fn)
10345       logging.info("Starting instance %s on node %s", instance, pnode_name)
10346       feedback_fn("* starting instance...")
10347       result = self.rpc.call_instance_start(pnode_name, (iobj, None, None),
10348                                             False)
10349       result.Raise("Could not start instance")
10350
10351     return list(iobj.all_nodes)
10352
10353
10354 def _CheckRADOSFreeSpace():
10355   """Compute disk size requirements inside the RADOS cluster.
10356
10357   """
10358   # For the RADOS cluster we assume there is always enough space.
10359   pass
10360
10361
10362 class LUInstanceConsole(NoHooksLU):
10363   """Connect to an instance's console.
10364
10365   This is somewhat special in that it returns the command line that
10366   you need to run on the master node in order to connect to the
10367   console.
10368
10369   """
10370   REQ_BGL = False
10371
10372   def ExpandNames(self):
10373     self.share_locks = _ShareAll()
10374     self._ExpandAndLockInstance()
10375
10376   def CheckPrereq(self):
10377     """Check prerequisites.
10378
10379     This checks that the instance is in the cluster.
10380
10381     """
10382     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10383     assert self.instance is not None, \
10384       "Cannot retrieve locked instance %s" % self.op.instance_name
10385     _CheckNodeOnline(self, self.instance.primary_node)
10386
10387   def Exec(self, feedback_fn):
10388     """Connect to the console of an instance
10389
10390     """
10391     instance = self.instance
10392     node = instance.primary_node
10393
10394     node_insts = self.rpc.call_instance_list([node],
10395                                              [instance.hypervisor])[node]
10396     node_insts.Raise("Can't get node information from %s" % node)
10397
10398     if instance.name not in node_insts.payload:
10399       if instance.admin_state == constants.ADMINST_UP:
10400         state = constants.INSTST_ERRORDOWN
10401       elif instance.admin_state == constants.ADMINST_DOWN:
10402         state = constants.INSTST_ADMINDOWN
10403       else:
10404         state = constants.INSTST_ADMINOFFLINE
10405       raise errors.OpExecError("Instance %s is not running (state %s)" %
10406                                (instance.name, state))
10407
10408     logging.debug("Connecting to console of %s on %s", instance.name, node)
10409
10410     return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
10411
10412
10413 def _GetInstanceConsole(cluster, instance):
10414   """Returns console information for an instance.
10415
10416   @type cluster: L{objects.Cluster}
10417   @type instance: L{objects.Instance}
10418   @rtype: dict
10419
10420   """
10421   hyper = hypervisor.GetHypervisor(instance.hypervisor)
10422   # beparams and hvparams are passed separately, to avoid editing the
10423   # instance and then saving the defaults in the instance itself.
10424   hvparams = cluster.FillHV(instance)
10425   beparams = cluster.FillBE(instance)
10426   console = hyper.GetInstanceConsole(instance, hvparams, beparams)
10427
10428   assert console.instance == instance.name
10429   assert console.Validate()
10430
10431   return console.ToDict()
10432
10433
10434 class LUInstanceReplaceDisks(LogicalUnit):
10435   """Replace the disks of an instance.
10436
10437   """
10438   HPATH = "mirrors-replace"
10439   HTYPE = constants.HTYPE_INSTANCE
10440   REQ_BGL = False
10441
10442   def CheckArguments(self):
10443     TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
10444                                   self.op.iallocator)
10445
10446   def ExpandNames(self):
10447     self._ExpandAndLockInstance()
10448
10449     assert locking.LEVEL_NODE not in self.needed_locks
10450     assert locking.LEVEL_NODE_RES not in self.needed_locks
10451     assert locking.LEVEL_NODEGROUP not in self.needed_locks
10452
10453     assert self.op.iallocator is None or self.op.remote_node is None, \
10454       "Conflicting options"
10455
10456     if self.op.remote_node is not None:
10457       self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10458
10459       # Warning: do not remove the locking of the new secondary here
10460       # unless DRBD8.AddChildren is changed to work in parallel;
10461       # currently it doesn't since parallel invocations of
10462       # FindUnusedMinor will conflict
10463       self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
10464       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
10465     else:
10466       self.needed_locks[locking.LEVEL_NODE] = []
10467       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10468
10469       if self.op.iallocator is not None:
10470         # iallocator will select a new node in the same group
10471         self.needed_locks[locking.LEVEL_NODEGROUP] = []
10472
10473     self.needed_locks[locking.LEVEL_NODE_RES] = []
10474
10475     self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
10476                                    self.op.iallocator, self.op.remote_node,
10477                                    self.op.disks, False, self.op.early_release,
10478                                    self.op.ignore_ipolicy)
10479
10480     self.tasklets = [self.replacer]
10481
10482   def DeclareLocks(self, level):
10483     if level == locking.LEVEL_NODEGROUP:
10484       assert self.op.remote_node is None
10485       assert self.op.iallocator is not None
10486       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
10487
10488       self.share_locks[locking.LEVEL_NODEGROUP] = 1
10489       # Lock all groups used by instance optimistically; this requires going
10490       # via the node before it's locked, requiring verification later on
10491       self.needed_locks[locking.LEVEL_NODEGROUP] = \
10492         self.cfg.GetInstanceNodeGroups(self.op.instance_name)
10493
10494     elif level == locking.LEVEL_NODE:
10495       if self.op.iallocator is not None:
10496         assert self.op.remote_node is None
10497         assert not self.needed_locks[locking.LEVEL_NODE]
10498
10499         # Lock member nodes of all locked groups
10500         self.needed_locks[locking.LEVEL_NODE] = \
10501             [node_name
10502              for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
10503              for node_name in self.cfg.GetNodeGroup(group_uuid).members]
10504       else:
10505         self._LockInstancesNodes()
10506     elif level == locking.LEVEL_NODE_RES:
10507       # Reuse node locks
10508       self.needed_locks[locking.LEVEL_NODE_RES] = \
10509         self.needed_locks[locking.LEVEL_NODE]
10510
10511   def BuildHooksEnv(self):
10512     """Build hooks env.
10513
10514     This runs on the master, the primary and all the secondaries.
10515
10516     """
10517     instance = self.replacer.instance
10518     env = {
10519       "MODE": self.op.mode,
10520       "NEW_SECONDARY": self.op.remote_node,
10521       "OLD_SECONDARY": instance.secondary_nodes[0],
10522       }
10523     env.update(_BuildInstanceHookEnvByObject(self, instance))
10524     return env
10525
10526   def BuildHooksNodes(self):
10527     """Build hooks nodes.
10528
10529     """
10530     instance = self.replacer.instance
10531     nl = [
10532       self.cfg.GetMasterNode(),
10533       instance.primary_node,
10534       ]
10535     if self.op.remote_node is not None:
10536       nl.append(self.op.remote_node)
10537     return nl, nl
10538
10539   def CheckPrereq(self):
10540     """Check prerequisites.
10541
10542     """
10543     assert (self.glm.is_owned(locking.LEVEL_NODEGROUP) or
10544             self.op.iallocator is None)
10545
10546     # Verify if node group locks are still correct
10547     owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
10548     if owned_groups:
10549       _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
10550
10551     return LogicalUnit.CheckPrereq(self)
10552
10553
10554 class TLReplaceDisks(Tasklet):
10555   """Replaces disks for an instance.
10556
10557   Note: Locking is not within the scope of this class.
10558
10559   """
10560   def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
10561                disks, delay_iallocator, early_release, ignore_ipolicy):
10562     """Initializes this class.
10563
10564     """
10565     Tasklet.__init__(self, lu)
10566
10567     # Parameters
10568     self.instance_name = instance_name
10569     self.mode = mode
10570     self.iallocator_name = iallocator_name
10571     self.remote_node = remote_node
10572     self.disks = disks
10573     self.delay_iallocator = delay_iallocator
10574     self.early_release = early_release
10575     self.ignore_ipolicy = ignore_ipolicy
10576
10577     # Runtime data
10578     self.instance = None
10579     self.new_node = None
10580     self.target_node = None
10581     self.other_node = None
10582     self.remote_node_info = None
10583     self.node_secondary_ip = None
10584
10585   @staticmethod
10586   def CheckArguments(mode, remote_node, ialloc):
10587     """Helper function for users of this class.
10588
10589     """
10590     # check for valid parameter combination
10591     if mode == constants.REPLACE_DISK_CHG:
10592       if remote_node is None and ialloc is None:
10593         raise errors.OpPrereqError("When changing the secondary either an"
10594                                    " iallocator script must be used or the"
10595                                    " new node given", errors.ECODE_INVAL)
10596
10597       if remote_node is not None and ialloc is not None:
10598         raise errors.OpPrereqError("Give either the iallocator or the new"
10599                                    " secondary, not both", errors.ECODE_INVAL)
10600
10601     elif remote_node is not None or ialloc is not None:
10602       # Not replacing the secondary
10603       raise errors.OpPrereqError("The iallocator and new node options can"
10604                                  " only be used when changing the"
10605                                  " secondary node", errors.ECODE_INVAL)
10606
10607   @staticmethod
10608   def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
10609     """Compute a new secondary node using an IAllocator.
10610
10611     """
10612     req = iallocator.IAReqRelocate(name=instance_name,
10613                                    relocate_from=list(relocate_from))
10614     ial = iallocator.IAllocator(lu.cfg, lu.rpc, req)
10615
10616     ial.Run(iallocator_name)
10617
10618     if not ial.success:
10619       raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
10620                                  " %s" % (iallocator_name, ial.info),
10621                                  errors.ECODE_NORES)
10622
10623     remote_node_name = ial.result[0]
10624
10625     lu.LogInfo("Selected new secondary for instance '%s': %s",
10626                instance_name, remote_node_name)
10627
10628     return remote_node_name
10629
10630   def _FindFaultyDisks(self, node_name):
10631     """Wrapper for L{_FindFaultyInstanceDisks}.
10632
10633     """
10634     return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
10635                                     node_name, True)
10636
10637   def _CheckDisksActivated(self, instance):
10638     """Checks if the instance disks are activated.
10639
10640     @param instance: The instance to check disks
10641     @return: True if they are activated, False otherwise
10642
10643     """
10644     nodes = instance.all_nodes
10645
10646     for idx, dev in enumerate(instance.disks):
10647       for node in nodes:
10648         self.lu.LogInfo("Checking disk/%d on %s", idx, node)
10649         self.cfg.SetDiskID(dev, node)
10650
10651         result = _BlockdevFind(self, node, dev, instance)
10652
10653         if result.offline:
10654           continue
10655         elif result.fail_msg or not result.payload:
10656           return False
10657
10658     return True
10659
10660   def CheckPrereq(self):
10661     """Check prerequisites.
10662
10663     This checks that the instance is in the cluster.
10664
10665     """
10666     self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
10667     assert instance is not None, \
10668       "Cannot retrieve locked instance %s" % self.instance_name
10669
10670     if instance.disk_template != constants.DT_DRBD8:
10671       raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
10672                                  " instances", errors.ECODE_INVAL)
10673
10674     if len(instance.secondary_nodes) != 1:
10675       raise errors.OpPrereqError("The instance has a strange layout,"
10676                                  " expected one secondary but found %d" %
10677                                  len(instance.secondary_nodes),
10678                                  errors.ECODE_FAULT)
10679
10680     if not self.delay_iallocator:
10681       self._CheckPrereq2()
10682
10683   def _CheckPrereq2(self):
10684     """Check prerequisites, second part.
10685
10686     This function should always be part of CheckPrereq. It was separated and is
10687     now called from Exec because during node evacuation iallocator was only
10688     called with an unmodified cluster model, not taking planned changes into
10689     account.
10690
10691     """
10692     instance = self.instance
10693     secondary_node = instance.secondary_nodes[0]
10694
10695     if self.iallocator_name is None:
10696       remote_node = self.remote_node
10697     else:
10698       remote_node = self._RunAllocator(self.lu, self.iallocator_name,
10699                                        instance.name, instance.secondary_nodes)
10700
10701     if remote_node is None:
10702       self.remote_node_info = None
10703     else:
10704       assert remote_node in self.lu.owned_locks(locking.LEVEL_NODE), \
10705              "Remote node '%s' is not locked" % remote_node
10706
10707       self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
10708       assert self.remote_node_info is not None, \
10709         "Cannot retrieve locked node %s" % remote_node
10710
10711     if remote_node == self.instance.primary_node:
10712       raise errors.OpPrereqError("The specified node is the primary node of"
10713                                  " the instance", errors.ECODE_INVAL)
10714
10715     if remote_node == secondary_node:
10716       raise errors.OpPrereqError("The specified node is already the"
10717                                  " secondary node of the instance",
10718                                  errors.ECODE_INVAL)
10719
10720     if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
10721                                     constants.REPLACE_DISK_CHG):
10722       raise errors.OpPrereqError("Cannot specify disks to be replaced",
10723                                  errors.ECODE_INVAL)
10724
10725     if self.mode == constants.REPLACE_DISK_AUTO:
10726       if not self._CheckDisksActivated(instance):
10727         raise errors.OpPrereqError("Please run activate-disks on instance %s"
10728                                    " first" % self.instance_name,
10729                                    errors.ECODE_STATE)
10730       faulty_primary = self._FindFaultyDisks(instance.primary_node)
10731       faulty_secondary = self._FindFaultyDisks(secondary_node)
10732
10733       if faulty_primary and faulty_secondary:
10734         raise errors.OpPrereqError("Instance %s has faulty disks on more than"
10735                                    " one node and can not be repaired"
10736                                    " automatically" % self.instance_name,
10737                                    errors.ECODE_STATE)
10738
10739       if faulty_primary:
10740         self.disks = faulty_primary
10741         self.target_node = instance.primary_node
10742         self.other_node = secondary_node
10743         check_nodes = [self.target_node, self.other_node]
10744       elif faulty_secondary:
10745         self.disks = faulty_secondary
10746         self.target_node = secondary_node
10747         self.other_node = instance.primary_node
10748         check_nodes = [self.target_node, self.other_node]
10749       else:
10750         self.disks = []
10751         check_nodes = []
10752
10753     else:
10754       # Non-automatic modes
10755       if self.mode == constants.REPLACE_DISK_PRI:
10756         self.target_node = instance.primary_node
10757         self.other_node = secondary_node
10758         check_nodes = [self.target_node, self.other_node]
10759
10760       elif self.mode == constants.REPLACE_DISK_SEC:
10761         self.target_node = secondary_node
10762         self.other_node = instance.primary_node
10763         check_nodes = [self.target_node, self.other_node]
10764
10765       elif self.mode == constants.REPLACE_DISK_CHG:
10766         self.new_node = remote_node
10767         self.other_node = instance.primary_node
10768         self.target_node = secondary_node
10769         check_nodes = [self.new_node, self.other_node]
10770
10771         _CheckNodeNotDrained(self.lu, remote_node)
10772         _CheckNodeVmCapable(self.lu, remote_node)
10773
10774         old_node_info = self.cfg.GetNodeInfo(secondary_node)
10775         assert old_node_info is not None
10776         if old_node_info.offline and not self.early_release:
10777           # doesn't make sense to delay the release
10778           self.early_release = True
10779           self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
10780                           " early-release mode", secondary_node)
10781
10782       else:
10783         raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
10784                                      self.mode)
10785
10786       # If not specified all disks should be replaced
10787       if not self.disks:
10788         self.disks = range(len(self.instance.disks))
10789
10790     # TODO: This is ugly, but right now we can't distinguish between internal
10791     # submitted opcode and external one. We should fix that.
10792     if self.remote_node_info:
10793       # We change the node, lets verify it still meets instance policy
10794       new_group_info = self.cfg.GetNodeGroup(self.remote_node_info.group)
10795       cluster = self.cfg.GetClusterInfo()
10796       ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
10797                                                               new_group_info)
10798       _CheckTargetNodeIPolicy(self, ipolicy, instance, self.remote_node_info,
10799                               ignore=self.ignore_ipolicy)
10800
10801     for node in check_nodes:
10802       _CheckNodeOnline(self.lu, node)
10803
10804     touched_nodes = frozenset(node_name for node_name in [self.new_node,
10805                                                           self.other_node,
10806                                                           self.target_node]
10807                               if node_name is not None)
10808
10809     # Release unneeded node and node resource locks
10810     _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
10811     _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES, keep=touched_nodes)
10812
10813     # Release any owned node group
10814     if self.lu.glm.is_owned(locking.LEVEL_NODEGROUP):
10815       _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
10816
10817     # Check whether disks are valid
10818     for disk_idx in self.disks:
10819       instance.FindDisk(disk_idx)
10820
10821     # Get secondary node IP addresses
10822     self.node_secondary_ip = dict((name, node.secondary_ip) for (name, node)
10823                                   in self.cfg.GetMultiNodeInfo(touched_nodes))
10824
10825   def Exec(self, feedback_fn):
10826     """Execute disk replacement.
10827
10828     This dispatches the disk replacement to the appropriate handler.
10829
10830     """
10831     if self.delay_iallocator:
10832       self._CheckPrereq2()
10833
10834     if __debug__:
10835       # Verify owned locks before starting operation
10836       owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
10837       assert set(owned_nodes) == set(self.node_secondary_ip), \
10838           ("Incorrect node locks, owning %s, expected %s" %
10839            (owned_nodes, self.node_secondary_ip.keys()))
10840       assert (self.lu.owned_locks(locking.LEVEL_NODE) ==
10841               self.lu.owned_locks(locking.LEVEL_NODE_RES))
10842
10843       owned_instances = self.lu.owned_locks(locking.LEVEL_INSTANCE)
10844       assert list(owned_instances) == [self.instance_name], \
10845           "Instance '%s' not locked" % self.instance_name
10846
10847       assert not self.lu.glm.is_owned(locking.LEVEL_NODEGROUP), \
10848           "Should not own any node group lock at this point"
10849
10850     if not self.disks:
10851       feedback_fn("No disks need replacement")
10852       return
10853
10854     feedback_fn("Replacing disk(s) %s for %s" %
10855                 (utils.CommaJoin(self.disks), self.instance.name))
10856
10857     activate_disks = (self.instance.admin_state != constants.ADMINST_UP)
10858
10859     # Activate the instance disks if we're replacing them on a down instance
10860     if activate_disks:
10861       _StartInstanceDisks(self.lu, self.instance, True)
10862
10863     try:
10864       # Should we replace the secondary node?
10865       if self.new_node is not None:
10866         fn = self._ExecDrbd8Secondary
10867       else:
10868         fn = self._ExecDrbd8DiskOnly
10869
10870       result = fn(feedback_fn)
10871     finally:
10872       # Deactivate the instance disks if we're replacing them on a
10873       # down instance
10874       if activate_disks:
10875         _SafeShutdownInstanceDisks(self.lu, self.instance)
10876
10877     assert not self.lu.owned_locks(locking.LEVEL_NODE)
10878
10879     if __debug__:
10880       # Verify owned locks
10881       owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE_RES)
10882       nodes = frozenset(self.node_secondary_ip)
10883       assert ((self.early_release and not owned_nodes) or
10884               (not self.early_release and not (set(owned_nodes) - nodes))), \
10885         ("Not owning the correct locks, early_release=%s, owned=%r,"
10886          " nodes=%r" % (self.early_release, owned_nodes, nodes))
10887
10888     return result
10889
10890   def _CheckVolumeGroup(self, nodes):
10891     self.lu.LogInfo("Checking volume groups")
10892
10893     vgname = self.cfg.GetVGName()
10894
10895     # Make sure volume group exists on all involved nodes
10896     results = self.rpc.call_vg_list(nodes)
10897     if not results:
10898       raise errors.OpExecError("Can't list volume groups on the nodes")
10899
10900     for node in nodes:
10901       res = results[node]
10902       res.Raise("Error checking node %s" % node)
10903       if vgname not in res.payload:
10904         raise errors.OpExecError("Volume group '%s' not found on node %s" %
10905                                  (vgname, node))
10906
10907   def _CheckDisksExistence(self, nodes):
10908     # Check disk existence
10909     for idx, dev in enumerate(self.instance.disks):
10910       if idx not in self.disks:
10911         continue
10912
10913       for node in nodes:
10914         self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
10915         self.cfg.SetDiskID(dev, node)
10916
10917         result = _BlockdevFind(self, node, dev, self.instance)
10918
10919         msg = result.fail_msg
10920         if msg or not result.payload:
10921           if not msg:
10922             msg = "disk not found"
10923           raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
10924                                    (idx, node, msg))
10925
10926   def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
10927     for idx, dev in enumerate(self.instance.disks):
10928       if idx not in self.disks:
10929         continue
10930
10931       self.lu.LogInfo("Checking disk/%d consistency on node %s" %
10932                       (idx, node_name))
10933
10934       if not _CheckDiskConsistency(self.lu, self.instance, dev, node_name,
10935                                    on_primary, ldisk=ldisk):
10936         raise errors.OpExecError("Node %s has degraded storage, unsafe to"
10937                                  " replace disks for instance %s" %
10938                                  (node_name, self.instance.name))
10939
10940   def _CreateNewStorage(self, node_name):
10941     """Create new storage on the primary or secondary node.
10942
10943     This is only used for same-node replaces, not for changing the
10944     secondary node, hence we don't want to modify the existing disk.
10945
10946     """
10947     iv_names = {}
10948
10949     disks = _AnnotateDiskParams(self.instance, self.instance.disks, self.cfg)
10950     for idx, dev in enumerate(disks):
10951       if idx not in self.disks:
10952         continue
10953
10954       self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
10955
10956       self.cfg.SetDiskID(dev, node_name)
10957
10958       lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
10959       names = _GenerateUniqueNames(self.lu, lv_names)
10960
10961       (data_disk, meta_disk) = dev.children
10962       vg_data = data_disk.logical_id[0]
10963       lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
10964                              logical_id=(vg_data, names[0]),
10965                              params=data_disk.params)
10966       vg_meta = meta_disk.logical_id[0]
10967       lv_meta = objects.Disk(dev_type=constants.LD_LV,
10968                              size=constants.DRBD_META_SIZE,
10969                              logical_id=(vg_meta, names[1]),
10970                              params=meta_disk.params)
10971
10972       new_lvs = [lv_data, lv_meta]
10973       old_lvs = [child.Copy() for child in dev.children]
10974       iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
10975
10976       # we pass force_create=True to force the LVM creation
10977       for new_lv in new_lvs:
10978         _CreateBlockDevInner(self.lu, node_name, self.instance, new_lv, True,
10979                              _GetInstanceInfoText(self.instance), False)
10980
10981     return iv_names
10982
10983   def _CheckDevices(self, node_name, iv_names):
10984     for name, (dev, _, _) in iv_names.iteritems():
10985       self.cfg.SetDiskID(dev, node_name)
10986
10987       result = _BlockdevFind(self, node_name, dev, self.instance)
10988
10989       msg = result.fail_msg
10990       if msg or not result.payload:
10991         if not msg:
10992           msg = "disk not found"
10993         raise errors.OpExecError("Can't find DRBD device %s: %s" %
10994                                  (name, msg))
10995
10996       if result.payload.is_degraded:
10997         raise errors.OpExecError("DRBD device %s is degraded!" % name)
10998
10999   def _RemoveOldStorage(self, node_name, iv_names):
11000     for name, (_, old_lvs, _) in iv_names.iteritems():
11001       self.lu.LogInfo("Remove logical volumes for %s" % name)
11002
11003       for lv in old_lvs:
11004         self.cfg.SetDiskID(lv, node_name)
11005
11006         msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
11007         if msg:
11008           self.lu.LogWarning("Can't remove old LV: %s" % msg,
11009                              hint="remove unused LVs manually")
11010
11011   def _ExecDrbd8DiskOnly(self, feedback_fn): # pylint: disable=W0613
11012     """Replace a disk on the primary or secondary for DRBD 8.
11013
11014     The algorithm for replace is quite complicated:
11015
11016       1. for each disk to be replaced:
11017
11018         1. create new LVs on the target node with unique names
11019         1. detach old LVs from the drbd device
11020         1. rename old LVs to name_replaced.<time_t>
11021         1. rename new LVs to old LVs
11022         1. attach the new LVs (with the old names now) to the drbd device
11023
11024       1. wait for sync across all devices
11025
11026       1. for each modified disk:
11027
11028         1. remove old LVs (which have the name name_replaces.<time_t>)
11029
11030     Failures are not very well handled.
11031
11032     """
11033     steps_total = 6
11034
11035     # Step: check device activation
11036     self.lu.LogStep(1, steps_total, "Check device existence")
11037     self._CheckDisksExistence([self.other_node, self.target_node])
11038     self._CheckVolumeGroup([self.target_node, self.other_node])
11039
11040     # Step: check other node consistency
11041     self.lu.LogStep(2, steps_total, "Check peer consistency")
11042     self._CheckDisksConsistency(self.other_node,
11043                                 self.other_node == self.instance.primary_node,
11044                                 False)
11045
11046     # Step: create new storage
11047     self.lu.LogStep(3, steps_total, "Allocate new storage")
11048     iv_names = self._CreateNewStorage(self.target_node)
11049
11050     # Step: for each lv, detach+rename*2+attach
11051     self.lu.LogStep(4, steps_total, "Changing drbd configuration")
11052     for dev, old_lvs, new_lvs in iv_names.itervalues():
11053       self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
11054
11055       result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
11056                                                      old_lvs)
11057       result.Raise("Can't detach drbd from local storage on node"
11058                    " %s for device %s" % (self.target_node, dev.iv_name))
11059       #dev.children = []
11060       #cfg.Update(instance)
11061
11062       # ok, we created the new LVs, so now we know we have the needed
11063       # storage; as such, we proceed on the target node to rename
11064       # old_lv to _old, and new_lv to old_lv; note that we rename LVs
11065       # using the assumption that logical_id == physical_id (which in
11066       # turn is the unique_id on that node)
11067
11068       # FIXME(iustin): use a better name for the replaced LVs
11069       temp_suffix = int(time.time())
11070       ren_fn = lambda d, suff: (d.physical_id[0],
11071                                 d.physical_id[1] + "_replaced-%s" % suff)
11072
11073       # Build the rename list based on what LVs exist on the node
11074       rename_old_to_new = []
11075       for to_ren in old_lvs:
11076         result = self.rpc.call_blockdev_find(self.target_node, to_ren)
11077         if not result.fail_msg and result.payload:
11078           # device exists
11079           rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
11080
11081       self.lu.LogInfo("Renaming the old LVs on the target node")
11082       result = self.rpc.call_blockdev_rename(self.target_node,
11083                                              rename_old_to_new)
11084       result.Raise("Can't rename old LVs on node %s" % self.target_node)
11085
11086       # Now we rename the new LVs to the old LVs
11087       self.lu.LogInfo("Renaming the new LVs on the target node")
11088       rename_new_to_old = [(new, old.physical_id)
11089                            for old, new in zip(old_lvs, new_lvs)]
11090       result = self.rpc.call_blockdev_rename(self.target_node,
11091                                              rename_new_to_old)
11092       result.Raise("Can't rename new LVs on node %s" % self.target_node)
11093
11094       # Intermediate steps of in memory modifications
11095       for old, new in zip(old_lvs, new_lvs):
11096         new.logical_id = old.logical_id
11097         self.cfg.SetDiskID(new, self.target_node)
11098
11099       # We need to modify old_lvs so that removal later removes the
11100       # right LVs, not the newly added ones; note that old_lvs is a
11101       # copy here
11102       for disk in old_lvs:
11103         disk.logical_id = ren_fn(disk, temp_suffix)
11104         self.cfg.SetDiskID(disk, self.target_node)
11105
11106       # Now that the new lvs have the old name, we can add them to the device
11107       self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
11108       result = self.rpc.call_blockdev_addchildren(self.target_node,
11109                                                   (dev, self.instance), new_lvs)
11110       msg = result.fail_msg
11111       if msg:
11112         for new_lv in new_lvs:
11113           msg2 = self.rpc.call_blockdev_remove(self.target_node,
11114                                                new_lv).fail_msg
11115           if msg2:
11116             self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
11117                                hint=("cleanup manually the unused logical"
11118                                      "volumes"))
11119         raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
11120
11121     cstep = itertools.count(5)
11122
11123     if self.early_release:
11124       self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11125       self._RemoveOldStorage(self.target_node, iv_names)
11126       # TODO: Check if releasing locks early still makes sense
11127       _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
11128     else:
11129       # Release all resource locks except those used by the instance
11130       _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
11131                     keep=self.node_secondary_ip.keys())
11132
11133     # Release all node locks while waiting for sync
11134     _ReleaseLocks(self.lu, locking.LEVEL_NODE)
11135
11136     # TODO: Can the instance lock be downgraded here? Take the optional disk
11137     # shutdown in the caller into consideration.
11138
11139     # Wait for sync
11140     # This can fail as the old devices are degraded and _WaitForSync
11141     # does a combined result over all disks, so we don't check its return value
11142     self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
11143     _WaitForSync(self.lu, self.instance)
11144
11145     # Check all devices manually
11146     self._CheckDevices(self.instance.primary_node, iv_names)
11147
11148     # Step: remove old storage
11149     if not self.early_release:
11150       self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11151       self._RemoveOldStorage(self.target_node, iv_names)
11152
11153   def _ExecDrbd8Secondary(self, feedback_fn):
11154     """Replace the secondary node for DRBD 8.
11155
11156     The algorithm for replace is quite complicated:
11157       - for all disks of the instance:
11158         - create new LVs on the new node with same names
11159         - shutdown the drbd device on the old secondary
11160         - disconnect the drbd network on the primary
11161         - create the drbd device on the new secondary
11162         - network attach the drbd on the primary, using an artifice:
11163           the drbd code for Attach() will connect to the network if it
11164           finds a device which is connected to the good local disks but
11165           not network enabled
11166       - wait for sync across all devices
11167       - remove all disks from the old secondary
11168
11169     Failures are not very well handled.
11170
11171     """
11172     steps_total = 6
11173
11174     pnode = self.instance.primary_node
11175
11176     # Step: check device activation
11177     self.lu.LogStep(1, steps_total, "Check device existence")
11178     self._CheckDisksExistence([self.instance.primary_node])
11179     self._CheckVolumeGroup([self.instance.primary_node])
11180
11181     # Step: check other node consistency
11182     self.lu.LogStep(2, steps_total, "Check peer consistency")
11183     self._CheckDisksConsistency(self.instance.primary_node, True, True)
11184
11185     # Step: create new storage
11186     self.lu.LogStep(3, steps_total, "Allocate new storage")
11187     disks = _AnnotateDiskParams(self.instance, self.instance.disks, self.cfg)
11188     for idx, dev in enumerate(disks):
11189       self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
11190                       (self.new_node, idx))
11191       # we pass force_create=True to force LVM creation
11192       for new_lv in dev.children:
11193         _CreateBlockDevInner(self.lu, self.new_node, self.instance, new_lv,
11194                              True, _GetInstanceInfoText(self.instance), False)
11195
11196     # Step 4: dbrd minors and drbd setups changes
11197     # after this, we must manually remove the drbd minors on both the
11198     # error and the success paths
11199     self.lu.LogStep(4, steps_total, "Changing drbd configuration")
11200     minors = self.cfg.AllocateDRBDMinor([self.new_node
11201                                          for dev in self.instance.disks],
11202                                         self.instance.name)
11203     logging.debug("Allocated minors %r", minors)
11204
11205     iv_names = {}
11206     for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
11207       self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
11208                       (self.new_node, idx))
11209       # create new devices on new_node; note that we create two IDs:
11210       # one without port, so the drbd will be activated without
11211       # networking information on the new node at this stage, and one
11212       # with network, for the latter activation in step 4
11213       (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
11214       if self.instance.primary_node == o_node1:
11215         p_minor = o_minor1
11216       else:
11217         assert self.instance.primary_node == o_node2, "Three-node instance?"
11218         p_minor = o_minor2
11219
11220       new_alone_id = (self.instance.primary_node, self.new_node, None,
11221                       p_minor, new_minor, o_secret)
11222       new_net_id = (self.instance.primary_node, self.new_node, o_port,
11223                     p_minor, new_minor, o_secret)
11224
11225       iv_names[idx] = (dev, dev.children, new_net_id)
11226       logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
11227                     new_net_id)
11228       new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
11229                               logical_id=new_alone_id,
11230                               children=dev.children,
11231                               size=dev.size,
11232                               params={})
11233       (anno_new_drbd,) = _AnnotateDiskParams(self.instance, [new_drbd],
11234                                              self.cfg)
11235       try:
11236         _CreateSingleBlockDev(self.lu, self.new_node, self.instance,
11237                               anno_new_drbd,
11238                               _GetInstanceInfoText(self.instance), False)
11239       except errors.GenericError:
11240         self.cfg.ReleaseDRBDMinors(self.instance.name)
11241         raise
11242
11243     # We have new devices, shutdown the drbd on the old secondary
11244     for idx, dev in enumerate(self.instance.disks):
11245       self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
11246       self.cfg.SetDiskID(dev, self.target_node)
11247       msg = self.rpc.call_blockdev_shutdown(self.target_node,
11248                                             (dev, self.instance)).fail_msg
11249       if msg:
11250         self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
11251                            "node: %s" % (idx, msg),
11252                            hint=("Please cleanup this device manually as"
11253                                  " soon as possible"))
11254
11255     self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
11256     result = self.rpc.call_drbd_disconnect_net([pnode], self.node_secondary_ip,
11257                                                self.instance.disks)[pnode]
11258
11259     msg = result.fail_msg
11260     if msg:
11261       # detaches didn't succeed (unlikely)
11262       self.cfg.ReleaseDRBDMinors(self.instance.name)
11263       raise errors.OpExecError("Can't detach the disks from the network on"
11264                                " old node: %s" % (msg,))
11265
11266     # if we managed to detach at least one, we update all the disks of
11267     # the instance to point to the new secondary
11268     self.lu.LogInfo("Updating instance configuration")
11269     for dev, _, new_logical_id in iv_names.itervalues():
11270       dev.logical_id = new_logical_id
11271       self.cfg.SetDiskID(dev, self.instance.primary_node)
11272
11273     self.cfg.Update(self.instance, feedback_fn)
11274
11275     # Release all node locks (the configuration has been updated)
11276     _ReleaseLocks(self.lu, locking.LEVEL_NODE)
11277
11278     # and now perform the drbd attach
11279     self.lu.LogInfo("Attaching primary drbds to new secondary"
11280                     " (standalone => connected)")
11281     result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
11282                                             self.new_node],
11283                                            self.node_secondary_ip,
11284                                            (self.instance.disks, self.instance),
11285                                            self.instance.name,
11286                                            False)
11287     for to_node, to_result in result.items():
11288       msg = to_result.fail_msg
11289       if msg:
11290         self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
11291                            to_node, msg,
11292                            hint=("please do a gnt-instance info to see the"
11293                                  " status of disks"))
11294
11295     cstep = itertools.count(5)
11296
11297     if self.early_release:
11298       self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11299       self._RemoveOldStorage(self.target_node, iv_names)
11300       # TODO: Check if releasing locks early still makes sense
11301       _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
11302     else:
11303       # Release all resource locks except those used by the instance
11304       _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
11305                     keep=self.node_secondary_ip.keys())
11306
11307     # TODO: Can the instance lock be downgraded here? Take the optional disk
11308     # shutdown in the caller into consideration.
11309
11310     # Wait for sync
11311     # This can fail as the old devices are degraded and _WaitForSync
11312     # does a combined result over all disks, so we don't check its return value
11313     self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
11314     _WaitForSync(self.lu, self.instance)
11315
11316     # Check all devices manually
11317     self._CheckDevices(self.instance.primary_node, iv_names)
11318
11319     # Step: remove old storage
11320     if not self.early_release:
11321       self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11322       self._RemoveOldStorage(self.target_node, iv_names)
11323
11324
11325 class LURepairNodeStorage(NoHooksLU):
11326   """Repairs the volume group on a node.
11327
11328   """
11329   REQ_BGL = False
11330
11331   def CheckArguments(self):
11332     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
11333
11334     storage_type = self.op.storage_type
11335
11336     if (constants.SO_FIX_CONSISTENCY not in
11337         constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
11338       raise errors.OpPrereqError("Storage units of type '%s' can not be"
11339                                  " repaired" % storage_type,
11340                                  errors.ECODE_INVAL)
11341
11342   def ExpandNames(self):
11343     self.needed_locks = {
11344       locking.LEVEL_NODE: [self.op.node_name],
11345       }
11346
11347   def _CheckFaultyDisks(self, instance, node_name):
11348     """Ensure faulty disks abort the opcode or at least warn."""
11349     try:
11350       if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
11351                                   node_name, True):
11352         raise errors.OpPrereqError("Instance '%s' has faulty disks on"
11353                                    " node '%s'" % (instance.name, node_name),
11354                                    errors.ECODE_STATE)
11355     except errors.OpPrereqError, err:
11356       if self.op.ignore_consistency:
11357         self.proc.LogWarning(str(err.args[0]))
11358       else:
11359         raise
11360
11361   def CheckPrereq(self):
11362     """Check prerequisites.
11363
11364     """
11365     # Check whether any instance on this node has faulty disks
11366     for inst in _GetNodeInstances(self.cfg, self.op.node_name):
11367       if inst.admin_state != constants.ADMINST_UP:
11368         continue
11369       check_nodes = set(inst.all_nodes)
11370       check_nodes.discard(self.op.node_name)
11371       for inst_node_name in check_nodes:
11372         self._CheckFaultyDisks(inst, inst_node_name)
11373
11374   def Exec(self, feedback_fn):
11375     feedback_fn("Repairing storage unit '%s' on %s ..." %
11376                 (self.op.name, self.op.node_name))
11377
11378     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
11379     result = self.rpc.call_storage_execute(self.op.node_name,
11380                                            self.op.storage_type, st_args,
11381                                            self.op.name,
11382                                            constants.SO_FIX_CONSISTENCY)
11383     result.Raise("Failed to repair storage unit '%s' on %s" %
11384                  (self.op.name, self.op.node_name))
11385
11386
11387 class LUNodeEvacuate(NoHooksLU):
11388   """Evacuates instances off a list of nodes.
11389
11390   """
11391   REQ_BGL = False
11392
11393   _MODE2IALLOCATOR = {
11394     constants.NODE_EVAC_PRI: constants.IALLOCATOR_NEVAC_PRI,
11395     constants.NODE_EVAC_SEC: constants.IALLOCATOR_NEVAC_SEC,
11396     constants.NODE_EVAC_ALL: constants.IALLOCATOR_NEVAC_ALL,
11397     }
11398   assert frozenset(_MODE2IALLOCATOR.keys()) == constants.NODE_EVAC_MODES
11399   assert (frozenset(_MODE2IALLOCATOR.values()) ==
11400           constants.IALLOCATOR_NEVAC_MODES)
11401
11402   def CheckArguments(self):
11403     _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
11404
11405   def ExpandNames(self):
11406     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
11407
11408     if self.op.remote_node is not None:
11409       self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
11410       assert self.op.remote_node
11411
11412       if self.op.remote_node == self.op.node_name:
11413         raise errors.OpPrereqError("Can not use evacuated node as a new"
11414                                    " secondary node", errors.ECODE_INVAL)
11415
11416       if self.op.mode != constants.NODE_EVAC_SEC:
11417         raise errors.OpPrereqError("Without the use of an iallocator only"
11418                                    " secondary instances can be evacuated",
11419                                    errors.ECODE_INVAL)
11420
11421     # Declare locks
11422     self.share_locks = _ShareAll()
11423     self.needed_locks = {
11424       locking.LEVEL_INSTANCE: [],
11425       locking.LEVEL_NODEGROUP: [],
11426       locking.LEVEL_NODE: [],
11427       }
11428
11429     # Determine nodes (via group) optimistically, needs verification once locks
11430     # have been acquired
11431     self.lock_nodes = self._DetermineNodes()
11432
11433   def _DetermineNodes(self):
11434     """Gets the list of nodes to operate on.
11435
11436     """
11437     if self.op.remote_node is None:
11438       # Iallocator will choose any node(s) in the same group
11439       group_nodes = self.cfg.GetNodeGroupMembersByNodes([self.op.node_name])
11440     else:
11441       group_nodes = frozenset([self.op.remote_node])
11442
11443     # Determine nodes to be locked
11444     return set([self.op.node_name]) | group_nodes
11445
11446   def _DetermineInstances(self):
11447     """Builds list of instances to operate on.
11448
11449     """
11450     assert self.op.mode in constants.NODE_EVAC_MODES
11451
11452     if self.op.mode == constants.NODE_EVAC_PRI:
11453       # Primary instances only
11454       inst_fn = _GetNodePrimaryInstances
11455       assert self.op.remote_node is None, \
11456         "Evacuating primary instances requires iallocator"
11457     elif self.op.mode == constants.NODE_EVAC_SEC:
11458       # Secondary instances only
11459       inst_fn = _GetNodeSecondaryInstances
11460     else:
11461       # All instances
11462       assert self.op.mode == constants.NODE_EVAC_ALL
11463       inst_fn = _GetNodeInstances
11464       # TODO: In 2.6, change the iallocator interface to take an evacuation mode
11465       # per instance
11466       raise errors.OpPrereqError("Due to an issue with the iallocator"
11467                                  " interface it is not possible to evacuate"
11468                                  " all instances at once; specify explicitly"
11469                                  " whether to evacuate primary or secondary"
11470                                  " instances",
11471                                  errors.ECODE_INVAL)
11472
11473     return inst_fn(self.cfg, self.op.node_name)
11474
11475   def DeclareLocks(self, level):
11476     if level == locking.LEVEL_INSTANCE:
11477       # Lock instances optimistically, needs verification once node and group
11478       # locks have been acquired
11479       self.needed_locks[locking.LEVEL_INSTANCE] = \
11480         set(i.name for i in self._DetermineInstances())
11481
11482     elif level == locking.LEVEL_NODEGROUP:
11483       # Lock node groups for all potential target nodes optimistically, needs
11484       # verification once nodes have been acquired
11485       self.needed_locks[locking.LEVEL_NODEGROUP] = \
11486         self.cfg.GetNodeGroupsFromNodes(self.lock_nodes)
11487
11488     elif level == locking.LEVEL_NODE:
11489       self.needed_locks[locking.LEVEL_NODE] = self.lock_nodes
11490
11491   def CheckPrereq(self):
11492     # Verify locks
11493     owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
11494     owned_nodes = self.owned_locks(locking.LEVEL_NODE)
11495     owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
11496
11497     need_nodes = self._DetermineNodes()
11498
11499     if not owned_nodes.issuperset(need_nodes):
11500       raise errors.OpPrereqError("Nodes in same group as '%s' changed since"
11501                                  " locks were acquired, current nodes are"
11502                                  " are '%s', used to be '%s'; retry the"
11503                                  " operation" %
11504                                  (self.op.node_name,
11505                                   utils.CommaJoin(need_nodes),
11506                                   utils.CommaJoin(owned_nodes)),
11507                                  errors.ECODE_STATE)
11508
11509     wanted_groups = self.cfg.GetNodeGroupsFromNodes(owned_nodes)
11510     if owned_groups != wanted_groups:
11511       raise errors.OpExecError("Node groups changed since locks were acquired,"
11512                                " current groups are '%s', used to be '%s';"
11513                                " retry the operation" %
11514                                (utils.CommaJoin(wanted_groups),
11515                                 utils.CommaJoin(owned_groups)))
11516
11517     # Determine affected instances
11518     self.instances = self._DetermineInstances()
11519     self.instance_names = [i.name for i in self.instances]
11520
11521     if set(self.instance_names) != owned_instances:
11522       raise errors.OpExecError("Instances on node '%s' changed since locks"
11523                                " were acquired, current instances are '%s',"
11524                                " used to be '%s'; retry the operation" %
11525                                (self.op.node_name,
11526                                 utils.CommaJoin(self.instance_names),
11527                                 utils.CommaJoin(owned_instances)))
11528
11529     if self.instance_names:
11530       self.LogInfo("Evacuating instances from node '%s': %s",
11531                    self.op.node_name,
11532                    utils.CommaJoin(utils.NiceSort(self.instance_names)))
11533     else:
11534       self.LogInfo("No instances to evacuate from node '%s'",
11535                    self.op.node_name)
11536
11537     if self.op.remote_node is not None:
11538       for i in self.instances:
11539         if i.primary_node == self.op.remote_node:
11540           raise errors.OpPrereqError("Node %s is the primary node of"
11541                                      " instance %s, cannot use it as"
11542                                      " secondary" %
11543                                      (self.op.remote_node, i.name),
11544                                      errors.ECODE_INVAL)
11545
11546   def Exec(self, feedback_fn):
11547     assert (self.op.iallocator is not None) ^ (self.op.remote_node is not None)
11548
11549     if not self.instance_names:
11550       # No instances to evacuate
11551       jobs = []
11552
11553     elif self.op.iallocator is not None:
11554       # TODO: Implement relocation to other group
11555       evac_mode = self._MODE2IALLOCATOR[self.op.mode]
11556       req = iallocator.IAReqNodeEvac(evac_mode=evac_mode,
11557                                      instances=list(self.instance_names))
11558       ial = iallocator.IAllocator(self.cfg, self.rpc, req)
11559
11560       ial.Run(self.op.iallocator)
11561
11562       if not ial.success:
11563         raise errors.OpPrereqError("Can't compute node evacuation using"
11564                                    " iallocator '%s': %s" %
11565                                    (self.op.iallocator, ial.info),
11566                                    errors.ECODE_NORES)
11567
11568       jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, True)
11569
11570     elif self.op.remote_node is not None:
11571       assert self.op.mode == constants.NODE_EVAC_SEC
11572       jobs = [
11573         [opcodes.OpInstanceReplaceDisks(instance_name=instance_name,
11574                                         remote_node=self.op.remote_node,
11575                                         disks=[],
11576                                         mode=constants.REPLACE_DISK_CHG,
11577                                         early_release=self.op.early_release)]
11578         for instance_name in self.instance_names
11579         ]
11580
11581     else:
11582       raise errors.ProgrammerError("No iallocator or remote node")
11583
11584     return ResultWithJobs(jobs)
11585
11586
11587 def _SetOpEarlyRelease(early_release, op):
11588   """Sets C{early_release} flag on opcodes if available.
11589
11590   """
11591   try:
11592     op.early_release = early_release
11593   except AttributeError:
11594     assert not isinstance(op, opcodes.OpInstanceReplaceDisks)
11595
11596   return op
11597
11598
11599 def _NodeEvacDest(use_nodes, group, nodes):
11600   """Returns group or nodes depending on caller's choice.
11601
11602   """
11603   if use_nodes:
11604     return utils.CommaJoin(nodes)
11605   else:
11606     return group
11607
11608
11609 def _LoadNodeEvacResult(lu, alloc_result, early_release, use_nodes):
11610   """Unpacks the result of change-group and node-evacuate iallocator requests.
11611
11612   Iallocator modes L{constants.IALLOCATOR_MODE_NODE_EVAC} and
11613   L{constants.IALLOCATOR_MODE_CHG_GROUP}.
11614
11615   @type lu: L{LogicalUnit}
11616   @param lu: Logical unit instance
11617   @type alloc_result: tuple/list
11618   @param alloc_result: Result from iallocator
11619   @type early_release: bool
11620   @param early_release: Whether to release locks early if possible
11621   @type use_nodes: bool
11622   @param use_nodes: Whether to display node names instead of groups
11623
11624   """
11625   (moved, failed, jobs) = alloc_result
11626
11627   if failed:
11628     failreason = utils.CommaJoin("%s (%s)" % (name, reason)
11629                                  for (name, reason) in failed)
11630     lu.LogWarning("Unable to evacuate instances %s", failreason)
11631     raise errors.OpExecError("Unable to evacuate instances %s" % failreason)
11632
11633   if moved:
11634     lu.LogInfo("Instances to be moved: %s",
11635                utils.CommaJoin("%s (to %s)" %
11636                                (name, _NodeEvacDest(use_nodes, group, nodes))
11637                                for (name, group, nodes) in moved))
11638
11639   return [map(compat.partial(_SetOpEarlyRelease, early_release),
11640               map(opcodes.OpCode.LoadOpCode, ops))
11641           for ops in jobs]
11642
11643
11644 class LUInstanceGrowDisk(LogicalUnit):
11645   """Grow a disk of an instance.
11646
11647   """
11648   HPATH = "disk-grow"
11649   HTYPE = constants.HTYPE_INSTANCE
11650   REQ_BGL = False
11651
11652   def ExpandNames(self):
11653     self._ExpandAndLockInstance()
11654     self.needed_locks[locking.LEVEL_NODE] = []
11655     self.needed_locks[locking.LEVEL_NODE_RES] = []
11656     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
11657     self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
11658
11659   def DeclareLocks(self, level):
11660     if level == locking.LEVEL_NODE:
11661       self._LockInstancesNodes()
11662     elif level == locking.LEVEL_NODE_RES:
11663       # Copy node locks
11664       self.needed_locks[locking.LEVEL_NODE_RES] = \
11665         self.needed_locks[locking.LEVEL_NODE][:]
11666
11667   def BuildHooksEnv(self):
11668     """Build hooks env.
11669
11670     This runs on the master, the primary and all the secondaries.
11671
11672     """
11673     env = {
11674       "DISK": self.op.disk,
11675       "AMOUNT": self.op.amount,
11676       "ABSOLUTE": self.op.absolute,
11677       }
11678     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
11679     return env
11680
11681   def BuildHooksNodes(self):
11682     """Build hooks nodes.
11683
11684     """
11685     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
11686     return (nl, nl)
11687
11688   def CheckPrereq(self):
11689     """Check prerequisites.
11690
11691     This checks that the instance is in the cluster.
11692
11693     """
11694     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
11695     assert instance is not None, \
11696       "Cannot retrieve locked instance %s" % self.op.instance_name
11697     nodenames = list(instance.all_nodes)
11698     for node in nodenames:
11699       _CheckNodeOnline(self, node)
11700
11701     self.instance = instance
11702
11703     if instance.disk_template not in constants.DTS_GROWABLE:
11704       raise errors.OpPrereqError("Instance's disk layout does not support"
11705                                  " growing", errors.ECODE_INVAL)
11706
11707     self.disk = instance.FindDisk(self.op.disk)
11708
11709     if self.op.absolute:
11710       self.target = self.op.amount
11711       self.delta = self.target - self.disk.size
11712       if self.delta < 0:
11713         raise errors.OpPrereqError("Requested size (%s) is smaller than "
11714                                    "current disk size (%s)" %
11715                                    (utils.FormatUnit(self.target, "h"),
11716                                     utils.FormatUnit(self.disk.size, "h")),
11717                                    errors.ECODE_STATE)
11718     else:
11719       self.delta = self.op.amount
11720       self.target = self.disk.size + self.delta
11721       if self.delta < 0:
11722         raise errors.OpPrereqError("Requested increment (%s) is negative" %
11723                                    utils.FormatUnit(self.delta, "h"),
11724                                    errors.ECODE_INVAL)
11725
11726     if instance.disk_template not in (constants.DT_FILE,
11727                                       constants.DT_SHARED_FILE,
11728                                       constants.DT_RBD):
11729       # TODO: check the free disk space for file, when that feature will be
11730       # supported
11731       _CheckNodesFreeDiskPerVG(self, nodenames,
11732                                self.disk.ComputeGrowth(self.delta))
11733
11734   def Exec(self, feedback_fn):
11735     """Execute disk grow.
11736
11737     """
11738     instance = self.instance
11739     disk = self.disk
11740
11741     assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
11742     assert (self.owned_locks(locking.LEVEL_NODE) ==
11743             self.owned_locks(locking.LEVEL_NODE_RES))
11744
11745     disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
11746     if not disks_ok:
11747       raise errors.OpExecError("Cannot activate block device to grow")
11748
11749     feedback_fn("Growing disk %s of instance '%s' by %s to %s" %
11750                 (self.op.disk, instance.name,
11751                  utils.FormatUnit(self.delta, "h"),
11752                  utils.FormatUnit(self.target, "h")))
11753
11754     # First run all grow ops in dry-run mode
11755     for node in instance.all_nodes:
11756       self.cfg.SetDiskID(disk, node)
11757       result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
11758                                            True, True)
11759       result.Raise("Grow request failed to node %s" % node)
11760
11761     # We know that (as far as we can test) operations across different
11762     # nodes will succeed, time to run it for real on the backing storage
11763     for node in instance.all_nodes:
11764       self.cfg.SetDiskID(disk, node)
11765       result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
11766                                            False, True)
11767       result.Raise("Grow request failed to node %s" % node)
11768
11769     # And now execute it for logical storage, on the primary node
11770     node = instance.primary_node
11771     self.cfg.SetDiskID(disk, node)
11772     result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
11773                                          False, False)
11774     result.Raise("Grow request failed to node %s" % node)
11775
11776     disk.RecordGrow(self.delta)
11777     self.cfg.Update(instance, feedback_fn)
11778
11779     # Changes have been recorded, release node lock
11780     _ReleaseLocks(self, locking.LEVEL_NODE)
11781
11782     # Downgrade lock while waiting for sync
11783     self.glm.downgrade(locking.LEVEL_INSTANCE)
11784
11785     if self.op.wait_for_sync:
11786       disk_abort = not _WaitForSync(self, instance, disks=[disk])
11787       if disk_abort:
11788         self.proc.LogWarning("Disk sync-ing has not returned a good"
11789                              " status; please check the instance")
11790       if instance.admin_state != constants.ADMINST_UP:
11791         _SafeShutdownInstanceDisks(self, instance, disks=[disk])
11792     elif instance.admin_state != constants.ADMINST_UP:
11793       self.proc.LogWarning("Not shutting down the disk even if the instance is"
11794                            " not supposed to be running because no wait for"
11795                            " sync mode was requested")
11796
11797     assert self.owned_locks(locking.LEVEL_NODE_RES)
11798     assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
11799
11800
11801 class LUInstanceQueryData(NoHooksLU):
11802   """Query runtime instance data.
11803
11804   """
11805   REQ_BGL = False
11806
11807   def ExpandNames(self):
11808     self.needed_locks = {}
11809
11810     # Use locking if requested or when non-static information is wanted
11811     if not (self.op.static or self.op.use_locking):
11812       self.LogWarning("Non-static data requested, locks need to be acquired")
11813       self.op.use_locking = True
11814
11815     if self.op.instances or not self.op.use_locking:
11816       # Expand instance names right here
11817       self.wanted_names = _GetWantedInstances(self, self.op.instances)
11818     else:
11819       # Will use acquired locks
11820       self.wanted_names = None
11821
11822     if self.op.use_locking:
11823       self.share_locks = _ShareAll()
11824
11825       if self.wanted_names is None:
11826         self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
11827       else:
11828         self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
11829
11830       self.needed_locks[locking.LEVEL_NODEGROUP] = []
11831       self.needed_locks[locking.LEVEL_NODE] = []
11832       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
11833
11834   def DeclareLocks(self, level):
11835     if self.op.use_locking:
11836       if level == locking.LEVEL_NODEGROUP:
11837         owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
11838
11839         # Lock all groups used by instances optimistically; this requires going
11840         # via the node before it's locked, requiring verification later on
11841         self.needed_locks[locking.LEVEL_NODEGROUP] = \
11842           frozenset(group_uuid
11843                     for instance_name in owned_instances
11844                     for group_uuid in
11845                       self.cfg.GetInstanceNodeGroups(instance_name))
11846
11847       elif level == locking.LEVEL_NODE:
11848         self._LockInstancesNodes()
11849
11850   def CheckPrereq(self):
11851     """Check prerequisites.
11852
11853     This only checks the optional instance list against the existing names.
11854
11855     """
11856     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
11857     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
11858     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
11859
11860     if self.wanted_names is None:
11861       assert self.op.use_locking, "Locking was not used"
11862       self.wanted_names = owned_instances
11863
11864     instances = dict(self.cfg.GetMultiInstanceInfo(self.wanted_names))
11865
11866     if self.op.use_locking:
11867       _CheckInstancesNodeGroups(self.cfg, instances, owned_groups, owned_nodes,
11868                                 None)
11869     else:
11870       assert not (owned_instances or owned_groups or owned_nodes)
11871
11872     self.wanted_instances = instances.values()
11873
11874   def _ComputeBlockdevStatus(self, node, instance, dev):
11875     """Returns the status of a block device
11876
11877     """
11878     if self.op.static or not node:
11879       return None
11880
11881     self.cfg.SetDiskID(dev, node)
11882
11883     result = self.rpc.call_blockdev_find(node, dev)
11884     if result.offline:
11885       return None
11886
11887     result.Raise("Can't compute disk status for %s" % instance.name)
11888
11889     status = result.payload
11890     if status is None:
11891       return None
11892
11893     return (status.dev_path, status.major, status.minor,
11894             status.sync_percent, status.estimated_time,
11895             status.is_degraded, status.ldisk_status)
11896
11897   def _ComputeDiskStatus(self, instance, snode, dev):
11898     """Compute block device status.
11899
11900     """
11901     (anno_dev,) = _AnnotateDiskParams(instance, [dev], self.cfg)
11902
11903     return self._ComputeDiskStatusInner(instance, snode, anno_dev)
11904
11905   def _ComputeDiskStatusInner(self, instance, snode, dev):
11906     """Compute block device status.
11907
11908     @attention: The device has to be annotated already.
11909
11910     """
11911     if dev.dev_type in constants.LDS_DRBD:
11912       # we change the snode then (otherwise we use the one passed in)
11913       if dev.logical_id[0] == instance.primary_node:
11914         snode = dev.logical_id[1]
11915       else:
11916         snode = dev.logical_id[0]
11917
11918     dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
11919                                               instance, dev)
11920     dev_sstatus = self._ComputeBlockdevStatus(snode, instance, dev)
11921
11922     if dev.children:
11923       dev_children = map(compat.partial(self._ComputeDiskStatusInner,
11924                                         instance, snode),
11925                          dev.children)
11926     else:
11927       dev_children = []
11928
11929     return {
11930       "iv_name": dev.iv_name,
11931       "dev_type": dev.dev_type,
11932       "logical_id": dev.logical_id,
11933       "physical_id": dev.physical_id,
11934       "pstatus": dev_pstatus,
11935       "sstatus": dev_sstatus,
11936       "children": dev_children,
11937       "mode": dev.mode,
11938       "size": dev.size,
11939       }
11940
11941   def Exec(self, feedback_fn):
11942     """Gather and return data"""
11943     result = {}
11944
11945     cluster = self.cfg.GetClusterInfo()
11946
11947     node_names = itertools.chain(*(i.all_nodes for i in self.wanted_instances))
11948     nodes = dict(self.cfg.GetMultiNodeInfo(node_names))
11949
11950     groups = dict(self.cfg.GetMultiNodeGroupInfo(node.group
11951                                                  for node in nodes.values()))
11952
11953     group2name_fn = lambda uuid: groups[uuid].name
11954
11955     for instance in self.wanted_instances:
11956       pnode = nodes[instance.primary_node]
11957
11958       if self.op.static or pnode.offline:
11959         remote_state = None
11960         if pnode.offline:
11961           self.LogWarning("Primary node %s is marked offline, returning static"
11962                           " information only for instance %s" %
11963                           (pnode.name, instance.name))
11964       else:
11965         remote_info = self.rpc.call_instance_info(instance.primary_node,
11966                                                   instance.name,
11967                                                   instance.hypervisor)
11968         remote_info.Raise("Error checking node %s" % instance.primary_node)
11969         remote_info = remote_info.payload
11970         if remote_info and "state" in remote_info:
11971           remote_state = "up"
11972         else:
11973           if instance.admin_state == constants.ADMINST_UP:
11974             remote_state = "down"
11975           else:
11976             remote_state = instance.admin_state
11977
11978       disks = map(compat.partial(self._ComputeDiskStatus, instance, None),
11979                   instance.disks)
11980
11981       snodes_group_uuids = [nodes[snode_name].group
11982                             for snode_name in instance.secondary_nodes]
11983
11984       result[instance.name] = {
11985         "name": instance.name,
11986         "config_state": instance.admin_state,
11987         "run_state": remote_state,
11988         "pnode": instance.primary_node,
11989         "pnode_group_uuid": pnode.group,
11990         "pnode_group_name": group2name_fn(pnode.group),
11991         "snodes": instance.secondary_nodes,
11992         "snodes_group_uuids": snodes_group_uuids,
11993         "snodes_group_names": map(group2name_fn, snodes_group_uuids),
11994         "os": instance.os,
11995         # this happens to be the same format used for hooks
11996         "nics": _NICListToTuple(self, instance.nics),
11997         "disk_template": instance.disk_template,
11998         "disks": disks,
11999         "hypervisor": instance.hypervisor,
12000         "network_port": instance.network_port,
12001         "hv_instance": instance.hvparams,
12002         "hv_actual": cluster.FillHV(instance, skip_globals=True),
12003         "be_instance": instance.beparams,
12004         "be_actual": cluster.FillBE(instance),
12005         "os_instance": instance.osparams,
12006         "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
12007         "serial_no": instance.serial_no,
12008         "mtime": instance.mtime,
12009         "ctime": instance.ctime,
12010         "uuid": instance.uuid,
12011         }
12012
12013     return result
12014
12015
12016 def PrepareContainerMods(mods, private_fn):
12017   """Prepares a list of container modifications by adding a private data field.
12018
12019   @type mods: list of tuples; (operation, index, parameters)
12020   @param mods: List of modifications
12021   @type private_fn: callable or None
12022   @param private_fn: Callable for constructing a private data field for a
12023     modification
12024   @rtype: list
12025
12026   """
12027   if private_fn is None:
12028     fn = lambda: None
12029   else:
12030     fn = private_fn
12031
12032   return [(op, idx, params, fn()) for (op, idx, params) in mods]
12033
12034
12035 #: Type description for changes as returned by L{ApplyContainerMods}'s
12036 #: callbacks
12037 _TApplyContModsCbChanges = \
12038   ht.TMaybeListOf(ht.TAnd(ht.TIsLength(2), ht.TItems([
12039     ht.TNonEmptyString,
12040     ht.TAny,
12041     ])))
12042
12043
12044 def ApplyContainerMods(kind, container, chgdesc, mods,
12045                        create_fn, modify_fn, remove_fn):
12046   """Applies descriptions in C{mods} to C{container}.
12047
12048   @type kind: string
12049   @param kind: One-word item description
12050   @type container: list
12051   @param container: Container to modify
12052   @type chgdesc: None or list
12053   @param chgdesc: List of applied changes
12054   @type mods: list
12055   @param mods: Modifications as returned by L{PrepareContainerMods}
12056   @type create_fn: callable
12057   @param create_fn: Callback for creating a new item (L{constants.DDM_ADD});
12058     receives absolute item index, parameters and private data object as added
12059     by L{PrepareContainerMods}, returns tuple containing new item and changes
12060     as list
12061   @type modify_fn: callable
12062   @param modify_fn: Callback for modifying an existing item
12063     (L{constants.DDM_MODIFY}); receives absolute item index, item, parameters
12064     and private data object as added by L{PrepareContainerMods}, returns
12065     changes as list
12066   @type remove_fn: callable
12067   @param remove_fn: Callback on removing item; receives absolute item index,
12068     item and private data object as added by L{PrepareContainerMods}
12069
12070   """
12071   for (op, idx, params, private) in mods:
12072     if idx == -1:
12073       # Append
12074       absidx = len(container) - 1
12075     elif idx < 0:
12076       raise IndexError("Not accepting negative indices other than -1")
12077     elif idx > len(container):
12078       raise IndexError("Got %s index %s, but there are only %s" %
12079                        (kind, idx, len(container)))
12080     else:
12081       absidx = idx
12082
12083     changes = None
12084
12085     if op == constants.DDM_ADD:
12086       # Calculate where item will be added
12087       if idx == -1:
12088         addidx = len(container)
12089       else:
12090         addidx = idx
12091
12092       if create_fn is None:
12093         item = params
12094       else:
12095         (item, changes) = create_fn(addidx, params, private)
12096
12097       if idx == -1:
12098         container.append(item)
12099       else:
12100         assert idx >= 0
12101         assert idx <= len(container)
12102         # list.insert does so before the specified index
12103         container.insert(idx, item)
12104     else:
12105       # Retrieve existing item
12106       try:
12107         item = container[absidx]
12108       except IndexError:
12109         raise IndexError("Invalid %s index %s" % (kind, idx))
12110
12111       if op == constants.DDM_REMOVE:
12112         assert not params
12113
12114         if remove_fn is not None:
12115           remove_fn(absidx, item, private)
12116
12117         changes = [("%s/%s" % (kind, absidx), "remove")]
12118
12119         assert container[absidx] == item
12120         del container[absidx]
12121       elif op == constants.DDM_MODIFY:
12122         if modify_fn is not None:
12123           changes = modify_fn(absidx, item, params, private)
12124       else:
12125         raise errors.ProgrammerError("Unhandled operation '%s'" % op)
12126
12127     assert _TApplyContModsCbChanges(changes)
12128
12129     if not (chgdesc is None or changes is None):
12130       chgdesc.extend(changes)
12131
12132
12133 def _UpdateIvNames(base_index, disks):
12134   """Updates the C{iv_name} attribute of disks.
12135
12136   @type disks: list of L{objects.Disk}
12137
12138   """
12139   for (idx, disk) in enumerate(disks):
12140     disk.iv_name = "disk/%s" % (base_index + idx, )
12141
12142
12143 class _InstNicModPrivate:
12144   """Data structure for network interface modifications.
12145
12146   Used by L{LUInstanceSetParams}.
12147
12148   """
12149   def __init__(self):
12150     self.params = None
12151     self.filled = None
12152
12153
12154 class LUInstanceSetParams(LogicalUnit):
12155   """Modifies an instances's parameters.
12156
12157   """
12158   HPATH = "instance-modify"
12159   HTYPE = constants.HTYPE_INSTANCE
12160   REQ_BGL = False
12161
12162   @staticmethod
12163   def _UpgradeDiskNicMods(kind, mods, verify_fn):
12164     assert ht.TList(mods)
12165     assert not mods or len(mods[0]) in (2, 3)
12166
12167     if mods and len(mods[0]) == 2:
12168       result = []
12169
12170       addremove = 0
12171       for op, params in mods:
12172         if op in (constants.DDM_ADD, constants.DDM_REMOVE):
12173           result.append((op, -1, params))
12174           addremove += 1
12175
12176           if addremove > 1:
12177             raise errors.OpPrereqError("Only one %s add or remove operation is"
12178                                        " supported at a time" % kind,
12179                                        errors.ECODE_INVAL)
12180         else:
12181           result.append((constants.DDM_MODIFY, op, params))
12182
12183       assert verify_fn(result)
12184     else:
12185       result = mods
12186
12187     return result
12188
12189   @staticmethod
12190   def _CheckMods(kind, mods, key_types, item_fn):
12191     """Ensures requested disk/NIC modifications are valid.
12192
12193     """
12194     for (op, _, params) in mods:
12195       assert ht.TDict(params)
12196
12197       utils.ForceDictType(params, key_types)
12198
12199       if op == constants.DDM_REMOVE:
12200         if params:
12201           raise errors.OpPrereqError("No settings should be passed when"
12202                                      " removing a %s" % kind,
12203                                      errors.ECODE_INVAL)
12204       elif op in (constants.DDM_ADD, constants.DDM_MODIFY):
12205         item_fn(op, params)
12206       else:
12207         raise errors.ProgrammerError("Unhandled operation '%s'" % op)
12208
12209   @staticmethod
12210   def _VerifyDiskModification(op, params):
12211     """Verifies a disk modification.
12212
12213     """
12214     if op == constants.DDM_ADD:
12215       mode = params.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
12216       if mode not in constants.DISK_ACCESS_SET:
12217         raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
12218                                    errors.ECODE_INVAL)
12219
12220       size = params.get(constants.IDISK_SIZE, None)
12221       if size is None:
12222         raise errors.OpPrereqError("Required disk parameter '%s' missing" %
12223                                    constants.IDISK_SIZE, errors.ECODE_INVAL)
12224
12225       try:
12226         size = int(size)
12227       except (TypeError, ValueError), err:
12228         raise errors.OpPrereqError("Invalid disk size parameter: %s" % err,
12229                                    errors.ECODE_INVAL)
12230
12231       params[constants.IDISK_SIZE] = size
12232
12233     elif op == constants.DDM_MODIFY and constants.IDISK_SIZE in params:
12234       raise errors.OpPrereqError("Disk size change not possible, use"
12235                                  " grow-disk", errors.ECODE_INVAL)
12236
12237   @staticmethod
12238   def _VerifyNicModification(op, params):
12239     """Verifies a network interface modification.
12240
12241     """
12242     if op in (constants.DDM_ADD, constants.DDM_MODIFY):
12243       ip = params.get(constants.INIC_IP, None)
12244       if ip is None:
12245         pass
12246       elif ip.lower() == constants.VALUE_NONE:
12247         params[constants.INIC_IP] = None
12248       elif not netutils.IPAddress.IsValid(ip):
12249         raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
12250                                    errors.ECODE_INVAL)
12251
12252       bridge = params.get("bridge", None)
12253       link = params.get(constants.INIC_LINK, None)
12254       if bridge and link:
12255         raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
12256                                    " at the same time", errors.ECODE_INVAL)
12257       elif bridge and bridge.lower() == constants.VALUE_NONE:
12258         params["bridge"] = None
12259       elif link and link.lower() == constants.VALUE_NONE:
12260         params[constants.INIC_LINK] = None
12261
12262       if op == constants.DDM_ADD:
12263         macaddr = params.get(constants.INIC_MAC, None)
12264         if macaddr is None:
12265           params[constants.INIC_MAC] = constants.VALUE_AUTO
12266
12267       if constants.INIC_MAC in params:
12268         macaddr = params[constants.INIC_MAC]
12269         if macaddr not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
12270           macaddr = utils.NormalizeAndValidateMac(macaddr)
12271
12272         if op == constants.DDM_MODIFY and macaddr == constants.VALUE_AUTO:
12273           raise errors.OpPrereqError("'auto' is not a valid MAC address when"
12274                                      " modifying an existing NIC",
12275                                      errors.ECODE_INVAL)
12276
12277   def CheckArguments(self):
12278     if not (self.op.nics or self.op.disks or self.op.disk_template or
12279             self.op.hvparams or self.op.beparams or self.op.os_name or
12280             self.op.offline is not None or self.op.runtime_mem):
12281       raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
12282
12283     if self.op.hvparams:
12284       _CheckGlobalHvParams(self.op.hvparams)
12285
12286     self.op.disks = self._UpgradeDiskNicMods(
12287       "disk", self.op.disks, opcodes.OpInstanceSetParams.TestDiskModifications)
12288     self.op.nics = self._UpgradeDiskNicMods(
12289       "NIC", self.op.nics, opcodes.OpInstanceSetParams.TestNicModifications)
12290
12291     # Check disk modifications
12292     self._CheckMods("disk", self.op.disks, constants.IDISK_PARAMS_TYPES,
12293                     self._VerifyDiskModification)
12294
12295     if self.op.disks and self.op.disk_template is not None:
12296       raise errors.OpPrereqError("Disk template conversion and other disk"
12297                                  " changes not supported at the same time",
12298                                  errors.ECODE_INVAL)
12299
12300     if (self.op.disk_template and
12301         self.op.disk_template in constants.DTS_INT_MIRROR and
12302         self.op.remote_node is None):
12303       raise errors.OpPrereqError("Changing the disk template to a mirrored"
12304                                  " one requires specifying a secondary node",
12305                                  errors.ECODE_INVAL)
12306
12307     # Check NIC modifications
12308     self._CheckMods("NIC", self.op.nics, constants.INIC_PARAMS_TYPES,
12309                     self._VerifyNicModification)
12310
12311   def ExpandNames(self):
12312     self._ExpandAndLockInstance()
12313     # Can't even acquire node locks in shared mode as upcoming changes in
12314     # Ganeti 2.6 will start to modify the node object on disk conversion
12315     self.needed_locks[locking.LEVEL_NODE] = []
12316     self.needed_locks[locking.LEVEL_NODE_RES] = []
12317     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
12318
12319   def DeclareLocks(self, level):
12320     # TODO: Acquire group lock in shared mode (disk parameters)
12321     if level == locking.LEVEL_NODE:
12322       self._LockInstancesNodes()
12323       if self.op.disk_template and self.op.remote_node:
12324         self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
12325         self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
12326     elif level == locking.LEVEL_NODE_RES and self.op.disk_template:
12327       # Copy node locks
12328       self.needed_locks[locking.LEVEL_NODE_RES] = \
12329         self.needed_locks[locking.LEVEL_NODE][:]
12330
12331   def BuildHooksEnv(self):
12332     """Build hooks env.
12333
12334     This runs on the master, primary and secondaries.
12335
12336     """
12337     args = dict()
12338     if constants.BE_MINMEM in self.be_new:
12339       args["minmem"] = self.be_new[constants.BE_MINMEM]
12340     if constants.BE_MAXMEM in self.be_new:
12341       args["maxmem"] = self.be_new[constants.BE_MAXMEM]
12342     if constants.BE_VCPUS in self.be_new:
12343       args["vcpus"] = self.be_new[constants.BE_VCPUS]
12344     # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
12345     # information at all.
12346
12347     if self._new_nics is not None:
12348       nics = []
12349
12350       for nic in self._new_nics:
12351         nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
12352         mode = nicparams[constants.NIC_MODE]
12353         link = nicparams[constants.NIC_LINK]
12354         nics.append((nic.ip, nic.mac, mode, link))
12355
12356       args["nics"] = nics
12357
12358     env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
12359     if self.op.disk_template:
12360       env["NEW_DISK_TEMPLATE"] = self.op.disk_template
12361     if self.op.runtime_mem:
12362       env["RUNTIME_MEMORY"] = self.op.runtime_mem
12363
12364     return env
12365
12366   def BuildHooksNodes(self):
12367     """Build hooks nodes.
12368
12369     """
12370     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
12371     return (nl, nl)
12372
12373   def _PrepareNicModification(self, params, private, old_ip, old_params,
12374                               cluster, pnode):
12375     update_params_dict = dict([(key, params[key])
12376                                for key in constants.NICS_PARAMETERS
12377                                if key in params])
12378
12379     if "bridge" in params:
12380       update_params_dict[constants.NIC_LINK] = params["bridge"]
12381
12382     new_params = _GetUpdatedParams(old_params, update_params_dict)
12383     utils.ForceDictType(new_params, constants.NICS_PARAMETER_TYPES)
12384
12385     new_filled_params = cluster.SimpleFillNIC(new_params)
12386     objects.NIC.CheckParameterSyntax(new_filled_params)
12387
12388     new_mode = new_filled_params[constants.NIC_MODE]
12389     if new_mode == constants.NIC_MODE_BRIDGED:
12390       bridge = new_filled_params[constants.NIC_LINK]
12391       msg = self.rpc.call_bridges_exist(pnode, [bridge]).fail_msg
12392       if msg:
12393         msg = "Error checking bridges on node '%s': %s" % (pnode, msg)
12394         if self.op.force:
12395           self.warn.append(msg)
12396         else:
12397           raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
12398
12399     elif new_mode == constants.NIC_MODE_ROUTED:
12400       ip = params.get(constants.INIC_IP, old_ip)
12401       if ip is None:
12402         raise errors.OpPrereqError("Cannot set the NIC IP address to None"
12403                                    " on a routed NIC", errors.ECODE_INVAL)
12404
12405     if constants.INIC_MAC in params:
12406       mac = params[constants.INIC_MAC]
12407       if mac is None:
12408         raise errors.OpPrereqError("Cannot unset the NIC MAC address",
12409                                    errors.ECODE_INVAL)
12410       elif mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
12411         # otherwise generate the MAC address
12412         params[constants.INIC_MAC] = \
12413           self.cfg.GenerateMAC(self.proc.GetECId())
12414       else:
12415         # or validate/reserve the current one
12416         try:
12417           self.cfg.ReserveMAC(mac, self.proc.GetECId())
12418         except errors.ReservationError:
12419           raise errors.OpPrereqError("MAC address '%s' already in use"
12420                                      " in cluster" % mac,
12421                                      errors.ECODE_NOTUNIQUE)
12422
12423     private.params = new_params
12424     private.filled = new_filled_params
12425
12426   def CheckPrereq(self):
12427     """Check prerequisites.
12428
12429     This only checks the instance list against the existing names.
12430
12431     """
12432     # checking the new params on the primary/secondary nodes
12433
12434     instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
12435     cluster = self.cluster = self.cfg.GetClusterInfo()
12436     assert self.instance is not None, \
12437       "Cannot retrieve locked instance %s" % self.op.instance_name
12438     pnode = instance.primary_node
12439     nodelist = list(instance.all_nodes)
12440     pnode_info = self.cfg.GetNodeInfo(pnode)
12441     self.diskparams = self.cfg.GetInstanceDiskParams(instance)
12442
12443     # Prepare disk/NIC modifications
12444     self.diskmod = PrepareContainerMods(self.op.disks, None)
12445     self.nicmod = PrepareContainerMods(self.op.nics, _InstNicModPrivate)
12446
12447     # OS change
12448     if self.op.os_name and not self.op.force:
12449       _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
12450                       self.op.force_variant)
12451       instance_os = self.op.os_name
12452     else:
12453       instance_os = instance.os
12454
12455     assert not (self.op.disk_template and self.op.disks), \
12456       "Can't modify disk template and apply disk changes at the same time"
12457
12458     if self.op.disk_template:
12459       if instance.disk_template == self.op.disk_template:
12460         raise errors.OpPrereqError("Instance already has disk template %s" %
12461                                    instance.disk_template, errors.ECODE_INVAL)
12462
12463       if (instance.disk_template,
12464           self.op.disk_template) not in self._DISK_CONVERSIONS:
12465         raise errors.OpPrereqError("Unsupported disk template conversion from"
12466                                    " %s to %s" % (instance.disk_template,
12467                                                   self.op.disk_template),
12468                                    errors.ECODE_INVAL)
12469       _CheckInstanceState(self, instance, INSTANCE_DOWN,
12470                           msg="cannot change disk template")
12471       if self.op.disk_template in constants.DTS_INT_MIRROR:
12472         if self.op.remote_node == pnode:
12473           raise errors.OpPrereqError("Given new secondary node %s is the same"
12474                                      " as the primary node of the instance" %
12475                                      self.op.remote_node, errors.ECODE_STATE)
12476         _CheckNodeOnline(self, self.op.remote_node)
12477         _CheckNodeNotDrained(self, self.op.remote_node)
12478         # FIXME: here we assume that the old instance type is DT_PLAIN
12479         assert instance.disk_template == constants.DT_PLAIN
12480         disks = [{constants.IDISK_SIZE: d.size,
12481                   constants.IDISK_VG: d.logical_id[0]}
12482                  for d in instance.disks]
12483         required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
12484         _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
12485
12486         snode_info = self.cfg.GetNodeInfo(self.op.remote_node)
12487         snode_group = self.cfg.GetNodeGroup(snode_info.group)
12488         ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
12489                                                                 snode_group)
12490         _CheckTargetNodeIPolicy(self, ipolicy, instance, snode_info,
12491                                 ignore=self.op.ignore_ipolicy)
12492         if pnode_info.group != snode_info.group:
12493           self.LogWarning("The primary and secondary nodes are in two"
12494                           " different node groups; the disk parameters"
12495                           " from the first disk's node group will be"
12496                           " used")
12497
12498     # hvparams processing
12499     if self.op.hvparams:
12500       hv_type = instance.hypervisor
12501       i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
12502       utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
12503       hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
12504
12505       # local check
12506       hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
12507       _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
12508       self.hv_proposed = self.hv_new = hv_new # the new actual values
12509       self.hv_inst = i_hvdict # the new dict (without defaults)
12510     else:
12511       self.hv_proposed = cluster.SimpleFillHV(instance.hypervisor, instance.os,
12512                                               instance.hvparams)
12513       self.hv_new = self.hv_inst = {}
12514
12515     # beparams processing
12516     if self.op.beparams:
12517       i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
12518                                    use_none=True)
12519       objects.UpgradeBeParams(i_bedict)
12520       utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
12521       be_new = cluster.SimpleFillBE(i_bedict)
12522       self.be_proposed = self.be_new = be_new # the new actual values
12523       self.be_inst = i_bedict # the new dict (without defaults)
12524     else:
12525       self.be_new = self.be_inst = {}
12526       self.be_proposed = cluster.SimpleFillBE(instance.beparams)
12527     be_old = cluster.FillBE(instance)
12528
12529     # CPU param validation -- checking every time a parameter is
12530     # changed to cover all cases where either CPU mask or vcpus have
12531     # changed
12532     if (constants.BE_VCPUS in self.be_proposed and
12533         constants.HV_CPU_MASK in self.hv_proposed):
12534       cpu_list = \
12535         utils.ParseMultiCpuMask(self.hv_proposed[constants.HV_CPU_MASK])
12536       # Verify mask is consistent with number of vCPUs. Can skip this
12537       # test if only 1 entry in the CPU mask, which means same mask
12538       # is applied to all vCPUs.
12539       if (len(cpu_list) > 1 and
12540           len(cpu_list) != self.be_proposed[constants.BE_VCPUS]):
12541         raise errors.OpPrereqError("Number of vCPUs [%d] does not match the"
12542                                    " CPU mask [%s]" %
12543                                    (self.be_proposed[constants.BE_VCPUS],
12544                                     self.hv_proposed[constants.HV_CPU_MASK]),
12545                                    errors.ECODE_INVAL)
12546
12547       # Only perform this test if a new CPU mask is given
12548       if constants.HV_CPU_MASK in self.hv_new:
12549         # Calculate the largest CPU number requested
12550         max_requested_cpu = max(map(max, cpu_list))
12551         # Check that all of the instance's nodes have enough physical CPUs to
12552         # satisfy the requested CPU mask
12553         _CheckNodesPhysicalCPUs(self, instance.all_nodes,
12554                                 max_requested_cpu + 1, instance.hypervisor)
12555
12556     # osparams processing
12557     if self.op.osparams:
12558       i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
12559       _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
12560       self.os_inst = i_osdict # the new dict (without defaults)
12561     else:
12562       self.os_inst = {}
12563
12564     self.warn = []
12565
12566     #TODO(dynmem): do the appropriate check involving MINMEM
12567     if (constants.BE_MAXMEM in self.op.beparams and not self.op.force and
12568         be_new[constants.BE_MAXMEM] > be_old[constants.BE_MAXMEM]):
12569       mem_check_list = [pnode]
12570       if be_new[constants.BE_AUTO_BALANCE]:
12571         # either we changed auto_balance to yes or it was from before
12572         mem_check_list.extend(instance.secondary_nodes)
12573       instance_info = self.rpc.call_instance_info(pnode, instance.name,
12574                                                   instance.hypervisor)
12575       nodeinfo = self.rpc.call_node_info(mem_check_list, None,
12576                                          [instance.hypervisor])
12577       pninfo = nodeinfo[pnode]
12578       msg = pninfo.fail_msg
12579       if msg:
12580         # Assume the primary node is unreachable and go ahead
12581         self.warn.append("Can't get info from primary node %s: %s" %
12582                          (pnode, msg))
12583       else:
12584         (_, _, (pnhvinfo, )) = pninfo.payload
12585         if not isinstance(pnhvinfo.get("memory_free", None), int):
12586           self.warn.append("Node data from primary node %s doesn't contain"
12587                            " free memory information" % pnode)
12588         elif instance_info.fail_msg:
12589           self.warn.append("Can't get instance runtime information: %s" %
12590                            instance_info.fail_msg)
12591         else:
12592           if instance_info.payload:
12593             current_mem = int(instance_info.payload["memory"])
12594           else:
12595             # Assume instance not running
12596             # (there is a slight race condition here, but it's not very
12597             # probable, and we have no other way to check)
12598             # TODO: Describe race condition
12599             current_mem = 0
12600           #TODO(dynmem): do the appropriate check involving MINMEM
12601           miss_mem = (be_new[constants.BE_MAXMEM] - current_mem -
12602                       pnhvinfo["memory_free"])
12603           if miss_mem > 0:
12604             raise errors.OpPrereqError("This change will prevent the instance"
12605                                        " from starting, due to %d MB of memory"
12606                                        " missing on its primary node" %
12607                                        miss_mem, errors.ECODE_NORES)
12608
12609       if be_new[constants.BE_AUTO_BALANCE]:
12610         for node, nres in nodeinfo.items():
12611           if node not in instance.secondary_nodes:
12612             continue
12613           nres.Raise("Can't get info from secondary node %s" % node,
12614                      prereq=True, ecode=errors.ECODE_STATE)
12615           (_, _, (nhvinfo, )) = nres.payload
12616           if not isinstance(nhvinfo.get("memory_free", None), int):
12617             raise errors.OpPrereqError("Secondary node %s didn't return free"
12618                                        " memory information" % node,
12619                                        errors.ECODE_STATE)
12620           #TODO(dynmem): do the appropriate check involving MINMEM
12621           elif be_new[constants.BE_MAXMEM] > nhvinfo["memory_free"]:
12622             raise errors.OpPrereqError("This change will prevent the instance"
12623                                        " from failover to its secondary node"
12624                                        " %s, due to not enough memory" % node,
12625                                        errors.ECODE_STATE)
12626
12627     if self.op.runtime_mem:
12628       remote_info = self.rpc.call_instance_info(instance.primary_node,
12629                                                 instance.name,
12630                                                 instance.hypervisor)
12631       remote_info.Raise("Error checking node %s" % instance.primary_node)
12632       if not remote_info.payload: # not running already
12633         raise errors.OpPrereqError("Instance %s is not running" %
12634                                    instance.name, errors.ECODE_STATE)
12635
12636       current_memory = remote_info.payload["memory"]
12637       if (not self.op.force and
12638            (self.op.runtime_mem > self.be_proposed[constants.BE_MAXMEM] or
12639             self.op.runtime_mem < self.be_proposed[constants.BE_MINMEM])):
12640         raise errors.OpPrereqError("Instance %s must have memory between %d"
12641                                    " and %d MB of memory unless --force is"
12642                                    " given" %
12643                                    (instance.name,
12644                                     self.be_proposed[constants.BE_MINMEM],
12645                                     self.be_proposed[constants.BE_MAXMEM]),
12646                                    errors.ECODE_INVAL)
12647
12648       if self.op.runtime_mem > current_memory:
12649         _CheckNodeFreeMemory(self, instance.primary_node,
12650                              "ballooning memory for instance %s" %
12651                              instance.name,
12652                              self.op.memory - current_memory,
12653                              instance.hypervisor)
12654
12655     if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
12656       raise errors.OpPrereqError("Disk operations not supported for"
12657                                  " diskless instances", errors.ECODE_INVAL)
12658
12659     def _PrepareNicCreate(_, params, private):
12660       self._PrepareNicModification(params, private, None, {}, cluster, pnode)
12661       return (None, None)
12662
12663     def _PrepareNicMod(_, nic, params, private):
12664       self._PrepareNicModification(params, private, nic.ip,
12665                                    nic.nicparams, cluster, pnode)
12666       return None
12667
12668     # Verify NIC changes (operating on copy)
12669     nics = instance.nics[:]
12670     ApplyContainerMods("NIC", nics, None, self.nicmod,
12671                        _PrepareNicCreate, _PrepareNicMod, None)
12672     if len(nics) > constants.MAX_NICS:
12673       raise errors.OpPrereqError("Instance has too many network interfaces"
12674                                  " (%d), cannot add more" % constants.MAX_NICS,
12675                                  errors.ECODE_STATE)
12676
12677     # Verify disk changes (operating on a copy)
12678     disks = instance.disks[:]
12679     ApplyContainerMods("disk", disks, None, self.diskmod, None, None, None)
12680     if len(disks) > constants.MAX_DISKS:
12681       raise errors.OpPrereqError("Instance has too many disks (%d), cannot add"
12682                                  " more" % constants.MAX_DISKS,
12683                                  errors.ECODE_STATE)
12684
12685     if self.op.offline is not None:
12686       if self.op.offline:
12687         msg = "can't change to offline"
12688       else:
12689         msg = "can't change to online"
12690       _CheckInstanceState(self, instance, CAN_CHANGE_INSTANCE_OFFLINE, msg=msg)
12691
12692     # Pre-compute NIC changes (necessary to use result in hooks)
12693     self._nic_chgdesc = []
12694     if self.nicmod:
12695       # Operate on copies as this is still in prereq
12696       nics = [nic.Copy() for nic in instance.nics]
12697       ApplyContainerMods("NIC", nics, self._nic_chgdesc, self.nicmod,
12698                          self._CreateNewNic, self._ApplyNicMods, None)
12699       self._new_nics = nics
12700     else:
12701       self._new_nics = None
12702
12703   def _ConvertPlainToDrbd(self, feedback_fn):
12704     """Converts an instance from plain to drbd.
12705
12706     """
12707     feedback_fn("Converting template to drbd")
12708     instance = self.instance
12709     pnode = instance.primary_node
12710     snode = self.op.remote_node
12711
12712     assert instance.disk_template == constants.DT_PLAIN
12713
12714     # create a fake disk info for _GenerateDiskTemplate
12715     disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
12716                   constants.IDISK_VG: d.logical_id[0]}
12717                  for d in instance.disks]
12718     new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
12719                                       instance.name, pnode, [snode],
12720                                       disk_info, None, None, 0, feedback_fn,
12721                                       self.diskparams)
12722     anno_disks = rpc.AnnotateDiskParams(constants.DT_DRBD8, new_disks,
12723                                         self.diskparams)
12724     info = _GetInstanceInfoText(instance)
12725     feedback_fn("Creating additional volumes...")
12726     # first, create the missing data and meta devices
12727     for disk in anno_disks:
12728       # unfortunately this is... not too nice
12729       _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
12730                             info, True)
12731       for child in disk.children:
12732         _CreateSingleBlockDev(self, snode, instance, child, info, True)
12733     # at this stage, all new LVs have been created, we can rename the
12734     # old ones
12735     feedback_fn("Renaming original volumes...")
12736     rename_list = [(o, n.children[0].logical_id)
12737                    for (o, n) in zip(instance.disks, new_disks)]
12738     result = self.rpc.call_blockdev_rename(pnode, rename_list)
12739     result.Raise("Failed to rename original LVs")
12740
12741     feedback_fn("Initializing DRBD devices...")
12742     # all child devices are in place, we can now create the DRBD devices
12743     for disk in anno_disks:
12744       for node in [pnode, snode]:
12745         f_create = node == pnode
12746         _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
12747
12748     # at this point, the instance has been modified
12749     instance.disk_template = constants.DT_DRBD8
12750     instance.disks = new_disks
12751     self.cfg.Update(instance, feedback_fn)
12752
12753     # Release node locks while waiting for sync
12754     _ReleaseLocks(self, locking.LEVEL_NODE)
12755
12756     # disks are created, waiting for sync
12757     disk_abort = not _WaitForSync(self, instance,
12758                                   oneshot=not self.op.wait_for_sync)
12759     if disk_abort:
12760       raise errors.OpExecError("There are some degraded disks for"
12761                                " this instance, please cleanup manually")
12762
12763     # Node resource locks will be released by caller
12764
12765   def _ConvertDrbdToPlain(self, feedback_fn):
12766     """Converts an instance from drbd to plain.
12767
12768     """
12769     instance = self.instance
12770
12771     assert len(instance.secondary_nodes) == 1
12772     assert instance.disk_template == constants.DT_DRBD8
12773
12774     pnode = instance.primary_node
12775     snode = instance.secondary_nodes[0]
12776     feedback_fn("Converting template to plain")
12777
12778     old_disks = _AnnotateDiskParams(instance, instance.disks, self.cfg)
12779     new_disks = [d.children[0] for d in instance.disks]
12780
12781     # copy over size and mode
12782     for parent, child in zip(old_disks, new_disks):
12783       child.size = parent.size
12784       child.mode = parent.mode
12785
12786     # this is a DRBD disk, return its port to the pool
12787     # NOTE: this must be done right before the call to cfg.Update!
12788     for disk in old_disks:
12789       tcp_port = disk.logical_id[2]
12790       self.cfg.AddTcpUdpPort(tcp_port)
12791
12792     # update instance structure
12793     instance.disks = new_disks
12794     instance.disk_template = constants.DT_PLAIN
12795     self.cfg.Update(instance, feedback_fn)
12796
12797     # Release locks in case removing disks takes a while
12798     _ReleaseLocks(self, locking.LEVEL_NODE)
12799
12800     feedback_fn("Removing volumes on the secondary node...")
12801     for disk in old_disks:
12802       self.cfg.SetDiskID(disk, snode)
12803       msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
12804       if msg:
12805         self.LogWarning("Could not remove block device %s on node %s,"
12806                         " continuing anyway: %s", disk.iv_name, snode, msg)
12807
12808     feedback_fn("Removing unneeded volumes on the primary node...")
12809     for idx, disk in enumerate(old_disks):
12810       meta = disk.children[1]
12811       self.cfg.SetDiskID(meta, pnode)
12812       msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
12813       if msg:
12814         self.LogWarning("Could not remove metadata for disk %d on node %s,"
12815                         " continuing anyway: %s", idx, pnode, msg)
12816
12817   def _CreateNewDisk(self, idx, params, _):
12818     """Creates a new disk.
12819
12820     """
12821     instance = self.instance
12822
12823     # add a new disk
12824     if instance.disk_template in constants.DTS_FILEBASED:
12825       (file_driver, file_path) = instance.disks[0].logical_id
12826       file_path = os.path.dirname(file_path)
12827     else:
12828       file_driver = file_path = None
12829
12830     disk = \
12831       _GenerateDiskTemplate(self, instance.disk_template, instance.name,
12832                             instance.primary_node, instance.secondary_nodes,
12833                             [params], file_path, file_driver, idx,
12834                             self.Log, self.diskparams)[0]
12835
12836     info = _GetInstanceInfoText(instance)
12837
12838     logging.info("Creating volume %s for instance %s",
12839                  disk.iv_name, instance.name)
12840     # Note: this needs to be kept in sync with _CreateDisks
12841     #HARDCODE
12842     for node in instance.all_nodes:
12843       f_create = (node == instance.primary_node)
12844       try:
12845         _CreateBlockDev(self, node, instance, disk, f_create, info, f_create)
12846       except errors.OpExecError, err:
12847         self.LogWarning("Failed to create volume %s (%s) on node '%s': %s",
12848                         disk.iv_name, disk, node, err)
12849
12850     return (disk, [
12851       ("disk/%d" % idx, "add:size=%s,mode=%s" % (disk.size, disk.mode)),
12852       ])
12853
12854   @staticmethod
12855   def _ModifyDisk(idx, disk, params, _):
12856     """Modifies a disk.
12857
12858     """
12859     disk.mode = params[constants.IDISK_MODE]
12860
12861     return [
12862       ("disk.mode/%d" % idx, disk.mode),
12863       ]
12864
12865   def _RemoveDisk(self, idx, root, _):
12866     """Removes a disk.
12867
12868     """
12869     (anno_disk,) = _AnnotateDiskParams(self.instance, [root], self.cfg)
12870     for node, disk in anno_disk.ComputeNodeTree(self.instance.primary_node):
12871       self.cfg.SetDiskID(disk, node)
12872       msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
12873       if msg:
12874         self.LogWarning("Could not remove disk/%d on node '%s': %s,"
12875                         " continuing anyway", idx, node, msg)
12876
12877     # if this is a DRBD disk, return its port to the pool
12878     if root.dev_type in constants.LDS_DRBD:
12879       self.cfg.AddTcpUdpPort(root.logical_id[2])
12880
12881   @staticmethod
12882   def _CreateNewNic(idx, params, private):
12883     """Creates data structure for a new network interface.
12884
12885     """
12886     mac = params[constants.INIC_MAC]
12887     ip = params.get(constants.INIC_IP, None)
12888     nicparams = private.params
12889
12890     return (objects.NIC(mac=mac, ip=ip, nicparams=nicparams), [
12891       ("nic.%d" % idx,
12892        "add:mac=%s,ip=%s,mode=%s,link=%s" %
12893        (mac, ip, private.filled[constants.NIC_MODE],
12894        private.filled[constants.NIC_LINK])),
12895       ])
12896
12897   @staticmethod
12898   def _ApplyNicMods(idx, nic, params, private):
12899     """Modifies a network interface.
12900
12901     """
12902     changes = []
12903
12904     for key in [constants.INIC_MAC, constants.INIC_IP]:
12905       if key in params:
12906         changes.append(("nic.%s/%d" % (key, idx), params[key]))
12907         setattr(nic, key, params[key])
12908
12909     if private.params:
12910       nic.nicparams = private.params
12911
12912       for (key, val) in params.items():
12913         changes.append(("nic.%s/%d" % (key, idx), val))
12914
12915     return changes
12916
12917   def Exec(self, feedback_fn):
12918     """Modifies an instance.
12919
12920     All parameters take effect only at the next restart of the instance.
12921
12922     """
12923     # Process here the warnings from CheckPrereq, as we don't have a
12924     # feedback_fn there.
12925     # TODO: Replace with self.LogWarning
12926     for warn in self.warn:
12927       feedback_fn("WARNING: %s" % warn)
12928
12929     assert ((self.op.disk_template is None) ^
12930             bool(self.owned_locks(locking.LEVEL_NODE_RES))), \
12931       "Not owning any node resource locks"
12932
12933     result = []
12934     instance = self.instance
12935
12936     # runtime memory
12937     if self.op.runtime_mem:
12938       rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node,
12939                                                      instance,
12940                                                      self.op.runtime_mem)
12941       rpcres.Raise("Cannot modify instance runtime memory")
12942       result.append(("runtime_memory", self.op.runtime_mem))
12943
12944     # Apply disk changes
12945     ApplyContainerMods("disk", instance.disks, result, self.diskmod,
12946                        self._CreateNewDisk, self._ModifyDisk, self._RemoveDisk)
12947     _UpdateIvNames(0, instance.disks)
12948
12949     if self.op.disk_template:
12950       if __debug__:
12951         check_nodes = set(instance.all_nodes)
12952         if self.op.remote_node:
12953           check_nodes.add(self.op.remote_node)
12954         for level in [locking.LEVEL_NODE, locking.LEVEL_NODE_RES]:
12955           owned = self.owned_locks(level)
12956           assert not (check_nodes - owned), \
12957             ("Not owning the correct locks, owning %r, expected at least %r" %
12958              (owned, check_nodes))
12959
12960       r_shut = _ShutdownInstanceDisks(self, instance)
12961       if not r_shut:
12962         raise errors.OpExecError("Cannot shutdown instance disks, unable to"
12963                                  " proceed with disk template conversion")
12964       mode = (instance.disk_template, self.op.disk_template)
12965       try:
12966         self._DISK_CONVERSIONS[mode](self, feedback_fn)
12967       except:
12968         self.cfg.ReleaseDRBDMinors(instance.name)
12969         raise
12970       result.append(("disk_template", self.op.disk_template))
12971
12972       assert instance.disk_template == self.op.disk_template, \
12973         ("Expected disk template '%s', found '%s'" %
12974          (self.op.disk_template, instance.disk_template))
12975
12976     # Release node and resource locks if there are any (they might already have
12977     # been released during disk conversion)
12978     _ReleaseLocks(self, locking.LEVEL_NODE)
12979     _ReleaseLocks(self, locking.LEVEL_NODE_RES)
12980
12981     # Apply NIC changes
12982     if self._new_nics is not None:
12983       instance.nics = self._new_nics
12984       result.extend(self._nic_chgdesc)
12985
12986     # hvparams changes
12987     if self.op.hvparams:
12988       instance.hvparams = self.hv_inst
12989       for key, val in self.op.hvparams.iteritems():
12990         result.append(("hv/%s" % key, val))
12991
12992     # beparams changes
12993     if self.op.beparams:
12994       instance.beparams = self.be_inst
12995       for key, val in self.op.beparams.iteritems():
12996         result.append(("be/%s" % key, val))
12997
12998     # OS change
12999     if self.op.os_name:
13000       instance.os = self.op.os_name
13001
13002     # osparams changes
13003     if self.op.osparams:
13004       instance.osparams = self.os_inst
13005       for key, val in self.op.osparams.iteritems():
13006         result.append(("os/%s" % key, val))
13007
13008     if self.op.offline is None:
13009       # Ignore
13010       pass
13011     elif self.op.offline:
13012       # Mark instance as offline
13013       self.cfg.MarkInstanceOffline(instance.name)
13014       result.append(("admin_state", constants.ADMINST_OFFLINE))
13015     else:
13016       # Mark instance as online, but stopped
13017       self.cfg.MarkInstanceDown(instance.name)
13018       result.append(("admin_state", constants.ADMINST_DOWN))
13019
13020     self.cfg.Update(instance, feedback_fn)
13021
13022     assert not (self.owned_locks(locking.LEVEL_NODE_RES) or
13023                 self.owned_locks(locking.LEVEL_NODE)), \
13024       "All node locks should have been released by now"
13025
13026     return result
13027
13028   _DISK_CONVERSIONS = {
13029     (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
13030     (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
13031     }
13032
13033
13034 class LUInstanceChangeGroup(LogicalUnit):
13035   HPATH = "instance-change-group"
13036   HTYPE = constants.HTYPE_INSTANCE
13037   REQ_BGL = False
13038
13039   def ExpandNames(self):
13040     self.share_locks = _ShareAll()
13041     self.needed_locks = {
13042       locking.LEVEL_NODEGROUP: [],
13043       locking.LEVEL_NODE: [],
13044       }
13045
13046     self._ExpandAndLockInstance()
13047
13048     if self.op.target_groups:
13049       self.req_target_uuids = map(self.cfg.LookupNodeGroup,
13050                                   self.op.target_groups)
13051     else:
13052       self.req_target_uuids = None
13053
13054     self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
13055
13056   def DeclareLocks(self, level):
13057     if level == locking.LEVEL_NODEGROUP:
13058       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
13059
13060       if self.req_target_uuids:
13061         lock_groups = set(self.req_target_uuids)
13062
13063         # Lock all groups used by instance optimistically; this requires going
13064         # via the node before it's locked, requiring verification later on
13065         instance_groups = self.cfg.GetInstanceNodeGroups(self.op.instance_name)
13066         lock_groups.update(instance_groups)
13067       else:
13068         # No target groups, need to lock all of them
13069         lock_groups = locking.ALL_SET
13070
13071       self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
13072
13073     elif level == locking.LEVEL_NODE:
13074       if self.req_target_uuids:
13075         # Lock all nodes used by instances
13076         self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
13077         self._LockInstancesNodes()
13078
13079         # Lock all nodes in all potential target groups
13080         lock_groups = (frozenset(self.owned_locks(locking.LEVEL_NODEGROUP)) -
13081                        self.cfg.GetInstanceNodeGroups(self.op.instance_name))
13082         member_nodes = [node_name
13083                         for group in lock_groups
13084                         for node_name in self.cfg.GetNodeGroup(group).members]
13085         self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
13086       else:
13087         # Lock all nodes as all groups are potential targets
13088         self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
13089
13090   def CheckPrereq(self):
13091     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
13092     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
13093     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
13094
13095     assert (self.req_target_uuids is None or
13096             owned_groups.issuperset(self.req_target_uuids))
13097     assert owned_instances == set([self.op.instance_name])
13098
13099     # Get instance information
13100     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
13101
13102     # Check if node groups for locked instance are still correct
13103     assert owned_nodes.issuperset(self.instance.all_nodes), \
13104       ("Instance %s's nodes changed while we kept the lock" %
13105        self.op.instance_name)
13106
13107     inst_groups = _CheckInstanceNodeGroups(self.cfg, self.op.instance_name,
13108                                            owned_groups)
13109
13110     if self.req_target_uuids:
13111       # User requested specific target groups
13112       self.target_uuids = frozenset(self.req_target_uuids)
13113     else:
13114       # All groups except those used by the instance are potential targets
13115       self.target_uuids = owned_groups - inst_groups
13116
13117     conflicting_groups = self.target_uuids & inst_groups
13118     if conflicting_groups:
13119       raise errors.OpPrereqError("Can't use group(s) '%s' as targets, they are"
13120                                  " used by the instance '%s'" %
13121                                  (utils.CommaJoin(conflicting_groups),
13122                                   self.op.instance_name),
13123                                  errors.ECODE_INVAL)
13124
13125     if not self.target_uuids:
13126       raise errors.OpPrereqError("There are no possible target groups",
13127                                  errors.ECODE_INVAL)
13128
13129   def BuildHooksEnv(self):
13130     """Build hooks env.
13131
13132     """
13133     assert self.target_uuids
13134
13135     env = {
13136       "TARGET_GROUPS": " ".join(self.target_uuids),
13137       }
13138
13139     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
13140
13141     return env
13142
13143   def BuildHooksNodes(self):
13144     """Build hooks nodes.
13145
13146     """
13147     mn = self.cfg.GetMasterNode()
13148     return ([mn], [mn])
13149
13150   def Exec(self, feedback_fn):
13151     instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
13152
13153     assert instances == [self.op.instance_name], "Instance not locked"
13154
13155     req = iallocator.IAReqGroupChange(instances=instances,
13156                                       target_groups=list(self.target_uuids))
13157     ial = iallocator.IAllocator(self.cfg, self.rpc, req)
13158
13159     ial.Run(self.op.iallocator)
13160
13161     if not ial.success:
13162       raise errors.OpPrereqError("Can't compute solution for changing group of"
13163                                  " instance '%s' using iallocator '%s': %s" %
13164                                  (self.op.instance_name, self.op.iallocator,
13165                                   ial.info), errors.ECODE_NORES)
13166
13167     jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
13168
13169     self.LogInfo("Iallocator returned %s job(s) for changing group of"
13170                  " instance '%s'", len(jobs), self.op.instance_name)
13171
13172     return ResultWithJobs(jobs)
13173
13174
13175 class LUBackupQuery(NoHooksLU):
13176   """Query the exports list
13177
13178   """
13179   REQ_BGL = False
13180
13181   def CheckArguments(self):
13182     self.expq = _ExportQuery(qlang.MakeSimpleFilter("node", self.op.nodes),
13183                              ["node", "export"], self.op.use_locking)
13184
13185   def ExpandNames(self):
13186     self.expq.ExpandNames(self)
13187
13188   def DeclareLocks(self, level):
13189     self.expq.DeclareLocks(self, level)
13190
13191   def Exec(self, feedback_fn):
13192     result = {}
13193
13194     for (node, expname) in self.expq.OldStyleQuery(self):
13195       if expname is None:
13196         result[node] = False
13197       else:
13198         result.setdefault(node, []).append(expname)
13199
13200     return result
13201
13202
13203 class _ExportQuery(_QueryBase):
13204   FIELDS = query.EXPORT_FIELDS
13205
13206   #: The node name is not a unique key for this query
13207   SORT_FIELD = "node"
13208
13209   def ExpandNames(self, lu):
13210     lu.needed_locks = {}
13211
13212     # The following variables interact with _QueryBase._GetNames
13213     if self.names:
13214       self.wanted = _GetWantedNodes(lu, self.names)
13215     else:
13216       self.wanted = locking.ALL_SET
13217
13218     self.do_locking = self.use_locking
13219
13220     if self.do_locking:
13221       lu.share_locks = _ShareAll()
13222       lu.needed_locks = {
13223         locking.LEVEL_NODE: self.wanted,
13224         }
13225
13226   def DeclareLocks(self, lu, level):
13227     pass
13228
13229   def _GetQueryData(self, lu):
13230     """Computes the list of nodes and their attributes.
13231
13232     """
13233     # Locking is not used
13234     # TODO
13235     assert not (compat.any(lu.glm.is_owned(level)
13236                            for level in locking.LEVELS
13237                            if level != locking.LEVEL_CLUSTER) or
13238                 self.do_locking or self.use_locking)
13239
13240     nodes = self._GetNames(lu, lu.cfg.GetNodeList(), locking.LEVEL_NODE)
13241
13242     result = []
13243
13244     for (node, nres) in lu.rpc.call_export_list(nodes).items():
13245       if nres.fail_msg:
13246         result.append((node, None))
13247       else:
13248         result.extend((node, expname) for expname in nres.payload)
13249
13250     return result
13251
13252
13253 class LUBackupPrepare(NoHooksLU):
13254   """Prepares an instance for an export and returns useful information.
13255
13256   """
13257   REQ_BGL = False
13258
13259   def ExpandNames(self):
13260     self._ExpandAndLockInstance()
13261
13262   def CheckPrereq(self):
13263     """Check prerequisites.
13264
13265     """
13266     instance_name = self.op.instance_name
13267
13268     self.instance = self.cfg.GetInstanceInfo(instance_name)
13269     assert self.instance is not None, \
13270           "Cannot retrieve locked instance %s" % self.op.instance_name
13271     _CheckNodeOnline(self, self.instance.primary_node)
13272
13273     self._cds = _GetClusterDomainSecret()
13274
13275   def Exec(self, feedback_fn):
13276     """Prepares an instance for an export.
13277
13278     """
13279     instance = self.instance
13280
13281     if self.op.mode == constants.EXPORT_MODE_REMOTE:
13282       salt = utils.GenerateSecret(8)
13283
13284       feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
13285       result = self.rpc.call_x509_cert_create(instance.primary_node,
13286                                               constants.RIE_CERT_VALIDITY)
13287       result.Raise("Can't create X509 key and certificate on %s" % result.node)
13288
13289       (name, cert_pem) = result.payload
13290
13291       cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
13292                                              cert_pem)
13293
13294       return {
13295         "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
13296         "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
13297                           salt),
13298         "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
13299         }
13300
13301     return None
13302
13303
13304 class LUBackupExport(LogicalUnit):
13305   """Export an instance to an image in the cluster.
13306
13307   """
13308   HPATH = "instance-export"
13309   HTYPE = constants.HTYPE_INSTANCE
13310   REQ_BGL = False
13311
13312   def CheckArguments(self):
13313     """Check the arguments.
13314
13315     """
13316     self.x509_key_name = self.op.x509_key_name
13317     self.dest_x509_ca_pem = self.op.destination_x509_ca
13318
13319     if self.op.mode == constants.EXPORT_MODE_REMOTE:
13320       if not self.x509_key_name:
13321         raise errors.OpPrereqError("Missing X509 key name for encryption",
13322                                    errors.ECODE_INVAL)
13323
13324       if not self.dest_x509_ca_pem:
13325         raise errors.OpPrereqError("Missing destination X509 CA",
13326                                    errors.ECODE_INVAL)
13327
13328   def ExpandNames(self):
13329     self._ExpandAndLockInstance()
13330
13331     # Lock all nodes for local exports
13332     if self.op.mode == constants.EXPORT_MODE_LOCAL:
13333       # FIXME: lock only instance primary and destination node
13334       #
13335       # Sad but true, for now we have do lock all nodes, as we don't know where
13336       # the previous export might be, and in this LU we search for it and
13337       # remove it from its current node. In the future we could fix this by:
13338       #  - making a tasklet to search (share-lock all), then create the
13339       #    new one, then one to remove, after
13340       #  - removing the removal operation altogether
13341       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
13342
13343   def DeclareLocks(self, level):
13344     """Last minute lock declaration."""
13345     # All nodes are locked anyway, so nothing to do here.
13346
13347   def BuildHooksEnv(self):
13348     """Build hooks env.
13349
13350     This will run on the master, primary node and target node.
13351
13352     """
13353     env = {
13354       "EXPORT_MODE": self.op.mode,
13355       "EXPORT_NODE": self.op.target_node,
13356       "EXPORT_DO_SHUTDOWN": self.op.shutdown,
13357       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
13358       # TODO: Generic function for boolean env variables
13359       "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
13360       }
13361
13362     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
13363
13364     return env
13365
13366   def BuildHooksNodes(self):
13367     """Build hooks nodes.
13368
13369     """
13370     nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
13371
13372     if self.op.mode == constants.EXPORT_MODE_LOCAL:
13373       nl.append(self.op.target_node)
13374
13375     return (nl, nl)
13376
13377   def CheckPrereq(self):
13378     """Check prerequisites.
13379
13380     This checks that the instance and node names are valid.
13381
13382     """
13383     instance_name = self.op.instance_name
13384
13385     self.instance = self.cfg.GetInstanceInfo(instance_name)
13386     assert self.instance is not None, \
13387           "Cannot retrieve locked instance %s" % self.op.instance_name
13388     _CheckNodeOnline(self, self.instance.primary_node)
13389
13390     if (self.op.remove_instance and
13391         self.instance.admin_state == constants.ADMINST_UP and
13392         not self.op.shutdown):
13393       raise errors.OpPrereqError("Can not remove instance without shutting it"
13394                                  " down before", errors.ECODE_STATE)
13395
13396     if self.op.mode == constants.EXPORT_MODE_LOCAL:
13397       self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
13398       self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
13399       assert self.dst_node is not None
13400
13401       _CheckNodeOnline(self, self.dst_node.name)
13402       _CheckNodeNotDrained(self, self.dst_node.name)
13403
13404       self._cds = None
13405       self.dest_disk_info = None
13406       self.dest_x509_ca = None
13407
13408     elif self.op.mode == constants.EXPORT_MODE_REMOTE:
13409       self.dst_node = None
13410
13411       if len(self.op.target_node) != len(self.instance.disks):
13412         raise errors.OpPrereqError(("Received destination information for %s"
13413                                     " disks, but instance %s has %s disks") %
13414                                    (len(self.op.target_node), instance_name,
13415                                     len(self.instance.disks)),
13416                                    errors.ECODE_INVAL)
13417
13418       cds = _GetClusterDomainSecret()
13419
13420       # Check X509 key name
13421       try:
13422         (key_name, hmac_digest, hmac_salt) = self.x509_key_name
13423       except (TypeError, ValueError), err:
13424         raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err,
13425                                    errors.ECODE_INVAL)
13426
13427       if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
13428         raise errors.OpPrereqError("HMAC for X509 key name is wrong",
13429                                    errors.ECODE_INVAL)
13430
13431       # Load and verify CA
13432       try:
13433         (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
13434       except OpenSSL.crypto.Error, err:
13435         raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
13436                                    (err, ), errors.ECODE_INVAL)
13437
13438       (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
13439       if errcode is not None:
13440         raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
13441                                    (msg, ), errors.ECODE_INVAL)
13442
13443       self.dest_x509_ca = cert
13444
13445       # Verify target information
13446       disk_info = []
13447       for idx, disk_data in enumerate(self.op.target_node):
13448         try:
13449           (host, port, magic) = \
13450             masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
13451         except errors.GenericError, err:
13452           raise errors.OpPrereqError("Target info for disk %s: %s" %
13453                                      (idx, err), errors.ECODE_INVAL)
13454
13455         disk_info.append((host, port, magic))
13456
13457       assert len(disk_info) == len(self.op.target_node)
13458       self.dest_disk_info = disk_info
13459
13460     else:
13461       raise errors.ProgrammerError("Unhandled export mode %r" %
13462                                    self.op.mode)
13463
13464     # instance disk type verification
13465     # TODO: Implement export support for file-based disks
13466     for disk in self.instance.disks:
13467       if disk.dev_type == constants.LD_FILE:
13468         raise errors.OpPrereqError("Export not supported for instances with"
13469                                    " file-based disks", errors.ECODE_INVAL)
13470
13471   def _CleanupExports(self, feedback_fn):
13472     """Removes exports of current instance from all other nodes.
13473
13474     If an instance in a cluster with nodes A..D was exported to node C, its
13475     exports will be removed from the nodes A, B and D.
13476
13477     """
13478     assert self.op.mode != constants.EXPORT_MODE_REMOTE
13479
13480     nodelist = self.cfg.GetNodeList()
13481     nodelist.remove(self.dst_node.name)
13482
13483     # on one-node clusters nodelist will be empty after the removal
13484     # if we proceed the backup would be removed because OpBackupQuery
13485     # substitutes an empty list with the full cluster node list.
13486     iname = self.instance.name
13487     if nodelist:
13488       feedback_fn("Removing old exports for instance %s" % iname)
13489       exportlist = self.rpc.call_export_list(nodelist)
13490       for node in exportlist:
13491         if exportlist[node].fail_msg:
13492           continue
13493         if iname in exportlist[node].payload:
13494           msg = self.rpc.call_export_remove(node, iname).fail_msg
13495           if msg:
13496             self.LogWarning("Could not remove older export for instance %s"
13497                             " on node %s: %s", iname, node, msg)
13498
13499   def Exec(self, feedback_fn):
13500     """Export an instance to an image in the cluster.
13501
13502     """
13503     assert self.op.mode in constants.EXPORT_MODES
13504
13505     instance = self.instance
13506     src_node = instance.primary_node
13507
13508     if self.op.shutdown:
13509       # shutdown the instance, but not the disks
13510       feedback_fn("Shutting down instance %s" % instance.name)
13511       result = self.rpc.call_instance_shutdown(src_node, instance,
13512                                                self.op.shutdown_timeout)
13513       # TODO: Maybe ignore failures if ignore_remove_failures is set
13514       result.Raise("Could not shutdown instance %s on"
13515                    " node %s" % (instance.name, src_node))
13516
13517     # set the disks ID correctly since call_instance_start needs the
13518     # correct drbd minor to create the symlinks
13519     for disk in instance.disks:
13520       self.cfg.SetDiskID(disk, src_node)
13521
13522     activate_disks = (instance.admin_state != constants.ADMINST_UP)
13523
13524     if activate_disks:
13525       # Activate the instance disks if we'exporting a stopped instance
13526       feedback_fn("Activating disks for %s" % instance.name)
13527       _StartInstanceDisks(self, instance, None)
13528
13529     try:
13530       helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
13531                                                      instance)
13532
13533       helper.CreateSnapshots()
13534       try:
13535         if (self.op.shutdown and
13536             instance.admin_state == constants.ADMINST_UP and
13537             not self.op.remove_instance):
13538           assert not activate_disks
13539           feedback_fn("Starting instance %s" % instance.name)
13540           result = self.rpc.call_instance_start(src_node,
13541                                                 (instance, None, None), False)
13542           msg = result.fail_msg
13543           if msg:
13544             feedback_fn("Failed to start instance: %s" % msg)
13545             _ShutdownInstanceDisks(self, instance)
13546             raise errors.OpExecError("Could not start instance: %s" % msg)
13547
13548         if self.op.mode == constants.EXPORT_MODE_LOCAL:
13549           (fin_resu, dresults) = helper.LocalExport(self.dst_node)
13550         elif self.op.mode == constants.EXPORT_MODE_REMOTE:
13551           connect_timeout = constants.RIE_CONNECT_TIMEOUT
13552           timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
13553
13554           (key_name, _, _) = self.x509_key_name
13555
13556           dest_ca_pem = \
13557             OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
13558                                             self.dest_x509_ca)
13559
13560           (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
13561                                                      key_name, dest_ca_pem,
13562                                                      timeouts)
13563       finally:
13564         helper.Cleanup()
13565
13566       # Check for backwards compatibility
13567       assert len(dresults) == len(instance.disks)
13568       assert compat.all(isinstance(i, bool) for i in dresults), \
13569              "Not all results are boolean: %r" % dresults
13570
13571     finally:
13572       if activate_disks:
13573         feedback_fn("Deactivating disks for %s" % instance.name)
13574         _ShutdownInstanceDisks(self, instance)
13575
13576     if not (compat.all(dresults) and fin_resu):
13577       failures = []
13578       if not fin_resu:
13579         failures.append("export finalization")
13580       if not compat.all(dresults):
13581         fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
13582                                if not dsk)
13583         failures.append("disk export: disk(s) %s" % fdsk)
13584
13585       raise errors.OpExecError("Export failed, errors in %s" %
13586                                utils.CommaJoin(failures))
13587
13588     # At this point, the export was successful, we can cleanup/finish
13589
13590     # Remove instance if requested
13591     if self.op.remove_instance:
13592       feedback_fn("Removing instance %s" % instance.name)
13593       _RemoveInstance(self, feedback_fn, instance,
13594                       self.op.ignore_remove_failures)
13595
13596     if self.op.mode == constants.EXPORT_MODE_LOCAL:
13597       self._CleanupExports(feedback_fn)
13598
13599     return fin_resu, dresults
13600
13601
13602 class LUBackupRemove(NoHooksLU):
13603   """Remove exports related to the named instance.
13604
13605   """
13606   REQ_BGL = False
13607
13608   def ExpandNames(self):
13609     self.needed_locks = {}
13610     # We need all nodes to be locked in order for RemoveExport to work, but we
13611     # don't need to lock the instance itself, as nothing will happen to it (and
13612     # we can remove exports also for a removed instance)
13613     self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
13614
13615   def Exec(self, feedback_fn):
13616     """Remove any export.
13617
13618     """
13619     instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
13620     # If the instance was not found we'll try with the name that was passed in.
13621     # This will only work if it was an FQDN, though.
13622     fqdn_warn = False
13623     if not instance_name:
13624       fqdn_warn = True
13625       instance_name = self.op.instance_name
13626
13627     locked_nodes = self.owned_locks(locking.LEVEL_NODE)
13628     exportlist = self.rpc.call_export_list(locked_nodes)
13629     found = False
13630     for node in exportlist:
13631       msg = exportlist[node].fail_msg
13632       if msg:
13633         self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
13634         continue
13635       if instance_name in exportlist[node].payload:
13636         found = True
13637         result = self.rpc.call_export_remove(node, instance_name)
13638         msg = result.fail_msg
13639         if msg:
13640           logging.error("Could not remove export for instance %s"
13641                         " on node %s: %s", instance_name, node, msg)
13642
13643     if fqdn_warn and not found:
13644       feedback_fn("Export not found. If trying to remove an export belonging"
13645                   " to a deleted instance please use its Fully Qualified"
13646                   " Domain Name.")
13647
13648
13649 class LUGroupAdd(LogicalUnit):
13650   """Logical unit for creating node groups.
13651
13652   """
13653   HPATH = "group-add"
13654   HTYPE = constants.HTYPE_GROUP
13655   REQ_BGL = False
13656
13657   def ExpandNames(self):
13658     # We need the new group's UUID here so that we can create and acquire the
13659     # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
13660     # that it should not check whether the UUID exists in the configuration.
13661     self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
13662     self.needed_locks = {}
13663     self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
13664
13665   def CheckPrereq(self):
13666     """Check prerequisites.
13667
13668     This checks that the given group name is not an existing node group
13669     already.
13670
13671     """
13672     try:
13673       existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13674     except errors.OpPrereqError:
13675       pass
13676     else:
13677       raise errors.OpPrereqError("Desired group name '%s' already exists as a"
13678                                  " node group (UUID: %s)" %
13679                                  (self.op.group_name, existing_uuid),
13680                                  errors.ECODE_EXISTS)
13681
13682     if self.op.ndparams:
13683       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
13684
13685     if self.op.hv_state:
13686       self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
13687     else:
13688       self.new_hv_state = None
13689
13690     if self.op.disk_state:
13691       self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
13692     else:
13693       self.new_disk_state = None
13694
13695     if self.op.diskparams:
13696       for templ in constants.DISK_TEMPLATES:
13697         if templ in self.op.diskparams:
13698           utils.ForceDictType(self.op.diskparams[templ],
13699                               constants.DISK_DT_TYPES)
13700       self.new_diskparams = self.op.diskparams
13701       try:
13702         utils.VerifyDictOptions(self.new_diskparams, constants.DISK_DT_DEFAULTS)
13703       except errors.OpPrereqError, err:
13704         raise errors.OpPrereqError("While verify diskparams options: %s" % err,
13705                                    errors.ECODE_INVAL)
13706     else:
13707       self.new_diskparams = {}
13708
13709     if self.op.ipolicy:
13710       cluster = self.cfg.GetClusterInfo()
13711       full_ipolicy = cluster.SimpleFillIPolicy(self.op.ipolicy)
13712       try:
13713         objects.InstancePolicy.CheckParameterSyntax(full_ipolicy, False)
13714       except errors.ConfigurationError, err:
13715         raise errors.OpPrereqError("Invalid instance policy: %s" % err,
13716                                    errors.ECODE_INVAL)
13717
13718   def BuildHooksEnv(self):
13719     """Build hooks env.
13720
13721     """
13722     return {
13723       "GROUP_NAME": self.op.group_name,
13724       }
13725
13726   def BuildHooksNodes(self):
13727     """Build hooks nodes.
13728
13729     """
13730     mn = self.cfg.GetMasterNode()
13731     return ([mn], [mn])
13732
13733   def Exec(self, feedback_fn):
13734     """Add the node group to the cluster.
13735
13736     """
13737     group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
13738                                   uuid=self.group_uuid,
13739                                   alloc_policy=self.op.alloc_policy,
13740                                   ndparams=self.op.ndparams,
13741                                   diskparams=self.new_diskparams,
13742                                   ipolicy=self.op.ipolicy,
13743                                   hv_state_static=self.new_hv_state,
13744                                   disk_state_static=self.new_disk_state)
13745
13746     self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
13747     del self.remove_locks[locking.LEVEL_NODEGROUP]
13748
13749
13750 class LUGroupAssignNodes(NoHooksLU):
13751   """Logical unit for assigning nodes to groups.
13752
13753   """
13754   REQ_BGL = False
13755
13756   def ExpandNames(self):
13757     # These raise errors.OpPrereqError on their own:
13758     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13759     self.op.nodes = _GetWantedNodes(self, self.op.nodes)
13760
13761     # We want to lock all the affected nodes and groups. We have readily
13762     # available the list of nodes, and the *destination* group. To gather the
13763     # list of "source" groups, we need to fetch node information later on.
13764     self.needed_locks = {
13765       locking.LEVEL_NODEGROUP: set([self.group_uuid]),
13766       locking.LEVEL_NODE: self.op.nodes,
13767       }
13768
13769   def DeclareLocks(self, level):
13770     if level == locking.LEVEL_NODEGROUP:
13771       assert len(self.needed_locks[locking.LEVEL_NODEGROUP]) == 1
13772
13773       # Try to get all affected nodes' groups without having the group or node
13774       # lock yet. Needs verification later in the code flow.
13775       groups = self.cfg.GetNodeGroupsFromNodes(self.op.nodes)
13776
13777       self.needed_locks[locking.LEVEL_NODEGROUP].update(groups)
13778
13779   def CheckPrereq(self):
13780     """Check prerequisites.
13781
13782     """
13783     assert self.needed_locks[locking.LEVEL_NODEGROUP]
13784     assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
13785             frozenset(self.op.nodes))
13786
13787     expected_locks = (set([self.group_uuid]) |
13788                       self.cfg.GetNodeGroupsFromNodes(self.op.nodes))
13789     actual_locks = self.owned_locks(locking.LEVEL_NODEGROUP)
13790     if actual_locks != expected_locks:
13791       raise errors.OpExecError("Nodes changed groups since locks were acquired,"
13792                                " current groups are '%s', used to be '%s'" %
13793                                (utils.CommaJoin(expected_locks),
13794                                 utils.CommaJoin(actual_locks)))
13795
13796     self.node_data = self.cfg.GetAllNodesInfo()
13797     self.group = self.cfg.GetNodeGroup(self.group_uuid)
13798     instance_data = self.cfg.GetAllInstancesInfo()
13799
13800     if self.group is None:
13801       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
13802                                (self.op.group_name, self.group_uuid))
13803
13804     (new_splits, previous_splits) = \
13805       self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
13806                                              for node in self.op.nodes],
13807                                             self.node_data, instance_data)
13808
13809     if new_splits:
13810       fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
13811
13812       if not self.op.force:
13813         raise errors.OpExecError("The following instances get split by this"
13814                                  " change and --force was not given: %s" %
13815                                  fmt_new_splits)
13816       else:
13817         self.LogWarning("This operation will split the following instances: %s",
13818                         fmt_new_splits)
13819
13820         if previous_splits:
13821           self.LogWarning("In addition, these already-split instances continue"
13822                           " to be split across groups: %s",
13823                           utils.CommaJoin(utils.NiceSort(previous_splits)))
13824
13825   def Exec(self, feedback_fn):
13826     """Assign nodes to a new group.
13827
13828     """
13829     mods = [(node_name, self.group_uuid) for node_name in self.op.nodes]
13830
13831     self.cfg.AssignGroupNodes(mods)
13832
13833   @staticmethod
13834   def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
13835     """Check for split instances after a node assignment.
13836
13837     This method considers a series of node assignments as an atomic operation,
13838     and returns information about split instances after applying the set of
13839     changes.
13840
13841     In particular, it returns information about newly split instances, and
13842     instances that were already split, and remain so after the change.
13843
13844     Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
13845     considered.
13846
13847     @type changes: list of (node_name, new_group_uuid) pairs.
13848     @param changes: list of node assignments to consider.
13849     @param node_data: a dict with data for all nodes
13850     @param instance_data: a dict with all instances to consider
13851     @rtype: a two-tuple
13852     @return: a list of instances that were previously okay and result split as a
13853       consequence of this change, and a list of instances that were previously
13854       split and this change does not fix.
13855
13856     """
13857     changed_nodes = dict((node, group) for node, group in changes
13858                          if node_data[node].group != group)
13859
13860     all_split_instances = set()
13861     previously_split_instances = set()
13862
13863     def InstanceNodes(instance):
13864       return [instance.primary_node] + list(instance.secondary_nodes)
13865
13866     for inst in instance_data.values():
13867       if inst.disk_template not in constants.DTS_INT_MIRROR:
13868         continue
13869
13870       instance_nodes = InstanceNodes(inst)
13871
13872       if len(set(node_data[node].group for node in instance_nodes)) > 1:
13873         previously_split_instances.add(inst.name)
13874
13875       if len(set(changed_nodes.get(node, node_data[node].group)
13876                  for node in instance_nodes)) > 1:
13877         all_split_instances.add(inst.name)
13878
13879     return (list(all_split_instances - previously_split_instances),
13880             list(previously_split_instances & all_split_instances))
13881
13882
13883 class _GroupQuery(_QueryBase):
13884   FIELDS = query.GROUP_FIELDS
13885
13886   def ExpandNames(self, lu):
13887     lu.needed_locks = {}
13888
13889     self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
13890     self._cluster = lu.cfg.GetClusterInfo()
13891     name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
13892
13893     if not self.names:
13894       self.wanted = [name_to_uuid[name]
13895                      for name in utils.NiceSort(name_to_uuid.keys())]
13896     else:
13897       # Accept names to be either names or UUIDs.
13898       missing = []
13899       self.wanted = []
13900       all_uuid = frozenset(self._all_groups.keys())
13901
13902       for name in self.names:
13903         if name in all_uuid:
13904           self.wanted.append(name)
13905         elif name in name_to_uuid:
13906           self.wanted.append(name_to_uuid[name])
13907         else:
13908           missing.append(name)
13909
13910       if missing:
13911         raise errors.OpPrereqError("Some groups do not exist: %s" %
13912                                    utils.CommaJoin(missing),
13913                                    errors.ECODE_NOENT)
13914
13915   def DeclareLocks(self, lu, level):
13916     pass
13917
13918   def _GetQueryData(self, lu):
13919     """Computes the list of node groups and their attributes.
13920
13921     """
13922     do_nodes = query.GQ_NODE in self.requested_data
13923     do_instances = query.GQ_INST in self.requested_data
13924
13925     group_to_nodes = None
13926     group_to_instances = None
13927
13928     # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
13929     # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
13930     # latter GetAllInstancesInfo() is not enough, for we have to go through
13931     # instance->node. Hence, we will need to process nodes even if we only need
13932     # instance information.
13933     if do_nodes or do_instances:
13934       all_nodes = lu.cfg.GetAllNodesInfo()
13935       group_to_nodes = dict((uuid, []) for uuid in self.wanted)
13936       node_to_group = {}
13937
13938       for node in all_nodes.values():
13939         if node.group in group_to_nodes:
13940           group_to_nodes[node.group].append(node.name)
13941           node_to_group[node.name] = node.group
13942
13943       if do_instances:
13944         all_instances = lu.cfg.GetAllInstancesInfo()
13945         group_to_instances = dict((uuid, []) for uuid in self.wanted)
13946
13947         for instance in all_instances.values():
13948           node = instance.primary_node
13949           if node in node_to_group:
13950             group_to_instances[node_to_group[node]].append(instance.name)
13951
13952         if not do_nodes:
13953           # Do not pass on node information if it was not requested.
13954           group_to_nodes = None
13955
13956     return query.GroupQueryData(self._cluster,
13957                                 [self._all_groups[uuid]
13958                                  for uuid in self.wanted],
13959                                 group_to_nodes, group_to_instances,
13960                                 query.GQ_DISKPARAMS in self.requested_data)
13961
13962
13963 class LUGroupQuery(NoHooksLU):
13964   """Logical unit for querying node groups.
13965
13966   """
13967   REQ_BGL = False
13968
13969   def CheckArguments(self):
13970     self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
13971                           self.op.output_fields, False)
13972
13973   def ExpandNames(self):
13974     self.gq.ExpandNames(self)
13975
13976   def DeclareLocks(self, level):
13977     self.gq.DeclareLocks(self, level)
13978
13979   def Exec(self, feedback_fn):
13980     return self.gq.OldStyleQuery(self)
13981
13982
13983 class LUGroupSetParams(LogicalUnit):
13984   """Modifies the parameters of a node group.
13985
13986   """
13987   HPATH = "group-modify"
13988   HTYPE = constants.HTYPE_GROUP
13989   REQ_BGL = False
13990
13991   def CheckArguments(self):
13992     all_changes = [
13993       self.op.ndparams,
13994       self.op.diskparams,
13995       self.op.alloc_policy,
13996       self.op.hv_state,
13997       self.op.disk_state,
13998       self.op.ipolicy,
13999       ]
14000
14001     if all_changes.count(None) == len(all_changes):
14002       raise errors.OpPrereqError("Please pass at least one modification",
14003                                  errors.ECODE_INVAL)
14004
14005   def ExpandNames(self):
14006     # This raises errors.OpPrereqError on its own:
14007     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14008
14009     self.needed_locks = {
14010       locking.LEVEL_INSTANCE: [],
14011       locking.LEVEL_NODEGROUP: [self.group_uuid],
14012       }
14013
14014     self.share_locks[locking.LEVEL_INSTANCE] = 1
14015
14016   def DeclareLocks(self, level):
14017     if level == locking.LEVEL_INSTANCE:
14018       assert not self.needed_locks[locking.LEVEL_INSTANCE]
14019
14020       # Lock instances optimistically, needs verification once group lock has
14021       # been acquired
14022       self.needed_locks[locking.LEVEL_INSTANCE] = \
14023           self.cfg.GetNodeGroupInstances(self.group_uuid)
14024
14025   @staticmethod
14026   def _UpdateAndVerifyDiskParams(old, new):
14027     """Updates and verifies disk parameters.
14028
14029     """
14030     new_params = _GetUpdatedParams(old, new)
14031     utils.ForceDictType(new_params, constants.DISK_DT_TYPES)
14032     return new_params
14033
14034   def CheckPrereq(self):
14035     """Check prerequisites.
14036
14037     """
14038     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
14039
14040     # Check if locked instances are still correct
14041     _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
14042
14043     self.group = self.cfg.GetNodeGroup(self.group_uuid)
14044     cluster = self.cfg.GetClusterInfo()
14045
14046     if self.group is None:
14047       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
14048                                (self.op.group_name, self.group_uuid))
14049
14050     if self.op.ndparams:
14051       new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
14052       utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
14053       self.new_ndparams = new_ndparams
14054
14055     if self.op.diskparams:
14056       diskparams = self.group.diskparams
14057       uavdp = self._UpdateAndVerifyDiskParams
14058       # For each disktemplate subdict update and verify the values
14059       new_diskparams = dict((dt,
14060                              uavdp(diskparams.get(dt, {}),
14061                                    self.op.diskparams[dt]))
14062                             for dt in constants.DISK_TEMPLATES
14063                             if dt in self.op.diskparams)
14064       # As we've all subdicts of diskparams ready, lets merge the actual
14065       # dict with all updated subdicts
14066       self.new_diskparams = objects.FillDict(diskparams, new_diskparams)
14067       try:
14068         utils.VerifyDictOptions(self.new_diskparams, constants.DISK_DT_DEFAULTS)
14069       except errors.OpPrereqError, err:
14070         raise errors.OpPrereqError("While verify diskparams options: %s" % err,
14071                                    errors.ECODE_INVAL)
14072
14073     if self.op.hv_state:
14074       self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
14075                                                  self.group.hv_state_static)
14076
14077     if self.op.disk_state:
14078       self.new_disk_state = \
14079         _MergeAndVerifyDiskState(self.op.disk_state,
14080                                  self.group.disk_state_static)
14081
14082     if self.op.ipolicy:
14083       self.new_ipolicy = _GetUpdatedIPolicy(self.group.ipolicy,
14084                                             self.op.ipolicy,
14085                                             group_policy=True)
14086
14087       new_ipolicy = cluster.SimpleFillIPolicy(self.new_ipolicy)
14088       inst_filter = lambda inst: inst.name in owned_instances
14089       instances = self.cfg.GetInstancesInfoByFilter(inst_filter).values()
14090       gmi = ganeti.masterd.instance
14091       violations = \
14092           _ComputeNewInstanceViolations(gmi.CalculateGroupIPolicy(cluster,
14093                                                                   self.group),
14094                                         new_ipolicy, instances)
14095
14096       if violations:
14097         self.LogWarning("After the ipolicy change the following instances"
14098                         " violate them: %s",
14099                         utils.CommaJoin(violations))
14100
14101   def BuildHooksEnv(self):
14102     """Build hooks env.
14103
14104     """
14105     return {
14106       "GROUP_NAME": self.op.group_name,
14107       "NEW_ALLOC_POLICY": self.op.alloc_policy,
14108       }
14109
14110   def BuildHooksNodes(self):
14111     """Build hooks nodes.
14112
14113     """
14114     mn = self.cfg.GetMasterNode()
14115     return ([mn], [mn])
14116
14117   def Exec(self, feedback_fn):
14118     """Modifies the node group.
14119
14120     """
14121     result = []
14122
14123     if self.op.ndparams:
14124       self.group.ndparams = self.new_ndparams
14125       result.append(("ndparams", str(self.group.ndparams)))
14126
14127     if self.op.diskparams:
14128       self.group.diskparams = self.new_diskparams
14129       result.append(("diskparams", str(self.group.diskparams)))
14130
14131     if self.op.alloc_policy:
14132       self.group.alloc_policy = self.op.alloc_policy
14133
14134     if self.op.hv_state:
14135       self.group.hv_state_static = self.new_hv_state
14136
14137     if self.op.disk_state:
14138       self.group.disk_state_static = self.new_disk_state
14139
14140     if self.op.ipolicy:
14141       self.group.ipolicy = self.new_ipolicy
14142
14143     self.cfg.Update(self.group, feedback_fn)
14144     return result
14145
14146
14147 class LUGroupRemove(LogicalUnit):
14148   HPATH = "group-remove"
14149   HTYPE = constants.HTYPE_GROUP
14150   REQ_BGL = False
14151
14152   def ExpandNames(self):
14153     # This will raises errors.OpPrereqError on its own:
14154     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14155     self.needed_locks = {
14156       locking.LEVEL_NODEGROUP: [self.group_uuid],
14157       }
14158
14159   def CheckPrereq(self):
14160     """Check prerequisites.
14161
14162     This checks that the given group name exists as a node group, that is
14163     empty (i.e., contains no nodes), and that is not the last group of the
14164     cluster.
14165
14166     """
14167     # Verify that the group is empty.
14168     group_nodes = [node.name
14169                    for node in self.cfg.GetAllNodesInfo().values()
14170                    if node.group == self.group_uuid]
14171
14172     if group_nodes:
14173       raise errors.OpPrereqError("Group '%s' not empty, has the following"
14174                                  " nodes: %s" %
14175                                  (self.op.group_name,
14176                                   utils.CommaJoin(utils.NiceSort(group_nodes))),
14177                                  errors.ECODE_STATE)
14178
14179     # Verify the cluster would not be left group-less.
14180     if len(self.cfg.GetNodeGroupList()) == 1:
14181       raise errors.OpPrereqError("Group '%s' is the only group, cannot be"
14182                                  " removed" % self.op.group_name,
14183                                  errors.ECODE_STATE)
14184
14185   def BuildHooksEnv(self):
14186     """Build hooks env.
14187
14188     """
14189     return {
14190       "GROUP_NAME": self.op.group_name,
14191       }
14192
14193   def BuildHooksNodes(self):
14194     """Build hooks nodes.
14195
14196     """
14197     mn = self.cfg.GetMasterNode()
14198     return ([mn], [mn])
14199
14200   def Exec(self, feedback_fn):
14201     """Remove the node group.
14202
14203     """
14204     try:
14205       self.cfg.RemoveNodeGroup(self.group_uuid)
14206     except errors.ConfigurationError:
14207       raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
14208                                (self.op.group_name, self.group_uuid))
14209
14210     self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
14211
14212
14213 class LUGroupRename(LogicalUnit):
14214   HPATH = "group-rename"
14215   HTYPE = constants.HTYPE_GROUP
14216   REQ_BGL = False
14217
14218   def ExpandNames(self):
14219     # This raises errors.OpPrereqError on its own:
14220     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14221
14222     self.needed_locks = {
14223       locking.LEVEL_NODEGROUP: [self.group_uuid],
14224       }
14225
14226   def CheckPrereq(self):
14227     """Check prerequisites.
14228
14229     Ensures requested new name is not yet used.
14230
14231     """
14232     try:
14233       new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
14234     except errors.OpPrereqError:
14235       pass
14236     else:
14237       raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
14238                                  " node group (UUID: %s)" %
14239                                  (self.op.new_name, new_name_uuid),
14240                                  errors.ECODE_EXISTS)
14241
14242   def BuildHooksEnv(self):
14243     """Build hooks env.
14244
14245     """
14246     return {
14247       "OLD_NAME": self.op.group_name,
14248       "NEW_NAME": self.op.new_name,
14249       }
14250
14251   def BuildHooksNodes(self):
14252     """Build hooks nodes.
14253
14254     """
14255     mn = self.cfg.GetMasterNode()
14256
14257     all_nodes = self.cfg.GetAllNodesInfo()
14258     all_nodes.pop(mn, None)
14259
14260     run_nodes = [mn]
14261     run_nodes.extend(node.name for node in all_nodes.values()
14262                      if node.group == self.group_uuid)
14263
14264     return (run_nodes, run_nodes)
14265
14266   def Exec(self, feedback_fn):
14267     """Rename the node group.
14268
14269     """
14270     group = self.cfg.GetNodeGroup(self.group_uuid)
14271
14272     if group is None:
14273       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
14274                                (self.op.group_name, self.group_uuid))
14275
14276     group.name = self.op.new_name
14277     self.cfg.Update(group, feedback_fn)
14278
14279     return self.op.new_name
14280
14281
14282 class LUGroupEvacuate(LogicalUnit):
14283   HPATH = "group-evacuate"
14284   HTYPE = constants.HTYPE_GROUP
14285   REQ_BGL = False
14286
14287   def ExpandNames(self):
14288     # This raises errors.OpPrereqError on its own:
14289     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14290
14291     if self.op.target_groups:
14292       self.req_target_uuids = map(self.cfg.LookupNodeGroup,
14293                                   self.op.target_groups)
14294     else:
14295       self.req_target_uuids = []
14296
14297     if self.group_uuid in self.req_target_uuids:
14298       raise errors.OpPrereqError("Group to be evacuated (%s) can not be used"
14299                                  " as a target group (targets are %s)" %
14300                                  (self.group_uuid,
14301                                   utils.CommaJoin(self.req_target_uuids)),
14302                                  errors.ECODE_INVAL)
14303
14304     self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
14305
14306     self.share_locks = _ShareAll()
14307     self.needed_locks = {
14308       locking.LEVEL_INSTANCE: [],
14309       locking.LEVEL_NODEGROUP: [],
14310       locking.LEVEL_NODE: [],
14311       }
14312
14313   def DeclareLocks(self, level):
14314     if level == locking.LEVEL_INSTANCE:
14315       assert not self.needed_locks[locking.LEVEL_INSTANCE]
14316
14317       # Lock instances optimistically, needs verification once node and group
14318       # locks have been acquired
14319       self.needed_locks[locking.LEVEL_INSTANCE] = \
14320         self.cfg.GetNodeGroupInstances(self.group_uuid)
14321
14322     elif level == locking.LEVEL_NODEGROUP:
14323       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
14324
14325       if self.req_target_uuids:
14326         lock_groups = set([self.group_uuid] + self.req_target_uuids)
14327
14328         # Lock all groups used by instances optimistically; this requires going
14329         # via the node before it's locked, requiring verification later on
14330         lock_groups.update(group_uuid
14331                            for instance_name in
14332                              self.owned_locks(locking.LEVEL_INSTANCE)
14333                            for group_uuid in
14334                              self.cfg.GetInstanceNodeGroups(instance_name))
14335       else:
14336         # No target groups, need to lock all of them
14337         lock_groups = locking.ALL_SET
14338
14339       self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
14340
14341     elif level == locking.LEVEL_NODE:
14342       # This will only lock the nodes in the group to be evacuated which
14343       # contain actual instances
14344       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
14345       self._LockInstancesNodes()
14346
14347       # Lock all nodes in group to be evacuated and target groups
14348       owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
14349       assert self.group_uuid in owned_groups
14350       member_nodes = [node_name
14351                       for group in owned_groups
14352                       for node_name in self.cfg.GetNodeGroup(group).members]
14353       self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
14354
14355   def CheckPrereq(self):
14356     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
14357     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
14358     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
14359
14360     assert owned_groups.issuperset(self.req_target_uuids)
14361     assert self.group_uuid in owned_groups
14362
14363     # Check if locked instances are still correct
14364     _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
14365
14366     # Get instance information
14367     self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
14368
14369     # Check if node groups for locked instances are still correct
14370     _CheckInstancesNodeGroups(self.cfg, self.instances,
14371                               owned_groups, owned_nodes, self.group_uuid)
14372
14373     if self.req_target_uuids:
14374       # User requested specific target groups
14375       self.target_uuids = self.req_target_uuids
14376     else:
14377       # All groups except the one to be evacuated are potential targets
14378       self.target_uuids = [group_uuid for group_uuid in owned_groups
14379                            if group_uuid != self.group_uuid]
14380
14381       if not self.target_uuids:
14382         raise errors.OpPrereqError("There are no possible target groups",
14383                                    errors.ECODE_INVAL)
14384
14385   def BuildHooksEnv(self):
14386     """Build hooks env.
14387
14388     """
14389     return {
14390       "GROUP_NAME": self.op.group_name,
14391       "TARGET_GROUPS": " ".join(self.target_uuids),
14392       }
14393
14394   def BuildHooksNodes(self):
14395     """Build hooks nodes.
14396
14397     """
14398     mn = self.cfg.GetMasterNode()
14399
14400     assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
14401
14402     run_nodes = [mn] + self.cfg.GetNodeGroup(self.group_uuid).members
14403
14404     return (run_nodes, run_nodes)
14405
14406   def Exec(self, feedback_fn):
14407     instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
14408
14409     assert self.group_uuid not in self.target_uuids
14410
14411     req = iallocator.IAReqGroupChange(instances=instances,
14412                                       target_groups=self.target_uuids)
14413     ial = iallocator.IAllocator(self.cfg, self.rpc, req)
14414
14415     ial.Run(self.op.iallocator)
14416
14417     if not ial.success:
14418       raise errors.OpPrereqError("Can't compute group evacuation using"
14419                                  " iallocator '%s': %s" %
14420                                  (self.op.iallocator, ial.info),
14421                                  errors.ECODE_NORES)
14422
14423     jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
14424
14425     self.LogInfo("Iallocator returned %s job(s) for evacuating node group %s",
14426                  len(jobs), self.op.group_name)
14427
14428     return ResultWithJobs(jobs)
14429
14430
14431 class TagsLU(NoHooksLU): # pylint: disable=W0223
14432   """Generic tags LU.
14433
14434   This is an abstract class which is the parent of all the other tags LUs.
14435
14436   """
14437   def ExpandNames(self):
14438     self.group_uuid = None
14439     self.needed_locks = {}
14440
14441     if self.op.kind == constants.TAG_NODE:
14442       self.op.name = _ExpandNodeName(self.cfg, self.op.name)
14443       lock_level = locking.LEVEL_NODE
14444       lock_name = self.op.name
14445     elif self.op.kind == constants.TAG_INSTANCE:
14446       self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
14447       lock_level = locking.LEVEL_INSTANCE
14448       lock_name = self.op.name
14449     elif self.op.kind == constants.TAG_NODEGROUP:
14450       self.group_uuid = self.cfg.LookupNodeGroup(self.op.name)
14451       lock_level = locking.LEVEL_NODEGROUP
14452       lock_name = self.group_uuid
14453     else:
14454       lock_level = None
14455       lock_name = None
14456
14457     if lock_level and getattr(self.op, "use_locking", True):
14458       self.needed_locks[lock_level] = lock_name
14459
14460     # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
14461     # not possible to acquire the BGL based on opcode parameters)
14462
14463   def CheckPrereq(self):
14464     """Check prerequisites.
14465
14466     """
14467     if self.op.kind == constants.TAG_CLUSTER:
14468       self.target = self.cfg.GetClusterInfo()
14469     elif self.op.kind == constants.TAG_NODE:
14470       self.target = self.cfg.GetNodeInfo(self.op.name)
14471     elif self.op.kind == constants.TAG_INSTANCE:
14472       self.target = self.cfg.GetInstanceInfo(self.op.name)
14473     elif self.op.kind == constants.TAG_NODEGROUP:
14474       self.target = self.cfg.GetNodeGroup(self.group_uuid)
14475     else:
14476       raise errors.OpPrereqError("Wrong tag type requested (%s)" %
14477                                  str(self.op.kind), errors.ECODE_INVAL)
14478
14479
14480 class LUTagsGet(TagsLU):
14481   """Returns the tags of a given object.
14482
14483   """
14484   REQ_BGL = False
14485
14486   def ExpandNames(self):
14487     TagsLU.ExpandNames(self)
14488
14489     # Share locks as this is only a read operation
14490     self.share_locks = _ShareAll()
14491
14492   def Exec(self, feedback_fn):
14493     """Returns the tag list.
14494
14495     """
14496     return list(self.target.GetTags())
14497
14498
14499 class LUTagsSearch(NoHooksLU):
14500   """Searches the tags for a given pattern.
14501
14502   """
14503   REQ_BGL = False
14504
14505   def ExpandNames(self):
14506     self.needed_locks = {}
14507
14508   def CheckPrereq(self):
14509     """Check prerequisites.
14510
14511     This checks the pattern passed for validity by compiling it.
14512
14513     """
14514     try:
14515       self.re = re.compile(self.op.pattern)
14516     except re.error, err:
14517       raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
14518                                  (self.op.pattern, err), errors.ECODE_INVAL)
14519
14520   def Exec(self, feedback_fn):
14521     """Returns the tag list.
14522
14523     """
14524     cfg = self.cfg
14525     tgts = [("/cluster", cfg.GetClusterInfo())]
14526     ilist = cfg.GetAllInstancesInfo().values()
14527     tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
14528     nlist = cfg.GetAllNodesInfo().values()
14529     tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
14530     tgts.extend(("/nodegroup/%s" % n.name, n)
14531                 for n in cfg.GetAllNodeGroupsInfo().values())
14532     results = []
14533     for path, target in tgts:
14534       for tag in target.GetTags():
14535         if self.re.search(tag):
14536           results.append((path, tag))
14537     return results
14538
14539
14540 class LUTagsSet(TagsLU):
14541   """Sets a tag on a given object.
14542
14543   """
14544   REQ_BGL = False
14545
14546   def CheckPrereq(self):
14547     """Check prerequisites.
14548
14549     This checks the type and length of the tag name and value.
14550
14551     """
14552     TagsLU.CheckPrereq(self)
14553     for tag in self.op.tags:
14554       objects.TaggableObject.ValidateTag(tag)
14555
14556   def Exec(self, feedback_fn):
14557     """Sets the tag.
14558
14559     """
14560     try:
14561       for tag in self.op.tags:
14562         self.target.AddTag(tag)
14563     except errors.TagError, err:
14564       raise errors.OpExecError("Error while setting tag: %s" % str(err))
14565     self.cfg.Update(self.target, feedback_fn)
14566
14567
14568 class LUTagsDel(TagsLU):
14569   """Delete a list of tags from a given object.
14570
14571   """
14572   REQ_BGL = False
14573
14574   def CheckPrereq(self):
14575     """Check prerequisites.
14576
14577     This checks that we have the given tag.
14578
14579     """
14580     TagsLU.CheckPrereq(self)
14581     for tag in self.op.tags:
14582       objects.TaggableObject.ValidateTag(tag)
14583     del_tags = frozenset(self.op.tags)
14584     cur_tags = self.target.GetTags()
14585
14586     diff_tags = del_tags - cur_tags
14587     if diff_tags:
14588       diff_names = ("'%s'" % i for i in sorted(diff_tags))
14589       raise errors.OpPrereqError("Tag(s) %s not found" %
14590                                  (utils.CommaJoin(diff_names), ),
14591                                  errors.ECODE_NOENT)
14592
14593   def Exec(self, feedback_fn):
14594     """Remove the tag from the object.
14595
14596     """
14597     for tag in self.op.tags:
14598       self.target.RemoveTag(tag)
14599     self.cfg.Update(self.target, feedback_fn)
14600
14601
14602 class LUTestDelay(NoHooksLU):
14603   """Sleep for a specified amount of time.
14604
14605   This LU sleeps on the master and/or nodes for a specified amount of
14606   time.
14607
14608   """
14609   REQ_BGL = False
14610
14611   def ExpandNames(self):
14612     """Expand names and set required locks.
14613
14614     This expands the node list, if any.
14615
14616     """
14617     self.needed_locks = {}
14618     if self.op.on_nodes:
14619       # _GetWantedNodes can be used here, but is not always appropriate to use
14620       # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
14621       # more information.
14622       self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
14623       self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
14624
14625   def _TestDelay(self):
14626     """Do the actual sleep.
14627
14628     """
14629     if self.op.on_master:
14630       if not utils.TestDelay(self.op.duration):
14631         raise errors.OpExecError("Error during master delay test")
14632     if self.op.on_nodes:
14633       result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
14634       for node, node_result in result.items():
14635         node_result.Raise("Failure during rpc call to node %s" % node)
14636
14637   def Exec(self, feedback_fn):
14638     """Execute the test delay opcode, with the wanted repetitions.
14639
14640     """
14641     if self.op.repeat == 0:
14642       self._TestDelay()
14643     else:
14644       top_value = self.op.repeat - 1
14645       for i in range(self.op.repeat):
14646         self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
14647         self._TestDelay()
14648
14649
14650 class LUTestJqueue(NoHooksLU):
14651   """Utility LU to test some aspects of the job queue.
14652
14653   """
14654   REQ_BGL = False
14655
14656   # Must be lower than default timeout for WaitForJobChange to see whether it
14657   # notices changed jobs
14658   _CLIENT_CONNECT_TIMEOUT = 20.0
14659   _CLIENT_CONFIRM_TIMEOUT = 60.0
14660
14661   @classmethod
14662   def _NotifyUsingSocket(cls, cb, errcls):
14663     """Opens a Unix socket and waits for another program to connect.
14664
14665     @type cb: callable
14666     @param cb: Callback to send socket name to client
14667     @type errcls: class
14668     @param errcls: Exception class to use for errors
14669
14670     """
14671     # Using a temporary directory as there's no easy way to create temporary
14672     # sockets without writing a custom loop around tempfile.mktemp and
14673     # socket.bind
14674     tmpdir = tempfile.mkdtemp()
14675     try:
14676       tmpsock = utils.PathJoin(tmpdir, "sock")
14677
14678       logging.debug("Creating temporary socket at %s", tmpsock)
14679       sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
14680       try:
14681         sock.bind(tmpsock)
14682         sock.listen(1)
14683
14684         # Send details to client
14685         cb(tmpsock)
14686
14687         # Wait for client to connect before continuing
14688         sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
14689         try:
14690           (conn, _) = sock.accept()
14691         except socket.error, err:
14692           raise errcls("Client didn't connect in time (%s)" % err)
14693       finally:
14694         sock.close()
14695     finally:
14696       # Remove as soon as client is connected
14697       shutil.rmtree(tmpdir)
14698
14699     # Wait for client to close
14700     try:
14701       try:
14702         # pylint: disable=E1101
14703         # Instance of '_socketobject' has no ... member
14704         conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
14705         conn.recv(1)
14706       except socket.error, err:
14707         raise errcls("Client failed to confirm notification (%s)" % err)
14708     finally:
14709       conn.close()
14710
14711   def _SendNotification(self, test, arg, sockname):
14712     """Sends a notification to the client.
14713
14714     @type test: string
14715     @param test: Test name
14716     @param arg: Test argument (depends on test)
14717     @type sockname: string
14718     @param sockname: Socket path
14719
14720     """
14721     self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
14722
14723   def _Notify(self, prereq, test, arg):
14724     """Notifies the client of a test.
14725
14726     @type prereq: bool
14727     @param prereq: Whether this is a prereq-phase test
14728     @type test: string
14729     @param test: Test name
14730     @param arg: Test argument (depends on test)
14731
14732     """
14733     if prereq:
14734       errcls = errors.OpPrereqError
14735     else:
14736       errcls = errors.OpExecError
14737
14738     return self._NotifyUsingSocket(compat.partial(self._SendNotification,
14739                                                   test, arg),
14740                                    errcls)
14741
14742   def CheckArguments(self):
14743     self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
14744     self.expandnames_calls = 0
14745
14746   def ExpandNames(self):
14747     checkargs_calls = getattr(self, "checkargs_calls", 0)
14748     if checkargs_calls < 1:
14749       raise errors.ProgrammerError("CheckArguments was not called")
14750
14751     self.expandnames_calls += 1
14752
14753     if self.op.notify_waitlock:
14754       self._Notify(True, constants.JQT_EXPANDNAMES, None)
14755
14756     self.LogInfo("Expanding names")
14757
14758     # Get lock on master node (just to get a lock, not for a particular reason)
14759     self.needed_locks = {
14760       locking.LEVEL_NODE: self.cfg.GetMasterNode(),
14761       }
14762
14763   def Exec(self, feedback_fn):
14764     if self.expandnames_calls < 1:
14765       raise errors.ProgrammerError("ExpandNames was not called")
14766
14767     if self.op.notify_exec:
14768       self._Notify(False, constants.JQT_EXEC, None)
14769
14770     self.LogInfo("Executing")
14771
14772     if self.op.log_messages:
14773       self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
14774       for idx, msg in enumerate(self.op.log_messages):
14775         self.LogInfo("Sending log message %s", idx + 1)
14776         feedback_fn(constants.JQT_MSGPREFIX + msg)
14777         # Report how many test messages have been sent
14778         self._Notify(False, constants.JQT_LOGMSG, idx + 1)
14779
14780     if self.op.fail:
14781       raise errors.OpExecError("Opcode failure was requested")
14782
14783     return True
14784
14785
14786 class LUTestAllocator(NoHooksLU):
14787   """Run allocator tests.
14788
14789   This LU runs the allocator tests
14790
14791   """
14792   def CheckPrereq(self):
14793     """Check prerequisites.
14794
14795     This checks the opcode parameters depending on the director and mode test.
14796
14797     """
14798     if self.op.mode in (constants.IALLOCATOR_MODE_ALLOC,
14799                         constants.IALLOCATOR_MODE_MULTI_ALLOC):
14800       for attr in ["memory", "disks", "disk_template",
14801                    "os", "tags", "nics", "vcpus"]:
14802         if not hasattr(self.op, attr):
14803           raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
14804                                      attr, errors.ECODE_INVAL)
14805       iname = self.cfg.ExpandInstanceName(self.op.name)
14806       if iname is not None:
14807         raise errors.OpPrereqError("Instance '%s' already in the cluster" %
14808                                    iname, errors.ECODE_EXISTS)
14809       if not isinstance(self.op.nics, list):
14810         raise errors.OpPrereqError("Invalid parameter 'nics'",
14811                                    errors.ECODE_INVAL)
14812       if not isinstance(self.op.disks, list):
14813         raise errors.OpPrereqError("Invalid parameter 'disks'",
14814                                    errors.ECODE_INVAL)
14815       for row in self.op.disks:
14816         if (not isinstance(row, dict) or
14817             constants.IDISK_SIZE not in row or
14818             not isinstance(row[constants.IDISK_SIZE], int) or
14819             constants.IDISK_MODE not in row or
14820             row[constants.IDISK_MODE] not in constants.DISK_ACCESS_SET):
14821           raise errors.OpPrereqError("Invalid contents of the 'disks'"
14822                                      " parameter", errors.ECODE_INVAL)
14823       if self.op.hypervisor is None:
14824         self.op.hypervisor = self.cfg.GetHypervisorType()
14825     elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
14826       fname = _ExpandInstanceName(self.cfg, self.op.name)
14827       self.op.name = fname
14828       self.relocate_from = \
14829           list(self.cfg.GetInstanceInfo(fname).secondary_nodes)
14830     elif self.op.mode in (constants.IALLOCATOR_MODE_CHG_GROUP,
14831                           constants.IALLOCATOR_MODE_NODE_EVAC):
14832       if not self.op.instances:
14833         raise errors.OpPrereqError("Missing instances", errors.ECODE_INVAL)
14834       self.op.instances = _GetWantedInstances(self, self.op.instances)
14835     else:
14836       raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
14837                                  self.op.mode, errors.ECODE_INVAL)
14838
14839     if self.op.direction == constants.IALLOCATOR_DIR_OUT:
14840       if self.op.allocator is None:
14841         raise errors.OpPrereqError("Missing allocator name",
14842                                    errors.ECODE_INVAL)
14843     elif self.op.direction != constants.IALLOCATOR_DIR_IN:
14844       raise errors.OpPrereqError("Wrong allocator test '%s'" %
14845                                  self.op.direction, errors.ECODE_INVAL)
14846
14847   def Exec(self, feedback_fn):
14848     """Run the allocator test.
14849
14850     """
14851     if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
14852       req = iallocator.IAReqInstanceAlloc(name=self.op.name,
14853                                           memory=self.op.memory,
14854                                           disks=self.op.disks,
14855                                           disk_template=self.op.disk_template,
14856                                           os=self.op.os,
14857                                           tags=self.op.tags,
14858                                           nics=self.op.nics,
14859                                           vcpus=self.op.vcpus,
14860                                           spindle_use=self.op.spindle_use,
14861                                           hypervisor=self.op.hypervisor)
14862     elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
14863       req = iallocator.IAReqRelocate(name=self.op.name,
14864                                      relocate_from=list(self.relocate_from))
14865     elif self.op.mode == constants.IALLOCATOR_MODE_CHG_GROUP:
14866       req = iallocator.IAReqGroupChange(instances=self.op.instances,
14867                                         target_groups=self.op.target_groups)
14868     elif self.op.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
14869       req = iallocator.IAReqNodeEvac(instances=self.op.instances,
14870                                      evac_mode=self.op.evac_mode)
14871     elif self.op.mode == constants.IALLOCATOR_MODE_MULTI_ALLOC:
14872       disk_template = self.op.disk_template
14873       insts = [iallocator.IAReqInstanceAlloc(name="%s%s" % (self.op.name, idx),
14874                                              memory=self.op.memory,
14875                                              disks=self.op.disks,
14876                                              disk_template=disk_template,
14877                                              os=self.op.os,
14878                                              tags=self.op.tags,
14879                                              nics=self.op.nics,
14880                                              vcpus=self.op.vcpus,
14881                                              spindle_use=self.op.spindle_use,
14882                                              hypervisor=self.op.hypervisor)
14883                for idx in range(self.op.count)]
14884       req = iallocator.IAReqMultiInstanceAlloc(instances=insts)
14885     else:
14886       raise errors.ProgrammerError("Uncatched mode %s in"
14887                                    " LUTestAllocator.Exec", self.op.mode)
14888
14889     ial = iallocator.IAllocator(self.cfg, self.rpc, req)
14890     if self.op.direction == constants.IALLOCATOR_DIR_IN:
14891       result = ial.in_text
14892     else:
14893       ial.Run(self.op.allocator, validate=False)
14894       result = ial.out_text
14895     return result
14896
14897
14898 #: Query type implementations
14899 _QUERY_IMPL = {
14900   constants.QR_CLUSTER: _ClusterQuery,
14901   constants.QR_INSTANCE: _InstanceQuery,
14902   constants.QR_NODE: _NodeQuery,
14903   constants.QR_GROUP: _GroupQuery,
14904   constants.QR_OS: _OsQuery,
14905   constants.QR_EXPORT: _ExportQuery,
14906   }
14907
14908 assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
14909
14910
14911 def _GetQueryImplementation(name):
14912   """Returns the implemtnation for a query type.
14913
14914   @param name: Query type, must be one of L{constants.QR_VIA_OP}
14915
14916   """
14917   try:
14918     return _QUERY_IMPL[name]
14919   except KeyError:
14920     raise errors.OpPrereqError("Unknown query resource '%s'" % name,
14921                                errors.ECODE_INVAL)