code.grnet.gr Git - ganeti-local/blob - lib/cmdlib.py

   1 #
   2 #
   3
   4 # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012 Google Inc.
   5 #
   6 # This program is free software; you can redistribute it and/or modify
   7 # it under the terms of the GNU General Public License as published by
   8 # the Free Software Foundation; either version 2 of the License, or
   9 # (at your option) any later version.
  10 #
  11 # This program is distributed in the hope that it will be useful, but
  12 # WITHOUT ANY WARRANTY; without even the implied warranty of
  13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 # General Public License for more details.
  15 #
  16 # You should have received a copy of the GNU General Public License
  17 # along with this program; if not, write to the Free Software
  18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  19 # 02110-1301, USA.
  20
  21
  22 """Module implementing the master-side code."""
  23
  24 # pylint: disable=W0201,C0302
  25
  26 # W0201 since most LU attributes are defined in CheckPrereq or similar
  27 # functions
  28
  29 # C0302: since we have waaaay too many lines in this module
  30
  31 import os
  32 import os.path
  33 import time
  34 import re
  35 import logging
  36 import copy
  37 import OpenSSL
  38 import socket
  39 import tempfile
  40 import shutil
  41 import itertools
  42 import operator
  43
  44 from ganeti import ssh
  45 from ganeti import utils
  46 from ganeti import errors
  47 from ganeti import hypervisor
  48 from ganeti import locking
  49 from ganeti import constants
  50 from ganeti import objects
  51 from ganeti import ssconf
  52 from ganeti import uidpool
  53 from ganeti import compat
  54 from ganeti import masterd
  55 from ganeti import netutils
  56 from ganeti import query
  57 from ganeti import qlang
  58 from ganeti import opcodes
  59 from ganeti import ht
  60 from ganeti import rpc
  61 from ganeti import runtime
  62 from ganeti.masterd import iallocator
  63
  64 import ganeti.masterd.instance # pylint: disable=W0611
  65
  66
  67 # States of instance
  68 INSTANCE_DOWN = [constants.ADMINST_DOWN]
  69 INSTANCE_ONLINE = [constants.ADMINST_DOWN, constants.ADMINST_UP]
  70 INSTANCE_NOT_RUNNING = [constants.ADMINST_DOWN, constants.ADMINST_OFFLINE]
  71
  72 #: Instance status in which an instance can be marked as offline/online
  73 CAN_CHANGE_INSTANCE_OFFLINE = (frozenset(INSTANCE_DOWN) | frozenset([
  74   constants.ADMINST_OFFLINE,
  75   ]))
  76
  77
  78 class ResultWithJobs:
  79   """Data container for LU results with jobs.
  80
  81   Instances of this class returned from L{LogicalUnit.Exec} will be recognized
  82   by L{mcpu._ProcessResult}. The latter will then submit the jobs
  83   contained in the C{jobs} attribute and include the job IDs in the opcode
  84   result.
  85
  86   """
  87   def __init__(self, jobs, **kwargs):
  88     """Initializes this class.
  89
  90     Additional return values can be specified as keyword arguments.
  91
  92     @type jobs: list of lists of L{opcode.OpCode}
  93     @param jobs: A list of lists of opcode objects
  94
  95     """
  96     self.jobs = jobs
  97     self.other = kwargs
  98
  99
 100 class LogicalUnit(object):
 101   """Logical Unit base class.
 102
 103   Subclasses must follow these rules:
 104     - implement ExpandNames
 105     - implement CheckPrereq (except when tasklets are used)
 106     - implement Exec (except when tasklets are used)
 107     - implement BuildHooksEnv
 108     - implement BuildHooksNodes
 109     - redefine HPATH and HTYPE
 110     - optionally redefine their run requirements:
 111         REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
 112
 113   Note that all commands require root permissions.
 114
 115   @ivar dry_run_result: the value (if any) that will be returned to the caller
 116       in dry-run mode (signalled by opcode dry_run parameter)
 117
 118   """
 119   HPATH = None
 120   HTYPE = None
 121   REQ_BGL = True
 122
 123   def __init__(self, processor, op, context, rpc_runner):
 124     """Constructor for LogicalUnit.
 125
 126     This needs to be overridden in derived classes in order to check op
 127     validity.
 128
 129     """
 130     self.proc = processor
 131     self.op = op
 132     self.cfg = context.cfg
 133     self.glm = context.glm
 134     # readability alias
 135     self.owned_locks = context.glm.list_owned
 136     self.context = context
 137     self.rpc = rpc_runner
 138     # Dicts used to declare locking needs to mcpu
 139     self.needed_locks = None
 140     self.share_locks = dict.fromkeys(locking.LEVELS, 0)
 141     self.add_locks = {}
 142     self.remove_locks = {}
 143     # Used to force good behavior when calling helper functions
 144     self.recalculate_locks = {}
 145     # logging
 146     self.Log = processor.Log # pylint: disable=C0103
 147     self.LogWarning = processor.LogWarning # pylint: disable=C0103
 148     self.LogInfo = processor.LogInfo # pylint: disable=C0103
 149     self.LogStep = processor.LogStep # pylint: disable=C0103
 150     # support for dry-run
 151     self.dry_run_result = None
 152     # support for generic debug attribute
 153     if (not hasattr(self.op, "debug_level") or
 154         not isinstance(self.op.debug_level, int)):
 155       self.op.debug_level = 0
 156
 157     # Tasklets
 158     self.tasklets = None
 159
 160     # Validate opcode parameters and set defaults
 161     self.op.Validate(True)
 162
 163     self.CheckArguments()
 164
 165   def CheckArguments(self):
 166     """Check syntactic validity for the opcode arguments.
 167
 168     This method is for doing a simple syntactic check and ensure
 169     validity of opcode parameters, without any cluster-related
 170     checks. While the same can be accomplished in ExpandNames and/or
 171     CheckPrereq, doing these separate is better because:
 172
 173       - ExpandNames is left as as purely a lock-related function
 174       - CheckPrereq is run after we have acquired locks (and possible
 175         waited for them)
 176
 177     The function is allowed to change the self.op attribute so that
 178     later methods can no longer worry about missing parameters.
 179
 180     """
 181     pass
 182
 183   def ExpandNames(self):
 184     """Expand names for this LU.
 185
 186     This method is called before starting to execute the opcode, and it should
 187     update all the parameters of the opcode to their canonical form (e.g. a
 188     short node name must be fully expanded after this method has successfully
 189     completed). This way locking, hooks, logging, etc. can work correctly.
 190
 191     LUs which implement this method must also populate the self.needed_locks
 192     member, as a dict with lock levels as keys, and a list of needed lock names
 193     as values. Rules:
 194
 195       - use an empty dict if you don't need any lock
 196       - if you don't need any lock at a particular level omit that
 197         level (note that in this case C{DeclareLocks} won't be called
 198         at all for that level)
 199       - if you need locks at a level, but you can't calculate it in
 200         this function, initialise that level with an empty list and do
 201         further processing in L{LogicalUnit.DeclareLocks} (see that
 202         function's docstring)
 203       - don't put anything for the BGL level
 204       - if you want all locks at a level use L{locking.ALL_SET} as a value
 205
 206     If you need to share locks (rather than acquire them exclusively) at one
 207     level you can modify self.share_locks, setting a true value (usually 1) for
 208     that level. By default locks are not shared.
 209
 210     This function can also define a list of tasklets, which then will be
 211     executed in order instead of the usual LU-level CheckPrereq and Exec
 212     functions, if those are not defined by the LU.
 213
 214     Examples::
 215
 216       # Acquire all nodes and one instance
 217       self.needed_locks = {
 218         locking.LEVEL_NODE: locking.ALL_SET,
 219         locking.LEVEL_INSTANCE: ['instance1.example.com'],
 220       }
 221       # Acquire just two nodes
 222       self.needed_locks = {
 223         locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
 224       }
 225       # Acquire no locks
 226       self.needed_locks = {} # No, you can't leave it to the default value None
 227
 228     """
 229     # The implementation of this method is mandatory only if the new LU is
 230     # concurrent, so that old LUs don't need to be changed all at the same
 231     # time.
 232     if self.REQ_BGL:
 233       self.needed_locks = {} # Exclusive LUs don't need locks.
 234     else:
 235       raise NotImplementedError
 236
 237   def DeclareLocks(self, level):
 238     """Declare LU locking needs for a level
 239
 240     While most LUs can just declare their locking needs at ExpandNames time,
 241     sometimes there's the need to calculate some locks after having acquired
 242     the ones before. This function is called just before acquiring locks at a
 243     particular level, but after acquiring the ones at lower levels, and permits
 244     such calculations. It can be used to modify self.needed_locks, and by
 245     default it does nothing.
 246
 247     This function is only called if you have something already set in
 248     self.needed_locks for the level.
 249
 250     @param level: Locking level which is going to be locked
 251     @type level: member of L{ganeti.locking.LEVELS}
 252
 253     """
 254
 255   def CheckPrereq(self):
 256     """Check prerequisites for this LU.
 257
 258     This method should check that the prerequisites for the execution
 259     of this LU are fulfilled. It can do internode communication, but
 260     it should be idempotent - no cluster or system changes are
 261     allowed.
 262
 263     The method should raise errors.OpPrereqError in case something is
 264     not fulfilled. Its return value is ignored.
 265
 266     This method should also update all the parameters of the opcode to
 267     their canonical form if it hasn't been done by ExpandNames before.
 268
 269     """
 270     if self.tasklets is not None:
 271       for (idx, tl) in enumerate(self.tasklets):
 272         logging.debug("Checking prerequisites for tasklet %s/%s",
 273                       idx + 1, len(self.tasklets))
 274         tl.CheckPrereq()
 275     else:
 276       pass
 277
 278   def Exec(self, feedback_fn):
 279     """Execute the LU.
 280
 281     This method should implement the actual work. It should raise
 282     errors.OpExecError for failures that are somewhat dealt with in
 283     code, or expected.
 284
 285     """
 286     if self.tasklets is not None:
 287       for (idx, tl) in enumerate(self.tasklets):
 288         logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
 289         tl.Exec(feedback_fn)
 290     else:
 291       raise NotImplementedError
 292
 293   def BuildHooksEnv(self):
 294     """Build hooks environment for this LU.
 295
 296     @rtype: dict
 297     @return: Dictionary containing the environment that will be used for
 298       running the hooks for this LU. The keys of the dict must not be prefixed
 299       with "GANETI_"--that'll be added by the hooks runner. The hooks runner
 300       will extend the environment with additional variables. If no environment
 301       should be defined, an empty dictionary should be returned (not C{None}).
 302     @note: If the C{HPATH} attribute of the LU class is C{None}, this function
 303       will not be called.
 304
 305     """
 306     raise NotImplementedError
 307
 308   def BuildHooksNodes(self):
 309     """Build list of nodes to run LU's hooks.
 310
 311     @rtype: tuple; (list, list)
 312     @return: Tuple containing a list of node names on which the hook
 313       should run before the execution and a list of node names on which the
 314       hook should run after the execution. No nodes should be returned as an
 315       empty list (and not None).
 316     @note: If the C{HPATH} attribute of the LU class is C{None}, this function
 317       will not be called.
 318
 319     """
 320     raise NotImplementedError
 321
 322   def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
 323     """Notify the LU about the results of its hooks.
 324
 325     This method is called every time a hooks phase is executed, and notifies
 326     the Logical Unit about the hooks' result. The LU can then use it to alter
 327     its result based on the hooks.  By default the method does nothing and the
 328     previous result is passed back unchanged but any LU can define it if it
 329     wants to use the local cluster hook-scripts somehow.
 330
 331     @param phase: one of L{constants.HOOKS_PHASE_POST} or
 332         L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
 333     @param hook_results: the results of the multi-node hooks rpc call
 334     @param feedback_fn: function used send feedback back to the caller
 335     @param lu_result: the previous Exec result this LU had, or None
 336         in the PRE phase
 337     @return: the new Exec result, based on the previous result
 338         and hook results
 339
 340     """
 341     # API must be kept, thus we ignore the unused argument and could
 342     # be a function warnings
 343     # pylint: disable=W0613,R0201
 344     return lu_result
 345
 346   def _ExpandAndLockInstance(self):
 347     """Helper function to expand and lock an instance.
 348
 349     Many LUs that work on an instance take its name in self.op.instance_name
 350     and need to expand it and then declare the expanded name for locking. This
 351     function does it, and then updates self.op.instance_name to the expanded
 352     name. It also initializes needed_locks as a dict, if this hasn't been done
 353     before.
 354
 355     """
 356     if self.needed_locks is None:
 357       self.needed_locks = {}
 358     else:
 359       assert locking.LEVEL_INSTANCE not in self.needed_locks, \
 360         "_ExpandAndLockInstance called with instance-level locks set"
 361     self.op.instance_name = _ExpandInstanceName(self.cfg,
 362                                                 self.op.instance_name)
 363     self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
 364
 365   def _LockInstancesNodes(self, primary_only=False,
 366                           level=locking.LEVEL_NODE):
 367     """Helper function to declare instances' nodes for locking.
 368
 369     This function should be called after locking one or more instances to lock
 370     their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
 371     with all primary or secondary nodes for instances already locked and
 372     present in self.needed_locks[locking.LEVEL_INSTANCE].
 373
 374     It should be called from DeclareLocks, and for safety only works if
 375     self.recalculate_locks[locking.LEVEL_NODE] is set.
 376
 377     In the future it may grow parameters to just lock some instance's nodes, or
 378     to just lock primaries or secondary nodes, if needed.
 379
 380     If should be called in DeclareLocks in a way similar to::
 381
 382       if level == locking.LEVEL_NODE:
 383         self._LockInstancesNodes()
 384
 385     @type primary_only: boolean
 386     @param primary_only: only lock primary nodes of locked instances
 387     @param level: Which lock level to use for locking nodes
 388
 389     """
 390     assert level in self.recalculate_locks, \
 391       "_LockInstancesNodes helper function called with no nodes to recalculate"
 392
 393     # TODO: check if we're really been called with the instance locks held
 394
 395     # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
 396     # future we might want to have different behaviors depending on the value
 397     # of self.recalculate_locks[locking.LEVEL_NODE]
 398     wanted_nodes = []
 399     locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
 400     for _, instance in self.cfg.GetMultiInstanceInfo(locked_i):
 401       wanted_nodes.append(instance.primary_node)
 402       if not primary_only:
 403         wanted_nodes.extend(instance.secondary_nodes)
 404
 405     if self.recalculate_locks[level] == constants.LOCKS_REPLACE:
 406       self.needed_locks[level] = wanted_nodes
 407     elif self.recalculate_locks[level] == constants.LOCKS_APPEND:
 408       self.needed_locks[level].extend(wanted_nodes)
 409     else:
 410       raise errors.ProgrammerError("Unknown recalculation mode")
 411
 412     del self.recalculate_locks[level]
 413
 414
 415 class NoHooksLU(LogicalUnit): # pylint: disable=W0223
 416   """Simple LU which runs no hooks.
 417
 418   This LU is intended as a parent for other LogicalUnits which will
 419   run no hooks, in order to reduce duplicate code.
 420
 421   """
 422   HPATH = None
 423   HTYPE = None
 424
 425   def BuildHooksEnv(self):
 426     """Empty BuildHooksEnv for NoHooksLu.
 427
 428     This just raises an error.
 429
 430     """
 431     raise AssertionError("BuildHooksEnv called for NoHooksLUs")
 432
 433   def BuildHooksNodes(self):
 434     """Empty BuildHooksNodes for NoHooksLU.
 435
 436     """
 437     raise AssertionError("BuildHooksNodes called for NoHooksLU")
 438
 439
 440 class Tasklet:
 441   """Tasklet base class.
 442
 443   Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
 444   they can mix legacy code with tasklets. Locking needs to be done in the LU,
 445   tasklets know nothing about locks.
 446
 447   Subclasses must follow these rules:
 448     - Implement CheckPrereq
 449     - Implement Exec
 450
 451   """
 452   def __init__(self, lu):
 453     self.lu = lu
 454
 455     # Shortcuts
 456     self.cfg = lu.cfg
 457     self.rpc = lu.rpc
 458
 459   def CheckPrereq(self):
 460     """Check prerequisites for this tasklets.
 461
 462     This method should check whether the prerequisites for the execution of
 463     this tasklet are fulfilled. It can do internode communication, but it
 464     should be idempotent - no cluster or system changes are allowed.
 465
 466     The method should raise errors.OpPrereqError in case something is not
 467     fulfilled. Its return value is ignored.
 468
 469     This method should also update all parameters to their canonical form if it
 470     hasn't been done before.
 471
 472     """
 473     pass
 474
 475   def Exec(self, feedback_fn):
 476     """Execute the tasklet.
 477
 478     This method should implement the actual work. It should raise
 479     errors.OpExecError for failures that are somewhat dealt with in code, or
 480     expected.
 481
 482     """
 483     raise NotImplementedError
 484
 485
 486 class _QueryBase:
 487   """Base for query utility classes.
 488
 489   """
 490   #: Attribute holding field definitions
 491   FIELDS = None
 492
 493   #: Field to sort by
 494   SORT_FIELD = "name"
 495
 496   def __init__(self, qfilter, fields, use_locking):
 497     """Initializes this class.
 498
 499     """
 500     self.use_locking = use_locking
 501
 502     self.query = query.Query(self.FIELDS, fields, qfilter=qfilter,
 503                              namefield=self.SORT_FIELD)
 504     self.requested_data = self.query.RequestedData()
 505     self.names = self.query.RequestedNames()
 506
 507     # Sort only if no names were requested
 508     self.sort_by_name = not self.names
 509
 510     self.do_locking = None
 511     self.wanted = None
 512
 513   def _GetNames(self, lu, all_names, lock_level):
 514     """Helper function to determine names asked for in the query.
 515
 516     """
 517     if self.do_locking:
 518       names = lu.owned_locks(lock_level)
 519     else:
 520       names = all_names
 521
 522     if self.wanted == locking.ALL_SET:
 523       assert not self.names
 524       # caller didn't specify names, so ordering is not important
 525       return utils.NiceSort(names)
 526
 527     # caller specified names and we must keep the same order
 528     assert self.names
 529     assert not self.do_locking or lu.glm.is_owned(lock_level)
 530
 531     missing = set(self.wanted).difference(names)
 532     if missing:
 533       raise errors.OpExecError("Some items were removed before retrieving"
 534                                " their data: %s" % missing)
 535
 536     # Return expanded names
 537     return self.wanted
 538
 539   def ExpandNames(self, lu):
 540     """Expand names for this query.
 541
 542     See L{LogicalUnit.ExpandNames}.
 543
 544     """
 545     raise NotImplementedError()
 546
 547   def DeclareLocks(self, lu, level):
 548     """Declare locks for this query.
 549
 550     See L{LogicalUnit.DeclareLocks}.
 551
 552     """
 553     raise NotImplementedError()
 554
 555   def _GetQueryData(self, lu):
 556     """Collects all data for this query.
 557
 558     @return: Query data object
 559
 560     """
 561     raise NotImplementedError()
 562
 563   def NewStyleQuery(self, lu):
 564     """Collect data and execute query.
 565
 566     """
 567     return query.GetQueryResponse(self.query, self._GetQueryData(lu),
 568                                   sort_by_name=self.sort_by_name)
 569
 570   def OldStyleQuery(self, lu):
 571     """Collect data and execute query.
 572
 573     """
 574     return self.query.OldStyleQuery(self._GetQueryData(lu),
 575                                     sort_by_name=self.sort_by_name)
 576
 577
 578 def _ShareAll():
 579   """Returns a dict declaring all lock levels shared.
 580
 581   """
 582   return dict.fromkeys(locking.LEVELS, 1)
 583
 584
 585 def _AnnotateDiskParams(instance, devs, cfg):
 586   """Little helper wrapper to the rpc annotation method.
 587
 588   @param instance: The instance object
 589   @type devs: List of L{objects.Disk}
 590   @param devs: The root devices (not any of its children!)
 591   @param cfg: The config object
 592   @returns The annotated disk copies
 593   @see L{rpc.AnnotateDiskParams}
 594
 595   """
 596   return rpc.AnnotateDiskParams(instance.disk_template, devs,
 597                                 cfg.GetInstanceDiskParams(instance))
 598
 599
 600 def _CheckInstancesNodeGroups(cfg, instances, owned_groups, owned_nodes,
 601                               cur_group_uuid):
 602   """Checks if node groups for locked instances are still correct.
 603
 604   @type cfg: L{config.ConfigWriter}
 605   @param cfg: Cluster configuration
 606   @type instances: dict; string as key, L{objects.Instance} as value
 607   @param instances: Dictionary, instance name as key, instance object as value
 608   @type owned_groups: iterable of string
 609   @param owned_groups: List of owned groups
 610   @type owned_nodes: iterable of string
 611   @param owned_nodes: List of owned nodes
 612   @type cur_group_uuid: string or None
 613   @param cur_group_uuid: Optional group UUID to check against instance's groups
 614
 615   """
 616   for (name, inst) in instances.items():
 617     assert owned_nodes.issuperset(inst.all_nodes), \
 618       "Instance %s's nodes changed while we kept the lock" % name
 619
 620     inst_groups = _CheckInstanceNodeGroups(cfg, name, owned_groups)
 621
 622     assert cur_group_uuid is None or cur_group_uuid in inst_groups, \
 623       "Instance %s has no node in group %s" % (name, cur_group_uuid)
 624
 625
 626 def _CheckInstanceNodeGroups(cfg, instance_name, owned_groups,
 627                              primary_only=False):
 628   """Checks if the owned node groups are still correct for an instance.
 629
 630   @type cfg: L{config.ConfigWriter}
 631   @param cfg: The cluster configuration
 632   @type instance_name: string
 633   @param instance_name: Instance name
 634   @type owned_groups: set or frozenset
 635   @param owned_groups: List of currently owned node groups
 636   @type primary_only: boolean
 637   @param primary_only: Whether to check node groups for only the primary node
 638
 639   """
 640   inst_groups = cfg.GetInstanceNodeGroups(instance_name, primary_only)
 641
 642   if not owned_groups.issuperset(inst_groups):
 643     raise errors.OpPrereqError("Instance %s's node groups changed since"
 644                                " locks were acquired, current groups are"
 645                                " are '%s', owning groups '%s'; retry the"
 646                                " operation" %
 647                                (instance_name,
 648                                 utils.CommaJoin(inst_groups),
 649                                 utils.CommaJoin(owned_groups)),
 650                                errors.ECODE_STATE)
 651
 652   return inst_groups
 653
 654
 655 def _CheckNodeGroupInstances(cfg, group_uuid, owned_instances):
 656   """Checks if the instances in a node group are still correct.
 657
 658   @type cfg: L{config.ConfigWriter}
 659   @param cfg: The cluster configuration
 660   @type group_uuid: string
 661   @param group_uuid: Node group UUID
 662   @type owned_instances: set or frozenset
 663   @param owned_instances: List of currently owned instances
 664
 665   """
 666   wanted_instances = cfg.GetNodeGroupInstances(group_uuid)
 667   if owned_instances != wanted_instances:
 668     raise errors.OpPrereqError("Instances in node group '%s' changed since"
 669                                " locks were acquired, wanted '%s', have '%s';"
 670                                " retry the operation" %
 671                                (group_uuid,
 672                                 utils.CommaJoin(wanted_instances),
 673                                 utils.CommaJoin(owned_instances)),
 674                                errors.ECODE_STATE)
 675
 676   return wanted_instances
 677
 678
 679 def _SupportsOob(cfg, node):
 680   """Tells if node supports OOB.
 681
 682   @type cfg: L{config.ConfigWriter}
 683   @param cfg: The cluster configuration
 684   @type node: L{objects.Node}
 685   @param node: The node
 686   @return: The OOB script if supported or an empty string otherwise
 687
 688   """
 689   return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
 690
 691
 692 def _GetWantedNodes(lu, nodes):
 693   """Returns list of checked and expanded node names.
 694
 695   @type lu: L{LogicalUnit}
 696   @param lu: the logical unit on whose behalf we execute
 697   @type nodes: list
 698   @param nodes: list of node names or None for all nodes
 699   @rtype: list
 700   @return: the list of nodes, sorted
 701   @raise errors.ProgrammerError: if the nodes parameter is wrong type
 702
 703   """
 704   if nodes:
 705     return [_ExpandNodeName(lu.cfg, name) for name in nodes]
 706
 707   return utils.NiceSort(lu.cfg.GetNodeList())
 708
 709
 710 def _GetWantedInstances(lu, instances):
 711   """Returns list of checked and expanded instance names.
 712
 713   @type lu: L{LogicalUnit}
 714   @param lu: the logical unit on whose behalf we execute
 715   @type instances: list
 716   @param instances: list of instance names or None for all instances
 717   @rtype: list
 718   @return: the list of instances, sorted
 719   @raise errors.OpPrereqError: if the instances parameter is wrong type
 720   @raise errors.OpPrereqError: if any of the passed instances is not found
 721
 722   """
 723   if instances:
 724     wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
 725   else:
 726     wanted = utils.NiceSort(lu.cfg.GetInstanceList())
 727   return wanted
 728
 729
 730 def _GetUpdatedParams(old_params, update_dict,
 731                       use_default=True, use_none=False):
 732   """Return the new version of a parameter dictionary.
 733
 734   @type old_params: dict
 735   @param old_params: old parameters
 736   @type update_dict: dict
 737   @param update_dict: dict containing new parameter values, or
 738       constants.VALUE_DEFAULT to reset the parameter to its default
 739       value
 740   @param use_default: boolean
 741   @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
 742       values as 'to be deleted' values
 743   @param use_none: boolean
 744   @type use_none: whether to recognise C{None} values as 'to be
 745       deleted' values
 746   @rtype: dict
 747   @return: the new parameter dictionary
 748
 749   """
 750   params_copy = copy.deepcopy(old_params)
 751   for key, val in update_dict.iteritems():
 752     if ((use_default and val == constants.VALUE_DEFAULT) or
 753         (use_none and val is None)):
 754       try:
 755         del params_copy[key]
 756       except KeyError:
 757         pass
 758     else:
 759       params_copy[key] = val
 760   return params_copy
 761
 762
 763 def _GetUpdatedIPolicy(old_ipolicy, new_ipolicy, group_policy=False):
 764   """Return the new version of a instance policy.
 765
 766   @param group_policy: whether this policy applies to a group and thus
 767     we should support removal of policy entries
 768
 769   """
 770   use_none = use_default = group_policy
 771   ipolicy = copy.deepcopy(old_ipolicy)
 772   for key, value in new_ipolicy.items():
 773     if key not in constants.IPOLICY_ALL_KEYS:
 774       raise errors.OpPrereqError("Invalid key in new ipolicy: %s" % key,
 775                                  errors.ECODE_INVAL)
 776     if key in constants.IPOLICY_ISPECS:
 777       utils.ForceDictType(value, constants.ISPECS_PARAMETER_TYPES)
 778       ipolicy[key] = _GetUpdatedParams(old_ipolicy.get(key, {}), value,
 779                                        use_none=use_none,
 780                                        use_default=use_default)
 781     else:
 782       if (not value or value == [constants.VALUE_DEFAULT] or
 783           value == constants.VALUE_DEFAULT):
 784         if group_policy:
 785           del ipolicy[key]
 786         else:
 787           raise errors.OpPrereqError("Can't unset ipolicy attribute '%s'"
 788                                      " on the cluster'" % key,
 789                                      errors.ECODE_INVAL)
 790       else:
 791         if key in constants.IPOLICY_PARAMETERS:
 792           # FIXME: we assume all such values are float
 793           try:
 794             ipolicy[key] = float(value)
 795           except (TypeError, ValueError), err:
 796             raise errors.OpPrereqError("Invalid value for attribute"
 797                                        " '%s': '%s', error: %s" %
 798                                        (key, value, err), errors.ECODE_INVAL)
 799         else:
 800           # FIXME: we assume all others are lists; this should be redone
 801           # in a nicer way
 802           ipolicy[key] = list(value)
 803   try:
 804     objects.InstancePolicy.CheckParameterSyntax(ipolicy, not group_policy)
 805   except errors.ConfigurationError, err:
 806     raise errors.OpPrereqError("Invalid instance policy: %s" % err,
 807                                errors.ECODE_INVAL)
 808   return ipolicy
 809
 810
 811 def _UpdateAndVerifySubDict(base, updates, type_check):
 812   """Updates and verifies a dict with sub dicts of the same type.
 813
 814   @param base: The dict with the old data
 815   @param updates: The dict with the new data
 816   @param type_check: Dict suitable to ForceDictType to verify correct types
 817   @returns: A new dict with updated and verified values
 818
 819   """
 820   def fn(old, value):
 821     new = _GetUpdatedParams(old, value)
 822     utils.ForceDictType(new, type_check)
 823     return new
 824
 825   ret = copy.deepcopy(base)
 826   ret.update(dict((key, fn(base.get(key, {}), value))
 827                   for key, value in updates.items()))
 828   return ret
 829
 830
 831 def _MergeAndVerifyHvState(op_input, obj_input):
 832   """Combines the hv state from an opcode with the one of the object
 833
 834   @param op_input: The input dict from the opcode
 835   @param obj_input: The input dict from the objects
 836   @return: The verified and updated dict
 837
 838   """
 839   if op_input:
 840     invalid_hvs = set(op_input) - constants.HYPER_TYPES
 841     if invalid_hvs:
 842       raise errors.OpPrereqError("Invalid hypervisor(s) in hypervisor state:"
 843                                  " %s" % utils.CommaJoin(invalid_hvs),
 844                                  errors.ECODE_INVAL)
 845     if obj_input is None:
 846       obj_input = {}
 847     type_check = constants.HVSTS_PARAMETER_TYPES
 848     return _UpdateAndVerifySubDict(obj_input, op_input, type_check)
 849
 850   return None
 851
 852
 853 def _MergeAndVerifyDiskState(op_input, obj_input):
 854   """Combines the disk state from an opcode with the one of the object
 855
 856   @param op_input: The input dict from the opcode
 857   @param obj_input: The input dict from the objects
 858   @return: The verified and updated dict
 859   """
 860   if op_input:
 861     invalid_dst = set(op_input) - constants.DS_VALID_TYPES
 862     if invalid_dst:
 863       raise errors.OpPrereqError("Invalid storage type(s) in disk state: %s" %
 864                                  utils.CommaJoin(invalid_dst),
 865                                  errors.ECODE_INVAL)
 866     type_check = constants.DSS_PARAMETER_TYPES
 867     if obj_input is None:
 868       obj_input = {}
 869     return dict((key, _UpdateAndVerifySubDict(obj_input.get(key, {}), value,
 870                                               type_check))
 871                 for key, value in op_input.items())
 872
 873   return None
 874
 875
 876 def _ReleaseLocks(lu, level, names=None, keep=None):
 877   """Releases locks owned by an LU.
 878
 879   @type lu: L{LogicalUnit}
 880   @param level: Lock level
 881   @type names: list or None
 882   @param names: Names of locks to release
 883   @type keep: list or None
 884   @param keep: Names of locks to retain
 885
 886   """
 887   assert not (keep is not None and names is not None), \
 888          "Only one of the 'names' and the 'keep' parameters can be given"
 889
 890   if names is not None:
 891     should_release = names.__contains__
 892   elif keep:
 893     should_release = lambda name: name not in keep
 894   else:
 895     should_release = None
 896
 897   owned = lu.owned_locks(level)
 898   if not owned:
 899     # Not owning any lock at this level, do nothing
 900     pass
 901
 902   elif should_release:
 903     retain = []
 904     release = []
 905
 906     # Determine which locks to release
 907     for name in owned:
 908       if should_release(name):
 909         release.append(name)
 910       else:
 911         retain.append(name)
 912
 913     assert len(lu.owned_locks(level)) == (len(retain) + len(release))
 914
 915     # Release just some locks
 916     lu.glm.release(level, names=release)
 917
 918     assert frozenset(lu.owned_locks(level)) == frozenset(retain)
 919   else:
 920     # Release everything
 921     lu.glm.release(level)
 922
 923     assert not lu.glm.is_owned(level), "No locks should be owned"
 924
 925
 926 def _MapInstanceDisksToNodes(instances):
 927   """Creates a map from (node, volume) to instance name.
 928
 929   @type instances: list of L{objects.Instance}
 930   @rtype: dict; tuple of (node name, volume name) as key, instance name as value
 931
 932   """
 933   return dict(((node, vol), inst.name)
 934               for inst in instances
 935               for (node, vols) in inst.MapLVsByNode().items()
 936               for vol in vols)
 937
 938
 939 def _RunPostHook(lu, node_name):
 940   """Runs the post-hook for an opcode on a single node.
 941
 942   """
 943   hm = lu.proc.BuildHooksManager(lu)
 944   try:
 945     hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
 946   except Exception, err: # pylint: disable=W0703
 947     lu.LogWarning("Errors occurred running hooks on %s: %s" % (node_name, err))
 948
 949
 950 def _CheckOutputFields(static, dynamic, selected):
 951   """Checks whether all selected fields are valid.
 952
 953   @type static: L{utils.FieldSet}
 954   @param static: static fields set
 955   @type dynamic: L{utils.FieldSet}
 956   @param dynamic: dynamic fields set
 957
 958   """
 959   f = utils.FieldSet()
 960   f.Extend(static)
 961   f.Extend(dynamic)
 962
 963   delta = f.NonMatching(selected)
 964   if delta:
 965     raise errors.OpPrereqError("Unknown output fields selected: %s"
 966                                % ",".join(delta), errors.ECODE_INVAL)
 967
 968
 969 def _CheckGlobalHvParams(params):
 970   """Validates that given hypervisor params are not global ones.
 971
 972   This will ensure that instances don't get customised versions of
 973   global params.
 974
 975   """
 976   used_globals = constants.HVC_GLOBALS.intersection(params)
 977   if used_globals:
 978     msg = ("The following hypervisor parameters are global and cannot"
 979            " be customized at instance level, please modify them at"
 980            " cluster level: %s" % utils.CommaJoin(used_globals))
 981     raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
 982
 983
 984 def _CheckNodeOnline(lu, node, msg=None):
 985   """Ensure that a given node is online.
 986
 987   @param lu: the LU on behalf of which we make the check
 988   @param node: the node to check
 989   @param msg: if passed, should be a message to replace the default one
 990   @raise errors.OpPrereqError: if the node is offline
 991
 992   """
 993   if msg is None:
 994     msg = "Can't use offline node"
 995   if lu.cfg.GetNodeInfo(node).offline:
 996     raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
 997
 998
 999 def _CheckNodeNotDrained(lu, node):
1000   """Ensure that a given node is not drained.
1001
1002   @param lu: the LU on behalf of which we make the check
1003   @param node: the node to check
1004   @raise errors.OpPrereqError: if the node is drained
1005
1006   """
1007   if lu.cfg.GetNodeInfo(node).drained:
1008     raise errors.OpPrereqError("Can't use drained node %s" % node,
1009                                errors.ECODE_STATE)
1010
1011
1012 def _CheckNodeVmCapable(lu, node):
1013   """Ensure that a given node is vm capable.
1014
1015   @param lu: the LU on behalf of which we make the check
1016   @param node: the node to check
1017   @raise errors.OpPrereqError: if the node is not vm capable
1018
1019   """
1020   if not lu.cfg.GetNodeInfo(node).vm_capable:
1021     raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
1022                                errors.ECODE_STATE)
1023
1024
1025 def _CheckNodeHasOS(lu, node, os_name, force_variant):
1026   """Ensure that a node supports a given OS.
1027
1028   @param lu: the LU on behalf of which we make the check
1029   @param node: the node to check
1030   @param os_name: the OS to query about
1031   @param force_variant: whether to ignore variant errors
1032   @raise errors.OpPrereqError: if the node is not supporting the OS
1033
1034   """
1035   result = lu.rpc.call_os_get(node, os_name)
1036   result.Raise("OS '%s' not in supported OS list for node %s" %
1037                (os_name, node),
1038                prereq=True, ecode=errors.ECODE_INVAL)
1039   if not force_variant:
1040     _CheckOSVariant(result.payload, os_name)
1041
1042
1043 def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
1044   """Ensure that a node has the given secondary ip.
1045
1046   @type lu: L{LogicalUnit}
1047   @param lu: the LU on behalf of which we make the check
1048   @type node: string
1049   @param node: the node to check
1050   @type secondary_ip: string
1051   @param secondary_ip: the ip to check
1052   @type prereq: boolean
1053   @param prereq: whether to throw a prerequisite or an execute error
1054   @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
1055   @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
1056
1057   """
1058   result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
1059   result.Raise("Failure checking secondary ip on node %s" % node,
1060                prereq=prereq, ecode=errors.ECODE_ENVIRON)
1061   if not result.payload:
1062     msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
1063            " please fix and re-run this command" % secondary_ip)
1064     if prereq:
1065       raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
1066     else:
1067       raise errors.OpExecError(msg)
1068
1069
1070 def _GetClusterDomainSecret():
1071   """Reads the cluster domain secret.
1072
1073   """
1074   return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
1075                                strict=True)
1076
1077
1078 def _CheckInstanceState(lu, instance, req_states, msg=None):
1079   """Ensure that an instance is in one of the required states.
1080
1081   @param lu: the LU on behalf of which we make the check
1082   @param instance: the instance to check
1083   @param msg: if passed, should be a message to replace the default one
1084   @raise errors.OpPrereqError: if the instance is not in the required state
1085
1086   """
1087   if msg is None:
1088     msg = "can't use instance from outside %s states" % ", ".join(req_states)
1089   if instance.admin_state not in req_states:
1090     raise errors.OpPrereqError("Instance '%s' is marked to be %s, %s" %
1091                                (instance.name, instance.admin_state, msg),
1092                                errors.ECODE_STATE)
1093
1094   if constants.ADMINST_UP not in req_states:
1095     pnode = instance.primary_node
1096     if not lu.cfg.GetNodeInfo(pnode).offline:
1097       ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
1098       ins_l.Raise("Can't contact node %s for instance information" % pnode,
1099                   prereq=True, ecode=errors.ECODE_ENVIRON)
1100       if instance.name in ins_l.payload:
1101         raise errors.OpPrereqError("Instance %s is running, %s" %
1102                                    (instance.name, msg), errors.ECODE_STATE)
1103     else:
1104       lu.LogWarning("Primary node offline, ignoring check that instance"
1105                      " is down")
1106
1107
1108 def _ComputeMinMaxSpec(name, qualifier, ipolicy, value):
1109   """Computes if value is in the desired range.
1110
1111   @param name: name of the parameter for which we perform the check
1112   @param qualifier: a qualifier used in the error message (e.g. 'disk/1',
1113       not just 'disk')
1114   @param ipolicy: dictionary containing min, max and std values
1115   @param value: actual value that we want to use
1116   @return: None or element not meeting the criteria
1117
1118
1119   """
1120   if value in [None, constants.VALUE_AUTO]:
1121     return None
1122   max_v = ipolicy[constants.ISPECS_MAX].get(name, value)
1123   min_v = ipolicy[constants.ISPECS_MIN].get(name, value)
1124   if value > max_v or min_v > value:
1125     if qualifier:
1126       fqn = "%s/%s" % (name, qualifier)
1127     else:
1128       fqn = name
1129     return ("%s value %s is not in range [%s, %s]" %
1130             (fqn, value, min_v, max_v))
1131   return None
1132
1133
1134 def _ComputeIPolicySpecViolation(ipolicy, mem_size, cpu_count, disk_count,
1135                                  nic_count, disk_sizes, spindle_use,
1136                                  _compute_fn=_ComputeMinMaxSpec):
1137   """Verifies ipolicy against provided specs.
1138
1139   @type ipolicy: dict
1140   @param ipolicy: The ipolicy
1141   @type mem_size: int
1142   @param mem_size: The memory size
1143   @type cpu_count: int
1144   @param cpu_count: Used cpu cores
1145   @type disk_count: int
1146   @param disk_count: Number of disks used
1147   @type nic_count: int
1148   @param nic_count: Number of nics used
1149   @type disk_sizes: list of ints
1150   @param disk_sizes: Disk sizes of used disk (len must match C{disk_count})
1151   @type spindle_use: int
1152   @param spindle_use: The number of spindles this instance uses
1153   @param _compute_fn: The compute function (unittest only)
1154   @return: A list of violations, or an empty list of no violations are found
1155
1156   """
1157   assert disk_count == len(disk_sizes)
1158
1159   test_settings = [
1160     (constants.ISPEC_MEM_SIZE, "", mem_size),
1161     (constants.ISPEC_CPU_COUNT, "", cpu_count),
1162     (constants.ISPEC_DISK_COUNT, "", disk_count),
1163     (constants.ISPEC_NIC_COUNT, "", nic_count),
1164     (constants.ISPEC_SPINDLE_USE, "", spindle_use),
1165     ] + [(constants.ISPEC_DISK_SIZE, str(idx), d)
1166          for idx, d in enumerate(disk_sizes)]
1167
1168   return filter(None,
1169                 (_compute_fn(name, qualifier, ipolicy, value)
1170                  for (name, qualifier, value) in test_settings))
1171
1172
1173 def _ComputeIPolicyInstanceViolation(ipolicy, instance,
1174                                      _compute_fn=_ComputeIPolicySpecViolation):
1175   """Compute if instance meets the specs of ipolicy.
1176
1177   @type ipolicy: dict
1178   @param ipolicy: The ipolicy to verify against
1179   @type instance: L{objects.Instance}
1180   @param instance: The instance to verify
1181   @param _compute_fn: The function to verify ipolicy (unittest only)
1182   @see: L{_ComputeIPolicySpecViolation}
1183
1184   """
1185   mem_size = instance.beparams.get(constants.BE_MAXMEM, None)
1186   cpu_count = instance.beparams.get(constants.BE_VCPUS, None)
1187   spindle_use = instance.beparams.get(constants.BE_SPINDLE_USE, None)
1188   disk_count = len(instance.disks)
1189   disk_sizes = [disk.size for disk in instance.disks]
1190   nic_count = len(instance.nics)
1191
1192   return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1193                      disk_sizes, spindle_use)
1194
1195
1196 def _ComputeIPolicyInstanceSpecViolation(
1197   ipolicy, instance_spec, _compute_fn=_ComputeIPolicySpecViolation):
1198   """Compute if instance specs meets the specs of ipolicy.
1199
1200   @type ipolicy: dict
1201   @param ipolicy: The ipolicy to verify against
1202   @param instance_spec: dict
1203   @param instance_spec: The instance spec to verify
1204   @param _compute_fn: The function to verify ipolicy (unittest only)
1205   @see: L{_ComputeIPolicySpecViolation}
1206
1207   """
1208   mem_size = instance_spec.get(constants.ISPEC_MEM_SIZE, None)
1209   cpu_count = instance_spec.get(constants.ISPEC_CPU_COUNT, None)
1210   disk_count = instance_spec.get(constants.ISPEC_DISK_COUNT, 0)
1211   disk_sizes = instance_spec.get(constants.ISPEC_DISK_SIZE, [])
1212   nic_count = instance_spec.get(constants.ISPEC_NIC_COUNT, 0)
1213   spindle_use = instance_spec.get(constants.ISPEC_SPINDLE_USE, None)
1214
1215   return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1216                      disk_sizes, spindle_use)
1217
1218
1219 def _ComputeIPolicyNodeViolation(ipolicy, instance, current_group,
1220                                  target_group,
1221                                  _compute_fn=_ComputeIPolicyInstanceViolation):
1222   """Compute if instance meets the specs of the new target group.
1223
1224   @param ipolicy: The ipolicy to verify
1225   @param instance: The instance object to verify
1226   @param current_group: The current group of the instance
1227   @param target_group: The new group of the instance
1228   @param _compute_fn: The function to verify ipolicy (unittest only)
1229   @see: L{_ComputeIPolicySpecViolation}
1230
1231   """
1232   if current_group == target_group:
1233     return []
1234   else:
1235     return _compute_fn(ipolicy, instance)
1236
1237
1238 def _CheckTargetNodeIPolicy(lu, ipolicy, instance, node, ignore=False,
1239                             _compute_fn=_ComputeIPolicyNodeViolation):
1240   """Checks that the target node is correct in terms of instance policy.
1241
1242   @param ipolicy: The ipolicy to verify
1243   @param instance: The instance object to verify
1244   @param node: The new node to relocate
1245   @param ignore: Ignore violations of the ipolicy
1246   @param _compute_fn: The function to verify ipolicy (unittest only)
1247   @see: L{_ComputeIPolicySpecViolation}
1248
1249   """
1250   primary_node = lu.cfg.GetNodeInfo(instance.primary_node)
1251   res = _compute_fn(ipolicy, instance, primary_node.group, node.group)
1252
1253   if res:
1254     msg = ("Instance does not meet target node group's (%s) instance"
1255            " policy: %s") % (node.group, utils.CommaJoin(res))
1256     if ignore:
1257       lu.LogWarning(msg)
1258     else:
1259       raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
1260
1261
1262 def _ComputeNewInstanceViolations(old_ipolicy, new_ipolicy, instances):
1263   """Computes a set of any instances that would violate the new ipolicy.
1264
1265   @param old_ipolicy: The current (still in-place) ipolicy
1266   @param new_ipolicy: The new (to become) ipolicy
1267   @param instances: List of instances to verify
1268   @return: A list of instances which violates the new ipolicy but
1269       did not before
1270
1271   """
1272   return (_ComputeViolatingInstances(new_ipolicy, instances) -
1273           _ComputeViolatingInstances(old_ipolicy, instances))
1274
1275
1276 def _ExpandItemName(fn, name, kind):
1277   """Expand an item name.
1278
1279   @param fn: the function to use for expansion
1280   @param name: requested item name
1281   @param kind: text description ('Node' or 'Instance')
1282   @return: the resolved (full) name
1283   @raise errors.OpPrereqError: if the item is not found
1284
1285   """
1286   full_name = fn(name)
1287   if full_name is None:
1288     raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
1289                                errors.ECODE_NOENT)
1290   return full_name
1291
1292
1293 def _ExpandNodeName(cfg, name):
1294   """Wrapper over L{_ExpandItemName} for nodes."""
1295   return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
1296
1297
1298 def _ExpandInstanceName(cfg, name):
1299   """Wrapper over L{_ExpandItemName} for instance."""
1300   return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
1301
1302
1303 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
1304                           minmem, maxmem, vcpus, nics, disk_template, disks,
1305                           bep, hvp, hypervisor_name, tags):
1306   """Builds instance related env variables for hooks
1307
1308   This builds the hook environment from individual variables.
1309
1310   @type name: string
1311   @param name: the name of the instance
1312   @type primary_node: string
1313   @param primary_node: the name of the instance's primary node
1314   @type secondary_nodes: list
1315   @param secondary_nodes: list of secondary nodes as strings
1316   @type os_type: string
1317   @param os_type: the name of the instance's OS
1318   @type status: string
1319   @param status: the desired status of the instance
1320   @type minmem: string
1321   @param minmem: the minimum memory size of the instance
1322   @type maxmem: string
1323   @param maxmem: the maximum memory size of the instance
1324   @type vcpus: string
1325   @param vcpus: the count of VCPUs the instance has
1326   @type nics: list
1327   @param nics: list of tuples (ip, mac, mode, link) representing
1328       the NICs the instance has
1329   @type disk_template: string
1330   @param disk_template: the disk template of the instance
1331   @type disks: list
1332   @param disks: the list of (size, mode) pairs
1333   @type bep: dict
1334   @param bep: the backend parameters for the instance
1335   @type hvp: dict
1336   @param hvp: the hypervisor parameters for the instance
1337   @type hypervisor_name: string
1338   @param hypervisor_name: the hypervisor for the instance
1339   @type tags: list
1340   @param tags: list of instance tags as strings
1341   @rtype: dict
1342   @return: the hook environment for this instance
1343
1344   """
1345   env = {
1346     "OP_TARGET": name,
1347     "INSTANCE_NAME": name,
1348     "INSTANCE_PRIMARY": primary_node,
1349     "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
1350     "INSTANCE_OS_TYPE": os_type,
1351     "INSTANCE_STATUS": status,
1352     "INSTANCE_MINMEM": minmem,
1353     "INSTANCE_MAXMEM": maxmem,
1354     # TODO(2.7) remove deprecated "memory" value
1355     "INSTANCE_MEMORY": maxmem,
1356     "INSTANCE_VCPUS": vcpus,
1357     "INSTANCE_DISK_TEMPLATE": disk_template,
1358     "INSTANCE_HYPERVISOR": hypervisor_name,
1359   }
1360   if nics:
1361     nic_count = len(nics)
1362     for idx, (ip, mac, mode, link) in enumerate(nics):
1363       if ip is None:
1364         ip = ""
1365       env["INSTANCE_NIC%d_IP" % idx] = ip
1366       env["INSTANCE_NIC%d_MAC" % idx] = mac
1367       env["INSTANCE_NIC%d_MODE" % idx] = mode
1368       env["INSTANCE_NIC%d_LINK" % idx] = link
1369       if mode == constants.NIC_MODE_BRIDGED:
1370         env["INSTANCE_NIC%d_BRIDGE" % idx] = link
1371   else:
1372     nic_count = 0
1373
1374   env["INSTANCE_NIC_COUNT"] = nic_count
1375
1376   if disks:
1377     disk_count = len(disks)
1378     for idx, (size, mode) in enumerate(disks):
1379       env["INSTANCE_DISK%d_SIZE" % idx] = size
1380       env["INSTANCE_DISK%d_MODE" % idx] = mode
1381   else:
1382     disk_count = 0
1383
1384   env["INSTANCE_DISK_COUNT"] = disk_count
1385
1386   if not tags:
1387     tags = []
1388
1389   env["INSTANCE_TAGS"] = " ".join(tags)
1390
1391   for source, kind in [(bep, "BE"), (hvp, "HV")]:
1392     for key, value in source.items():
1393       env["INSTANCE_%s_%s" % (kind, key)] = value
1394
1395   return env
1396
1397
1398 def _NICListToTuple(lu, nics):
1399   """Build a list of nic information tuples.
1400
1401   This list is suitable to be passed to _BuildInstanceHookEnv or as a return
1402   value in LUInstanceQueryData.
1403
1404   @type lu:  L{LogicalUnit}
1405   @param lu: the logical unit on whose behalf we execute
1406   @type nics: list of L{objects.NIC}
1407   @param nics: list of nics to convert to hooks tuples
1408
1409   """
1410   hooks_nics = []
1411   cluster = lu.cfg.GetClusterInfo()
1412   for nic in nics:
1413     ip = nic.ip
1414     mac = nic.mac
1415     filled_params = cluster.SimpleFillNIC(nic.nicparams)
1416     mode = filled_params[constants.NIC_MODE]
1417     link = filled_params[constants.NIC_LINK]
1418     hooks_nics.append((ip, mac, mode, link))
1419   return hooks_nics
1420
1421
1422 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
1423   """Builds instance related env variables for hooks from an object.
1424
1425   @type lu: L{LogicalUnit}
1426   @param lu: the logical unit on whose behalf we execute
1427   @type instance: L{objects.Instance}
1428   @param instance: the instance for which we should build the
1429       environment
1430   @type override: dict
1431   @param override: dictionary with key/values that will override
1432       our values
1433   @rtype: dict
1434   @return: the hook environment dictionary
1435
1436   """
1437   cluster = lu.cfg.GetClusterInfo()
1438   bep = cluster.FillBE(instance)
1439   hvp = cluster.FillHV(instance)
1440   args = {
1441     "name": instance.name,
1442     "primary_node": instance.primary_node,
1443     "secondary_nodes": instance.secondary_nodes,
1444     "os_type": instance.os,
1445     "status": instance.admin_state,
1446     "maxmem": bep[constants.BE_MAXMEM],
1447     "minmem": bep[constants.BE_MINMEM],
1448     "vcpus": bep[constants.BE_VCPUS],
1449     "nics": _NICListToTuple(lu, instance.nics),
1450     "disk_template": instance.disk_template,
1451     "disks": [(disk.size, disk.mode) for disk in instance.disks],
1452     "bep": bep,
1453     "hvp": hvp,
1454     "hypervisor_name": instance.hypervisor,
1455     "tags": instance.tags,
1456   }
1457   if override:
1458     args.update(override)
1459   return _BuildInstanceHookEnv(**args) # pylint: disable=W0142
1460
1461
1462 def _AdjustCandidatePool(lu, exceptions):
1463   """Adjust the candidate pool after node operations.
1464
1465   """
1466   mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1467   if mod_list:
1468     lu.LogInfo("Promoted nodes to master candidate role: %s",
1469                utils.CommaJoin(node.name for node in mod_list))
1470     for name in mod_list:
1471       lu.context.ReaddNode(name)
1472   mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1473   if mc_now > mc_max:
1474     lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1475                (mc_now, mc_max))
1476
1477
1478 def _DecideSelfPromotion(lu, exceptions=None):
1479   """Decide whether I should promote myself as a master candidate.
1480
1481   """
1482   cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1483   mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1484   # the new node will increase mc_max with one, so:
1485   mc_should = min(mc_should + 1, cp_size)
1486   return mc_now < mc_should
1487
1488
1489 def _ComputeViolatingInstances(ipolicy, instances):
1490   """Computes a set of instances who violates given ipolicy.
1491
1492   @param ipolicy: The ipolicy to verify
1493   @type instances: object.Instance
1494   @param instances: List of instances to verify
1495   @return: A frozenset of instance names violating the ipolicy
1496
1497   """
1498   return frozenset([inst.name for inst in instances
1499                     if _ComputeIPolicyInstanceViolation(ipolicy, inst)])
1500
1501
1502 def _CheckNicsBridgesExist(lu, target_nics, target_node):
1503   """Check that the brigdes needed by a list of nics exist.
1504
1505   """
1506   cluster = lu.cfg.GetClusterInfo()
1507   paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1508   brlist = [params[constants.NIC_LINK] for params in paramslist
1509             if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1510   if brlist:
1511     result = lu.rpc.call_bridges_exist(target_node, brlist)
1512     result.Raise("Error checking bridges on destination node '%s'" %
1513                  target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1514
1515
1516 def _CheckInstanceBridgesExist(lu, instance, node=None):
1517   """Check that the brigdes needed by an instance exist.
1518
1519   """
1520   if node is None:
1521     node = instance.primary_node
1522   _CheckNicsBridgesExist(lu, instance.nics, node)
1523
1524
1525 def _CheckOSVariant(os_obj, name):
1526   """Check whether an OS name conforms to the os variants specification.
1527
1528   @type os_obj: L{objects.OS}
1529   @param os_obj: OS object to check
1530   @type name: string
1531   @param name: OS name passed by the user, to check for validity
1532
1533   """
1534   variant = objects.OS.GetVariant(name)
1535   if not os_obj.supported_variants:
1536     if variant:
1537       raise errors.OpPrereqError("OS '%s' doesn't support variants ('%s'"
1538                                  " passed)" % (os_obj.name, variant),
1539                                  errors.ECODE_INVAL)
1540     return
1541   if not variant:
1542     raise errors.OpPrereqError("OS name must include a variant",
1543                                errors.ECODE_INVAL)
1544
1545   if variant not in os_obj.supported_variants:
1546     raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1547
1548
1549 def _GetNodeInstancesInner(cfg, fn):
1550   return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1551
1552
1553 def _GetNodeInstances(cfg, node_name):
1554   """Returns a list of all primary and secondary instances on a node.
1555
1556   """
1557
1558   return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1559
1560
1561 def _GetNodePrimaryInstances(cfg, node_name):
1562   """Returns primary instances on a node.
1563
1564   """
1565   return _GetNodeInstancesInner(cfg,
1566                                 lambda inst: node_name == inst.primary_node)
1567
1568
1569 def _GetNodeSecondaryInstances(cfg, node_name):
1570   """Returns secondary instances on a node.
1571
1572   """
1573   return _GetNodeInstancesInner(cfg,
1574                                 lambda inst: node_name in inst.secondary_nodes)
1575
1576
1577 def _GetStorageTypeArgs(cfg, storage_type):
1578   """Returns the arguments for a storage type.
1579
1580   """
1581   # Special case for file storage
1582   if storage_type == constants.ST_FILE:
1583     # storage.FileStorage wants a list of storage directories
1584     return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1585
1586   return []
1587
1588
1589 def _FindFaultyInstanceDisks(cfg, rpc_runner, instance, node_name, prereq):
1590   faulty = []
1591
1592   for dev in instance.disks:
1593     cfg.SetDiskID(dev, node_name)
1594
1595   result = rpc_runner.call_blockdev_getmirrorstatus(node_name, (instance.disks,
1596                                                                 instance))
1597   result.Raise("Failed to get disk status from node %s" % node_name,
1598                prereq=prereq, ecode=errors.ECODE_ENVIRON)
1599
1600   for idx, bdev_status in enumerate(result.payload):
1601     if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1602       faulty.append(idx)
1603
1604   return faulty
1605
1606
1607 def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1608   """Check the sanity of iallocator and node arguments and use the
1609   cluster-wide iallocator if appropriate.
1610
1611   Check that at most one of (iallocator, node) is specified. If none is
1612   specified, then the LU's opcode's iallocator slot is filled with the
1613   cluster-wide default iallocator.
1614
1615   @type iallocator_slot: string
1616   @param iallocator_slot: the name of the opcode iallocator slot
1617   @type node_slot: string
1618   @param node_slot: the name of the opcode target node slot
1619
1620   """
1621   node = getattr(lu.op, node_slot, None)
1622   ialloc = getattr(lu.op, iallocator_slot, None)
1623
1624   if node is not None and ialloc is not None:
1625     raise errors.OpPrereqError("Do not specify both, iallocator and node",
1626                                errors.ECODE_INVAL)
1627   elif node is None and ialloc is None:
1628     default_iallocator = lu.cfg.GetDefaultIAllocator()
1629     if default_iallocator:
1630       setattr(lu.op, iallocator_slot, default_iallocator)
1631     else:
1632       raise errors.OpPrereqError("No iallocator or node given and no"
1633                                  " cluster-wide default iallocator found;"
1634                                  " please specify either an iallocator or a"
1635                                  " node, or set a cluster-wide default"
1636                                  " iallocator", errors.ECODE_INVAL)
1637
1638
1639 def _GetDefaultIAllocator(cfg, ialloc):
1640   """Decides on which iallocator to use.
1641
1642   @type cfg: L{config.ConfigWriter}
1643   @param cfg: Cluster configuration object
1644   @type ialloc: string or None
1645   @param ialloc: Iallocator specified in opcode
1646   @rtype: string
1647   @return: Iallocator name
1648
1649   """
1650   if not ialloc:
1651     # Use default iallocator
1652     ialloc = cfg.GetDefaultIAllocator()
1653
1654   if not ialloc:
1655     raise errors.OpPrereqError("No iallocator was specified, neither in the"
1656                                " opcode nor as a cluster-wide default",
1657                                errors.ECODE_INVAL)
1658
1659   return ialloc
1660
1661
1662 class LUClusterPostInit(LogicalUnit):
1663   """Logical unit for running hooks after cluster initialization.
1664
1665   """
1666   HPATH = "cluster-init"
1667   HTYPE = constants.HTYPE_CLUSTER
1668
1669   def BuildHooksEnv(self):
1670     """Build hooks env.
1671
1672     """
1673     return {
1674       "OP_TARGET": self.cfg.GetClusterName(),
1675       }
1676
1677   def BuildHooksNodes(self):
1678     """Build hooks nodes.
1679
1680     """
1681     return ([], [self.cfg.GetMasterNode()])
1682
1683   def Exec(self, feedback_fn):
1684     """Nothing to do.
1685
1686     """
1687     return True
1688
1689
1690 class LUClusterDestroy(LogicalUnit):
1691   """Logical unit for destroying the cluster.
1692
1693   """
1694   HPATH = "cluster-destroy"
1695   HTYPE = constants.HTYPE_CLUSTER
1696
1697   def BuildHooksEnv(self):
1698     """Build hooks env.
1699
1700     """
1701     return {
1702       "OP_TARGET": self.cfg.GetClusterName(),
1703       }
1704
1705   def BuildHooksNodes(self):
1706     """Build hooks nodes.
1707
1708     """
1709     return ([], [])
1710
1711   def CheckPrereq(self):
1712     """Check prerequisites.
1713
1714     This checks whether the cluster is empty.
1715
1716     Any errors are signaled by raising errors.OpPrereqError.
1717
1718     """
1719     master = self.cfg.GetMasterNode()
1720
1721     nodelist = self.cfg.GetNodeList()
1722     if len(nodelist) != 1 or nodelist[0] != master:
1723       raise errors.OpPrereqError("There are still %d node(s) in"
1724                                  " this cluster." % (len(nodelist) - 1),
1725                                  errors.ECODE_INVAL)
1726     instancelist = self.cfg.GetInstanceList()
1727     if instancelist:
1728       raise errors.OpPrereqError("There are still %d instance(s) in"
1729                                  " this cluster." % len(instancelist),
1730                                  errors.ECODE_INVAL)
1731
1732   def Exec(self, feedback_fn):
1733     """Destroys the cluster.
1734
1735     """
1736     master_params = self.cfg.GetMasterNetworkParameters()
1737
1738     # Run post hooks on master node before it's removed
1739     _RunPostHook(self, master_params.name)
1740
1741     ems = self.cfg.GetUseExternalMipScript()
1742     result = self.rpc.call_node_deactivate_master_ip(master_params.name,
1743                                                      master_params, ems)
1744     if result.fail_msg:
1745       self.LogWarning("Error disabling the master IP address: %s",
1746                       result.fail_msg)
1747
1748     return master_params.name
1749
1750
1751 def _VerifyCertificate(filename):
1752   """Verifies a certificate for L{LUClusterVerifyConfig}.
1753
1754   @type filename: string
1755   @param filename: Path to PEM file
1756
1757   """
1758   try:
1759     cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1760                                            utils.ReadFile(filename))
1761   except Exception, err: # pylint: disable=W0703
1762     return (LUClusterVerifyConfig.ETYPE_ERROR,
1763             "Failed to load X509 certificate %s: %s" % (filename, err))
1764
1765   (errcode, msg) = \
1766     utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1767                                 constants.SSL_CERT_EXPIRATION_ERROR)
1768
1769   if msg:
1770     fnamemsg = "While verifying %s: %s" % (filename, msg)
1771   else:
1772     fnamemsg = None
1773
1774   if errcode is None:
1775     return (None, fnamemsg)
1776   elif errcode == utils.CERT_WARNING:
1777     return (LUClusterVerifyConfig.ETYPE_WARNING, fnamemsg)
1778   elif errcode == utils.CERT_ERROR:
1779     return (LUClusterVerifyConfig.ETYPE_ERROR, fnamemsg)
1780
1781   raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1782
1783
1784 def _GetAllHypervisorParameters(cluster, instances):
1785   """Compute the set of all hypervisor parameters.
1786
1787   @type cluster: L{objects.Cluster}
1788   @param cluster: the cluster object
1789   @param instances: list of L{objects.Instance}
1790   @param instances: additional instances from which to obtain parameters
1791   @rtype: list of (origin, hypervisor, parameters)
1792   @return: a list with all parameters found, indicating the hypervisor they
1793        apply to, and the origin (can be "cluster", "os X", or "instance Y")
1794
1795   """
1796   hvp_data = []
1797
1798   for hv_name in cluster.enabled_hypervisors:
1799     hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
1800
1801   for os_name, os_hvp in cluster.os_hvp.items():
1802     for hv_name, hv_params in os_hvp.items():
1803       if hv_params:
1804         full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
1805         hvp_data.append(("os %s" % os_name, hv_name, full_params))
1806
1807   # TODO: collapse identical parameter values in a single one
1808   for instance in instances:
1809     if instance.hvparams:
1810       hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
1811                        cluster.FillHV(instance)))
1812
1813   return hvp_data
1814
1815
1816 class _VerifyErrors(object):
1817   """Mix-in for cluster/group verify LUs.
1818
1819   It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects
1820   self.op and self._feedback_fn to be available.)
1821
1822   """
1823
1824   ETYPE_FIELD = "code"
1825   ETYPE_ERROR = "ERROR"
1826   ETYPE_WARNING = "WARNING"
1827
1828   def _Error(self, ecode, item, msg, *args, **kwargs):
1829     """Format an error message.
1830
1831     Based on the opcode's error_codes parameter, either format a
1832     parseable error code, or a simpler error string.
1833
1834     This must be called only from Exec and functions called from Exec.
1835
1836     """
1837     ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1838     itype, etxt, _ = ecode
1839     # first complete the msg
1840     if args:
1841       msg = msg % args
1842     # then format the whole message
1843     if self.op.error_codes: # This is a mix-in. pylint: disable=E1101
1844       msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1845     else:
1846       if item:
1847         item = " " + item
1848       else:
1849         item = ""
1850       msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1851     # and finally report it via the feedback_fn
1852     self._feedback_fn("  - %s" % msg) # Mix-in. pylint: disable=E1101
1853
1854   def _ErrorIf(self, cond, ecode, *args, **kwargs):
1855     """Log an error message if the passed condition is True.
1856
1857     """
1858     cond = (bool(cond)
1859             or self.op.debug_simulate_errors) # pylint: disable=E1101
1860
1861     # If the error code is in the list of ignored errors, demote the error to a
1862     # warning
1863     (_, etxt, _) = ecode
1864     if etxt in self.op.ignore_errors:     # pylint: disable=E1101
1865       kwargs[self.ETYPE_FIELD] = self.ETYPE_WARNING
1866
1867     if cond:
1868       self._Error(ecode, *args, **kwargs)
1869
1870     # do not mark the operation as failed for WARN cases only
1871     if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1872       self.bad = self.bad or cond
1873
1874
1875 class LUClusterVerify(NoHooksLU):
1876   """Submits all jobs necessary to verify the cluster.
1877
1878   """
1879   REQ_BGL = False
1880
1881   def ExpandNames(self):
1882     self.needed_locks = {}
1883
1884   def Exec(self, feedback_fn):
1885     jobs = []
1886
1887     if self.op.group_name:
1888       groups = [self.op.group_name]
1889       depends_fn = lambda: None
1890     else:
1891       groups = self.cfg.GetNodeGroupList()
1892
1893       # Verify global configuration
1894       jobs.append([
1895         opcodes.OpClusterVerifyConfig(ignore_errors=self.op.ignore_errors)
1896         ])
1897
1898       # Always depend on global verification
1899       depends_fn = lambda: [(-len(jobs), [])]
1900
1901     jobs.extend(
1902       [opcodes.OpClusterVerifyGroup(group_name=group,
1903                                     ignore_errors=self.op.ignore_errors,
1904                                     depends=depends_fn())]
1905       for group in groups)
1906
1907     # Fix up all parameters
1908     for op in itertools.chain(*jobs): # pylint: disable=W0142
1909       op.debug_simulate_errors = self.op.debug_simulate_errors
1910       op.verbose = self.op.verbose
1911       op.error_codes = self.op.error_codes
1912       try:
1913         op.skip_checks = self.op.skip_checks
1914       except AttributeError:
1915         assert not isinstance(op, opcodes.OpClusterVerifyGroup)
1916
1917     return ResultWithJobs(jobs)
1918
1919
1920 class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
1921   """Verifies the cluster config.
1922
1923   """
1924   REQ_BGL = False
1925
1926   def _VerifyHVP(self, hvp_data):
1927     """Verifies locally the syntax of the hypervisor parameters.
1928
1929     """
1930     for item, hv_name, hv_params in hvp_data:
1931       msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
1932              (item, hv_name))
1933       try:
1934         hv_class = hypervisor.GetHypervisor(hv_name)
1935         utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
1936         hv_class.CheckParameterSyntax(hv_params)
1937       except errors.GenericError, err:
1938         self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg % str(err))
1939
1940   def ExpandNames(self):
1941     self.needed_locks = dict.fromkeys(locking.LEVELS, locking.ALL_SET)
1942     self.share_locks = _ShareAll()
1943
1944   def CheckPrereq(self):
1945     """Check prerequisites.
1946
1947     """
1948     # Retrieve all information
1949     self.all_group_info = self.cfg.GetAllNodeGroupsInfo()
1950     self.all_node_info = self.cfg.GetAllNodesInfo()
1951     self.all_inst_info = self.cfg.GetAllInstancesInfo()
1952
1953   def Exec(self, feedback_fn):
1954     """Verify integrity of cluster, performing various test on nodes.
1955
1956     """
1957     self.bad = False
1958     self._feedback_fn = feedback_fn
1959
1960     feedback_fn("* Verifying cluster config")
1961
1962     for msg in self.cfg.VerifyConfig():
1963       self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg)
1964
1965     feedback_fn("* Verifying cluster certificate files")
1966
1967     for cert_filename in constants.ALL_CERT_FILES:
1968       (errcode, msg) = _VerifyCertificate(cert_filename)
1969       self._ErrorIf(errcode, constants.CV_ECLUSTERCERT, None, msg, code=errcode)
1970
1971     feedback_fn("* Verifying hypervisor parameters")
1972
1973     self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(),
1974                                                 self.all_inst_info.values()))
1975
1976     feedback_fn("* Verifying all nodes belong to an existing group")
1977
1978     # We do this verification here because, should this bogus circumstance
1979     # occur, it would never be caught by VerifyGroup, which only acts on
1980     # nodes/instances reachable from existing node groups.
1981
1982     dangling_nodes = set(node.name for node in self.all_node_info.values()
1983                          if node.group not in self.all_group_info)
1984
1985     dangling_instances = {}
1986     no_node_instances = []
1987
1988     for inst in self.all_inst_info.values():
1989       if inst.primary_node in dangling_nodes:
1990         dangling_instances.setdefault(inst.primary_node, []).append(inst.name)
1991       elif inst.primary_node not in self.all_node_info:
1992         no_node_instances.append(inst.name)
1993
1994     pretty_dangling = [
1995         "%s (%s)" %
1996         (node.name,
1997          utils.CommaJoin(dangling_instances.get(node.name,
1998                                                 ["no instances"])))
1999         for node in dangling_nodes]
2000
2001     self._ErrorIf(bool(dangling_nodes), constants.CV_ECLUSTERDANGLINGNODES,
2002                   None,
2003                   "the following nodes (and their instances) belong to a non"
2004                   " existing group: %s", utils.CommaJoin(pretty_dangling))
2005
2006     self._ErrorIf(bool(no_node_instances), constants.CV_ECLUSTERDANGLINGINST,
2007                   None,
2008                   "the following instances have a non-existing primary-node:"
2009                   " %s", utils.CommaJoin(no_node_instances))
2010
2011     return not self.bad
2012
2013
2014 class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
2015   """Verifies the status of a node group.
2016
2017   """
2018   HPATH = "cluster-verify"
2019   HTYPE = constants.HTYPE_CLUSTER
2020   REQ_BGL = False
2021
2022   _HOOKS_INDENT_RE = re.compile("^", re.M)
2023
2024   class NodeImage(object):
2025     """A class representing the logical and physical status of a node.
2026
2027     @type name: string
2028     @ivar name: the node name to which this object refers
2029     @ivar volumes: a structure as returned from
2030         L{ganeti.backend.GetVolumeList} (runtime)
2031     @ivar instances: a list of running instances (runtime)
2032     @ivar pinst: list of configured primary instances (config)
2033     @ivar sinst: list of configured secondary instances (config)
2034     @ivar sbp: dictionary of {primary-node: list of instances} for all
2035         instances for which this node is secondary (config)
2036     @ivar mfree: free memory, as reported by hypervisor (runtime)
2037     @ivar dfree: free disk, as reported by the node (runtime)
2038     @ivar offline: the offline status (config)
2039     @type rpc_fail: boolean
2040     @ivar rpc_fail: whether the RPC verify call was successfull (overall,
2041         not whether the individual keys were correct) (runtime)
2042     @type lvm_fail: boolean
2043     @ivar lvm_fail: whether the RPC call didn't return valid LVM data
2044     @type hyp_fail: boolean
2045     @ivar hyp_fail: whether the RPC call didn't return the instance list
2046     @type ghost: boolean
2047     @ivar ghost: whether this is a known node or not (config)
2048     @type os_fail: boolean
2049     @ivar os_fail: whether the RPC call didn't return valid OS data
2050     @type oslist: list
2051     @ivar oslist: list of OSes as diagnosed by DiagnoseOS
2052     @type vm_capable: boolean
2053     @ivar vm_capable: whether the node can host instances
2054
2055     """
2056     def __init__(self, offline=False, name=None, vm_capable=True):
2057       self.name = name
2058       self.volumes = {}
2059       self.instances = []
2060       self.pinst = []
2061       self.sinst = []
2062       self.sbp = {}
2063       self.mfree = 0
2064       self.dfree = 0
2065       self.offline = offline
2066       self.vm_capable = vm_capable
2067       self.rpc_fail = False
2068       self.lvm_fail = False
2069       self.hyp_fail = False
2070       self.ghost = False
2071       self.os_fail = False
2072       self.oslist = {}
2073
2074   def ExpandNames(self):
2075     # This raises errors.OpPrereqError on its own:
2076     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
2077
2078     # Get instances in node group; this is unsafe and needs verification later
2079     inst_names = \
2080       self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True)
2081
2082     self.needed_locks = {
2083       locking.LEVEL_INSTANCE: inst_names,
2084       locking.LEVEL_NODEGROUP: [self.group_uuid],
2085       locking.LEVEL_NODE: [],
2086       }
2087
2088     self.share_locks = _ShareAll()
2089
2090   def DeclareLocks(self, level):
2091     if level == locking.LEVEL_NODE:
2092       # Get members of node group; this is unsafe and needs verification later
2093       nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
2094
2095       all_inst_info = self.cfg.GetAllInstancesInfo()
2096
2097       # In Exec(), we warn about mirrored instances that have primary and
2098       # secondary living in separate node groups. To fully verify that
2099       # volumes for these instances are healthy, we will need to do an
2100       # extra call to their secondaries. We ensure here those nodes will
2101       # be locked.
2102       for inst in self.owned_locks(locking.LEVEL_INSTANCE):
2103         # Important: access only the instances whose lock is owned
2104         if all_inst_info[inst].disk_template in constants.DTS_INT_MIRROR:
2105           nodes.update(all_inst_info[inst].secondary_nodes)
2106
2107       self.needed_locks[locking.LEVEL_NODE] = nodes
2108
2109   def CheckPrereq(self):
2110     assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
2111     self.group_info = self.cfg.GetNodeGroup(self.group_uuid)
2112
2113     group_nodes = set(self.group_info.members)
2114     group_instances = \
2115       self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True)
2116
2117     unlocked_nodes = \
2118         group_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
2119
2120     unlocked_instances = \
2121         group_instances.difference(self.owned_locks(locking.LEVEL_INSTANCE))
2122
2123     if unlocked_nodes:
2124       raise errors.OpPrereqError("Missing lock for nodes: %s" %
2125                                  utils.CommaJoin(unlocked_nodes),
2126                                  errors.ECODE_STATE)
2127
2128     if unlocked_instances:
2129       raise errors.OpPrereqError("Missing lock for instances: %s" %
2130                                  utils.CommaJoin(unlocked_instances),
2131                                  errors.ECODE_STATE)
2132
2133     self.all_node_info = self.cfg.GetAllNodesInfo()
2134     self.all_inst_info = self.cfg.GetAllInstancesInfo()
2135
2136     self.my_node_names = utils.NiceSort(group_nodes)
2137     self.my_inst_names = utils.NiceSort(group_instances)
2138
2139     self.my_node_info = dict((name, self.all_node_info[name])
2140                              for name in self.my_node_names)
2141
2142     self.my_inst_info = dict((name, self.all_inst_info[name])
2143                              for name in self.my_inst_names)
2144
2145     # We detect here the nodes that will need the extra RPC calls for verifying
2146     # split LV volumes; they should be locked.
2147     extra_lv_nodes = set()
2148
2149     for inst in self.my_inst_info.values():
2150       if inst.disk_template in constants.DTS_INT_MIRROR:
2151         for nname in inst.all_nodes:
2152           if self.all_node_info[nname].group != self.group_uuid:
2153             extra_lv_nodes.add(nname)
2154
2155     unlocked_lv_nodes = \
2156         extra_lv_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
2157
2158     if unlocked_lv_nodes:
2159       raise errors.OpPrereqError("Missing node locks for LV check: %s" %
2160                                  utils.CommaJoin(unlocked_lv_nodes),
2161                                  errors.ECODE_STATE)
2162     self.extra_lv_nodes = list(extra_lv_nodes)
2163
2164   def _VerifyNode(self, ninfo, nresult):
2165     """Perform some basic validation on data returned from a node.
2166
2167       - check the result data structure is well formed and has all the
2168         mandatory fields
2169       - check ganeti version
2170
2171     @type ninfo: L{objects.Node}
2172     @param ninfo: the node to check
2173     @param nresult: the results from the node
2174     @rtype: boolean
2175     @return: whether overall this call was successful (and we can expect
2176          reasonable values in the respose)
2177
2178     """
2179     node = ninfo.name
2180     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2181
2182     # main result, nresult should be a non-empty dict
2183     test = not nresult or not isinstance(nresult, dict)
2184     _ErrorIf(test, constants.CV_ENODERPC, node,
2185                   "unable to verify node: no data returned")
2186     if test:
2187       return False
2188
2189     # compares ganeti version
2190     local_version = constants.PROTOCOL_VERSION
2191     remote_version = nresult.get("version", None)
2192     test = not (remote_version and
2193                 isinstance(remote_version, (list, tuple)) and
2194                 len(remote_version) == 2)
2195     _ErrorIf(test, constants.CV_ENODERPC, node,
2196              "connection to node returned invalid data")
2197     if test:
2198       return False
2199
2200     test = local_version != remote_version[0]
2201     _ErrorIf(test, constants.CV_ENODEVERSION, node,
2202              "incompatible protocol versions: master %s,"
2203              " node %s", local_version, remote_version[0])
2204     if test:
2205       return False
2206
2207     # node seems compatible, we can actually try to look into its results
2208
2209     # full package version
2210     self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
2211                   constants.CV_ENODEVERSION, node,
2212                   "software version mismatch: master %s, node %s",
2213                   constants.RELEASE_VERSION, remote_version[1],
2214                   code=self.ETYPE_WARNING)
2215
2216     hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
2217     if ninfo.vm_capable and isinstance(hyp_result, dict):
2218       for hv_name, hv_result in hyp_result.iteritems():
2219         test = hv_result is not None
2220         _ErrorIf(test, constants.CV_ENODEHV, node,
2221                  "hypervisor %s verify failure: '%s'", hv_name, hv_result)
2222
2223     hvp_result = nresult.get(constants.NV_HVPARAMS, None)
2224     if ninfo.vm_capable and isinstance(hvp_result, list):
2225       for item, hv_name, hv_result in hvp_result:
2226         _ErrorIf(True, constants.CV_ENODEHV, node,
2227                  "hypervisor %s parameter verify failure (source %s): %s",
2228                  hv_name, item, hv_result)
2229
2230     test = nresult.get(constants.NV_NODESETUP,
2231                        ["Missing NODESETUP results"])
2232     _ErrorIf(test, constants.CV_ENODESETUP, node, "node setup error: %s",
2233              "; ".join(test))
2234
2235     return True
2236
2237   def _VerifyNodeTime(self, ninfo, nresult,
2238                       nvinfo_starttime, nvinfo_endtime):
2239     """Check the node time.
2240
2241     @type ninfo: L{objects.Node}
2242     @param ninfo: the node to check
2243     @param nresult: the remote results for the node
2244     @param nvinfo_starttime: the start time of the RPC call
2245     @param nvinfo_endtime: the end time of the RPC call
2246
2247     """
2248     node = ninfo.name
2249     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2250
2251     ntime = nresult.get(constants.NV_TIME, None)
2252     try:
2253       ntime_merged = utils.MergeTime(ntime)
2254     except (ValueError, TypeError):
2255       _ErrorIf(True, constants.CV_ENODETIME, node, "Node returned invalid time")
2256       return
2257
2258     if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
2259       ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
2260     elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
2261       ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
2262     else:
2263       ntime_diff = None
2264
2265     _ErrorIf(ntime_diff is not None, constants.CV_ENODETIME, node,
2266              "Node time diverges by at least %s from master node time",
2267              ntime_diff)
2268
2269   def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
2270     """Check the node LVM results.
2271
2272     @type ninfo: L{objects.Node}
2273     @param ninfo: the node to check
2274     @param nresult: the remote results for the node
2275     @param vg_name: the configured VG name
2276
2277     """
2278     if vg_name is None:
2279       return
2280
2281     node = ninfo.name
2282     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2283
2284     # checks vg existence and size > 20G
2285     vglist = nresult.get(constants.NV_VGLIST, None)
2286     test = not vglist
2287     _ErrorIf(test, constants.CV_ENODELVM, node, "unable to check volume groups")
2288     if not test:
2289       vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
2290                                             constants.MIN_VG_SIZE)
2291       _ErrorIf(vgstatus, constants.CV_ENODELVM, node, vgstatus)
2292
2293     # check pv names
2294     pvlist = nresult.get(constants.NV_PVLIST, None)
2295     test = pvlist is None
2296     _ErrorIf(test, constants.CV_ENODELVM, node, "Can't get PV list from node")
2297     if not test:
2298       # check that ':' is not present in PV names, since it's a
2299       # special character for lvcreate (denotes the range of PEs to
2300       # use on the PV)
2301       for _, pvname, owner_vg in pvlist:
2302         test = ":" in pvname
2303         _ErrorIf(test, constants.CV_ENODELVM, node,
2304                  "Invalid character ':' in PV '%s' of VG '%s'",
2305                  pvname, owner_vg)
2306
2307   def _VerifyNodeBridges(self, ninfo, nresult, bridges):
2308     """Check the node bridges.
2309
2310     @type ninfo: L{objects.Node}
2311     @param ninfo: the node to check
2312     @param nresult: the remote results for the node
2313     @param bridges: the expected list of bridges
2314
2315     """
2316     if not bridges:
2317       return
2318
2319     node = ninfo.name
2320     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2321
2322     missing = nresult.get(constants.NV_BRIDGES, None)
2323     test = not isinstance(missing, list)
2324     _ErrorIf(test, constants.CV_ENODENET, node,
2325              "did not return valid bridge information")
2326     if not test:
2327       _ErrorIf(bool(missing), constants.CV_ENODENET, node,
2328                "missing bridges: %s" % utils.CommaJoin(sorted(missing)))
2329
2330   def _VerifyNodeUserScripts(self, ninfo, nresult):
2331     """Check the results of user scripts presence and executability on the node
2332
2333     @type ninfo: L{objects.Node}
2334     @param ninfo: the node to check
2335     @param nresult: the remote results for the node
2336
2337     """
2338     node = ninfo.name
2339
2340     test = not constants.NV_USERSCRIPTS in nresult
2341     self._ErrorIf(test, constants.CV_ENODEUSERSCRIPTS, node,
2342                   "did not return user scripts information")
2343
2344     broken_scripts = nresult.get(constants.NV_USERSCRIPTS, None)
2345     if not test:
2346       self._ErrorIf(broken_scripts, constants.CV_ENODEUSERSCRIPTS, node,
2347                     "user scripts not present or not executable: %s" %
2348                     utils.CommaJoin(sorted(broken_scripts)))
2349
2350   def _VerifyNodeNetwork(self, ninfo, nresult):
2351     """Check the node network connectivity results.
2352
2353     @type ninfo: L{objects.Node}
2354     @param ninfo: the node to check
2355     @param nresult: the remote results for the node
2356
2357     """
2358     node = ninfo.name
2359     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2360
2361     test = constants.NV_NODELIST not in nresult
2362     _ErrorIf(test, constants.CV_ENODESSH, node,
2363              "node hasn't returned node ssh connectivity data")
2364     if not test:
2365       if nresult[constants.NV_NODELIST]:
2366         for a_node, a_msg in nresult[constants.NV_NODELIST].items():
2367           _ErrorIf(True, constants.CV_ENODESSH, node,
2368                    "ssh communication with node '%s': %s", a_node, a_msg)
2369
2370     test = constants.NV_NODENETTEST not in nresult
2371     _ErrorIf(test, constants.CV_ENODENET, node,
2372              "node hasn't returned node tcp connectivity data")
2373     if not test:
2374       if nresult[constants.NV_NODENETTEST]:
2375         nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
2376         for anode in nlist:
2377           _ErrorIf(True, constants.CV_ENODENET, node,
2378                    "tcp communication with node '%s': %s",
2379                    anode, nresult[constants.NV_NODENETTEST][anode])
2380
2381     test = constants.NV_MASTERIP not in nresult
2382     _ErrorIf(test, constants.CV_ENODENET, node,
2383              "node hasn't returned node master IP reachability data")
2384     if not test:
2385       if not nresult[constants.NV_MASTERIP]:
2386         if node == self.master_node:
2387           msg = "the master node cannot reach the master IP (not configured?)"
2388         else:
2389           msg = "cannot reach the master IP"
2390         _ErrorIf(True, constants.CV_ENODENET, node, msg)
2391
2392   def _VerifyInstance(self, instance, instanceconfig, node_image,
2393                       diskstatus):
2394     """Verify an instance.
2395
2396     This function checks to see if the required block devices are
2397     available on the instance's node.
2398
2399     """
2400     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2401     node_current = instanceconfig.primary_node
2402
2403     node_vol_should = {}
2404     instanceconfig.MapLVsByNode(node_vol_should)
2405
2406     cluster = self.cfg.GetClusterInfo()
2407     ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
2408                                                             self.group_info)
2409     err = _ComputeIPolicyInstanceViolation(ipolicy, instanceconfig)
2410     _ErrorIf(err, constants.CV_EINSTANCEPOLICY, instance, utils.CommaJoin(err))
2411
2412     for node in node_vol_should:
2413       n_img = node_image[node]
2414       if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2415         # ignore missing volumes on offline or broken nodes
2416         continue
2417       for volume in node_vol_should[node]:
2418         test = volume not in n_img.volumes
2419         _ErrorIf(test, constants.CV_EINSTANCEMISSINGDISK, instance,
2420                  "volume %s missing on node %s", volume, node)
2421
2422     if instanceconfig.admin_state == constants.ADMINST_UP:
2423       pri_img = node_image[node_current]
2424       test = instance not in pri_img.instances and not pri_img.offline
2425       _ErrorIf(test, constants.CV_EINSTANCEDOWN, instance,
2426                "instance not running on its primary node %s",
2427                node_current)
2428
2429     diskdata = [(nname, success, status, idx)
2430                 for (nname, disks) in diskstatus.items()
2431                 for idx, (success, status) in enumerate(disks)]
2432
2433     for nname, success, bdev_status, idx in diskdata:
2434       # the 'ghost node' construction in Exec() ensures that we have a
2435       # node here
2436       snode = node_image[nname]
2437       bad_snode = snode.ghost or snode.offline
2438       _ErrorIf(instanceconfig.admin_state == constants.ADMINST_UP and
2439                not success and not bad_snode,
2440                constants.CV_EINSTANCEFAULTYDISK, instance,
2441                "couldn't retrieve status for disk/%s on %s: %s",
2442                idx, nname, bdev_status)
2443       _ErrorIf((instanceconfig.admin_state == constants.ADMINST_UP and
2444                 success and bdev_status.ldisk_status == constants.LDS_FAULTY),
2445                constants.CV_EINSTANCEFAULTYDISK, instance,
2446                "disk/%s on %s is faulty", idx, nname)
2447
2448   def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
2449     """Verify if there are any unknown volumes in the cluster.
2450
2451     The .os, .swap and backup volumes are ignored. All other volumes are
2452     reported as unknown.
2453
2454     @type reserved: L{ganeti.utils.FieldSet}
2455     @param reserved: a FieldSet of reserved volume names
2456
2457     """
2458     for node, n_img in node_image.items():
2459       if (n_img.offline or n_img.rpc_fail or n_img.lvm_fail or
2460           self.all_node_info[node].group != self.group_uuid):
2461         # skip non-healthy nodes
2462         continue
2463       for volume in n_img.volumes:
2464         test = ((node not in node_vol_should or
2465                 volume not in node_vol_should[node]) and
2466                 not reserved.Matches(volume))
2467         self._ErrorIf(test, constants.CV_ENODEORPHANLV, node,
2468                       "volume %s is unknown", volume)
2469
2470   def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
2471     """Verify N+1 Memory Resilience.
2472
2473     Check that if one single node dies we can still start all the
2474     instances it was primary for.
2475
2476     """
2477     cluster_info = self.cfg.GetClusterInfo()
2478     for node, n_img in node_image.items():
2479       # This code checks that every node which is now listed as
2480       # secondary has enough memory to host all instances it is
2481       # supposed to should a single other node in the cluster fail.
2482       # FIXME: not ready for failover to an arbitrary node
2483       # FIXME: does not support file-backed instances
2484       # WARNING: we currently take into account down instances as well
2485       # as up ones, considering that even if they're down someone
2486       # might want to start them even in the event of a node failure.
2487       if n_img.offline or self.all_node_info[node].group != self.group_uuid:
2488         # we're skipping nodes marked offline and nodes in other groups from
2489         # the N+1 warning, since most likely we don't have good memory
2490         # infromation from them; we already list instances living on such
2491         # nodes, and that's enough warning
2492         continue
2493       #TODO(dynmem): also consider ballooning out other instances
2494       for prinode, instances in n_img.sbp.items():
2495         needed_mem = 0
2496         for instance in instances:
2497           bep = cluster_info.FillBE(instance_cfg[instance])
2498           if bep[constants.BE_AUTO_BALANCE]:
2499             needed_mem += bep[constants.BE_MINMEM]
2500         test = n_img.mfree < needed_mem
2501         self._ErrorIf(test, constants.CV_ENODEN1, node,
2502                       "not enough memory to accomodate instance failovers"
2503                       " should node %s fail (%dMiB needed, %dMiB available)",
2504                       prinode, needed_mem, n_img.mfree)
2505
2506   @classmethod
2507   def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
2508                    (files_all, files_opt, files_mc, files_vm)):
2509     """Verifies file checksums collected from all nodes.
2510
2511     @param errorif: Callback for reporting errors
2512     @param nodeinfo: List of L{objects.Node} objects
2513     @param master_node: Name of master node
2514     @param all_nvinfo: RPC results
2515
2516     """
2517     # Define functions determining which nodes to consider for a file
2518     files2nodefn = [
2519       (files_all, None),
2520       (files_mc, lambda node: (node.master_candidate or
2521                                node.name == master_node)),
2522       (files_vm, lambda node: node.vm_capable),
2523       ]
2524
2525     # Build mapping from filename to list of nodes which should have the file
2526     nodefiles = {}
2527     for (files, fn) in files2nodefn:
2528       if fn is None:
2529         filenodes = nodeinfo
2530       else:
2531         filenodes = filter(fn, nodeinfo)
2532       nodefiles.update((filename,
2533                         frozenset(map(operator.attrgetter("name"), filenodes)))
2534                        for filename in files)
2535
2536     assert set(nodefiles) == (files_all | files_mc | files_vm)
2537
2538     fileinfo = dict((filename, {}) for filename in nodefiles)
2539     ignore_nodes = set()
2540
2541     for node in nodeinfo:
2542       if node.offline:
2543         ignore_nodes.add(node.name)
2544         continue
2545
2546       nresult = all_nvinfo[node.name]
2547
2548       if nresult.fail_msg or not nresult.payload:
2549         node_files = None
2550       else:
2551         node_files = nresult.payload.get(constants.NV_FILELIST, None)
2552
2553       test = not (node_files and isinstance(node_files, dict))
2554       errorif(test, constants.CV_ENODEFILECHECK, node.name,
2555               "Node did not return file checksum data")
2556       if test:
2557         ignore_nodes.add(node.name)
2558         continue
2559
2560       # Build per-checksum mapping from filename to nodes having it
2561       for (filename, checksum) in node_files.items():
2562         assert filename in nodefiles
2563         fileinfo[filename].setdefault(checksum, set()).add(node.name)
2564
2565     for (filename, checksums) in fileinfo.items():
2566       assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
2567
2568       # Nodes having the file
2569       with_file = frozenset(node_name
2570                             for nodes in fileinfo[filename].values()
2571                             for node_name in nodes) - ignore_nodes
2572
2573       expected_nodes = nodefiles[filename] - ignore_nodes
2574
2575       # Nodes missing file
2576       missing_file = expected_nodes - with_file
2577
2578       if filename in files_opt:
2579         # All or no nodes
2580         errorif(missing_file and missing_file != expected_nodes,
2581                 constants.CV_ECLUSTERFILECHECK, None,
2582                 "File %s is optional, but it must exist on all or no"
2583                 " nodes (not found on %s)",
2584                 filename, utils.CommaJoin(utils.NiceSort(missing_file)))
2585       else:
2586         errorif(missing_file, constants.CV_ECLUSTERFILECHECK, None,
2587                 "File %s is missing from node(s) %s", filename,
2588                 utils.CommaJoin(utils.NiceSort(missing_file)))
2589
2590         # Warn if a node has a file it shouldn't
2591         unexpected = with_file - expected_nodes
2592         errorif(unexpected,
2593                 constants.CV_ECLUSTERFILECHECK, None,
2594                 "File %s should not exist on node(s) %s",
2595                 filename, utils.CommaJoin(utils.NiceSort(unexpected)))
2596
2597       # See if there are multiple versions of the file
2598       test = len(checksums) > 1
2599       if test:
2600         variants = ["variant %s on %s" %
2601                     (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
2602                     for (idx, (checksum, nodes)) in
2603                       enumerate(sorted(checksums.items()))]
2604       else:
2605         variants = []
2606
2607       errorif(test, constants.CV_ECLUSTERFILECHECK, None,
2608               "File %s found with %s different checksums (%s)",
2609               filename, len(checksums), "; ".join(variants))
2610
2611   def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
2612                       drbd_map):
2613     """Verifies and the node DRBD status.
2614
2615     @type ninfo: L{objects.Node}
2616     @param ninfo: the node to check
2617     @param nresult: the remote results for the node
2618     @param instanceinfo: the dict of instances
2619     @param drbd_helper: the configured DRBD usermode helper
2620     @param drbd_map: the DRBD map as returned by
2621         L{ganeti.config.ConfigWriter.ComputeDRBDMap}
2622
2623     """
2624     node = ninfo.name
2625     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2626
2627     if drbd_helper:
2628       helper_result = nresult.get(constants.NV_DRBDHELPER, None)
2629       test = (helper_result is None)
2630       _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2631                "no drbd usermode helper returned")
2632       if helper_result:
2633         status, payload = helper_result
2634         test = not status
2635         _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2636                  "drbd usermode helper check unsuccessful: %s", payload)
2637         test = status and (payload != drbd_helper)
2638         _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2639                  "wrong drbd usermode helper: %s", payload)
2640
2641     # compute the DRBD minors
2642     node_drbd = {}
2643     for minor, instance in drbd_map[node].items():
2644       test = instance not in instanceinfo
2645       _ErrorIf(test, constants.CV_ECLUSTERCFG, None,
2646                "ghost instance '%s' in temporary DRBD map", instance)
2647         # ghost instance should not be running, but otherwise we
2648         # don't give double warnings (both ghost instance and
2649         # unallocated minor in use)
2650       if test:
2651         node_drbd[minor] = (instance, False)
2652       else:
2653         instance = instanceinfo[instance]
2654         node_drbd[minor] = (instance.name,
2655                             instance.admin_state == constants.ADMINST_UP)
2656
2657     # and now check them
2658     used_minors = nresult.get(constants.NV_DRBDLIST, [])
2659     test = not isinstance(used_minors, (tuple, list))
2660     _ErrorIf(test, constants.CV_ENODEDRBD, node,
2661              "cannot parse drbd status file: %s", str(used_minors))
2662     if test:
2663       # we cannot check drbd status
2664       return
2665
2666     for minor, (iname, must_exist) in node_drbd.items():
2667       test = minor not in used_minors and must_exist
2668       _ErrorIf(test, constants.CV_ENODEDRBD, node,
2669                "drbd minor %d of instance %s is not active", minor, iname)
2670     for minor in used_minors:
2671       test = minor not in node_drbd
2672       _ErrorIf(test, constants.CV_ENODEDRBD, node,
2673                "unallocated drbd minor %d is in use", minor)
2674
2675   def _UpdateNodeOS(self, ninfo, nresult, nimg):
2676     """Builds the node OS structures.
2677
2678     @type ninfo: L{objects.Node}
2679     @param ninfo: the node to check
2680     @param nresult: the remote results for the node
2681     @param nimg: the node image object
2682
2683     """
2684     node = ninfo.name
2685     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2686
2687     remote_os = nresult.get(constants.NV_OSLIST, None)
2688     test = (not isinstance(remote_os, list) or
2689             not compat.all(isinstance(v, list) and len(v) == 7
2690                            for v in remote_os))
2691
2692     _ErrorIf(test, constants.CV_ENODEOS, node,
2693              "node hasn't returned valid OS data")
2694
2695     nimg.os_fail = test
2696
2697     if test:
2698       return
2699
2700     os_dict = {}
2701
2702     for (name, os_path, status, diagnose,
2703          variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
2704
2705       if name not in os_dict:
2706         os_dict[name] = []
2707
2708       # parameters is a list of lists instead of list of tuples due to
2709       # JSON lacking a real tuple type, fix it:
2710       parameters = [tuple(v) for v in parameters]
2711       os_dict[name].append((os_path, status, diagnose,
2712                             set(variants), set(parameters), set(api_ver)))
2713
2714     nimg.oslist = os_dict
2715
2716   def _VerifyNodeOS(self, ninfo, nimg, base):
2717     """Verifies the node OS list.
2718
2719     @type ninfo: L{objects.Node}
2720     @param ninfo: the node to check
2721     @param nimg: the node image object
2722     @param base: the 'template' node we match against (e.g. from the master)
2723
2724     """
2725     node = ninfo.name
2726     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2727
2728     assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
2729
2730     beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
2731     for os_name, os_data in nimg.oslist.items():
2732       assert os_data, "Empty OS status for OS %s?!" % os_name
2733       f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
2734       _ErrorIf(not f_status, constants.CV_ENODEOS, node,
2735                "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
2736       _ErrorIf(len(os_data) > 1, constants.CV_ENODEOS, node,
2737                "OS '%s' has multiple entries (first one shadows the rest): %s",
2738                os_name, utils.CommaJoin([v[0] for v in os_data]))
2739       # comparisons with the 'base' image
2740       test = os_name not in base.oslist
2741       _ErrorIf(test, constants.CV_ENODEOS, node,
2742                "Extra OS %s not present on reference node (%s)",
2743                os_name, base.name)
2744       if test:
2745         continue
2746       assert base.oslist[os_name], "Base node has empty OS status?"
2747       _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
2748       if not b_status:
2749         # base OS is invalid, skipping
2750         continue
2751       for kind, a, b in [("API version", f_api, b_api),
2752                          ("variants list", f_var, b_var),
2753                          ("parameters", beautify_params(f_param),
2754                           beautify_params(b_param))]:
2755         _ErrorIf(a != b, constants.CV_ENODEOS, node,
2756                  "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
2757                  kind, os_name, base.name,
2758                  utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
2759
2760     # check any missing OSes
2761     missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
2762     _ErrorIf(missing, constants.CV_ENODEOS, node,
2763              "OSes present on reference node %s but missing on this node: %s",
2764              base.name, utils.CommaJoin(missing))
2765
2766   def _VerifyOob(self, ninfo, nresult):
2767     """Verifies out of band functionality of a node.
2768
2769     @type ninfo: L{objects.Node}
2770     @param ninfo: the node to check
2771     @param nresult: the remote results for the node
2772
2773     """
2774     node = ninfo.name
2775     # We just have to verify the paths on master and/or master candidates
2776     # as the oob helper is invoked on the master
2777     if ((ninfo.master_candidate or ninfo.master_capable) and
2778         constants.NV_OOB_PATHS in nresult):
2779       for path_result in nresult[constants.NV_OOB_PATHS]:
2780         self._ErrorIf(path_result, constants.CV_ENODEOOBPATH, node, path_result)
2781
2782   def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
2783     """Verifies and updates the node volume data.
2784
2785     This function will update a L{NodeImage}'s internal structures
2786     with data from the remote call.
2787
2788     @type ninfo: L{objects.Node}
2789     @param ninfo: the node to check
2790     @param nresult: the remote results for the node
2791     @param nimg: the node image object
2792     @param vg_name: the configured VG name
2793
2794     """
2795     node = ninfo.name
2796     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2797
2798     nimg.lvm_fail = True
2799     lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
2800     if vg_name is None:
2801       pass
2802     elif isinstance(lvdata, basestring):
2803       _ErrorIf(True, constants.CV_ENODELVM, node, "LVM problem on node: %s",
2804                utils.SafeEncode(lvdata))
2805     elif not isinstance(lvdata, dict):
2806       _ErrorIf(True, constants.CV_ENODELVM, node,
2807                "rpc call to node failed (lvlist)")
2808     else:
2809       nimg.volumes = lvdata
2810       nimg.lvm_fail = False
2811
2812   def _UpdateNodeInstances(self, ninfo, nresult, nimg):
2813     """Verifies and updates the node instance list.
2814
2815     If the listing was successful, then updates this node's instance
2816     list. Otherwise, it marks the RPC call as failed for the instance
2817     list key.
2818
2819     @type ninfo: L{objects.Node}
2820     @param ninfo: the node to check
2821     @param nresult: the remote results for the node
2822     @param nimg: the node image object
2823
2824     """
2825     idata = nresult.get(constants.NV_INSTANCELIST, None)
2826     test = not isinstance(idata, list)
2827     self._ErrorIf(test, constants.CV_ENODEHV, ninfo.name,
2828                   "rpc call to node failed (instancelist): %s",
2829                   utils.SafeEncode(str(idata)))
2830     if test:
2831       nimg.hyp_fail = True
2832     else:
2833       nimg.instances = idata
2834
2835   def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
2836     """Verifies and computes a node information map
2837
2838     @type ninfo: L{objects.Node}
2839     @param ninfo: the node to check
2840     @param nresult: the remote results for the node
2841     @param nimg: the node image object
2842     @param vg_name: the configured VG name
2843
2844     """
2845     node = ninfo.name
2846     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2847
2848     # try to read free memory (from the hypervisor)
2849     hv_info = nresult.get(constants.NV_HVINFO, None)
2850     test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
2851     _ErrorIf(test, constants.CV_ENODEHV, node,
2852              "rpc call to node failed (hvinfo)")
2853     if not test:
2854       try:
2855         nimg.mfree = int(hv_info["memory_free"])
2856       except (ValueError, TypeError):
2857         _ErrorIf(True, constants.CV_ENODERPC, node,
2858                  "node returned invalid nodeinfo, check hypervisor")
2859
2860     # FIXME: devise a free space model for file based instances as well
2861     if vg_name is not None:
2862       test = (constants.NV_VGLIST not in nresult or
2863               vg_name not in nresult[constants.NV_VGLIST])
2864       _ErrorIf(test, constants.CV_ENODELVM, node,
2865                "node didn't return data for the volume group '%s'"
2866                " - it is either missing or broken", vg_name)
2867       if not test:
2868         try:
2869           nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
2870         except (ValueError, TypeError):
2871           _ErrorIf(True, constants.CV_ENODERPC, node,
2872                    "node returned invalid LVM info, check LVM status")
2873
2874   def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
2875     """Gets per-disk status information for all instances.
2876
2877     @type nodelist: list of strings
2878     @param nodelist: Node names
2879     @type node_image: dict of (name, L{objects.Node})
2880     @param node_image: Node objects
2881     @type instanceinfo: dict of (name, L{objects.Instance})
2882     @param instanceinfo: Instance objects
2883     @rtype: {instance: {node: [(succes, payload)]}}
2884     @return: a dictionary of per-instance dictionaries with nodes as
2885         keys and disk information as values; the disk information is a
2886         list of tuples (success, payload)
2887
2888     """
2889     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2890
2891     node_disks = {}
2892     node_disks_devonly = {}
2893     diskless_instances = set()
2894     diskless = constants.DT_DISKLESS
2895
2896     for nname in nodelist:
2897       node_instances = list(itertools.chain(node_image[nname].pinst,
2898                                             node_image[nname].sinst))
2899       diskless_instances.update(inst for inst in node_instances
2900                                 if instanceinfo[inst].disk_template == diskless)
2901       disks = [(inst, disk)
2902                for inst in node_instances
2903                for disk in instanceinfo[inst].disks]
2904
2905       if not disks:
2906         # No need to collect data
2907         continue
2908
2909       node_disks[nname] = disks
2910
2911       # _AnnotateDiskParams makes already copies of the disks
2912       devonly = []
2913       for (inst, dev) in disks:
2914         (anno_disk,) = _AnnotateDiskParams(instanceinfo[inst], [dev], self.cfg)
2915         self.cfg.SetDiskID(anno_disk, nname)
2916         devonly.append(anno_disk)
2917
2918       node_disks_devonly[nname] = devonly
2919
2920     assert len(node_disks) == len(node_disks_devonly)
2921
2922     # Collect data from all nodes with disks
2923     result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
2924                                                           node_disks_devonly)
2925
2926     assert len(result) == len(node_disks)
2927
2928     instdisk = {}
2929
2930     for (nname, nres) in result.items():
2931       disks = node_disks[nname]
2932
2933       if nres.offline:
2934         # No data from this node
2935         data = len(disks) * [(False, "node offline")]
2936       else:
2937         msg = nres.fail_msg
2938         _ErrorIf(msg, constants.CV_ENODERPC, nname,
2939                  "while getting disk information: %s", msg)
2940         if msg:
2941           # No data from this node
2942           data = len(disks) * [(False, msg)]
2943         else:
2944           data = []
2945           for idx, i in enumerate(nres.payload):
2946             if isinstance(i, (tuple, list)) and len(i) == 2:
2947               data.append(i)
2948             else:
2949               logging.warning("Invalid result from node %s, entry %d: %s",
2950                               nname, idx, i)
2951               data.append((False, "Invalid result from the remote node"))
2952
2953       for ((inst, _), status) in zip(disks, data):
2954         instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
2955
2956     # Add empty entries for diskless instances.
2957     for inst in diskless_instances:
2958       assert inst not in instdisk
2959       instdisk[inst] = {}
2960
2961     assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
2962                       len(nnames) <= len(instanceinfo[inst].all_nodes) and
2963                       compat.all(isinstance(s, (tuple, list)) and
2964                                  len(s) == 2 for s in statuses)
2965                       for inst, nnames in instdisk.items()
2966                       for nname, statuses in nnames.items())
2967     assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
2968
2969     return instdisk
2970
2971   @staticmethod
2972   def _SshNodeSelector(group_uuid, all_nodes):
2973     """Create endless iterators for all potential SSH check hosts.
2974
2975     """
2976     nodes = [node for node in all_nodes
2977              if (node.group != group_uuid and
2978                  not node.offline)]
2979     keyfunc = operator.attrgetter("group")
2980
2981     return map(itertools.cycle,
2982                [sorted(map(operator.attrgetter("name"), names))
2983                 for _, names in itertools.groupby(sorted(nodes, key=keyfunc),
2984                                                   keyfunc)])
2985
2986   @classmethod
2987   def _SelectSshCheckNodes(cls, group_nodes, group_uuid, all_nodes):
2988     """Choose which nodes should talk to which other nodes.
2989
2990     We will make nodes contact all nodes in their group, and one node from
2991     every other group.
2992
2993     @warning: This algorithm has a known issue if one node group is much
2994       smaller than others (e.g. just one node). In such a case all other
2995       nodes will talk to the single node.
2996
2997     """
2998     online_nodes = sorted(node.name for node in group_nodes if not node.offline)
2999     sel = cls._SshNodeSelector(group_uuid, all_nodes)
3000
3001     return (online_nodes,
3002             dict((name, sorted([i.next() for i in sel]))
3003                  for name in online_nodes))
3004
3005   def BuildHooksEnv(self):
3006     """Build hooks env.
3007
3008     Cluster-Verify hooks just ran in the post phase and their failure makes
3009     the output be logged in the verify output and the verification to fail.
3010
3011     """
3012     env = {
3013       "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
3014       }
3015
3016     env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
3017                for node in self.my_node_info.values())
3018
3019     return env
3020
3021   def BuildHooksNodes(self):
3022     """Build hooks nodes.
3023
3024     """
3025     return ([], self.my_node_names)
3026
3027   def Exec(self, feedback_fn):
3028     """Verify integrity of the node group, performing various test on nodes.
3029
3030     """
3031     # This method has too many local variables. pylint: disable=R0914
3032     feedback_fn("* Verifying group '%s'" % self.group_info.name)
3033
3034     if not self.my_node_names:
3035       # empty node group
3036       feedback_fn("* Empty node group, skipping verification")
3037       return True
3038
3039     self.bad = False
3040     _ErrorIf = self._ErrorIf # pylint: disable=C0103
3041     verbose = self.op.verbose
3042     self._feedback_fn = feedback_fn
3043
3044     vg_name = self.cfg.GetVGName()
3045     drbd_helper = self.cfg.GetDRBDHelper()
3046     cluster = self.cfg.GetClusterInfo()
3047     groupinfo = self.cfg.GetAllNodeGroupsInfo()
3048     hypervisors = cluster.enabled_hypervisors
3049     node_data_list = [self.my_node_info[name] for name in self.my_node_names]
3050
3051     i_non_redundant = [] # Non redundant instances
3052     i_non_a_balanced = [] # Non auto-balanced instances
3053     i_offline = 0 # Count of offline instances
3054     n_offline = 0 # Count of offline nodes
3055     n_drained = 0 # Count of nodes being drained
3056     node_vol_should = {}
3057
3058     # FIXME: verify OS list
3059
3060     # File verification
3061     filemap = _ComputeAncillaryFiles(cluster, False)
3062
3063     # do local checksums
3064     master_node = self.master_node = self.cfg.GetMasterNode()
3065     master_ip = self.cfg.GetMasterIP()
3066
3067     feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names))
3068
3069     user_scripts = []
3070     if self.cfg.GetUseExternalMipScript():
3071       user_scripts.append(constants.EXTERNAL_MASTER_SETUP_SCRIPT)
3072
3073     node_verify_param = {
3074       constants.NV_FILELIST:
3075         utils.UniqueSequence(filename
3076                              for files in filemap
3077                              for filename in files),
3078       constants.NV_NODELIST:
3079         self._SelectSshCheckNodes(node_data_list, self.group_uuid,
3080                                   self.all_node_info.values()),
3081       constants.NV_HYPERVISOR: hypervisors,
3082       constants.NV_HVPARAMS:
3083         _GetAllHypervisorParameters(cluster, self.all_inst_info.values()),
3084       constants.NV_NODENETTEST: [(node.name, node.primary_ip, node.secondary_ip)
3085                                  for node in node_data_list
3086                                  if not node.offline],
3087       constants.NV_INSTANCELIST: hypervisors,
3088       constants.NV_VERSION: None,
3089       constants.NV_HVINFO: self.cfg.GetHypervisorType(),
3090       constants.NV_NODESETUP: None,
3091       constants.NV_TIME: None,
3092       constants.NV_MASTERIP: (master_node, master_ip),
3093       constants.NV_OSLIST: None,
3094       constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
3095       constants.NV_USERSCRIPTS: user_scripts,
3096       }
3097
3098     if vg_name is not None:
3099       node_verify_param[constants.NV_VGLIST] = None
3100       node_verify_param[constants.NV_LVLIST] = vg_name
3101       node_verify_param[constants.NV_PVLIST] = [vg_name]
3102       node_verify_param[constants.NV_DRBDLIST] = None
3103
3104     if drbd_helper:
3105       node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
3106
3107     # bridge checks
3108     # FIXME: this needs to be changed per node-group, not cluster-wide
3109     bridges = set()
3110     default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
3111     if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3112       bridges.add(default_nicpp[constants.NIC_LINK])
3113     for instance in self.my_inst_info.values():
3114       for nic in instance.nics:
3115         full_nic = cluster.SimpleFillNIC(nic.nicparams)
3116         if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3117           bridges.add(full_nic[constants.NIC_LINK])
3118
3119     if bridges:
3120       node_verify_param[constants.NV_BRIDGES] = list(bridges)
3121
3122     # Build our expected cluster state
3123     node_image = dict((node.name, self.NodeImage(offline=node.offline,
3124                                                  name=node.name,
3125                                                  vm_capable=node.vm_capable))
3126                       for node in node_data_list)
3127
3128     # Gather OOB paths
3129     oob_paths = []
3130     for node in self.all_node_info.values():
3131       path = _SupportsOob(self.cfg, node)
3132       if path and path not in oob_paths:
3133         oob_paths.append(path)
3134
3135     if oob_paths:
3136       node_verify_param[constants.NV_OOB_PATHS] = oob_paths
3137
3138     for instance in self.my_inst_names:
3139       inst_config = self.my_inst_info[instance]
3140       if inst_config.admin_state == constants.ADMINST_OFFLINE:
3141         i_offline += 1
3142
3143       for nname in inst_config.all_nodes:
3144         if nname not in node_image:
3145           gnode = self.NodeImage(name=nname)
3146           gnode.ghost = (nname not in self.all_node_info)
3147           node_image[nname] = gnode
3148
3149       inst_config.MapLVsByNode(node_vol_should)
3150
3151       pnode = inst_config.primary_node
3152       node_image[pnode].pinst.append(instance)
3153
3154       for snode in inst_config.secondary_nodes:
3155         nimg = node_image[snode]
3156         nimg.sinst.append(instance)
3157         if pnode not in nimg.sbp:
3158           nimg.sbp[pnode] = []
3159         nimg.sbp[pnode].append(instance)
3160
3161     # At this point, we have the in-memory data structures complete,
3162     # except for the runtime information, which we'll gather next
3163
3164     # Due to the way our RPC system works, exact response times cannot be
3165     # guaranteed (e.g. a broken node could run into a timeout). By keeping the
3166     # time before and after executing the request, we can at least have a time
3167     # window.
3168     nvinfo_starttime = time.time()
3169     all_nvinfo = self.rpc.call_node_verify(self.my_node_names,
3170                                            node_verify_param,
3171                                            self.cfg.GetClusterName())
3172     nvinfo_endtime = time.time()
3173
3174     if self.extra_lv_nodes and vg_name is not None:
3175       extra_lv_nvinfo = \
3176           self.rpc.call_node_verify(self.extra_lv_nodes,
3177                                     {constants.NV_LVLIST: vg_name},
3178                                     self.cfg.GetClusterName())
3179     else:
3180       extra_lv_nvinfo = {}
3181
3182     all_drbd_map = self.cfg.ComputeDRBDMap()
3183
3184     feedback_fn("* Gathering disk information (%s nodes)" %
3185                 len(self.my_node_names))
3186     instdisk = self._CollectDiskInfo(self.my_node_names, node_image,
3187                                      self.my_inst_info)
3188
3189     feedback_fn("* Verifying configuration file consistency")
3190
3191     # If not all nodes are being checked, we need to make sure the master node
3192     # and a non-checked vm_capable node are in the list.
3193     absent_nodes = set(self.all_node_info).difference(self.my_node_info)
3194     if absent_nodes:
3195       vf_nvinfo = all_nvinfo.copy()
3196       vf_node_info = list(self.my_node_info.values())
3197       additional_nodes = []
3198       if master_node not in self.my_node_info:
3199         additional_nodes.append(master_node)
3200         vf_node_info.append(self.all_node_info[master_node])
3201       # Add the first vm_capable node we find which is not included,
3202       # excluding the master node (which we already have)
3203       for node in absent_nodes:
3204         nodeinfo = self.all_node_info[node]
3205         if (nodeinfo.vm_capable and not nodeinfo.offline and
3206             node != master_node):
3207           additional_nodes.append(node)
3208           vf_node_info.append(self.all_node_info[node])
3209           break
3210       key = constants.NV_FILELIST
3211       vf_nvinfo.update(self.rpc.call_node_verify(additional_nodes,
3212                                                  {key: node_verify_param[key]},
3213                                                  self.cfg.GetClusterName()))
3214     else:
3215       vf_nvinfo = all_nvinfo
3216       vf_node_info = self.my_node_info.values()
3217
3218     self._VerifyFiles(_ErrorIf, vf_node_info, master_node, vf_nvinfo, filemap)
3219
3220     feedback_fn("* Verifying node status")
3221
3222     refos_img = None
3223
3224     for node_i in node_data_list:
3225       node = node_i.name
3226       nimg = node_image[node]
3227
3228       if node_i.offline:
3229         if verbose:
3230           feedback_fn("* Skipping offline node %s" % (node,))
3231         n_offline += 1
3232         continue
3233
3234       if node == master_node:
3235         ntype = "master"
3236       elif node_i.master_candidate:
3237         ntype = "master candidate"
3238       elif node_i.drained:
3239         ntype = "drained"
3240         n_drained += 1
3241       else:
3242         ntype = "regular"
3243       if verbose:
3244         feedback_fn("* Verifying node %s (%s)" % (node, ntype))
3245
3246       msg = all_nvinfo[node].fail_msg
3247       _ErrorIf(msg, constants.CV_ENODERPC, node, "while contacting node: %s",
3248                msg)
3249       if msg:
3250         nimg.rpc_fail = True
3251         continue
3252
3253       nresult = all_nvinfo[node].payload
3254
3255       nimg.call_ok = self._VerifyNode(node_i, nresult)
3256       self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
3257       self._VerifyNodeNetwork(node_i, nresult)
3258       self._VerifyNodeUserScripts(node_i, nresult)
3259       self._VerifyOob(node_i, nresult)
3260
3261       if nimg.vm_capable:
3262         self._VerifyNodeLVM(node_i, nresult, vg_name)
3263         self._VerifyNodeDrbd(node_i, nresult, self.all_inst_info, drbd_helper,
3264                              all_drbd_map)
3265
3266         self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
3267         self._UpdateNodeInstances(node_i, nresult, nimg)
3268         self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
3269         self._UpdateNodeOS(node_i, nresult, nimg)
3270
3271         if not nimg.os_fail:
3272           if refos_img is None:
3273             refos_img = nimg
3274           self._VerifyNodeOS(node_i, nimg, refos_img)
3275         self._VerifyNodeBridges(node_i, nresult, bridges)
3276
3277         # Check whether all running instancies are primary for the node. (This
3278         # can no longer be done from _VerifyInstance below, since some of the
3279         # wrong instances could be from other node groups.)
3280         non_primary_inst = set(nimg.instances).difference(nimg.pinst)
3281
3282         for inst in non_primary_inst:
3283           test = inst in self.all_inst_info
3284           _ErrorIf(test, constants.CV_EINSTANCEWRONGNODE, inst,
3285                    "instance should not run on node %s", node_i.name)
3286           _ErrorIf(not test, constants.CV_ENODEORPHANINSTANCE, node_i.name,
3287                    "node is running unknown instance %s", inst)
3288
3289     for node, result in extra_lv_nvinfo.items():
3290       self._UpdateNodeVolumes(self.all_node_info[node], result.payload,
3291                               node_image[node], vg_name)
3292
3293     feedback_fn("* Verifying instance status")
3294     for instance in self.my_inst_names:
3295       if verbose:
3296         feedback_fn("* Verifying instance %s" % instance)
3297       inst_config = self.my_inst_info[instance]
3298       self._VerifyInstance(instance, inst_config, node_image,
3299                            instdisk[instance])
3300       inst_nodes_offline = []
3301
3302       pnode = inst_config.primary_node
3303       pnode_img = node_image[pnode]
3304       _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
3305                constants.CV_ENODERPC, pnode, "instance %s, connection to"
3306                " primary node failed", instance)
3307
3308       _ErrorIf(inst_config.admin_state == constants.ADMINST_UP and
3309                pnode_img.offline,
3310                constants.CV_EINSTANCEBADNODE, instance,
3311                "instance is marked as running and lives on offline node %s",
3312                inst_config.primary_node)
3313
3314       # If the instance is non-redundant we cannot survive losing its primary
3315       # node, so we are not N+1 compliant. On the other hand we have no disk
3316       # templates with more than one secondary so that situation is not well
3317       # supported either.
3318       # FIXME: does not support file-backed instances
3319       if not inst_config.secondary_nodes:
3320         i_non_redundant.append(instance)
3321
3322       _ErrorIf(len(inst_config.secondary_nodes) > 1,
3323                constants.CV_EINSTANCELAYOUT,
3324                instance, "instance has multiple secondary nodes: %s",
3325                utils.CommaJoin(inst_config.secondary_nodes),
3326                code=self.ETYPE_WARNING)
3327
3328       if inst_config.disk_template in constants.DTS_INT_MIRROR:
3329         pnode = inst_config.primary_node
3330         instance_nodes = utils.NiceSort(inst_config.all_nodes)
3331         instance_groups = {}
3332
3333         for node in instance_nodes:
3334           instance_groups.setdefault(self.all_node_info[node].group,
3335                                      []).append(node)
3336
3337         pretty_list = [
3338           "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
3339           # Sort so that we always list the primary node first.
3340           for group, nodes in sorted(instance_groups.items(),
3341                                      key=lambda (_, nodes): pnode in nodes,
3342                                      reverse=True)]
3343
3344         self._ErrorIf(len(instance_groups) > 1,
3345                       constants.CV_EINSTANCESPLITGROUPS,
3346                       instance, "instance has primary and secondary nodes in"
3347                       " different groups: %s", utils.CommaJoin(pretty_list),
3348                       code=self.ETYPE_WARNING)
3349
3350       if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
3351         i_non_a_balanced.append(instance)
3352
3353       for snode in inst_config.secondary_nodes:
3354         s_img = node_image[snode]
3355         _ErrorIf(s_img.rpc_fail and not s_img.offline, constants.CV_ENODERPC,
3356                  snode, "instance %s, connection to secondary node failed",
3357                  instance)
3358
3359         if s_img.offline:
3360           inst_nodes_offline.append(snode)
3361
3362       # warn that the instance lives on offline nodes
3363       _ErrorIf(inst_nodes_offline, constants.CV_EINSTANCEBADNODE, instance,
3364                "instance has offline secondary node(s) %s",
3365                utils.CommaJoin(inst_nodes_offline))
3366       # ... or ghost/non-vm_capable nodes
3367       for node in inst_config.all_nodes:
3368         _ErrorIf(node_image[node].ghost, constants.CV_EINSTANCEBADNODE,
3369                  instance, "instance lives on ghost node %s", node)
3370         _ErrorIf(not node_image[node].vm_capable, constants.CV_EINSTANCEBADNODE,
3371                  instance, "instance lives on non-vm_capable node %s", node)
3372
3373     feedback_fn("* Verifying orphan volumes")
3374     reserved = utils.FieldSet(*cluster.reserved_lvs)
3375
3376     # We will get spurious "unknown volume" warnings if any node of this group
3377     # is secondary for an instance whose primary is in another group. To avoid
3378     # them, we find these instances and add their volumes to node_vol_should.
3379     for inst in self.all_inst_info.values():
3380       for secondary in inst.secondary_nodes:
3381         if (secondary in self.my_node_info
3382             and inst.name not in self.my_inst_info):
3383           inst.MapLVsByNode(node_vol_should)
3384           break
3385
3386     self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
3387
3388     if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
3389       feedback_fn("* Verifying N+1 Memory redundancy")
3390       self._VerifyNPlusOneMemory(node_image, self.my_inst_info)
3391
3392     feedback_fn("* Other Notes")
3393     if i_non_redundant:
3394       feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
3395                   % len(i_non_redundant))
3396
3397     if i_non_a_balanced:
3398       feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
3399                   % len(i_non_a_balanced))
3400
3401     if i_offline:
3402       feedback_fn("  - NOTICE: %d offline instance(s) found." % i_offline)
3403
3404     if n_offline:
3405       feedback_fn("  - NOTICE: %d offline node(s) found." % n_offline)
3406
3407     if n_drained:
3408       feedback_fn("  - NOTICE: %d drained node(s) found." % n_drained)
3409
3410     return not self.bad
3411
3412   def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
3413     """Analyze the post-hooks' result
3414
3415     This method analyses the hook result, handles it, and sends some
3416     nicely-formatted feedback back to the user.
3417
3418     @param phase: one of L{constants.HOOKS_PHASE_POST} or
3419         L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
3420     @param hooks_results: the results of the multi-node hooks rpc call
3421     @param feedback_fn: function used send feedback back to the caller
3422     @param lu_result: previous Exec result
3423     @return: the new Exec result, based on the previous result
3424         and hook results
3425
3426     """
3427     # We only really run POST phase hooks, only for non-empty groups,
3428     # and are only interested in their results
3429     if not self.my_node_names:
3430       # empty node group
3431       pass
3432     elif phase == constants.HOOKS_PHASE_POST:
3433       # Used to change hooks' output to proper indentation
3434       feedback_fn("* Hooks Results")
3435       assert hooks_results, "invalid result from hooks"
3436
3437       for node_name in hooks_results:
3438         res = hooks_results[node_name]
3439         msg = res.fail_msg
3440         test = msg and not res.offline
3441         self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3442                       "Communication failure in hooks execution: %s", msg)
3443         if res.offline or msg:
3444           # No need to investigate payload if node is offline or gave
3445           # an error.
3446           continue
3447         for script, hkr, output in res.payload:
3448           test = hkr == constants.HKR_FAIL
3449           self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3450                         "Script %s failed, output:", script)
3451           if test:
3452             output = self._HOOKS_INDENT_RE.sub("      ", output)
3453             feedback_fn("%s" % output)
3454             lu_result = False
3455
3456     return lu_result
3457
3458
3459 class LUClusterVerifyDisks(NoHooksLU):
3460   """Verifies the cluster disks status.
3461
3462   """
3463   REQ_BGL = False
3464
3465   def ExpandNames(self):
3466     self.share_locks = _ShareAll()
3467     self.needed_locks = {
3468       locking.LEVEL_NODEGROUP: locking.ALL_SET,
3469       }
3470
3471   def Exec(self, feedback_fn):
3472     group_names = self.owned_locks(locking.LEVEL_NODEGROUP)
3473
3474     # Submit one instance of L{opcodes.OpGroupVerifyDisks} per node group
3475     return ResultWithJobs([[opcodes.OpGroupVerifyDisks(group_name=group)]
3476                            for group in group_names])
3477
3478
3479 class LUGroupVerifyDisks(NoHooksLU):
3480   """Verifies the status of all disks in a node group.
3481
3482   """
3483   REQ_BGL = False
3484
3485   def ExpandNames(self):
3486     # Raises errors.OpPrereqError on its own if group can't be found
3487     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
3488
3489     self.share_locks = _ShareAll()
3490     self.needed_locks = {
3491       locking.LEVEL_INSTANCE: [],
3492       locking.LEVEL_NODEGROUP: [],
3493       locking.LEVEL_NODE: [],
3494       }
3495
3496   def DeclareLocks(self, level):
3497     if level == locking.LEVEL_INSTANCE:
3498       assert not self.needed_locks[locking.LEVEL_INSTANCE]
3499
3500       # Lock instances optimistically, needs verification once node and group
3501       # locks have been acquired
3502       self.needed_locks[locking.LEVEL_INSTANCE] = \
3503         self.cfg.GetNodeGroupInstances(self.group_uuid)
3504
3505     elif level == locking.LEVEL_NODEGROUP:
3506       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
3507
3508       self.needed_locks[locking.LEVEL_NODEGROUP] = \
3509         set([self.group_uuid] +
3510             # Lock all groups used by instances optimistically; this requires
3511             # going via the node before it's locked, requiring verification
3512             # later on
3513             [group_uuid
3514              for instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
3515              for group_uuid in self.cfg.GetInstanceNodeGroups(instance_name)])
3516
3517     elif level == locking.LEVEL_NODE:
3518       # This will only lock the nodes in the group to be verified which contain
3519       # actual instances
3520       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
3521       self._LockInstancesNodes()
3522
3523       # Lock all nodes in group to be verified
3524       assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
3525       member_nodes = self.cfg.GetNodeGroup(self.group_uuid).members
3526       self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
3527
3528   def CheckPrereq(self):
3529     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
3530     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
3531     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
3532
3533     assert self.group_uuid in owned_groups
3534
3535     # Check if locked instances are still correct
3536     _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
3537
3538     # Get instance information
3539     self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
3540
3541     # Check if node groups for locked instances are still correct
3542     _CheckInstancesNodeGroups(self.cfg, self.instances,
3543                               owned_groups, owned_nodes, self.group_uuid)
3544
3545   def Exec(self, feedback_fn):
3546     """Verify integrity of cluster disks.
3547
3548     @rtype: tuple of three items
3549     @return: a tuple of (dict of node-to-node_error, list of instances
3550         which need activate-disks, dict of instance: (node, volume) for
3551         missing volumes
3552
3553     """
3554     res_nodes = {}
3555     res_instances = set()
3556     res_missing = {}
3557
3558     nv_dict = _MapInstanceDisksToNodes(
3559       [inst for inst in self.instances.values()
3560        if inst.admin_state == constants.ADMINST_UP])
3561
3562     if nv_dict:
3563       nodes = utils.NiceSort(set(self.owned_locks(locking.LEVEL_NODE)) &
3564                              set(self.cfg.GetVmCapableNodeList()))
3565
3566       node_lvs = self.rpc.call_lv_list(nodes, [])
3567
3568       for (node, node_res) in node_lvs.items():
3569         if node_res.offline:
3570           continue
3571
3572         msg = node_res.fail_msg
3573         if msg:
3574           logging.warning("Error enumerating LVs on node %s: %s", node, msg)
3575           res_nodes[node] = msg
3576           continue
3577
3578         for lv_name, (_, _, lv_online) in node_res.payload.items():
3579           inst = nv_dict.pop((node, lv_name), None)
3580           if not (lv_online or inst is None):
3581             res_instances.add(inst)
3582
3583       # any leftover items in nv_dict are missing LVs, let's arrange the data
3584       # better
3585       for key, inst in nv_dict.iteritems():
3586         res_missing.setdefault(inst, []).append(list(key))
3587
3588     return (res_nodes, list(res_instances), res_missing)
3589
3590
3591 class LUClusterRepairDiskSizes(NoHooksLU):
3592   """Verifies the cluster disks sizes.
3593
3594   """
3595   REQ_BGL = False
3596
3597   def ExpandNames(self):
3598     if self.op.instances:
3599       self.wanted_names = _GetWantedInstances(self, self.op.instances)
3600       self.needed_locks = {
3601         locking.LEVEL_NODE_RES: [],
3602         locking.LEVEL_INSTANCE: self.wanted_names,
3603         }
3604       self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
3605     else:
3606       self.wanted_names = None
3607       self.needed_locks = {
3608         locking.LEVEL_NODE_RES: locking.ALL_SET,
3609         locking.LEVEL_INSTANCE: locking.ALL_SET,
3610         }
3611     self.share_locks = {
3612       locking.LEVEL_NODE_RES: 1,
3613       locking.LEVEL_INSTANCE: 0,
3614       }
3615
3616   def DeclareLocks(self, level):
3617     if level == locking.LEVEL_NODE_RES and self.wanted_names is not None:
3618       self._LockInstancesNodes(primary_only=True, level=level)
3619
3620   def CheckPrereq(self):
3621     """Check prerequisites.
3622
3623     This only checks the optional instance list against the existing names.
3624
3625     """
3626     if self.wanted_names is None:
3627       self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
3628
3629     self.wanted_instances = \
3630         map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
3631
3632   def _EnsureChildSizes(self, disk):
3633     """Ensure children of the disk have the needed disk size.
3634
3635     This is valid mainly for DRBD8 and fixes an issue where the
3636     children have smaller disk size.
3637
3638     @param disk: an L{ganeti.objects.Disk} object
3639
3640     """
3641     if disk.dev_type == constants.LD_DRBD8:
3642       assert disk.children, "Empty children for DRBD8?"
3643       fchild = disk.children[0]
3644       mismatch = fchild.size < disk.size
3645       if mismatch:
3646         self.LogInfo("Child disk has size %d, parent %d, fixing",
3647                      fchild.size, disk.size)
3648         fchild.size = disk.size
3649
3650       # and we recurse on this child only, not on the metadev
3651       return self._EnsureChildSizes(fchild) or mismatch
3652     else:
3653       return False
3654
3655   def Exec(self, feedback_fn):
3656     """Verify the size of cluster disks.
3657
3658     """
3659     # TODO: check child disks too
3660     # TODO: check differences in size between primary/secondary nodes
3661     per_node_disks = {}
3662     for instance in self.wanted_instances:
3663       pnode = instance.primary_node
3664       if pnode not in per_node_disks:
3665         per_node_disks[pnode] = []
3666       for idx, disk in enumerate(instance.disks):
3667         per_node_disks[pnode].append((instance, idx, disk))
3668
3669     assert not (frozenset(per_node_disks.keys()) -
3670                 self.owned_locks(locking.LEVEL_NODE_RES)), \
3671       "Not owning correct locks"
3672     assert not self.owned_locks(locking.LEVEL_NODE)
3673
3674     changed = []
3675     for node, dskl in per_node_disks.items():
3676       newl = [v[2].Copy() for v in dskl]
3677       for dsk in newl:
3678         self.cfg.SetDiskID(dsk, node)
3679       result = self.rpc.call_blockdev_getsize(node, newl)
3680       if result.fail_msg:
3681         self.LogWarning("Failure in blockdev_getsize call to node"
3682                         " %s, ignoring", node)
3683         continue
3684       if len(result.payload) != len(dskl):
3685         logging.warning("Invalid result from node %s: len(dksl)=%d,"
3686                         " result.payload=%s", node, len(dskl), result.payload)
3687         self.LogWarning("Invalid result from node %s, ignoring node results",
3688                         node)
3689         continue
3690       for ((instance, idx, disk), size) in zip(dskl, result.payload):
3691         if size is None:
3692           self.LogWarning("Disk %d of instance %s did not return size"
3693                           " information, ignoring", idx, instance.name)
3694           continue
3695         if not isinstance(size, (int, long)):
3696           self.LogWarning("Disk %d of instance %s did not return valid"
3697                           " size information, ignoring", idx, instance.name)
3698           continue
3699         size = size >> 20
3700         if size != disk.size:
3701           self.LogInfo("Disk %d of instance %s has mismatched size,"
3702                        " correcting: recorded %d, actual %d", idx,
3703                        instance.name, disk.size, size)
3704           disk.size = size
3705           self.cfg.Update(instance, feedback_fn)
3706           changed.append((instance.name, idx, size))
3707         if self._EnsureChildSizes(disk):
3708           self.cfg.Update(instance, feedback_fn)
3709           changed.append((instance.name, idx, disk.size))
3710     return changed
3711
3712
3713 class LUClusterRename(LogicalUnit):
3714   """Rename the cluster.
3715
3716   """
3717   HPATH = "cluster-rename"
3718   HTYPE = constants.HTYPE_CLUSTER
3719
3720   def BuildHooksEnv(self):
3721     """Build hooks env.
3722
3723     """
3724     return {
3725       "OP_TARGET": self.cfg.GetClusterName(),
3726       "NEW_NAME": self.op.name,
3727       }
3728
3729   def BuildHooksNodes(self):
3730     """Build hooks nodes.
3731
3732     """
3733     return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
3734
3735   def CheckPrereq(self):
3736     """Verify that the passed name is a valid one.
3737
3738     """
3739     hostname = netutils.GetHostname(name=self.op.name,
3740                                     family=self.cfg.GetPrimaryIPFamily())
3741
3742     new_name = hostname.name
3743     self.ip = new_ip = hostname.ip
3744     old_name = self.cfg.GetClusterName()
3745     old_ip = self.cfg.GetMasterIP()
3746     if new_name == old_name and new_ip == old_ip:
3747       raise errors.OpPrereqError("Neither the name nor the IP address of the"
3748                                  " cluster has changed",
3749                                  errors.ECODE_INVAL)
3750     if new_ip != old_ip:
3751       if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
3752         raise errors.OpPrereqError("The given cluster IP address (%s) is"
3753                                    " reachable on the network" %
3754                                    new_ip, errors.ECODE_NOTUNIQUE)
3755
3756     self.op.name = new_name
3757
3758   def Exec(self, feedback_fn):
3759     """Rename the cluster.
3760
3761     """
3762     clustername = self.op.name
3763     new_ip = self.ip
3764
3765     # shutdown the master IP
3766     master_params = self.cfg.GetMasterNetworkParameters()
3767     ems = self.cfg.GetUseExternalMipScript()
3768     result = self.rpc.call_node_deactivate_master_ip(master_params.name,
3769                                                      master_params, ems)
3770     result.Raise("Could not disable the master role")
3771
3772     try:
3773       cluster = self.cfg.GetClusterInfo()
3774       cluster.cluster_name = clustername
3775       cluster.master_ip = new_ip
3776       self.cfg.Update(cluster, feedback_fn)
3777
3778       # update the known hosts file
3779       ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
3780       node_list = self.cfg.GetOnlineNodeList()
3781       try:
3782         node_list.remove(master_params.name)
3783       except ValueError:
3784         pass
3785       _UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE)
3786     finally:
3787       master_params.ip = new_ip
3788       result = self.rpc.call_node_activate_master_ip(master_params.name,
3789                                                      master_params, ems)
3790       msg = result.fail_msg
3791       if msg:
3792         self.LogWarning("Could not re-enable the master role on"
3793                         " the master, please restart manually: %s", msg)
3794
3795     return clustername
3796
3797
3798 def _ValidateNetmask(cfg, netmask):
3799   """Checks if a netmask is valid.
3800
3801   @type cfg: L{config.ConfigWriter}
3802   @param cfg: The cluster configuration
3803   @type netmask: int
3804   @param netmask: the netmask to be verified
3805   @raise errors.OpPrereqError: if the validation fails
3806
3807   """
3808   ip_family = cfg.GetPrimaryIPFamily()
3809   try:
3810     ipcls = netutils.IPAddress.GetClassFromIpFamily(ip_family)
3811   except errors.ProgrammerError:
3812     raise errors.OpPrereqError("Invalid primary ip family: %s." %
3813                                ip_family, errors.ECODE_INVAL)
3814   if not ipcls.ValidateNetmask(netmask):
3815     raise errors.OpPrereqError("CIDR netmask (%s) not valid" %
3816                                 (netmask), errors.ECODE_INVAL)
3817
3818
3819 class LUClusterSetParams(LogicalUnit):
3820   """Change the parameters of the cluster.
3821
3822   """
3823   HPATH = "cluster-modify"
3824   HTYPE = constants.HTYPE_CLUSTER
3825   REQ_BGL = False
3826
3827   def CheckArguments(self):
3828     """Check parameters
3829
3830     """
3831     if self.op.uid_pool:
3832       uidpool.CheckUidPool(self.op.uid_pool)
3833
3834     if self.op.add_uids:
3835       uidpool.CheckUidPool(self.op.add_uids)
3836
3837     if self.op.remove_uids:
3838       uidpool.CheckUidPool(self.op.remove_uids)
3839
3840     if self.op.master_netmask is not None:
3841       _ValidateNetmask(self.cfg, self.op.master_netmask)
3842
3843     if self.op.diskparams:
3844       for dt_params in self.op.diskparams.values():
3845         utils.ForceDictType(dt_params, constants.DISK_DT_TYPES)
3846       try:
3847         utils.VerifyDictOptions(self.op.diskparams, constants.DISK_DT_DEFAULTS)
3848       except errors.OpPrereqError, err:
3849         raise errors.OpPrereqError("While verify diskparams options: %s" % err,
3850                                    errors.ECODE_INVAL)
3851
3852   def ExpandNames(self):
3853     # FIXME: in the future maybe other cluster params won't require checking on
3854     # all nodes to be modified.
3855     self.needed_locks = {
3856       locking.LEVEL_NODE: locking.ALL_SET,
3857       locking.LEVEL_INSTANCE: locking.ALL_SET,
3858       locking.LEVEL_NODEGROUP: locking.ALL_SET,
3859     }
3860     self.share_locks = {
3861         locking.LEVEL_NODE: 1,
3862         locking.LEVEL_INSTANCE: 1,
3863         locking.LEVEL_NODEGROUP: 1,
3864     }
3865
3866   def BuildHooksEnv(self):
3867     """Build hooks env.
3868
3869     """
3870     return {
3871       "OP_TARGET": self.cfg.GetClusterName(),
3872       "NEW_VG_NAME": self.op.vg_name,
3873       }
3874
3875   def BuildHooksNodes(self):
3876     """Build hooks nodes.
3877
3878     """
3879     mn = self.cfg.GetMasterNode()
3880     return ([mn], [mn])
3881
3882   def CheckPrereq(self):
3883     """Check prerequisites.
3884
3885     This checks whether the given params don't conflict and
3886     if the given volume group is valid.
3887
3888     """
3889     if self.op.vg_name is not None and not self.op.vg_name:
3890       if self.cfg.HasAnyDiskOfType(constants.LD_LV):
3891         raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
3892                                    " instances exist", errors.ECODE_INVAL)
3893
3894     if self.op.drbd_helper is not None and not self.op.drbd_helper:
3895       if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
3896         raise errors.OpPrereqError("Cannot disable drbd helper while"
3897                                    " drbd-based instances exist",
3898                                    errors.ECODE_INVAL)
3899
3900     node_list = self.owned_locks(locking.LEVEL_NODE)
3901
3902     # if vg_name not None, checks given volume group on all nodes
3903     if self.op.vg_name:
3904       vglist = self.rpc.call_vg_list(node_list)
3905       for node in node_list:
3906         msg = vglist[node].fail_msg
3907         if msg:
3908           # ignoring down node
3909           self.LogWarning("Error while gathering data on node %s"
3910                           " (ignoring node): %s", node, msg)
3911           continue
3912         vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
3913                                               self.op.vg_name,
3914                                               constants.MIN_VG_SIZE)
3915         if vgstatus:
3916           raise errors.OpPrereqError("Error on node '%s': %s" %
3917                                      (node, vgstatus), errors.ECODE_ENVIRON)
3918
3919     if self.op.drbd_helper:
3920       # checks given drbd helper on all nodes
3921       helpers = self.rpc.call_drbd_helper(node_list)
3922       for (node, ninfo) in self.cfg.GetMultiNodeInfo(node_list):
3923         if ninfo.offline:
3924           self.LogInfo("Not checking drbd helper on offline node %s", node)
3925           continue
3926         msg = helpers[node].fail_msg
3927         if msg:
3928           raise errors.OpPrereqError("Error checking drbd helper on node"
3929                                      " '%s': %s" % (node, msg),
3930                                      errors.ECODE_ENVIRON)
3931         node_helper = helpers[node].payload
3932         if node_helper != self.op.drbd_helper:
3933           raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
3934                                      (node, node_helper), errors.ECODE_ENVIRON)
3935
3936     self.cluster = cluster = self.cfg.GetClusterInfo()
3937     # validate params changes
3938     if self.op.beparams:
3939       objects.UpgradeBeParams(self.op.beparams)
3940       utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
3941       self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
3942
3943     if self.op.ndparams:
3944       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
3945       self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
3946
3947       # TODO: we need a more general way to handle resetting
3948       # cluster-level parameters to default values
3949       if self.new_ndparams["oob_program"] == "":
3950         self.new_ndparams["oob_program"] = \
3951             constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
3952
3953     if self.op.hv_state:
3954       new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
3955                                             self.cluster.hv_state_static)
3956       self.new_hv_state = dict((hv, cluster.SimpleFillHvState(values))
3957                                for hv, values in new_hv_state.items())
3958
3959     if self.op.disk_state:
3960       new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state,
3961                                                 self.cluster.disk_state_static)
3962       self.new_disk_state = \
3963         dict((storage, dict((name, cluster.SimpleFillDiskState(values))
3964                             for name, values in svalues.items()))
3965              for storage, svalues in new_disk_state.items())
3966
3967     if self.op.ipolicy:
3968       self.new_ipolicy = _GetUpdatedIPolicy(cluster.ipolicy, self.op.ipolicy,
3969                                             group_policy=False)
3970
3971       all_instances = self.cfg.GetAllInstancesInfo().values()
3972       violations = set()
3973       for group in self.cfg.GetAllNodeGroupsInfo().values():
3974         instances = frozenset([inst for inst in all_instances
3975                                if compat.any(node in group.members
3976                                              for node in inst.all_nodes)])
3977         new_ipolicy = objects.FillIPolicy(self.new_ipolicy, group.ipolicy)
3978         ipol = ganeti.masterd.instance.CalculateGroupIPolicy(cluster, group)
3979         new = _ComputeNewInstanceViolations(ipol,
3980                                             new_ipolicy, instances)
3981         if new:
3982           violations.update(new)
3983
3984       if violations:
3985         self.LogWarning("After the ipolicy change the following instances"
3986                         " violate them: %s",
3987                         utils.CommaJoin(utils.NiceSort(violations)))
3988
3989     if self.op.nicparams:
3990       utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
3991       self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
3992       objects.NIC.CheckParameterSyntax(self.new_nicparams)
3993       nic_errors = []
3994
3995       # check all instances for consistency
3996       for instance in self.cfg.GetAllInstancesInfo().values():
3997         for nic_idx, nic in enumerate(instance.nics):
3998           params_copy = copy.deepcopy(nic.nicparams)
3999           params_filled = objects.FillDict(self.new_nicparams, params_copy)
4000
4001           # check parameter syntax
4002           try:
4003             objects.NIC.CheckParameterSyntax(params_filled)
4004           except errors.ConfigurationError, err:
4005             nic_errors.append("Instance %s, nic/%d: %s" %
4006                               (instance.name, nic_idx, err))
4007
4008           # if we're moving instances to routed, check that they have an ip
4009           target_mode = params_filled[constants.NIC_MODE]
4010           if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
4011             nic_errors.append("Instance %s, nic/%d: routed NIC with no ip"
4012                               " address" % (instance.name, nic_idx))
4013       if nic_errors:
4014         raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
4015                                    "\n".join(nic_errors), errors.ECODE_INVAL)
4016
4017     # hypervisor list/parameters
4018     self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
4019     if self.op.hvparams:
4020       for hv_name, hv_dict in self.op.hvparams.items():
4021         if hv_name not in self.new_hvparams:
4022           self.new_hvparams[hv_name] = hv_dict
4023         else:
4024           self.new_hvparams[hv_name].update(hv_dict)
4025
4026     # disk template parameters
4027     self.new_diskparams = objects.FillDict(cluster.diskparams, {})
4028     if self.op.diskparams:
4029       for dt_name, dt_params in self.op.diskparams.items():
4030         if dt_name not in self.op.diskparams:
4031           self.new_diskparams[dt_name] = dt_params
4032         else:
4033           self.new_diskparams[dt_name].update(dt_params)
4034
4035     # os hypervisor parameters
4036     self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
4037     if self.op.os_hvp:
4038       for os_name, hvs in self.op.os_hvp.items():
4039         if os_name not in self.new_os_hvp:
4040           self.new_os_hvp[os_name] = hvs
4041         else:
4042           for hv_name, hv_dict in hvs.items():
4043             if hv_name not in self.new_os_hvp[os_name]:
4044               self.new_os_hvp[os_name][hv_name] = hv_dict
4045             else:
4046               self.new_os_hvp[os_name][hv_name].update(hv_dict)
4047
4048     # os parameters
4049     self.new_osp = objects.FillDict(cluster.osparams, {})
4050     if self.op.osparams:
4051       for os_name, osp in self.op.osparams.items():
4052         if os_name not in self.new_osp:
4053           self.new_osp[os_name] = {}
4054
4055         self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
4056                                                   use_none=True)
4057
4058         if not self.new_osp[os_name]:
4059           # we removed all parameters
4060           del self.new_osp[os_name]
4061         else:
4062           # check the parameter validity (remote check)
4063           _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
4064                          os_name, self.new_osp[os_name])
4065
4066     # changes to the hypervisor list
4067     if self.op.enabled_hypervisors is not None:
4068       self.hv_list = self.op.enabled_hypervisors
4069       for hv in self.hv_list:
4070         # if the hypervisor doesn't already exist in the cluster
4071         # hvparams, we initialize it to empty, and then (in both
4072         # cases) we make sure to fill the defaults, as we might not
4073         # have a complete defaults list if the hypervisor wasn't
4074         # enabled before
4075         if hv not in new_hvp:
4076           new_hvp[hv] = {}
4077         new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
4078         utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
4079     else:
4080       self.hv_list = cluster.enabled_hypervisors
4081
4082     if self.op.hvparams or self.op.enabled_hypervisors is not None:
4083       # either the enabled list has changed, or the parameters have, validate
4084       for hv_name, hv_params in self.new_hvparams.items():
4085         if ((self.op.hvparams and hv_name in self.op.hvparams) or
4086             (self.op.enabled_hypervisors and
4087              hv_name in self.op.enabled_hypervisors)):
4088           # either this is a new hypervisor, or its parameters have changed
4089           hv_class = hypervisor.GetHypervisor(hv_name)
4090           utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
4091           hv_class.CheckParameterSyntax(hv_params)
4092           _CheckHVParams(self, node_list, hv_name, hv_params)
4093
4094     if self.op.os_hvp:
4095       # no need to check any newly-enabled hypervisors, since the
4096       # defaults have already been checked in the above code-block
4097       for os_name, os_hvp in self.new_os_hvp.items():
4098         for hv_name, hv_params in os_hvp.items():
4099           utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
4100           # we need to fill in the new os_hvp on top of the actual hv_p
4101           cluster_defaults = self.new_hvparams.get(hv_name, {})
4102           new_osp = objects.FillDict(cluster_defaults, hv_params)
4103           hv_class = hypervisor.GetHypervisor(hv_name)
4104           hv_class.CheckParameterSyntax(new_osp)
4105           _CheckHVParams(self, node_list, hv_name, new_osp)
4106
4107     if self.op.default_iallocator:
4108       alloc_script = utils.FindFile(self.op.default_iallocator,
4109                                     constants.IALLOCATOR_SEARCH_PATH,
4110                                     os.path.isfile)
4111       if alloc_script is None:
4112         raise errors.OpPrereqError("Invalid default iallocator script '%s'"
4113                                    " specified" % self.op.default_iallocator,
4114                                    errors.ECODE_INVAL)
4115
4116   def Exec(self, feedback_fn):
4117     """Change the parameters of the cluster.
4118
4119     """
4120     if self.op.vg_name is not None:
4121       new_volume = self.op.vg_name
4122       if not new_volume:
4123         new_volume = None
4124       if new_volume != self.cfg.GetVGName():
4125         self.cfg.SetVGName(new_volume)
4126       else:
4127         feedback_fn("Cluster LVM configuration already in desired"
4128                     " state, not changing")
4129     if self.op.drbd_helper is not None:
4130       new_helper = self.op.drbd_helper
4131       if not new_helper:
4132         new_helper = None
4133       if new_helper != self.cfg.GetDRBDHelper():
4134         self.cfg.SetDRBDHelper(new_helper)
4135       else:
4136         feedback_fn("Cluster DRBD helper already in desired state,"
4137                     " not changing")
4138     if self.op.hvparams:
4139       self.cluster.hvparams = self.new_hvparams
4140     if self.op.os_hvp:
4141       self.cluster.os_hvp = self.new_os_hvp
4142     if self.op.enabled_hypervisors is not None:
4143       self.cluster.hvparams = self.new_hvparams
4144       self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
4145     if self.op.beparams:
4146       self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
4147     if self.op.nicparams:
4148       self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
4149     if self.op.ipolicy:
4150       self.cluster.ipolicy = self.new_ipolicy
4151     if self.op.osparams:
4152       self.cluster.osparams = self.new_osp
4153     if self.op.ndparams:
4154       self.cluster.ndparams = self.new_ndparams
4155     if self.op.diskparams:
4156       self.cluster.diskparams = self.new_diskparams
4157     if self.op.hv_state:
4158       self.cluster.hv_state_static = self.new_hv_state
4159     if self.op.disk_state:
4160       self.cluster.disk_state_static = self.new_disk_state
4161
4162     if self.op.candidate_pool_size is not None:
4163       self.cluster.candidate_pool_size = self.op.candidate_pool_size
4164       # we need to update the pool size here, otherwise the save will fail
4165       _AdjustCandidatePool(self, [])
4166
4167     if self.op.maintain_node_health is not None:
4168       if self.op.maintain_node_health and not constants.ENABLE_CONFD:
4169         feedback_fn("Note: CONFD was disabled at build time, node health"
4170                     " maintenance is not useful (still enabling it)")
4171       self.cluster.maintain_node_health = self.op.maintain_node_health
4172
4173     if self.op.prealloc_wipe_disks is not None:
4174       self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
4175
4176     if self.op.add_uids is not None:
4177       uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
4178
4179     if self.op.remove_uids is not None:
4180       uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
4181
4182     if self.op.uid_pool is not None:
4183       self.cluster.uid_pool = self.op.uid_pool
4184
4185     if self.op.default_iallocator is not None:
4186       self.cluster.default_iallocator = self.op.default_iallocator
4187
4188     if self.op.reserved_lvs is not None:
4189       self.cluster.reserved_lvs = self.op.reserved_lvs
4190
4191     if self.op.use_external_mip_script is not None:
4192       self.cluster.use_external_mip_script = self.op.use_external_mip_script
4193
4194     def helper_os(aname, mods, desc):
4195       desc += " OS list"
4196       lst = getattr(self.cluster, aname)
4197       for key, val in mods:
4198         if key == constants.DDM_ADD:
4199           if val in lst:
4200             feedback_fn("OS %s already in %s, ignoring" % (val, desc))
4201           else:
4202             lst.append(val)
4203         elif key == constants.DDM_REMOVE:
4204           if val in lst:
4205             lst.remove(val)
4206           else:
4207             feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
4208         else:
4209           raise errors.ProgrammerError("Invalid modification '%s'" % key)
4210
4211     if self.op.hidden_os:
4212       helper_os("hidden_os", self.op.hidden_os, "hidden")
4213
4214     if self.op.blacklisted_os:
4215       helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
4216
4217     if self.op.master_netdev:
4218       master_params = self.cfg.GetMasterNetworkParameters()
4219       ems = self.cfg.GetUseExternalMipScript()
4220       feedback_fn("Shutting down master ip on the current netdev (%s)" %
4221                   self.cluster.master_netdev)
4222       result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4223                                                        master_params, ems)
4224       result.Raise("Could not disable the master ip")
4225       feedback_fn("Changing master_netdev from %s to %s" %
4226                   (master_params.netdev, self.op.master_netdev))
4227       self.cluster.master_netdev = self.op.master_netdev
4228
4229     if self.op.master_netmask:
4230       master_params = self.cfg.GetMasterNetworkParameters()
4231       feedback_fn("Changing master IP netmask to %s" % self.op.master_netmask)
4232       result = self.rpc.call_node_change_master_netmask(master_params.name,
4233                                                         master_params.netmask,
4234                                                         self.op.master_netmask,
4235                                                         master_params.ip,
4236                                                         master_params.netdev)
4237       if result.fail_msg:
4238         msg = "Could not change the master IP netmask: %s" % result.fail_msg
4239         feedback_fn(msg)
4240
4241       self.cluster.master_netmask = self.op.master_netmask
4242
4243     self.cfg.Update(self.cluster, feedback_fn)
4244
4245     if self.op.master_netdev:
4246       master_params = self.cfg.GetMasterNetworkParameters()
4247       feedback_fn("Starting the master ip on the new master netdev (%s)" %
4248                   self.op.master_netdev)
4249       ems = self.cfg.GetUseExternalMipScript()
4250       result = self.rpc.call_node_activate_master_ip(master_params.name,
4251                                                      master_params, ems)
4252       if result.fail_msg:
4253         self.LogWarning("Could not re-enable the master ip on"
4254                         " the master, please restart manually: %s",
4255                         result.fail_msg)
4256
4257
4258 def _UploadHelper(lu, nodes, fname):
4259   """Helper for uploading a file and showing warnings.
4260
4261   """
4262   if os.path.exists(fname):
4263     result = lu.rpc.call_upload_file(nodes, fname)
4264     for to_node, to_result in result.items():
4265       msg = to_result.fail_msg
4266       if msg:
4267         msg = ("Copy of file %s to node %s failed: %s" %
4268                (fname, to_node, msg))
4269         lu.proc.LogWarning(msg)
4270
4271
4272 def _ComputeAncillaryFiles(cluster, redist):
4273   """Compute files external to Ganeti which need to be consistent.
4274
4275   @type redist: boolean
4276   @param redist: Whether to include files which need to be redistributed
4277
4278   """
4279   # Compute files for all nodes
4280   files_all = set([
4281     constants.SSH_KNOWN_HOSTS_FILE,
4282     constants.CONFD_HMAC_KEY,
4283     constants.CLUSTER_DOMAIN_SECRET_FILE,
4284     constants.SPICE_CERT_FILE,
4285     constants.SPICE_CACERT_FILE,
4286     constants.RAPI_USERS_FILE,
4287     ])
4288
4289   if not redist:
4290     files_all.update(constants.ALL_CERT_FILES)
4291     files_all.update(ssconf.SimpleStore().GetFileList())
4292   else:
4293     # we need to ship at least the RAPI certificate
4294     files_all.add(constants.RAPI_CERT_FILE)
4295
4296   if cluster.modify_etc_hosts:
4297     files_all.add(constants.ETC_HOSTS)
4298
4299   if cluster.use_external_mip_script:
4300     files_all.add(constants.EXTERNAL_MASTER_SETUP_SCRIPT)
4301
4302   # Files which are optional, these must:
4303   # - be present in one other category as well
4304   # - either exist or not exist on all nodes of that category (mc, vm all)
4305   files_opt = set([
4306     constants.RAPI_USERS_FILE,
4307     ])
4308
4309   # Files which should only be on master candidates
4310   files_mc = set()
4311
4312   if not redist:
4313     files_mc.add(constants.CLUSTER_CONF_FILE)
4314
4315   # Files which should only be on VM-capable nodes
4316   files_vm = set(
4317     filename
4318     for hv_name in cluster.enabled_hypervisors
4319     for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[0])
4320
4321   files_opt |= set(
4322     filename
4323     for hv_name in cluster.enabled_hypervisors
4324     for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[1])
4325
4326   # Filenames in each category must be unique
4327   all_files_set = files_all | files_mc | files_vm
4328   assert (len(all_files_set) ==
4329           sum(map(len, [files_all, files_mc, files_vm]))), \
4330          "Found file listed in more than one file list"
4331
4332   # Optional files must be present in one other category
4333   assert all_files_set.issuperset(files_opt), \
4334          "Optional file not in a different required list"
4335
4336   return (files_all, files_opt, files_mc, files_vm)
4337
4338
4339 def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
4340   """Distribute additional files which are part of the cluster configuration.
4341
4342   ConfigWriter takes care of distributing the config and ssconf files, but
4343   there are more files which should be distributed to all nodes. This function
4344   makes sure those are copied.
4345
4346   @param lu: calling logical unit
4347   @param additional_nodes: list of nodes not in the config to distribute to
4348   @type additional_vm: boolean
4349   @param additional_vm: whether the additional nodes are vm-capable or not
4350
4351   """
4352   # Gather target nodes
4353   cluster = lu.cfg.GetClusterInfo()
4354   master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
4355
4356   online_nodes = lu.cfg.GetOnlineNodeList()
4357   online_set = frozenset(online_nodes)
4358   vm_nodes = list(online_set.intersection(lu.cfg.GetVmCapableNodeList()))
4359
4360   if additional_nodes is not None:
4361     online_nodes.extend(additional_nodes)
4362     if additional_vm:
4363       vm_nodes.extend(additional_nodes)
4364
4365   # Never distribute to master node
4366   for nodelist in [online_nodes, vm_nodes]:
4367     if master_info.name in nodelist:
4368       nodelist.remove(master_info.name)
4369
4370   # Gather file lists
4371   (files_all, _, files_mc, files_vm) = \
4372     _ComputeAncillaryFiles(cluster, True)
4373
4374   # Never re-distribute configuration file from here
4375   assert not (constants.CLUSTER_CONF_FILE in files_all or
4376               constants.CLUSTER_CONF_FILE in files_vm)
4377   assert not files_mc, "Master candidates not handled in this function"
4378
4379   filemap = [
4380     (online_nodes, files_all),
4381     (vm_nodes, files_vm),
4382     ]
4383
4384   # Upload the files
4385   for (node_list, files) in filemap:
4386     for fname in files:
4387       _UploadHelper(lu, node_list, fname)
4388
4389
4390 class LUClusterRedistConf(NoHooksLU):
4391   """Force the redistribution of cluster configuration.
4392
4393   This is a very simple LU.
4394
4395   """
4396   REQ_BGL = False
4397
4398   def ExpandNames(self):
4399     self.needed_locks = {
4400       locking.LEVEL_NODE: locking.ALL_SET,
4401     }
4402     self.share_locks[locking.LEVEL_NODE] = 1
4403
4404   def Exec(self, feedback_fn):
4405     """Redistribute the configuration.
4406
4407     """
4408     self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
4409     _RedistributeAncillaryFiles(self)
4410
4411
4412 class LUClusterActivateMasterIp(NoHooksLU):
4413   """Activate the master IP on the master node.
4414
4415   """
4416   def Exec(self, feedback_fn):
4417     """Activate the master IP.
4418
4419     """
4420     master_params = self.cfg.GetMasterNetworkParameters()
4421     ems = self.cfg.GetUseExternalMipScript()
4422     result = self.rpc.call_node_activate_master_ip(master_params.name,
4423                                                    master_params, ems)
4424     result.Raise("Could not activate the master IP")
4425
4426
4427 class LUClusterDeactivateMasterIp(NoHooksLU):
4428   """Deactivate the master IP on the master node.
4429
4430   """
4431   def Exec(self, feedback_fn):
4432     """Deactivate the master IP.
4433
4434     """
4435     master_params = self.cfg.GetMasterNetworkParameters()
4436     ems = self.cfg.GetUseExternalMipScript()
4437     result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4438                                                      master_params, ems)
4439     result.Raise("Could not deactivate the master IP")
4440
4441
4442 def _WaitForSync(lu, instance, disks=None, oneshot=False):
4443   """Sleep and poll for an instance's disk to sync.
4444
4445   """
4446   if not instance.disks or disks is not None and not disks:
4447     return True
4448
4449   disks = _ExpandCheckDisks(instance, disks)
4450
4451   if not oneshot:
4452     lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
4453
4454   node = instance.primary_node
4455
4456   for dev in disks:
4457     lu.cfg.SetDiskID(dev, node)
4458
4459   # TODO: Convert to utils.Retry
4460
4461   retries = 0
4462   degr_retries = 10 # in seconds, as we sleep 1 second each time
4463   while True:
4464     max_time = 0
4465     done = True
4466     cumul_degraded = False
4467     rstats = lu.rpc.call_blockdev_getmirrorstatus(node, (disks, instance))
4468     msg = rstats.fail_msg
4469     if msg:
4470       lu.LogWarning("Can't get any data from node %s: %s", node, msg)
4471       retries += 1
4472       if retries >= 10:
4473         raise errors.RemoteError("Can't contact node %s for mirror data,"
4474                                  " aborting." % node)
4475       time.sleep(6)
4476       continue
4477     rstats = rstats.payload
4478     retries = 0
4479     for i, mstat in enumerate(rstats):
4480       if mstat is None:
4481         lu.LogWarning("Can't compute data for node %s/%s",
4482                            node, disks[i].iv_name)
4483         continue
4484
4485       cumul_degraded = (cumul_degraded or
4486                         (mstat.is_degraded and mstat.sync_percent is None))
4487       if mstat.sync_percent is not None:
4488         done = False
4489         if mstat.estimated_time is not None:
4490           rem_time = ("%s remaining (estimated)" %
4491                       utils.FormatSeconds(mstat.estimated_time))
4492           max_time = mstat.estimated_time
4493         else:
4494           rem_time = "no time estimate"
4495         lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
4496                         (disks[i].iv_name, mstat.sync_percent, rem_time))
4497
4498     # if we're done but degraded, let's do a few small retries, to
4499     # make sure we see a stable and not transient situation; therefore
4500     # we force restart of the loop
4501     if (done or oneshot) and cumul_degraded and degr_retries > 0:
4502       logging.info("Degraded disks found, %d retries left", degr_retries)
4503       degr_retries -= 1
4504       time.sleep(1)
4505       continue
4506
4507     if done or oneshot:
4508       break
4509
4510     time.sleep(min(60, max_time))
4511
4512   if done:
4513     lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
4514   return not cumul_degraded
4515
4516
4517 def _BlockdevFind(lu, node, dev, instance):
4518   """Wrapper around call_blockdev_find to annotate diskparams.
4519
4520   @param lu: A reference to the lu object
4521   @param node: The node to call out
4522   @param dev: The device to find
4523   @param instance: The instance object the device belongs to
4524   @returns The result of the rpc call
4525
4526   """
4527   (disk,) = _AnnotateDiskParams(instance, [dev], lu.cfg)
4528   return lu.rpc.call_blockdev_find(node, disk)
4529
4530
4531 def _CheckDiskConsistency(lu, instance, dev, node, on_primary, ldisk=False):
4532   """Wrapper around L{_CheckDiskConsistencyInner}.
4533
4534   """
4535   (disk,) = _AnnotateDiskParams(instance, [dev], lu.cfg)
4536   return _CheckDiskConsistencyInner(lu, instance, disk, node, on_primary,
4537                                     ldisk=ldisk)
4538
4539
4540 def _CheckDiskConsistencyInner(lu, instance, dev, node, on_primary,
4541                                ldisk=False):
4542   """Check that mirrors are not degraded.
4543
4544   @attention: The device has to be annotated already.
4545
4546   The ldisk parameter, if True, will change the test from the
4547   is_degraded attribute (which represents overall non-ok status for
4548   the device(s)) to the ldisk (representing the local storage status).
4549
4550   """
4551   lu.cfg.SetDiskID(dev, node)
4552
4553   result = True
4554
4555   if on_primary or dev.AssembleOnSecondary():
4556     rstats = lu.rpc.call_blockdev_find(node, dev)
4557     msg = rstats.fail_msg
4558     if msg:
4559       lu.LogWarning("Can't find disk on node %s: %s", node, msg)
4560       result = False
4561     elif not rstats.payload:
4562       lu.LogWarning("Can't find disk on node %s", node)
4563       result = False
4564     else:
4565       if ldisk:
4566         result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
4567       else:
4568         result = result and not rstats.payload.is_degraded
4569
4570   if dev.children:
4571     for child in dev.children:
4572       result = result and _CheckDiskConsistencyInner(lu, instance, child, node,
4573                                                      on_primary)
4574
4575   return result
4576
4577
4578 class LUOobCommand(NoHooksLU):
4579   """Logical unit for OOB handling.
4580
4581   """
4582   REQ_BGL = False
4583   _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
4584
4585   def ExpandNames(self):
4586     """Gather locks we need.
4587
4588     """
4589     if self.op.node_names:
4590       self.op.node_names = _GetWantedNodes(self, self.op.node_names)
4591       lock_names = self.op.node_names
4592     else:
4593       lock_names = locking.ALL_SET
4594
4595     self.needed_locks = {
4596       locking.LEVEL_NODE: lock_names,
4597       }
4598
4599   def CheckPrereq(self):
4600     """Check prerequisites.
4601
4602     This checks:
4603      - the node exists in the configuration
4604      - OOB is supported
4605
4606     Any errors are signaled by raising errors.OpPrereqError.
4607
4608     """
4609     self.nodes = []
4610     self.master_node = self.cfg.GetMasterNode()
4611
4612     assert self.op.power_delay >= 0.0
4613
4614     if self.op.node_names:
4615       if (self.op.command in self._SKIP_MASTER and
4616           self.master_node in self.op.node_names):
4617         master_node_obj = self.cfg.GetNodeInfo(self.master_node)
4618         master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
4619
4620         if master_oob_handler:
4621           additional_text = ("run '%s %s %s' if you want to operate on the"
4622                              " master regardless") % (master_oob_handler,
4623                                                       self.op.command,
4624                                                       self.master_node)
4625         else:
4626           additional_text = "it does not support out-of-band operations"
4627
4628         raise errors.OpPrereqError(("Operating on the master node %s is not"
4629                                     " allowed for %s; %s") %
4630                                    (self.master_node, self.op.command,
4631                                     additional_text), errors.ECODE_INVAL)
4632     else:
4633       self.op.node_names = self.cfg.GetNodeList()
4634       if self.op.command in self._SKIP_MASTER:
4635         self.op.node_names.remove(self.master_node)
4636
4637     if self.op.command in self._SKIP_MASTER:
4638       assert self.master_node not in self.op.node_names
4639
4640     for (node_name, node) in self.cfg.GetMultiNodeInfo(self.op.node_names):
4641       if node is None:
4642         raise errors.OpPrereqError("Node %s not found" % node_name,
4643                                    errors.ECODE_NOENT)
4644       else:
4645         self.nodes.append(node)
4646
4647       if (not self.op.ignore_status and
4648           (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
4649         raise errors.OpPrereqError(("Cannot power off node %s because it is"
4650                                     " not marked offline") % node_name,
4651                                    errors.ECODE_STATE)
4652
4653   def Exec(self, feedback_fn):
4654     """Execute OOB and return result if we expect any.
4655
4656     """
4657     master_node = self.master_node
4658     ret = []
4659
4660     for idx, node in enumerate(utils.NiceSort(self.nodes,
4661                                               key=lambda node: node.name)):
4662       node_entry = [(constants.RS_NORMAL, node.name)]
4663       ret.append(node_entry)
4664
4665       oob_program = _SupportsOob(self.cfg, node)
4666
4667       if not oob_program:
4668         node_entry.append((constants.RS_UNAVAIL, None))
4669         continue
4670
4671       logging.info("Executing out-of-band command '%s' using '%s' on %s",
4672                    self.op.command, oob_program, node.name)
4673       result = self.rpc.call_run_oob(master_node, oob_program,
4674                                      self.op.command, node.name,
4675                                      self.op.timeout)
4676
4677       if result.fail_msg:
4678         self.LogWarning("Out-of-band RPC failed on node '%s': %s",
4679                         node.name, result.fail_msg)
4680         node_entry.append((constants.RS_NODATA, None))
4681       else:
4682         try:
4683           self._CheckPayload(result)
4684         except errors.OpExecError, err:
4685           self.LogWarning("Payload returned by node '%s' is not valid: %s",
4686                           node.name, err)
4687           node_entry.append((constants.RS_NODATA, None))
4688         else:
4689           if self.op.command == constants.OOB_HEALTH:
4690             # For health we should log important events
4691             for item, status in result.payload:
4692               if status in [constants.OOB_STATUS_WARNING,
4693                             constants.OOB_STATUS_CRITICAL]:
4694                 self.LogWarning("Item '%s' on node '%s' has status '%s'",
4695                                 item, node.name, status)
4696
4697           if self.op.command == constants.OOB_POWER_ON:
4698             node.powered = True
4699           elif self.op.command == constants.OOB_POWER_OFF:
4700             node.powered = False
4701           elif self.op.command == constants.OOB_POWER_STATUS:
4702             powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
4703             if powered != node.powered:
4704               logging.warning(("Recorded power state (%s) of node '%s' does not"
4705                                " match actual power state (%s)"), node.powered,
4706                               node.name, powered)
4707
4708           # For configuration changing commands we should update the node
4709           if self.op.command in (constants.OOB_POWER_ON,
4710                                  constants.OOB_POWER_OFF):
4711             self.cfg.Update(node, feedback_fn)
4712
4713           node_entry.append((constants.RS_NORMAL, result.payload))
4714
4715           if (self.op.command == constants.OOB_POWER_ON and
4716               idx < len(self.nodes) - 1):
4717             time.sleep(self.op.power_delay)
4718
4719     return ret
4720
4721   def _CheckPayload(self, result):
4722     """Checks if the payload is valid.
4723
4724     @param result: RPC result
4725     @raises errors.OpExecError: If payload is not valid
4726
4727     """
4728     errs = []
4729     if self.op.command == constants.OOB_HEALTH:
4730       if not isinstance(result.payload, list):
4731         errs.append("command 'health' is expected to return a list but got %s" %
4732                     type(result.payload))
4733       else:
4734         for item, status in result.payload:
4735           if status not in constants.OOB_STATUSES:
4736             errs.append("health item '%s' has invalid status '%s'" %
4737                         (item, status))
4738
4739     if self.op.command == constants.OOB_POWER_STATUS:
4740       if not isinstance(result.payload, dict):
4741         errs.append("power-status is expected to return a dict but got %s" %
4742                     type(result.payload))
4743
4744     if self.op.command in [
4745       constants.OOB_POWER_ON,
4746       constants.OOB_POWER_OFF,
4747       constants.OOB_POWER_CYCLE,
4748       ]:
4749       if result.payload is not None:
4750         errs.append("%s is expected to not return payload but got '%s'" %
4751                     (self.op.command, result.payload))
4752
4753     if errs:
4754       raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
4755                                utils.CommaJoin(errs))
4756
4757
4758 class _OsQuery(_QueryBase):
4759   FIELDS = query.OS_FIELDS
4760
4761   def ExpandNames(self, lu):
4762     # Lock all nodes in shared mode
4763     # Temporary removal of locks, should be reverted later
4764     # TODO: reintroduce locks when they are lighter-weight
4765     lu.needed_locks = {}
4766     #self.share_locks[locking.LEVEL_NODE] = 1
4767     #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4768
4769     # The following variables interact with _QueryBase._GetNames
4770     if self.names:
4771       self.wanted = self.names
4772     else:
4773       self.wanted = locking.ALL_SET
4774
4775     self.do_locking = self.use_locking
4776
4777   def DeclareLocks(self, lu, level):
4778     pass
4779
4780   @staticmethod
4781   def _DiagnoseByOS(rlist):
4782     """Remaps a per-node return list into an a per-os per-node dictionary
4783
4784     @param rlist: a map with node names as keys and OS objects as values
4785
4786     @rtype: dict
4787     @return: a dictionary with osnames as keys and as value another
4788         map, with nodes as keys and tuples of (path, status, diagnose,
4789         variants, parameters, api_versions) as values, eg::
4790
4791           {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
4792                                      (/srv/..., False, "invalid api")],
4793                            "node2": [(/srv/..., True, "", [], [])]}
4794           }
4795
4796     """
4797     all_os = {}
4798     # we build here the list of nodes that didn't fail the RPC (at RPC
4799     # level), so that nodes with a non-responding node daemon don't
4800     # make all OSes invalid
4801     good_nodes = [node_name for node_name in rlist
4802                   if not rlist[node_name].fail_msg]
4803     for node_name, nr in rlist.items():
4804       if nr.fail_msg or not nr.payload:
4805         continue
4806       for (name, path, status, diagnose, variants,
4807            params, api_versions) in nr.payload:
4808         if name not in all_os:
4809           # build a list of nodes for this os containing empty lists
4810           # for each node in node_list
4811           all_os[name] = {}
4812           for nname in good_nodes:
4813             all_os[name][nname] = []
4814         # convert params from [name, help] to (name, help)
4815         params = [tuple(v) for v in params]
4816         all_os[name][node_name].append((path, status, diagnose,
4817                                         variants, params, api_versions))
4818     return all_os
4819
4820   def _GetQueryData(self, lu):
4821     """Computes the list of nodes and their attributes.
4822
4823     """
4824     # Locking is not used
4825     assert not (compat.any(lu.glm.is_owned(level)
4826                            for level in locking.LEVELS
4827                            if level != locking.LEVEL_CLUSTER) or
4828                 self.do_locking or self.use_locking)
4829
4830     valid_nodes = [node.name
4831                    for node in lu.cfg.GetAllNodesInfo().values()
4832                    if not node.offline and node.vm_capable]
4833     pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
4834     cluster = lu.cfg.GetClusterInfo()
4835
4836     data = {}
4837
4838     for (os_name, os_data) in pol.items():
4839       info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
4840                           hidden=(os_name in cluster.hidden_os),
4841                           blacklisted=(os_name in cluster.blacklisted_os))
4842
4843       variants = set()
4844       parameters = set()
4845       api_versions = set()
4846
4847       for idx, osl in enumerate(os_data.values()):
4848         info.valid = bool(info.valid and osl and osl[0][1])
4849         if not info.valid:
4850           break
4851
4852         (node_variants, node_params, node_api) = osl[0][3:6]
4853         if idx == 0:
4854           # First entry
4855           variants.update(node_variants)
4856           parameters.update(node_params)
4857           api_versions.update(node_api)
4858         else:
4859           # Filter out inconsistent values
4860           variants.intersection_update(node_variants)
4861           parameters.intersection_update(node_params)
4862           api_versions.intersection_update(node_api)
4863
4864       info.variants = list(variants)
4865       info.parameters = list(parameters)
4866       info.api_versions = list(api_versions)
4867
4868       data[os_name] = info
4869
4870     # Prepare data in requested order
4871     return [data[name] for name in self._GetNames(lu, pol.keys(), None)
4872             if name in data]
4873
4874
4875 class LUOsDiagnose(NoHooksLU):
4876   """Logical unit for OS diagnose/query.
4877
4878   """
4879   REQ_BGL = False
4880
4881   @staticmethod
4882   def _BuildFilter(fields, names):
4883     """Builds a filter for querying OSes.
4884
4885     """
4886     name_filter = qlang.MakeSimpleFilter("name", names)
4887
4888     # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
4889     # respective field is not requested
4890     status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
4891                      for fname in ["hidden", "blacklisted"]
4892                      if fname not in fields]
4893     if "valid" not in fields:
4894       status_filter.append([qlang.OP_TRUE, "valid"])
4895
4896     if status_filter:
4897       status_filter.insert(0, qlang.OP_AND)
4898     else:
4899       status_filter = None
4900
4901     if name_filter and status_filter:
4902       return [qlang.OP_AND, name_filter, status_filter]
4903     elif name_filter:
4904       return name_filter
4905     else:
4906       return status_filter
4907
4908   def CheckArguments(self):
4909     self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
4910                        self.op.output_fields, False)
4911
4912   def ExpandNames(self):
4913     self.oq.ExpandNames(self)
4914
4915   def Exec(self, feedback_fn):
4916     return self.oq.OldStyleQuery(self)
4917
4918
4919 class LUNodeRemove(LogicalUnit):
4920   """Logical unit for removing a node.
4921
4922   """
4923   HPATH = "node-remove"
4924   HTYPE = constants.HTYPE_NODE
4925
4926   def BuildHooksEnv(self):
4927     """Build hooks env.
4928
4929     """
4930     return {
4931       "OP_TARGET": self.op.node_name,
4932       "NODE_NAME": self.op.node_name,
4933       }
4934
4935   def BuildHooksNodes(self):
4936     """Build hooks nodes.
4937
4938     This doesn't run on the target node in the pre phase as a failed
4939     node would then be impossible to remove.
4940
4941     """
4942     all_nodes = self.cfg.GetNodeList()
4943     try:
4944       all_nodes.remove(self.op.node_name)
4945     except ValueError:
4946       pass
4947     return (all_nodes, all_nodes)
4948
4949   def CheckPrereq(self):
4950     """Check prerequisites.
4951
4952     This checks:
4953      - the node exists in the configuration
4954      - it does not have primary or secondary instances
4955      - it's not the master
4956
4957     Any errors are signaled by raising errors.OpPrereqError.
4958
4959     """
4960     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4961     node = self.cfg.GetNodeInfo(self.op.node_name)
4962     assert node is not None
4963
4964     masternode = self.cfg.GetMasterNode()
4965     if node.name == masternode:
4966       raise errors.OpPrereqError("Node is the master node, failover to another"
4967                                  " node is required", errors.ECODE_INVAL)
4968
4969     for instance_name, instance in self.cfg.GetAllInstancesInfo().items():
4970       if node.name in instance.all_nodes:
4971         raise errors.OpPrereqError("Instance %s is still running on the node,"
4972                                    " please remove first" % instance_name,
4973                                    errors.ECODE_INVAL)
4974     self.op.node_name = node.name
4975     self.node = node
4976
4977   def Exec(self, feedback_fn):
4978     """Removes the node from the cluster.
4979
4980     """
4981     node = self.node
4982     logging.info("Stopping the node daemon and removing configs from node %s",
4983                  node.name)
4984
4985     modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
4986
4987     assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
4988       "Not owning BGL"
4989
4990     # Promote nodes to master candidate as needed
4991     _AdjustCandidatePool(self, exceptions=[node.name])
4992     self.context.RemoveNode(node.name)
4993
4994     # Run post hooks on the node before it's removed
4995     _RunPostHook(self, node.name)
4996
4997     result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
4998     msg = result.fail_msg
4999     if msg:
5000       self.LogWarning("Errors encountered on the remote node while leaving"
5001                       " the cluster: %s", msg)
5002
5003     # Remove node from our /etc/hosts
5004     if self.cfg.GetClusterInfo().modify_etc_hosts:
5005       master_node = self.cfg.GetMasterNode()
5006       result = self.rpc.call_etc_hosts_modify(master_node,
5007                                               constants.ETC_HOSTS_REMOVE,
5008                                               node.name, None)
5009       result.Raise("Can't update hosts file with new host data")
5010       _RedistributeAncillaryFiles(self)
5011
5012
5013 class _NodeQuery(_QueryBase):
5014   FIELDS = query.NODE_FIELDS
5015
5016   def ExpandNames(self, lu):
5017     lu.needed_locks = {}
5018     lu.share_locks = _ShareAll()
5019
5020     if self.names:
5021       self.wanted = _GetWantedNodes(lu, self.names)
5022     else:
5023       self.wanted = locking.ALL_SET
5024
5025     self.do_locking = (self.use_locking and
5026                        query.NQ_LIVE in self.requested_data)
5027
5028     if self.do_locking:
5029       # If any non-static field is requested we need to lock the nodes
5030       lu.needed_locks[locking.LEVEL_NODE] = self.wanted
5031
5032   def DeclareLocks(self, lu, level):
5033     pass
5034
5035   def _GetQueryData(self, lu):
5036     """Computes the list of nodes and their attributes.
5037
5038     """
5039     all_info = lu.cfg.GetAllNodesInfo()
5040
5041     nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
5042
5043     # Gather data as requested
5044     if query.NQ_LIVE in self.requested_data:
5045       # filter out non-vm_capable nodes
5046       toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
5047
5048       node_data = lu.rpc.call_node_info(toquery_nodes, [lu.cfg.GetVGName()],
5049                                         [lu.cfg.GetHypervisorType()])
5050       live_data = dict((name, rpc.MakeLegacyNodeInfo(nresult.payload))
5051                        for (name, nresult) in node_data.items()
5052                        if not nresult.fail_msg and nresult.payload)
5053     else:
5054       live_data = None
5055
5056     if query.NQ_INST in self.requested_data:
5057       node_to_primary = dict([(name, set()) for name in nodenames])
5058       node_to_secondary = dict([(name, set()) for name in nodenames])
5059
5060       inst_data = lu.cfg.GetAllInstancesInfo()
5061
5062       for inst in inst_data.values():
5063         if inst.primary_node in node_to_primary:
5064           node_to_primary[inst.primary_node].add(inst.name)
5065         for secnode in inst.secondary_nodes:
5066           if secnode in node_to_secondary:
5067             node_to_secondary[secnode].add(inst.name)
5068     else:
5069       node_to_primary = None
5070       node_to_secondary = None
5071
5072     if query.NQ_OOB in self.requested_data:
5073       oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
5074                          for name, node in all_info.iteritems())
5075     else:
5076       oob_support = None
5077
5078     if query.NQ_GROUP in self.requested_data:
5079       groups = lu.cfg.GetAllNodeGroupsInfo()
5080     else:
5081       groups = {}
5082
5083     return query.NodeQueryData([all_info[name] for name in nodenames],
5084                                live_data, lu.cfg.GetMasterNode(),
5085                                node_to_primary, node_to_secondary, groups,
5086                                oob_support, lu.cfg.GetClusterInfo())
5087
5088
5089 class LUNodeQuery(NoHooksLU):
5090   """Logical unit for querying nodes.
5091
5092   """
5093   # pylint: disable=W0142
5094   REQ_BGL = False
5095
5096   def CheckArguments(self):
5097     self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
5098                          self.op.output_fields, self.op.use_locking)
5099
5100   def ExpandNames(self):
5101     self.nq.ExpandNames(self)
5102
5103   def DeclareLocks(self, level):
5104     self.nq.DeclareLocks(self, level)
5105
5106   def Exec(self, feedback_fn):
5107     return self.nq.OldStyleQuery(self)
5108
5109
5110 class LUNodeQueryvols(NoHooksLU):
5111   """Logical unit for getting volumes on node(s).
5112
5113   """
5114   REQ_BGL = False
5115   _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
5116   _FIELDS_STATIC = utils.FieldSet("node")
5117
5118   def CheckArguments(self):
5119     _CheckOutputFields(static=self._FIELDS_STATIC,
5120                        dynamic=self._FIELDS_DYNAMIC,
5121                        selected=self.op.output_fields)
5122
5123   def ExpandNames(self):
5124     self.share_locks = _ShareAll()
5125     self.needed_locks = {}
5126
5127     if not self.op.nodes:
5128       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5129     else:
5130       self.needed_locks[locking.LEVEL_NODE] = \
5131         _GetWantedNodes(self, self.op.nodes)
5132
5133   def Exec(self, feedback_fn):
5134     """Computes the list of nodes and their attributes.
5135
5136     """
5137     nodenames = self.owned_locks(locking.LEVEL_NODE)
5138     volumes = self.rpc.call_node_volumes(nodenames)
5139
5140     ilist = self.cfg.GetAllInstancesInfo()
5141     vol2inst = _MapInstanceDisksToNodes(ilist.values())
5142
5143     output = []
5144     for node in nodenames:
5145       nresult = volumes[node]
5146       if nresult.offline:
5147         continue
5148       msg = nresult.fail_msg
5149       if msg:
5150         self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
5151         continue
5152
5153       node_vols = sorted(nresult.payload,
5154                          key=operator.itemgetter("dev"))
5155
5156       for vol in node_vols:
5157         node_output = []
5158         for field in self.op.output_fields:
5159           if field == "node":
5160             val = node
5161           elif field == "phys":
5162             val = vol["dev"]
5163           elif field == "vg":
5164             val = vol["vg"]
5165           elif field == "name":
5166             val = vol["name"]
5167           elif field == "size":
5168             val = int(float(vol["size"]))
5169           elif field == "instance":
5170             val = vol2inst.get((node, vol["vg"] + "/" + vol["name"]), "-")
5171           else:
5172             raise errors.ParameterError(field)
5173           node_output.append(str(val))
5174
5175         output.append(node_output)
5176
5177     return output
5178
5179
5180 class LUNodeQueryStorage(NoHooksLU):
5181   """Logical unit for getting information on storage units on node(s).
5182
5183   """
5184   _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
5185   REQ_BGL = False
5186
5187   def CheckArguments(self):
5188     _CheckOutputFields(static=self._FIELDS_STATIC,
5189                        dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
5190                        selected=self.op.output_fields)
5191
5192   def ExpandNames(self):
5193     self.share_locks = _ShareAll()
5194     self.needed_locks = {}
5195
5196     if self.op.nodes:
5197       self.needed_locks[locking.LEVEL_NODE] = \
5198         _GetWantedNodes(self, self.op.nodes)
5199     else:
5200       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5201
5202   def Exec(self, feedback_fn):
5203     """Computes the list of nodes and their attributes.
5204
5205     """
5206     self.nodes = self.owned_locks(locking.LEVEL_NODE)
5207
5208     # Always get name to sort by
5209     if constants.SF_NAME in self.op.output_fields:
5210       fields = self.op.output_fields[:]
5211     else:
5212       fields = [constants.SF_NAME] + self.op.output_fields
5213
5214     # Never ask for node or type as it's only known to the LU
5215     for extra in [constants.SF_NODE, constants.SF_TYPE]:
5216       while extra in fields:
5217         fields.remove(extra)
5218
5219     field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
5220     name_idx = field_idx[constants.SF_NAME]
5221
5222     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5223     data = self.rpc.call_storage_list(self.nodes,
5224                                       self.op.storage_type, st_args,
5225                                       self.op.name, fields)
5226
5227     result = []
5228
5229     for node in utils.NiceSort(self.nodes):
5230       nresult = data[node]
5231       if nresult.offline:
5232         continue
5233
5234       msg = nresult.fail_msg
5235       if msg:
5236         self.LogWarning("Can't get storage data from node %s: %s", node, msg)
5237         continue
5238
5239       rows = dict([(row[name_idx], row) for row in nresult.payload])
5240
5241       for name in utils.NiceSort(rows.keys()):
5242         row = rows[name]
5243
5244         out = []
5245
5246         for field in self.op.output_fields:
5247           if field == constants.SF_NODE:
5248             val = node
5249           elif field == constants.SF_TYPE:
5250             val = self.op.storage_type
5251           elif field in field_idx:
5252             val = row[field_idx[field]]
5253           else:
5254             raise errors.ParameterError(field)
5255
5256           out.append(val)
5257
5258         result.append(out)
5259
5260     return result
5261
5262
5263 class _InstanceQuery(_QueryBase):
5264   FIELDS = query.INSTANCE_FIELDS
5265
5266   def ExpandNames(self, lu):
5267     lu.needed_locks = {}
5268     lu.share_locks = _ShareAll()
5269
5270     if self.names:
5271       self.wanted = _GetWantedInstances(lu, self.names)
5272     else:
5273       self.wanted = locking.ALL_SET
5274
5275     self.do_locking = (self.use_locking and
5276                        query.IQ_LIVE in self.requested_data)
5277     if self.do_locking:
5278       lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
5279       lu.needed_locks[locking.LEVEL_NODEGROUP] = []
5280       lu.needed_locks[locking.LEVEL_NODE] = []
5281       lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5282
5283     self.do_grouplocks = (self.do_locking and
5284                           query.IQ_NODES in self.requested_data)
5285
5286   def DeclareLocks(self, lu, level):
5287     if self.do_locking:
5288       if level == locking.LEVEL_NODEGROUP and self.do_grouplocks:
5289         assert not lu.needed_locks[locking.LEVEL_NODEGROUP]
5290
5291         # Lock all groups used by instances optimistically; this requires going
5292         # via the node before it's locked, requiring verification later on
5293         lu.needed_locks[locking.LEVEL_NODEGROUP] = \
5294           set(group_uuid
5295               for instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
5296               for group_uuid in lu.cfg.GetInstanceNodeGroups(instance_name))
5297       elif level == locking.LEVEL_NODE:
5298         lu._LockInstancesNodes() # pylint: disable=W0212
5299
5300   @staticmethod
5301   def _CheckGroupLocks(lu):
5302     owned_instances = frozenset(lu.owned_locks(locking.LEVEL_INSTANCE))
5303     owned_groups = frozenset(lu.owned_locks(locking.LEVEL_NODEGROUP))
5304
5305     # Check if node groups for locked instances are still correct
5306     for instance_name in owned_instances:
5307       _CheckInstanceNodeGroups(lu.cfg, instance_name, owned_groups)
5308
5309   def _GetQueryData(self, lu):
5310     """Computes the list of instances and their attributes.
5311
5312     """
5313     if self.do_grouplocks:
5314       self._CheckGroupLocks(lu)
5315
5316     cluster = lu.cfg.GetClusterInfo()
5317     all_info = lu.cfg.GetAllInstancesInfo()
5318
5319     instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
5320
5321     instance_list = [all_info[name] for name in instance_names]
5322     nodes = frozenset(itertools.chain(*(inst.all_nodes
5323                                         for inst in instance_list)))
5324     hv_list = list(set([inst.hypervisor for inst in instance_list]))
5325     bad_nodes = []
5326     offline_nodes = []
5327     wrongnode_inst = set()
5328
5329     # Gather data as requested
5330     if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
5331       live_data = {}
5332       node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
5333       for name in nodes:
5334         result = node_data[name]
5335         if result.offline:
5336           # offline nodes will be in both lists
5337           assert result.fail_msg
5338           offline_nodes.append(name)
5339         if result.fail_msg:
5340           bad_nodes.append(name)
5341         elif result.payload:
5342           for inst in result.payload:
5343             if inst in all_info:
5344               if all_info[inst].primary_node == name:
5345                 live_data.update(result.payload)
5346               else:
5347                 wrongnode_inst.add(inst)
5348             else:
5349               # orphan instance; we don't list it here as we don't
5350               # handle this case yet in the output of instance listing
5351               logging.warning("Orphan instance '%s' found on node %s",
5352                               inst, name)
5353         # else no instance is alive
5354     else:
5355       live_data = {}
5356
5357     if query.IQ_DISKUSAGE in self.requested_data:
5358       gmi = ganeti.masterd.instance
5359       disk_usage = dict((inst.name,
5360                          gmi.ComputeDiskSize(inst.disk_template,
5361                                              [{constants.IDISK_SIZE: disk.size}
5362                                               for disk in inst.disks]))
5363                         for inst in instance_list)
5364     else:
5365       disk_usage = None
5366
5367     if query.IQ_CONSOLE in self.requested_data:
5368       consinfo = {}
5369       for inst in instance_list:
5370         if inst.name in live_data:
5371           # Instance is running
5372           consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
5373         else:
5374           consinfo[inst.name] = None
5375       assert set(consinfo.keys()) == set(instance_names)
5376     else:
5377       consinfo = None
5378
5379     if query.IQ_NODES in self.requested_data:
5380       node_names = set(itertools.chain(*map(operator.attrgetter("all_nodes"),
5381                                             instance_list)))
5382       nodes = dict(lu.cfg.GetMultiNodeInfo(node_names))
5383       groups = dict((uuid, lu.cfg.GetNodeGroup(uuid))
5384                     for uuid in set(map(operator.attrgetter("group"),
5385                                         nodes.values())))
5386     else:
5387       nodes = None
5388       groups = None
5389
5390     return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
5391                                    disk_usage, offline_nodes, bad_nodes,
5392                                    live_data, wrongnode_inst, consinfo,
5393                                    nodes, groups)
5394
5395
5396 class LUQuery(NoHooksLU):
5397   """Query for resources/items of a certain kind.
5398
5399   """
5400   # pylint: disable=W0142
5401   REQ_BGL = False
5402
5403   def CheckArguments(self):
5404     qcls = _GetQueryImplementation(self.op.what)
5405
5406     self.impl = qcls(self.op.qfilter, self.op.fields, self.op.use_locking)
5407
5408   def ExpandNames(self):
5409     self.impl.ExpandNames(self)
5410
5411   def DeclareLocks(self, level):
5412     self.impl.DeclareLocks(self, level)
5413
5414   def Exec(self, feedback_fn):
5415     return self.impl.NewStyleQuery(self)
5416
5417
5418 class LUQueryFields(NoHooksLU):
5419   """Query for resources/items of a certain kind.
5420
5421   """
5422   # pylint: disable=W0142
5423   REQ_BGL = False
5424
5425   def CheckArguments(self):
5426     self.qcls = _GetQueryImplementation(self.op.what)
5427
5428   def ExpandNames(self):
5429     self.needed_locks = {}
5430
5431   def Exec(self, feedback_fn):
5432     return query.QueryFields(self.qcls.FIELDS, self.op.fields)
5433
5434
5435 class LUNodeModifyStorage(NoHooksLU):
5436   """Logical unit for modifying a storage volume on a node.
5437
5438   """
5439   REQ_BGL = False
5440
5441   def CheckArguments(self):
5442     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5443
5444     storage_type = self.op.storage_type
5445
5446     try:
5447       modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
5448     except KeyError:
5449       raise errors.OpPrereqError("Storage units of type '%s' can not be"
5450                                  " modified" % storage_type,
5451                                  errors.ECODE_INVAL)
5452
5453     diff = set(self.op.changes.keys()) - modifiable
5454     if diff:
5455       raise errors.OpPrereqError("The following fields can not be modified for"
5456                                  " storage units of type '%s': %r" %
5457                                  (storage_type, list(diff)),
5458                                  errors.ECODE_INVAL)
5459
5460   def ExpandNames(self):
5461     self.needed_locks = {
5462       locking.LEVEL_NODE: self.op.node_name,
5463       }
5464
5465   def Exec(self, feedback_fn):
5466     """Computes the list of nodes and their attributes.
5467
5468     """
5469     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5470     result = self.rpc.call_storage_modify(self.op.node_name,
5471                                           self.op.storage_type, st_args,
5472                                           self.op.name, self.op.changes)
5473     result.Raise("Failed to modify storage unit '%s' on %s" %
5474                  (self.op.name, self.op.node_name))
5475
5476
5477 class LUNodeAdd(LogicalUnit):
5478   """Logical unit for adding node to the cluster.
5479
5480   """
5481   HPATH = "node-add"
5482   HTYPE = constants.HTYPE_NODE
5483   _NFLAGS = ["master_capable", "vm_capable"]
5484
5485   def CheckArguments(self):
5486     self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
5487     # validate/normalize the node name
5488     self.hostname = netutils.GetHostname(name=self.op.node_name,
5489                                          family=self.primary_ip_family)
5490     self.op.node_name = self.hostname.name
5491
5492     if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
5493       raise errors.OpPrereqError("Cannot readd the master node",
5494                                  errors.ECODE_STATE)
5495
5496     if self.op.readd and self.op.group:
5497       raise errors.OpPrereqError("Cannot pass a node group when a node is"
5498                                  " being readded", errors.ECODE_INVAL)
5499
5500   def BuildHooksEnv(self):
5501     """Build hooks env.
5502
5503     This will run on all nodes before, and on all nodes + the new node after.
5504
5505     """
5506     return {
5507       "OP_TARGET": self.op.node_name,
5508       "NODE_NAME": self.op.node_name,
5509       "NODE_PIP": self.op.primary_ip,
5510       "NODE_SIP": self.op.secondary_ip,
5511       "MASTER_CAPABLE": str(self.op.master_capable),
5512       "VM_CAPABLE": str(self.op.vm_capable),
5513       }
5514
5515   def BuildHooksNodes(self):
5516     """Build hooks nodes.
5517
5518     """
5519     # Exclude added node
5520     pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
5521     post_nodes = pre_nodes + [self.op.node_name, ]
5522
5523     return (pre_nodes, post_nodes)
5524
5525   def CheckPrereq(self):
5526     """Check prerequisites.
5527
5528     This checks:
5529      - the new node is not already in the config
5530      - it is resolvable
5531      - its parameters (single/dual homed) matches the cluster
5532
5533     Any errors are signaled by raising errors.OpPrereqError.
5534
5535     """
5536     cfg = self.cfg
5537     hostname = self.hostname
5538     node = hostname.name
5539     primary_ip = self.op.primary_ip = hostname.ip
5540     if self.op.secondary_ip is None:
5541       if self.primary_ip_family == netutils.IP6Address.family:
5542         raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
5543                                    " IPv4 address must be given as secondary",
5544                                    errors.ECODE_INVAL)
5545       self.op.secondary_ip = primary_ip
5546
5547     secondary_ip = self.op.secondary_ip
5548     if not netutils.IP4Address.IsValid(secondary_ip):
5549       raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5550                                  " address" % secondary_ip, errors.ECODE_INVAL)
5551
5552     node_list = cfg.GetNodeList()
5553     if not self.op.readd and node in node_list:
5554       raise errors.OpPrereqError("Node %s is already in the configuration" %
5555                                  node, errors.ECODE_EXISTS)
5556     elif self.op.readd and node not in node_list:
5557       raise errors.OpPrereqError("Node %s is not in the configuration" % node,
5558                                  errors.ECODE_NOENT)
5559
5560     self.changed_primary_ip = False
5561
5562     for existing_node_name, existing_node in cfg.GetMultiNodeInfo(node_list):
5563       if self.op.readd and node == existing_node_name:
5564         if existing_node.secondary_ip != secondary_ip:
5565           raise errors.OpPrereqError("Readded node doesn't have the same IP"
5566                                      " address configuration as before",
5567                                      errors.ECODE_INVAL)
5568         if existing_node.primary_ip != primary_ip:
5569           self.changed_primary_ip = True
5570
5571         continue
5572
5573       if (existing_node.primary_ip == primary_ip or
5574           existing_node.secondary_ip == primary_ip or
5575           existing_node.primary_ip == secondary_ip or
5576           existing_node.secondary_ip == secondary_ip):
5577         raise errors.OpPrereqError("New node ip address(es) conflict with"
5578                                    " existing node %s" % existing_node.name,
5579                                    errors.ECODE_NOTUNIQUE)
5580
5581     # After this 'if' block, None is no longer a valid value for the
5582     # _capable op attributes
5583     if self.op.readd:
5584       old_node = self.cfg.GetNodeInfo(node)
5585       assert old_node is not None, "Can't retrieve locked node %s" % node
5586       for attr in self._NFLAGS:
5587         if getattr(self.op, attr) is None:
5588           setattr(self.op, attr, getattr(old_node, attr))
5589     else:
5590       for attr in self._NFLAGS:
5591         if getattr(self.op, attr) is None:
5592           setattr(self.op, attr, True)
5593
5594     if self.op.readd and not self.op.vm_capable:
5595       pri, sec = cfg.GetNodeInstances(node)
5596       if pri or sec:
5597         raise errors.OpPrereqError("Node %s being re-added with vm_capable"
5598                                    " flag set to false, but it already holds"
5599                                    " instances" % node,
5600                                    errors.ECODE_STATE)
5601
5602     # check that the type of the node (single versus dual homed) is the
5603     # same as for the master
5604     myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
5605     master_singlehomed = myself.secondary_ip == myself.primary_ip
5606     newbie_singlehomed = secondary_ip == primary_ip
5607     if master_singlehomed != newbie_singlehomed:
5608       if master_singlehomed:
5609         raise errors.OpPrereqError("The master has no secondary ip but the"
5610                                    " new node has one",
5611                                    errors.ECODE_INVAL)
5612       else:
5613         raise errors.OpPrereqError("The master has a secondary ip but the"
5614                                    " new node doesn't have one",
5615                                    errors.ECODE_INVAL)
5616
5617     # checks reachability
5618     if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
5619       raise errors.OpPrereqError("Node not reachable by ping",
5620                                  errors.ECODE_ENVIRON)
5621
5622     if not newbie_singlehomed:
5623       # check reachability from my secondary ip to newbie's secondary ip
5624       if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
5625                               source=myself.secondary_ip):
5626         raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5627                                    " based ping to node daemon port",
5628                                    errors.ECODE_ENVIRON)
5629
5630     if self.op.readd:
5631       exceptions = [node]
5632     else:
5633       exceptions = []
5634
5635     if self.op.master_capable:
5636       self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
5637     else:
5638       self.master_candidate = False
5639
5640     if self.op.readd:
5641       self.new_node = old_node
5642     else:
5643       node_group = cfg.LookupNodeGroup(self.op.group)
5644       self.new_node = objects.Node(name=node,
5645                                    primary_ip=primary_ip,
5646                                    secondary_ip=secondary_ip,
5647                                    master_candidate=self.master_candidate,
5648                                    offline=False, drained=False,
5649                                    group=node_group)
5650
5651     if self.op.ndparams:
5652       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
5653
5654     if self.op.hv_state:
5655       self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
5656
5657     if self.op.disk_state:
5658       self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
5659
5660     # TODO: If we need to have multiple DnsOnlyRunner we probably should make
5661     #       it a property on the base class.
5662     result = rpc.DnsOnlyRunner().call_version([node])[node]
5663     result.Raise("Can't get version information from node %s" % node)
5664     if constants.PROTOCOL_VERSION == result.payload:
5665       logging.info("Communication to node %s fine, sw version %s match",
5666                    node, result.payload)
5667     else:
5668       raise errors.OpPrereqError("Version mismatch master version %s,"
5669                                  " node version %s" %
5670                                  (constants.PROTOCOL_VERSION, result.payload),
5671                                  errors.ECODE_ENVIRON)
5672
5673   def Exec(self, feedback_fn):
5674     """Adds the new node to the cluster.
5675
5676     """
5677     new_node = self.new_node
5678     node = new_node.name
5679
5680     assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
5681       "Not owning BGL"
5682
5683     # We adding a new node so we assume it's powered
5684     new_node.powered = True
5685
5686     # for re-adds, reset the offline/drained/master-candidate flags;
5687     # we need to reset here, otherwise offline would prevent RPC calls
5688     # later in the procedure; this also means that if the re-add
5689     # fails, we are left with a non-offlined, broken node
5690     if self.op.readd:
5691       new_node.drained = new_node.offline = False # pylint: disable=W0201
5692       self.LogInfo("Readding a node, the offline/drained flags were reset")
5693       # if we demote the node, we do cleanup later in the procedure
5694       new_node.master_candidate = self.master_candidate
5695       if self.changed_primary_ip:
5696         new_node.primary_ip = self.op.primary_ip
5697
5698     # copy the master/vm_capable flags
5699     for attr in self._NFLAGS:
5700       setattr(new_node, attr, getattr(self.op, attr))
5701
5702     # notify the user about any possible mc promotion
5703     if new_node.master_candidate:
5704       self.LogInfo("Node will be a master candidate")
5705
5706     if self.op.ndparams:
5707       new_node.ndparams = self.op.ndparams
5708     else:
5709       new_node.ndparams = {}
5710
5711     if self.op.hv_state:
5712       new_node.hv_state_static = self.new_hv_state
5713
5714     if self.op.disk_state:
5715       new_node.disk_state_static = self.new_disk_state
5716
5717     # Add node to our /etc/hosts, and add key to known_hosts
5718     if self.cfg.GetClusterInfo().modify_etc_hosts:
5719       master_node = self.cfg.GetMasterNode()
5720       result = self.rpc.call_etc_hosts_modify(master_node,
5721                                               constants.ETC_HOSTS_ADD,
5722                                               self.hostname.name,
5723                                               self.hostname.ip)
5724       result.Raise("Can't update hosts file with new host data")
5725
5726     if new_node.secondary_ip != new_node.primary_ip:
5727       _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
5728                                False)
5729
5730     node_verify_list = [self.cfg.GetMasterNode()]
5731     node_verify_param = {
5732       constants.NV_NODELIST: ([node], {}),
5733       # TODO: do a node-net-test as well?
5734     }
5735
5736     result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
5737                                        self.cfg.GetClusterName())
5738     for verifier in node_verify_list:
5739       result[verifier].Raise("Cannot communicate with node %s" % verifier)
5740       nl_payload = result[verifier].payload[constants.NV_NODELIST]
5741       if nl_payload:
5742         for failed in nl_payload:
5743           feedback_fn("ssh/hostname verification failed"
5744                       " (checking from %s): %s" %
5745                       (verifier, nl_payload[failed]))
5746         raise errors.OpExecError("ssh/hostname verification failed")
5747
5748     if self.op.readd:
5749       _RedistributeAncillaryFiles(self)
5750       self.context.ReaddNode(new_node)
5751       # make sure we redistribute the config
5752       self.cfg.Update(new_node, feedback_fn)
5753       # and make sure the new node will not have old files around
5754       if not new_node.master_candidate:
5755         result = self.rpc.call_node_demote_from_mc(new_node.name)
5756         msg = result.fail_msg
5757         if msg:
5758           self.LogWarning("Node failed to demote itself from master"
5759                           " candidate status: %s" % msg)
5760     else:
5761       _RedistributeAncillaryFiles(self, additional_nodes=[node],
5762                                   additional_vm=self.op.vm_capable)
5763       self.context.AddNode(new_node, self.proc.GetECId())
5764
5765
5766 class LUNodeSetParams(LogicalUnit):
5767   """Modifies the parameters of a node.
5768
5769   @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
5770       to the node role (as _ROLE_*)
5771   @cvar _R2F: a dictionary from node role to tuples of flags
5772   @cvar _FLAGS: a list of attribute names corresponding to the flags
5773
5774   """
5775   HPATH = "node-modify"
5776   HTYPE = constants.HTYPE_NODE
5777   REQ_BGL = False
5778   (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
5779   _F2R = {
5780     (True, False, False): _ROLE_CANDIDATE,
5781     (False, True, False): _ROLE_DRAINED,
5782     (False, False, True): _ROLE_OFFLINE,
5783     (False, False, False): _ROLE_REGULAR,
5784     }
5785   _R2F = dict((v, k) for k, v in _F2R.items())
5786   _FLAGS = ["master_candidate", "drained", "offline"]
5787
5788   def CheckArguments(self):
5789     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5790     all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
5791                 self.op.master_capable, self.op.vm_capable,
5792                 self.op.secondary_ip, self.op.ndparams, self.op.hv_state,
5793                 self.op.disk_state]
5794     if all_mods.count(None) == len(all_mods):
5795       raise errors.OpPrereqError("Please pass at least one modification",
5796                                  errors.ECODE_INVAL)
5797     if all_mods.count(True) > 1:
5798       raise errors.OpPrereqError("Can't set the node into more than one"
5799                                  " state at the same time",
5800                                  errors.ECODE_INVAL)
5801
5802     # Boolean value that tells us whether we might be demoting from MC
5803     self.might_demote = (self.op.master_candidate is False or
5804                          self.op.offline is True or
5805                          self.op.drained is True or
5806                          self.op.master_capable is False)
5807
5808     if self.op.secondary_ip:
5809       if not netutils.IP4Address.IsValid(self.op.secondary_ip):
5810         raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5811                                    " address" % self.op.secondary_ip,
5812                                    errors.ECODE_INVAL)
5813
5814     self.lock_all = self.op.auto_promote and self.might_demote
5815     self.lock_instances = self.op.secondary_ip is not None
5816
5817   def _InstanceFilter(self, instance):
5818     """Filter for getting affected instances.
5819
5820     """
5821     return (instance.disk_template in constants.DTS_INT_MIRROR and
5822             self.op.node_name in instance.all_nodes)
5823
5824   def ExpandNames(self):
5825     if self.lock_all:
5826       self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
5827     else:
5828       self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
5829
5830     # Since modifying a node can have severe effects on currently running
5831     # operations the resource lock is at least acquired in shared mode
5832     self.needed_locks[locking.LEVEL_NODE_RES] = \
5833       self.needed_locks[locking.LEVEL_NODE]
5834
5835     # Get node resource and instance locks in shared mode; they are not used
5836     # for anything but read-only access
5837     self.share_locks[locking.LEVEL_NODE_RES] = 1
5838     self.share_locks[locking.LEVEL_INSTANCE] = 1
5839
5840     if self.lock_instances:
5841       self.needed_locks[locking.LEVEL_INSTANCE] = \
5842         frozenset(self.cfg.GetInstancesInfoByFilter(self._InstanceFilter))
5843
5844   def BuildHooksEnv(self):
5845     """Build hooks env.
5846
5847     This runs on the master node.
5848
5849     """
5850     return {
5851       "OP_TARGET": self.op.node_name,
5852       "MASTER_CANDIDATE": str(self.op.master_candidate),
5853       "OFFLINE": str(self.op.offline),
5854       "DRAINED": str(self.op.drained),
5855       "MASTER_CAPABLE": str(self.op.master_capable),
5856       "VM_CAPABLE": str(self.op.vm_capable),
5857       }
5858
5859   def BuildHooksNodes(self):
5860     """Build hooks nodes.
5861
5862     """
5863     nl = [self.cfg.GetMasterNode(), self.op.node_name]
5864     return (nl, nl)
5865
5866   def CheckPrereq(self):
5867     """Check prerequisites.
5868
5869     This only checks the instance list against the existing names.
5870
5871     """
5872     node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
5873
5874     if self.lock_instances:
5875       affected_instances = \
5876         self.cfg.GetInstancesInfoByFilter(self._InstanceFilter)
5877
5878       # Verify instance locks
5879       owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
5880       wanted_instances = frozenset(affected_instances.keys())
5881       if wanted_instances - owned_instances:
5882         raise errors.OpPrereqError("Instances affected by changing node %s's"
5883                                    " secondary IP address have changed since"
5884                                    " locks were acquired, wanted '%s', have"
5885                                    " '%s'; retry the operation" %
5886                                    (self.op.node_name,
5887                                     utils.CommaJoin(wanted_instances),
5888                                     utils.CommaJoin(owned_instances)),
5889                                    errors.ECODE_STATE)
5890     else:
5891       affected_instances = None
5892
5893     if (self.op.master_candidate is not None or
5894         self.op.drained is not None or
5895         self.op.offline is not None):
5896       # we can't change the master's node flags
5897       if self.op.node_name == self.cfg.GetMasterNode():
5898         raise errors.OpPrereqError("The master role can be changed"
5899                                    " only via master-failover",
5900                                    errors.ECODE_INVAL)
5901
5902     if self.op.master_candidate and not node.master_capable:
5903       raise errors.OpPrereqError("Node %s is not master capable, cannot make"
5904                                  " it a master candidate" % node.name,
5905                                  errors.ECODE_STATE)
5906
5907     if self.op.vm_capable is False:
5908       (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
5909       if ipri or isec:
5910         raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
5911                                    " the vm_capable flag" % node.name,
5912                                    errors.ECODE_STATE)
5913
5914     if node.master_candidate and self.might_demote and not self.lock_all:
5915       assert not self.op.auto_promote, "auto_promote set but lock_all not"
5916       # check if after removing the current node, we're missing master
5917       # candidates
5918       (mc_remaining, mc_should, _) = \
5919           self.cfg.GetMasterCandidateStats(exceptions=[node.name])
5920       if mc_remaining < mc_should:
5921         raise errors.OpPrereqError("Not enough master candidates, please"
5922                                    " pass auto promote option to allow"
5923                                    " promotion (--auto-promote or RAPI"
5924                                    " auto_promote=True)", errors.ECODE_STATE)
5925
5926     self.old_flags = old_flags = (node.master_candidate,
5927                                   node.drained, node.offline)
5928     assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
5929     self.old_role = old_role = self._F2R[old_flags]
5930
5931     # Check for ineffective changes
5932     for attr in self._FLAGS:
5933       if (getattr(self.op, attr) is False and getattr(node, attr) is False):
5934         self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
5935         setattr(self.op, attr, None)
5936
5937     # Past this point, any flag change to False means a transition
5938     # away from the respective state, as only real changes are kept
5939
5940     # TODO: We might query the real power state if it supports OOB
5941     if _SupportsOob(self.cfg, node):
5942       if self.op.offline is False and not (node.powered or
5943                                            self.op.powered is True):
5944         raise errors.OpPrereqError(("Node %s needs to be turned on before its"
5945                                     " offline status can be reset") %
5946                                    self.op.node_name, errors.ECODE_STATE)
5947     elif self.op.powered is not None:
5948       raise errors.OpPrereqError(("Unable to change powered state for node %s"
5949                                   " as it does not support out-of-band"
5950                                   " handling") % self.op.node_name,
5951                                  errors.ECODE_STATE)
5952
5953     # If we're being deofflined/drained, we'll MC ourself if needed
5954     if (self.op.drained is False or self.op.offline is False or
5955         (self.op.master_capable and not node.master_capable)):
5956       if _DecideSelfPromotion(self):
5957         self.op.master_candidate = True
5958         self.LogInfo("Auto-promoting node to master candidate")
5959
5960     # If we're no longer master capable, we'll demote ourselves from MC
5961     if self.op.master_capable is False and node.master_candidate:
5962       self.LogInfo("Demoting from master candidate")
5963       self.op.master_candidate = False
5964
5965     # Compute new role
5966     assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
5967     if self.op.master_candidate:
5968       new_role = self._ROLE_CANDIDATE
5969     elif self.op.drained:
5970       new_role = self._ROLE_DRAINED
5971     elif self.op.offline:
5972       new_role = self._ROLE_OFFLINE
5973     elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
5974       # False is still in new flags, which means we're un-setting (the
5975       # only) True flag
5976       new_role = self._ROLE_REGULAR
5977     else: # no new flags, nothing, keep old role
5978       new_role = old_role
5979
5980     self.new_role = new_role
5981
5982     if old_role == self._ROLE_OFFLINE and new_role != old_role:
5983       # Trying to transition out of offline status
5984       result = self.rpc.call_version([node.name])[node.name]
5985       if result.fail_msg:
5986         raise errors.OpPrereqError("Node %s is being de-offlined but fails"
5987                                    " to report its version: %s" %
5988                                    (node.name, result.fail_msg),
5989                                    errors.ECODE_STATE)
5990       else:
5991         self.LogWarning("Transitioning node from offline to online state"
5992                         " without using re-add. Please make sure the node"
5993                         " is healthy!")
5994
5995     # When changing the secondary ip, verify if this is a single-homed to
5996     # multi-homed transition or vice versa, and apply the relevant
5997     # restrictions.
5998     if self.op.secondary_ip:
5999       # Ok even without locking, because this can't be changed by any LU
6000       master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
6001       master_singlehomed = master.secondary_ip == master.primary_ip
6002       if master_singlehomed and self.op.secondary_ip != node.primary_ip:
6003         if self.op.force and node.name == master.name:
6004           self.LogWarning("Transitioning from single-homed to multi-homed"
6005                           " cluster. All nodes will require a secondary ip.")
6006         else:
6007           raise errors.OpPrereqError("Changing the secondary ip on a"
6008                                      " single-homed cluster requires the"
6009                                      " --force option to be passed, and the"
6010                                      " target node to be the master",
6011                                      errors.ECODE_INVAL)
6012       elif not master_singlehomed and self.op.secondary_ip == node.primary_ip:
6013         if self.op.force and node.name == master.name:
6014           self.LogWarning("Transitioning from multi-homed to single-homed"
6015                           " cluster. Secondary IPs will have to be removed.")
6016         else:
6017           raise errors.OpPrereqError("Cannot set the secondary IP to be the"
6018                                      " same as the primary IP on a multi-homed"
6019                                      " cluster, unless the --force option is"
6020                                      " passed, and the target node is the"
6021                                      " master", errors.ECODE_INVAL)
6022
6023       assert not (frozenset(affected_instances) -
6024                   self.owned_locks(locking.LEVEL_INSTANCE))
6025
6026       if node.offline:
6027         if affected_instances:
6028           msg = ("Cannot change secondary IP address: offline node has"
6029                  " instances (%s) configured to use it" %
6030                  utils.CommaJoin(affected_instances.keys()))
6031           raise errors.OpPrereqError(msg, errors.ECODE_STATE)
6032       else:
6033         # On online nodes, check that no instances are running, and that
6034         # the node has the new ip and we can reach it.
6035         for instance in affected_instances.values():
6036           _CheckInstanceState(self, instance, INSTANCE_DOWN,
6037                               msg="cannot change secondary ip")
6038
6039         _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
6040         if master.name != node.name:
6041           # check reachability from master secondary ip to new secondary ip
6042           if not netutils.TcpPing(self.op.secondary_ip,
6043                                   constants.DEFAULT_NODED_PORT,
6044                                   source=master.secondary_ip):
6045             raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
6046                                        " based ping to node daemon port",
6047                                        errors.ECODE_ENVIRON)
6048
6049     if self.op.ndparams:
6050       new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
6051       utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
6052       self.new_ndparams = new_ndparams
6053
6054     if self.op.hv_state:
6055       self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
6056                                                  self.node.hv_state_static)
6057
6058     if self.op.disk_state:
6059       self.new_disk_state = \
6060         _MergeAndVerifyDiskState(self.op.disk_state,
6061                                  self.node.disk_state_static)
6062
6063   def Exec(self, feedback_fn):
6064     """Modifies a node.
6065
6066     """
6067     node = self.node
6068     old_role = self.old_role
6069     new_role = self.new_role
6070
6071     result = []
6072
6073     if self.op.ndparams:
6074       node.ndparams = self.new_ndparams
6075
6076     if self.op.powered is not None:
6077       node.powered = self.op.powered
6078
6079     if self.op.hv_state:
6080       node.hv_state_static = self.new_hv_state
6081
6082     if self.op.disk_state:
6083       node.disk_state_static = self.new_disk_state
6084
6085     for attr in ["master_capable", "vm_capable"]:
6086       val = getattr(self.op, attr)
6087       if val is not None:
6088         setattr(node, attr, val)
6089         result.append((attr, str(val)))
6090
6091     if new_role != old_role:
6092       # Tell the node to demote itself, if no longer MC and not offline
6093       if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
6094         msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
6095         if msg:
6096           self.LogWarning("Node failed to demote itself: %s", msg)
6097
6098       new_flags = self._R2F[new_role]
6099       for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
6100         if of != nf:
6101           result.append((desc, str(nf)))
6102       (node.master_candidate, node.drained, node.offline) = new_flags
6103
6104       # we locked all nodes, we adjust the CP before updating this node
6105       if self.lock_all:
6106         _AdjustCandidatePool(self, [node.name])
6107
6108     if self.op.secondary_ip:
6109       node.secondary_ip = self.op.secondary_ip
6110       result.append(("secondary_ip", self.op.secondary_ip))
6111
6112     # this will trigger configuration file update, if needed
6113     self.cfg.Update(node, feedback_fn)
6114
6115     # this will trigger job queue propagation or cleanup if the mc
6116     # flag changed
6117     if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
6118       self.context.ReaddNode(node)
6119
6120     return result
6121
6122
6123 class LUNodePowercycle(NoHooksLU):
6124   """Powercycles a node.
6125
6126   """
6127   REQ_BGL = False
6128
6129   def CheckArguments(self):
6130     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6131     if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
6132       raise errors.OpPrereqError("The node is the master and the force"
6133                                  " parameter was not set",
6134                                  errors.ECODE_INVAL)
6135
6136   def ExpandNames(self):
6137     """Locking for PowercycleNode.
6138
6139     This is a last-resort option and shouldn't block on other
6140     jobs. Therefore, we grab no locks.
6141
6142     """
6143     self.needed_locks = {}
6144
6145   def Exec(self, feedback_fn):
6146     """Reboots a node.
6147
6148     """
6149     result = self.rpc.call_node_powercycle(self.op.node_name,
6150                                            self.cfg.GetHypervisorType())
6151     result.Raise("Failed to schedule the reboot")
6152     return result.payload
6153
6154
6155 class LUClusterQuery(NoHooksLU):
6156   """Query cluster configuration.
6157
6158   """
6159   REQ_BGL = False
6160
6161   def ExpandNames(self):
6162     self.needed_locks = {}
6163
6164   def Exec(self, feedback_fn):
6165     """Return cluster config.
6166
6167     """
6168     cluster = self.cfg.GetClusterInfo()
6169     os_hvp = {}
6170
6171     # Filter just for enabled hypervisors
6172     for os_name, hv_dict in cluster.os_hvp.items():
6173       os_hvp[os_name] = {}
6174       for hv_name, hv_params in hv_dict.items():
6175         if hv_name in cluster.enabled_hypervisors:
6176           os_hvp[os_name][hv_name] = hv_params
6177
6178     # Convert ip_family to ip_version
6179     primary_ip_version = constants.IP4_VERSION
6180     if cluster.primary_ip_family == netutils.IP6Address.family:
6181       primary_ip_version = constants.IP6_VERSION
6182
6183     result = {
6184       "software_version": constants.RELEASE_VERSION,
6185       "protocol_version": constants.PROTOCOL_VERSION,
6186       "config_version": constants.CONFIG_VERSION,
6187       "os_api_version": max(constants.OS_API_VERSIONS),
6188       "export_version": constants.EXPORT_VERSION,
6189       "architecture": runtime.GetArchInfo(),
6190       "name": cluster.cluster_name,
6191       "master": cluster.master_node,
6192       "default_hypervisor": cluster.primary_hypervisor,
6193       "enabled_hypervisors": cluster.enabled_hypervisors,
6194       "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
6195                         for hypervisor_name in cluster.enabled_hypervisors]),
6196       "os_hvp": os_hvp,
6197       "beparams": cluster.beparams,
6198       "osparams": cluster.osparams,
6199       "ipolicy": cluster.ipolicy,
6200       "nicparams": cluster.nicparams,
6201       "ndparams": cluster.ndparams,
6202       "diskparams": cluster.diskparams,
6203       "candidate_pool_size": cluster.candidate_pool_size,
6204       "master_netdev": cluster.master_netdev,
6205       "master_netmask": cluster.master_netmask,
6206       "use_external_mip_script": cluster.use_external_mip_script,
6207       "volume_group_name": cluster.volume_group_name,
6208       "drbd_usermode_helper": cluster.drbd_usermode_helper,
6209       "file_storage_dir": cluster.file_storage_dir,
6210       "shared_file_storage_dir": cluster.shared_file_storage_dir,
6211       "maintain_node_health": cluster.maintain_node_health,
6212       "ctime": cluster.ctime,
6213       "mtime": cluster.mtime,
6214       "uuid": cluster.uuid,
6215       "tags": list(cluster.GetTags()),
6216       "uid_pool": cluster.uid_pool,
6217       "default_iallocator": cluster.default_iallocator,
6218       "reserved_lvs": cluster.reserved_lvs,
6219       "primary_ip_version": primary_ip_version,
6220       "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
6221       "hidden_os": cluster.hidden_os,
6222       "blacklisted_os": cluster.blacklisted_os,
6223       }
6224
6225     return result
6226
6227
6228 class LUClusterConfigQuery(NoHooksLU):
6229   """Return configuration values.
6230
6231   """
6232   REQ_BGL = False
6233
6234   def CheckArguments(self):
6235     self.cq = _ClusterQuery(None, self.op.output_fields, False)
6236
6237   def ExpandNames(self):
6238     self.cq.ExpandNames(self)
6239
6240   def DeclareLocks(self, level):
6241     self.cq.DeclareLocks(self, level)
6242
6243   def Exec(self, feedback_fn):
6244     result = self.cq.OldStyleQuery(self)
6245
6246     assert len(result) == 1
6247
6248     return result[0]
6249
6250
6251 class _ClusterQuery(_QueryBase):
6252   FIELDS = query.CLUSTER_FIELDS
6253
6254   #: Do not sort (there is only one item)
6255   SORT_FIELD = None
6256
6257   def ExpandNames(self, lu):
6258     lu.needed_locks = {}
6259
6260     # The following variables interact with _QueryBase._GetNames
6261     self.wanted = locking.ALL_SET
6262     self.do_locking = self.use_locking
6263
6264     if self.do_locking:
6265       raise errors.OpPrereqError("Can not use locking for cluster queries",
6266                                  errors.ECODE_INVAL)
6267
6268   def DeclareLocks(self, lu, level):
6269     pass
6270
6271   def _GetQueryData(self, lu):
6272     """Computes the list of nodes and their attributes.
6273
6274     """
6275     # Locking is not used
6276     assert not (compat.any(lu.glm.is_owned(level)
6277                            for level in locking.LEVELS
6278                            if level != locking.LEVEL_CLUSTER) or
6279                 self.do_locking or self.use_locking)
6280
6281     if query.CQ_CONFIG in self.requested_data:
6282       cluster = lu.cfg.GetClusterInfo()
6283     else:
6284       cluster = NotImplemented
6285
6286     if query.CQ_QUEUE_DRAINED in self.requested_data:
6287       drain_flag = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
6288     else:
6289       drain_flag = NotImplemented
6290
6291     if query.CQ_WATCHER_PAUSE in self.requested_data:
6292       watcher_pause = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
6293     else:
6294       watcher_pause = NotImplemented
6295
6296     return query.ClusterQueryData(cluster, drain_flag, watcher_pause)
6297
6298
6299 class LUInstanceActivateDisks(NoHooksLU):
6300   """Bring up an instance's disks.
6301
6302   """
6303   REQ_BGL = False
6304
6305   def ExpandNames(self):
6306     self._ExpandAndLockInstance()
6307     self.needed_locks[locking.LEVEL_NODE] = []
6308     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6309
6310   def DeclareLocks(self, level):
6311     if level == locking.LEVEL_NODE:
6312       self._LockInstancesNodes()
6313
6314   def CheckPrereq(self):
6315     """Check prerequisites.
6316
6317     This checks that the instance is in the cluster.
6318
6319     """
6320     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6321     assert self.instance is not None, \
6322       "Cannot retrieve locked instance %s" % self.op.instance_name
6323     _CheckNodeOnline(self, self.instance.primary_node)
6324
6325   def Exec(self, feedback_fn):
6326     """Activate the disks.
6327
6328     """
6329     disks_ok, disks_info = \
6330               _AssembleInstanceDisks(self, self.instance,
6331                                      ignore_size=self.op.ignore_size)
6332     if not disks_ok:
6333       raise errors.OpExecError("Cannot activate block devices")
6334
6335     if self.op.wait_for_sync:
6336       if not _WaitForSync(self, self.instance):
6337         raise errors.OpExecError("Some disks of the instance are degraded!")
6338
6339     return disks_info
6340
6341
6342 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
6343                            ignore_size=False):
6344   """Prepare the block devices for an instance.
6345
6346   This sets up the block devices on all nodes.
6347
6348   @type lu: L{LogicalUnit}
6349   @param lu: the logical unit on whose behalf we execute
6350   @type instance: L{objects.Instance}
6351   @param instance: the instance for whose disks we assemble
6352   @type disks: list of L{objects.Disk} or None
6353   @param disks: which disks to assemble (or all, if None)
6354   @type ignore_secondaries: boolean
6355   @param ignore_secondaries: if true, errors on secondary nodes
6356       won't result in an error return from the function
6357   @type ignore_size: boolean
6358   @param ignore_size: if true, the current known size of the disk
6359       will not be used during the disk activation, useful for cases
6360       when the size is wrong
6361   @return: False if the operation failed, otherwise a list of
6362       (host, instance_visible_name, node_visible_name)
6363       with the mapping from node devices to instance devices
6364
6365   """
6366   device_info = []
6367   disks_ok = True
6368   iname = instance.name
6369   disks = _ExpandCheckDisks(instance, disks)
6370
6371   # With the two passes mechanism we try to reduce the window of
6372   # opportunity for the race condition of switching DRBD to primary
6373   # before handshaking occured, but we do not eliminate it
6374
6375   # The proper fix would be to wait (with some limits) until the
6376   # connection has been made and drbd transitions from WFConnection
6377   # into any other network-connected state (Connected, SyncTarget,
6378   # SyncSource, etc.)
6379
6380   # 1st pass, assemble on all nodes in secondary mode
6381   for idx, inst_disk in enumerate(disks):
6382     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6383       if ignore_size:
6384         node_disk = node_disk.Copy()
6385         node_disk.UnsetSize()
6386       lu.cfg.SetDiskID(node_disk, node)
6387       result = lu.rpc.call_blockdev_assemble(node, (node_disk, instance), iname,
6388                                              False, idx)
6389       msg = result.fail_msg
6390       if msg:
6391         is_offline_secondary = (node in instance.secondary_nodes and
6392                                 result.offline)
6393         lu.proc.LogWarning("Could not prepare block device %s on node %s"
6394                            " (is_primary=False, pass=1): %s",
6395                            inst_disk.iv_name, node, msg)
6396         if not (ignore_secondaries or is_offline_secondary):
6397           disks_ok = False
6398
6399   # FIXME: race condition on drbd migration to primary
6400
6401   # 2nd pass, do only the primary node
6402   for idx, inst_disk in enumerate(disks):
6403     dev_path = None
6404
6405     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6406       if node != instance.primary_node:
6407         continue
6408       if ignore_size:
6409         node_disk = node_disk.Copy()
6410         node_disk.UnsetSize()
6411       lu.cfg.SetDiskID(node_disk, node)
6412       result = lu.rpc.call_blockdev_assemble(node, (node_disk, instance), iname,
6413                                              True, idx)
6414       msg = result.fail_msg
6415       if msg:
6416         lu.proc.LogWarning("Could not prepare block device %s on node %s"
6417                            " (is_primary=True, pass=2): %s",
6418                            inst_disk.iv_name, node, msg)
6419         disks_ok = False
6420       else:
6421         dev_path = result.payload
6422
6423     device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
6424
6425   # leave the disks configured for the primary node
6426   # this is a workaround that would be fixed better by
6427   # improving the logical/physical id handling
6428   for disk in disks:
6429     lu.cfg.SetDiskID(disk, instance.primary_node)
6430
6431   return disks_ok, device_info
6432
6433
6434 def _StartInstanceDisks(lu, instance, force):
6435   """Start the disks of an instance.
6436
6437   """
6438   disks_ok, _ = _AssembleInstanceDisks(lu, instance,
6439                                            ignore_secondaries=force)
6440   if not disks_ok:
6441     _ShutdownInstanceDisks(lu, instance)
6442     if force is not None and not force:
6443       lu.proc.LogWarning("", hint="If the message above refers to a"
6444                          " secondary node,"
6445                          " you can retry the operation using '--force'.")
6446     raise errors.OpExecError("Disk consistency error")
6447
6448
6449 class LUInstanceDeactivateDisks(NoHooksLU):
6450   """Shutdown an instance's disks.
6451
6452   """
6453   REQ_BGL = False
6454
6455   def ExpandNames(self):
6456     self._ExpandAndLockInstance()
6457     self.needed_locks[locking.LEVEL_NODE] = []
6458     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6459
6460   def DeclareLocks(self, level):
6461     if level == locking.LEVEL_NODE:
6462       self._LockInstancesNodes()
6463
6464   def CheckPrereq(self):
6465     """Check prerequisites.
6466
6467     This checks that the instance is in the cluster.
6468
6469     """
6470     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6471     assert self.instance is not None, \
6472       "Cannot retrieve locked instance %s" % self.op.instance_name
6473
6474   def Exec(self, feedback_fn):
6475     """Deactivate the disks
6476
6477     """
6478     instance = self.instance
6479     if self.op.force:
6480       _ShutdownInstanceDisks(self, instance)
6481     else:
6482       _SafeShutdownInstanceDisks(self, instance)
6483
6484
6485 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
6486   """Shutdown block devices of an instance.
6487
6488   This function checks if an instance is running, before calling
6489   _ShutdownInstanceDisks.
6490
6491   """
6492   _CheckInstanceState(lu, instance, INSTANCE_DOWN, msg="cannot shutdown disks")
6493   _ShutdownInstanceDisks(lu, instance, disks=disks)
6494
6495
6496 def _ExpandCheckDisks(instance, disks):
6497   """Return the instance disks selected by the disks list
6498
6499   @type disks: list of L{objects.Disk} or None
6500   @param disks: selected disks
6501   @rtype: list of L{objects.Disk}
6502   @return: selected instance disks to act on
6503
6504   """
6505   if disks is None:
6506     return instance.disks
6507   else:
6508     if not set(disks).issubset(instance.disks):
6509       raise errors.ProgrammerError("Can only act on disks belonging to the"
6510                                    " target instance")
6511     return disks
6512
6513
6514 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
6515   """Shutdown block devices of an instance.
6516
6517   This does the shutdown on all nodes of the instance.
6518
6519   If the ignore_primary is false, errors on the primary node are
6520   ignored.
6521
6522   """
6523   all_result = True
6524   disks = _ExpandCheckDisks(instance, disks)
6525
6526   for disk in disks:
6527     for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
6528       lu.cfg.SetDiskID(top_disk, node)
6529       result = lu.rpc.call_blockdev_shutdown(node, (top_disk, instance))
6530       msg = result.fail_msg
6531       if msg:
6532         lu.LogWarning("Could not shutdown block device %s on node %s: %s",
6533                       disk.iv_name, node, msg)
6534         if ((node == instance.primary_node and not ignore_primary) or
6535             (node != instance.primary_node and not result.offline)):
6536           all_result = False
6537   return all_result
6538
6539
6540 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
6541   """Checks if a node has enough free memory.
6542
6543   This function check if a given node has the needed amount of free
6544   memory. In case the node has less memory or we cannot get the
6545   information from the node, this function raise an OpPrereqError
6546   exception.
6547
6548   @type lu: C{LogicalUnit}
6549   @param lu: a logical unit from which we get configuration data
6550   @type node: C{str}
6551   @param node: the node to check
6552   @type reason: C{str}
6553   @param reason: string to use in the error message
6554   @type requested: C{int}
6555   @param requested: the amount of memory in MiB to check for
6556   @type hypervisor_name: C{str}
6557   @param hypervisor_name: the hypervisor to ask for memory stats
6558   @rtype: integer
6559   @return: node current free memory
6560   @raise errors.OpPrereqError: if the node doesn't have enough memory, or
6561       we cannot check the node
6562
6563   """
6564   nodeinfo = lu.rpc.call_node_info([node], None, [hypervisor_name])
6565   nodeinfo[node].Raise("Can't get data from node %s" % node,
6566                        prereq=True, ecode=errors.ECODE_ENVIRON)
6567   (_, _, (hv_info, )) = nodeinfo[node].payload
6568
6569   free_mem = hv_info.get("memory_free", None)
6570   if not isinstance(free_mem, int):
6571     raise errors.OpPrereqError("Can't compute free memory on node %s, result"
6572                                " was '%s'" % (node, free_mem),
6573                                errors.ECODE_ENVIRON)
6574   if requested > free_mem:
6575     raise errors.OpPrereqError("Not enough memory on node %s for %s:"
6576                                " needed %s MiB, available %s MiB" %
6577                                (node, reason, requested, free_mem),
6578                                errors.ECODE_NORES)
6579   return free_mem
6580
6581
6582 def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
6583   """Checks if nodes have enough free disk space in the all VGs.
6584
6585   This function check if all given nodes have the needed amount of
6586   free disk. In case any node has less disk or we cannot get the
6587   information from the node, this function raise an OpPrereqError
6588   exception.
6589
6590   @type lu: C{LogicalUnit}
6591   @param lu: a logical unit from which we get configuration data
6592   @type nodenames: C{list}
6593   @param nodenames: the list of node names to check
6594   @type req_sizes: C{dict}
6595   @param req_sizes: the hash of vg and corresponding amount of disk in
6596       MiB to check for
6597   @raise errors.OpPrereqError: if the node doesn't have enough disk,
6598       or we cannot check the node
6599
6600   """
6601   for vg, req_size in req_sizes.items():
6602     _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
6603
6604
6605 def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
6606   """Checks if nodes have enough free disk space in the specified VG.
6607
6608   This function check if all given nodes have the needed amount of
6609   free disk. In case any node has less disk or we cannot get the
6610   information from the node, this function raise an OpPrereqError
6611   exception.
6612
6613   @type lu: C{LogicalUnit}
6614   @param lu: a logical unit from which we get configuration data
6615   @type nodenames: C{list}
6616   @param nodenames: the list of node names to check
6617   @type vg: C{str}
6618   @param vg: the volume group to check
6619   @type requested: C{int}
6620   @param requested: the amount of disk in MiB to check for
6621   @raise errors.OpPrereqError: if the node doesn't have enough disk,
6622       or we cannot check the node
6623
6624   """
6625   nodeinfo = lu.rpc.call_node_info(nodenames, [vg], None)
6626   for node in nodenames:
6627     info = nodeinfo[node]
6628     info.Raise("Cannot get current information from node %s" % node,
6629                prereq=True, ecode=errors.ECODE_ENVIRON)
6630     (_, (vg_info, ), _) = info.payload
6631     vg_free = vg_info.get("vg_free", None)
6632     if not isinstance(vg_free, int):
6633       raise errors.OpPrereqError("Can't compute free disk space on node"
6634                                  " %s for vg %s, result was '%s'" %
6635                                  (node, vg, vg_free), errors.ECODE_ENVIRON)
6636     if requested > vg_free:
6637       raise errors.OpPrereqError("Not enough disk space on target node %s"
6638                                  " vg %s: required %d MiB, available %d MiB" %
6639                                  (node, vg, requested, vg_free),
6640                                  errors.ECODE_NORES)
6641
6642
6643 def _CheckNodesPhysicalCPUs(lu, nodenames, requested, hypervisor_name):
6644   """Checks if nodes have enough physical CPUs
6645
6646   This function checks if all given nodes have the needed number of
6647   physical CPUs. In case any node has less CPUs or we cannot get the
6648   information from the node, this function raises an OpPrereqError
6649   exception.
6650
6651   @type lu: C{LogicalUnit}
6652   @param lu: a logical unit from which we get configuration data
6653   @type nodenames: C{list}
6654   @param nodenames: the list of node names to check
6655   @type requested: C{int}
6656   @param requested: the minimum acceptable number of physical CPUs
6657   @raise errors.OpPrereqError: if the node doesn't have enough CPUs,
6658       or we cannot check the node
6659
6660   """
6661   nodeinfo = lu.rpc.call_node_info(nodenames, None, [hypervisor_name])
6662   for node in nodenames:
6663     info = nodeinfo[node]
6664     info.Raise("Cannot get current information from node %s" % node,
6665                prereq=True, ecode=errors.ECODE_ENVIRON)
6666     (_, _, (hv_info, )) = info.payload
6667     num_cpus = hv_info.get("cpu_total", None)
6668     if not isinstance(num_cpus, int):
6669       raise errors.OpPrereqError("Can't compute the number of physical CPUs"
6670                                  " on node %s, result was '%s'" %
6671                                  (node, num_cpus), errors.ECODE_ENVIRON)
6672     if requested > num_cpus:
6673       raise errors.OpPrereqError("Node %s has %s physical CPUs, but %s are "
6674                                  "required" % (node, num_cpus, requested),
6675                                  errors.ECODE_NORES)
6676
6677
6678 class LUInstanceStartup(LogicalUnit):
6679   """Starts an instance.
6680
6681   """
6682   HPATH = "instance-start"
6683   HTYPE = constants.HTYPE_INSTANCE
6684   REQ_BGL = False
6685
6686   def CheckArguments(self):
6687     # extra beparams
6688     if self.op.beparams:
6689       # fill the beparams dict
6690       objects.UpgradeBeParams(self.op.beparams)
6691       utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
6692
6693   def ExpandNames(self):
6694     self._ExpandAndLockInstance()
6695     self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
6696
6697   def DeclareLocks(self, level):
6698     if level == locking.LEVEL_NODE_RES:
6699       self._LockInstancesNodes(primary_only=True, level=locking.LEVEL_NODE_RES)
6700
6701   def BuildHooksEnv(self):
6702     """Build hooks env.
6703
6704     This runs on master, primary and secondary nodes of the instance.
6705
6706     """
6707     env = {
6708       "FORCE": self.op.force,
6709       }
6710
6711     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6712
6713     return env
6714
6715   def BuildHooksNodes(self):
6716     """Build hooks nodes.
6717
6718     """
6719     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6720     return (nl, nl)
6721
6722   def CheckPrereq(self):
6723     """Check prerequisites.
6724
6725     This checks that the instance is in the cluster.
6726
6727     """
6728     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6729     assert self.instance is not None, \
6730       "Cannot retrieve locked instance %s" % self.op.instance_name
6731
6732     # extra hvparams
6733     if self.op.hvparams:
6734       # check hypervisor parameter syntax (locally)
6735       cluster = self.cfg.GetClusterInfo()
6736       utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
6737       filled_hvp = cluster.FillHV(instance)
6738       filled_hvp.update(self.op.hvparams)
6739       hv_type = hypervisor.GetHypervisor(instance.hypervisor)
6740       hv_type.CheckParameterSyntax(filled_hvp)
6741       _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
6742
6743     _CheckInstanceState(self, instance, INSTANCE_ONLINE)
6744
6745     self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
6746
6747     if self.primary_offline and self.op.ignore_offline_nodes:
6748       self.proc.LogWarning("Ignoring offline primary node")
6749
6750       if self.op.hvparams or self.op.beparams:
6751         self.proc.LogWarning("Overridden parameters are ignored")
6752     else:
6753       _CheckNodeOnline(self, instance.primary_node)
6754
6755       bep = self.cfg.GetClusterInfo().FillBE(instance)
6756       bep.update(self.op.beparams)
6757
6758       # check bridges existence
6759       _CheckInstanceBridgesExist(self, instance)
6760
6761       remote_info = self.rpc.call_instance_info(instance.primary_node,
6762                                                 instance.name,
6763                                                 instance.hypervisor)
6764       remote_info.Raise("Error checking node %s" % instance.primary_node,
6765                         prereq=True, ecode=errors.ECODE_ENVIRON)
6766       if not remote_info.payload: # not running already
6767         _CheckNodeFreeMemory(self, instance.primary_node,
6768                              "starting instance %s" % instance.name,
6769                              bep[constants.BE_MINMEM], instance.hypervisor)
6770
6771   def Exec(self, feedback_fn):
6772     """Start the instance.
6773
6774     """
6775     instance = self.instance
6776     force = self.op.force
6777
6778     if not self.op.no_remember:
6779       self.cfg.MarkInstanceUp(instance.name)
6780
6781     if self.primary_offline:
6782       assert self.op.ignore_offline_nodes
6783       self.proc.LogInfo("Primary node offline, marked instance as started")
6784     else:
6785       node_current = instance.primary_node
6786
6787       _StartInstanceDisks(self, instance, force)
6788
6789       result = \
6790         self.rpc.call_instance_start(node_current,
6791                                      (instance, self.op.hvparams,
6792                                       self.op.beparams),
6793                                      self.op.startup_paused)
6794       msg = result.fail_msg
6795       if msg:
6796         _ShutdownInstanceDisks(self, instance)
6797         raise errors.OpExecError("Could not start instance: %s" % msg)
6798
6799
6800 class LUInstanceReboot(LogicalUnit):
6801   """Reboot an instance.
6802
6803   """
6804   HPATH = "instance-reboot"
6805   HTYPE = constants.HTYPE_INSTANCE
6806   REQ_BGL = False
6807
6808   def ExpandNames(self):
6809     self._ExpandAndLockInstance()
6810
6811   def BuildHooksEnv(self):
6812     """Build hooks env.
6813
6814     This runs on master, primary and secondary nodes of the instance.
6815
6816     """
6817     env = {
6818       "IGNORE_SECONDARIES": self.op.ignore_secondaries,
6819       "REBOOT_TYPE": self.op.reboot_type,
6820       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6821       }
6822
6823     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6824
6825     return env
6826
6827   def BuildHooksNodes(self):
6828     """Build hooks nodes.
6829
6830     """
6831     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6832     return (nl, nl)
6833
6834   def CheckPrereq(self):
6835     """Check prerequisites.
6836
6837     This checks that the instance is in the cluster.
6838
6839     """
6840     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6841     assert self.instance is not None, \
6842       "Cannot retrieve locked instance %s" % self.op.instance_name
6843     _CheckInstanceState(self, instance, INSTANCE_ONLINE)
6844     _CheckNodeOnline(self, instance.primary_node)
6845
6846     # check bridges existence
6847     _CheckInstanceBridgesExist(self, instance)
6848
6849   def Exec(self, feedback_fn):
6850     """Reboot the instance.
6851
6852     """
6853     instance = self.instance
6854     ignore_secondaries = self.op.ignore_secondaries
6855     reboot_type = self.op.reboot_type
6856
6857     remote_info = self.rpc.call_instance_info(instance.primary_node,
6858                                               instance.name,
6859                                               instance.hypervisor)
6860     remote_info.Raise("Error checking node %s" % instance.primary_node)
6861     instance_running = bool(remote_info.payload)
6862
6863     node_current = instance.primary_node
6864
6865     if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
6866                                             constants.INSTANCE_REBOOT_HARD]:
6867       for disk in instance.disks:
6868         self.cfg.SetDiskID(disk, node_current)
6869       result = self.rpc.call_instance_reboot(node_current, instance,
6870                                              reboot_type,
6871                                              self.op.shutdown_timeout)
6872       result.Raise("Could not reboot instance")
6873     else:
6874       if instance_running:
6875         result = self.rpc.call_instance_shutdown(node_current, instance,
6876                                                  self.op.shutdown_timeout)
6877         result.Raise("Could not shutdown instance for full reboot")
6878         _ShutdownInstanceDisks(self, instance)
6879       else:
6880         self.LogInfo("Instance %s was already stopped, starting now",
6881                      instance.name)
6882       _StartInstanceDisks(self, instance, ignore_secondaries)
6883       result = self.rpc.call_instance_start(node_current,
6884                                             (instance, None, None), False)
6885       msg = result.fail_msg
6886       if msg:
6887         _ShutdownInstanceDisks(self, instance)
6888         raise errors.OpExecError("Could not start instance for"
6889                                  " full reboot: %s" % msg)
6890
6891     self.cfg.MarkInstanceUp(instance.name)
6892
6893
6894 class LUInstanceShutdown(LogicalUnit):
6895   """Shutdown an instance.
6896
6897   """
6898   HPATH = "instance-stop"
6899   HTYPE = constants.HTYPE_INSTANCE
6900   REQ_BGL = False
6901
6902   def ExpandNames(self):
6903     self._ExpandAndLockInstance()
6904
6905   def BuildHooksEnv(self):
6906     """Build hooks env.
6907
6908     This runs on master, primary and secondary nodes of the instance.
6909
6910     """
6911     env = _BuildInstanceHookEnvByObject(self, self.instance)
6912     env["TIMEOUT"] = self.op.timeout
6913     return env
6914
6915   def BuildHooksNodes(self):
6916     """Build hooks nodes.
6917
6918     """
6919     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6920     return (nl, nl)
6921
6922   def CheckPrereq(self):
6923     """Check prerequisites.
6924
6925     This checks that the instance is in the cluster.
6926
6927     """
6928     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6929     assert self.instance is not None, \
6930       "Cannot retrieve locked instance %s" % self.op.instance_name
6931
6932     _CheckInstanceState(self, self.instance, INSTANCE_ONLINE)
6933
6934     self.primary_offline = \
6935       self.cfg.GetNodeInfo(self.instance.primary_node).offline
6936
6937     if self.primary_offline and self.op.ignore_offline_nodes:
6938       self.proc.LogWarning("Ignoring offline primary node")
6939     else:
6940       _CheckNodeOnline(self, self.instance.primary_node)
6941
6942   def Exec(self, feedback_fn):
6943     """Shutdown the instance.
6944
6945     """
6946     instance = self.instance
6947     node_current = instance.primary_node
6948     timeout = self.op.timeout
6949
6950     if not self.op.no_remember:
6951       self.cfg.MarkInstanceDown(instance.name)
6952
6953     if self.primary_offline:
6954       assert self.op.ignore_offline_nodes
6955       self.proc.LogInfo("Primary node offline, marked instance as stopped")
6956     else:
6957       result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
6958       msg = result.fail_msg
6959       if msg:
6960         self.proc.LogWarning("Could not shutdown instance: %s" % msg)
6961
6962       _ShutdownInstanceDisks(self, instance)
6963
6964
6965 class LUInstanceReinstall(LogicalUnit):
6966   """Reinstall an instance.
6967
6968   """
6969   HPATH = "instance-reinstall"
6970   HTYPE = constants.HTYPE_INSTANCE
6971   REQ_BGL = False
6972
6973   def ExpandNames(self):
6974     self._ExpandAndLockInstance()
6975
6976   def BuildHooksEnv(self):
6977     """Build hooks env.
6978
6979     This runs on master, primary and secondary nodes of the instance.
6980
6981     """
6982     return _BuildInstanceHookEnvByObject(self, self.instance)
6983
6984   def BuildHooksNodes(self):
6985     """Build hooks nodes.
6986
6987     """
6988     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6989     return (nl, nl)
6990
6991   def CheckPrereq(self):
6992     """Check prerequisites.
6993
6994     This checks that the instance is in the cluster and is not running.
6995
6996     """
6997     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6998     assert instance is not None, \
6999       "Cannot retrieve locked instance %s" % self.op.instance_name
7000     _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
7001                      " offline, cannot reinstall")
7002
7003     if instance.disk_template == constants.DT_DISKLESS:
7004       raise errors.OpPrereqError("Instance '%s' has no disks" %
7005                                  self.op.instance_name,
7006                                  errors.ECODE_INVAL)
7007     _CheckInstanceState(self, instance, INSTANCE_DOWN, msg="cannot reinstall")
7008
7009     if self.op.os_type is not None:
7010       # OS verification
7011       pnode = _ExpandNodeName(self.cfg, instance.primary_node)
7012       _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
7013       instance_os = self.op.os_type
7014     else:
7015       instance_os = instance.os
7016
7017     nodelist = list(instance.all_nodes)
7018
7019     if self.op.osparams:
7020       i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
7021       _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
7022       self.os_inst = i_osdict # the new dict (without defaults)
7023     else:
7024       self.os_inst = None
7025
7026     self.instance = instance
7027
7028   def Exec(self, feedback_fn):
7029     """Reinstall the instance.
7030
7031     """
7032     inst = self.instance
7033
7034     if self.op.os_type is not None:
7035       feedback_fn("Changing OS to '%s'..." % self.op.os_type)
7036       inst.os = self.op.os_type
7037       # Write to configuration
7038       self.cfg.Update(inst, feedback_fn)
7039
7040     _StartInstanceDisks(self, inst, None)
7041     try:
7042       feedback_fn("Running the instance OS create scripts...")
7043       # FIXME: pass debug option from opcode to backend
7044       result = self.rpc.call_instance_os_add(inst.primary_node,
7045                                              (inst, self.os_inst), True,
7046                                              self.op.debug_level)
7047       result.Raise("Could not install OS for instance %s on node %s" %
7048                    (inst.name, inst.primary_node))
7049     finally:
7050       _ShutdownInstanceDisks(self, inst)
7051
7052
7053 class LUInstanceRecreateDisks(LogicalUnit):
7054   """Recreate an instance's missing disks.
7055
7056   """
7057   HPATH = "instance-recreate-disks"
7058   HTYPE = constants.HTYPE_INSTANCE
7059   REQ_BGL = False
7060
7061   _MODIFYABLE = frozenset([
7062     constants.IDISK_SIZE,
7063     constants.IDISK_MODE,
7064     ])
7065
7066   # New or changed disk parameters may have different semantics
7067   assert constants.IDISK_PARAMS == (_MODIFYABLE | frozenset([
7068     constants.IDISK_ADOPT,
7069
7070     # TODO: Implement support changing VG while recreating
7071     constants.IDISK_VG,
7072     constants.IDISK_METAVG,
7073     ]))
7074
7075   def _RunAllocator(self):
7076     """Run the allocator based on input opcode.
7077
7078     """
7079     be_full = self.cfg.GetClusterInfo().FillBE(self.instance)
7080
7081     # FIXME
7082     # The allocator should actually run in "relocate" mode, but current
7083     # allocators don't support relocating all the nodes of an instance at
7084     # the same time. As a workaround we use "allocate" mode, but this is
7085     # suboptimal for two reasons:
7086     # - The instance name passed to the allocator is present in the list of
7087     #   existing instances, so there could be a conflict within the
7088     #   internal structures of the allocator. This doesn't happen with the
7089     #   current allocators, but it's a liability.
7090     # - The allocator counts the resources used by the instance twice: once
7091     #   because the instance exists already, and once because it tries to
7092     #   allocate a new instance.
7093     # The allocator could choose some of the nodes on which the instance is
7094     # running, but that's not a problem. If the instance nodes are broken,
7095     # they should be already be marked as drained or offline, and hence
7096     # skipped by the allocator. If instance disks have been lost for other
7097     # reasons, then recreating the disks on the same nodes should be fine.
7098     disk_template = self.instance.disk_template
7099     spindle_use = be_full[constants.BE_SPINDLE_USE]
7100     req = iallocator.IAReqInstanceAlloc(name=self.op.instance_name,
7101                                         disk_template=disk_template,
7102                                         tags=list(self.instance.GetTags()),
7103                                         os=self.instance.os,
7104                                         nics=[{}],
7105                                         vcpus=be_full[constants.BE_VCPUS],
7106                                         memory=be_full[constants.BE_MAXMEM],
7107                                         spindle_use=spindle_use,
7108                                         disks=[{constants.IDISK_SIZE: d.size,
7109                                                 constants.IDISK_MODE: d.mode}
7110                                                 for d in self.instance.disks],
7111                                         hypervisor=self.instance.hypervisor)
7112     ial = iallocator.IAllocator(self.cfg, self.rpc, req)
7113
7114     ial.Run(self.op.iallocator)
7115
7116     assert ial.required_nodes == len(self.instance.all_nodes)
7117
7118     if not ial.success:
7119       raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
7120                                  " %s" % (self.op.iallocator, ial.info),
7121                                  errors.ECODE_NORES)
7122
7123     if len(ial.result) != ial.required_nodes:
7124       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7125                                  " of nodes (%s), required %s" %
7126                                  (self.op.iallocator, len(ial.result),
7127                                   ial.required_nodes), errors.ECODE_FAULT)
7128
7129     self.op.nodes = ial.result
7130     self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
7131                  self.op.instance_name, self.op.iallocator,
7132                  utils.CommaJoin(ial.result))
7133
7134   def CheckArguments(self):
7135     if self.op.disks and ht.TPositiveInt(self.op.disks[0]):
7136       # Normalize and convert deprecated list of disk indices
7137       self.op.disks = [(idx, {}) for idx in sorted(frozenset(self.op.disks))]
7138
7139     duplicates = utils.FindDuplicates(map(compat.fst, self.op.disks))
7140     if duplicates:
7141       raise errors.OpPrereqError("Some disks have been specified more than"
7142                                  " once: %s" % utils.CommaJoin(duplicates),
7143                                  errors.ECODE_INVAL)
7144
7145     if self.op.iallocator and self.op.nodes:
7146       raise errors.OpPrereqError("Give either the iallocator or the new"
7147                                  " nodes, not both", errors.ECODE_INVAL)
7148
7149     for (idx, params) in self.op.disks:
7150       utils.ForceDictType(params, constants.IDISK_PARAMS_TYPES)
7151       unsupported = frozenset(params.keys()) - self._MODIFYABLE
7152       if unsupported:
7153         raise errors.OpPrereqError("Parameters for disk %s try to change"
7154                                    " unmodifyable parameter(s): %s" %
7155                                    (idx, utils.CommaJoin(unsupported)),
7156                                    errors.ECODE_INVAL)
7157
7158   def ExpandNames(self):
7159     self._ExpandAndLockInstance()
7160     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7161     if self.op.nodes:
7162       self.op.nodes = [_ExpandNodeName(self.cfg, n) for n in self.op.nodes]
7163       self.needed_locks[locking.LEVEL_NODE] = list(self.op.nodes)
7164     else:
7165       self.needed_locks[locking.LEVEL_NODE] = []
7166       if self.op.iallocator:
7167         # iallocator will select a new node in the same group
7168         self.needed_locks[locking.LEVEL_NODEGROUP] = []
7169     self.needed_locks[locking.LEVEL_NODE_RES] = []
7170
7171   def DeclareLocks(self, level):
7172     if level == locking.LEVEL_NODEGROUP:
7173       assert self.op.iallocator is not None
7174       assert not self.op.nodes
7175       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
7176       self.share_locks[locking.LEVEL_NODEGROUP] = 1
7177       # Lock the primary group used by the instance optimistically; this
7178       # requires going via the node before it's locked, requiring
7179       # verification later on
7180       self.needed_locks[locking.LEVEL_NODEGROUP] = \
7181         self.cfg.GetInstanceNodeGroups(self.op.instance_name, primary_only=True)
7182
7183     elif level == locking.LEVEL_NODE:
7184       # If an allocator is used, then we lock all the nodes in the current
7185       # instance group, as we don't know yet which ones will be selected;
7186       # if we replace the nodes without using an allocator, we only need to
7187       # lock the old primary for doing RPCs (FIXME: we don't lock nodes for
7188       # RPC anymore), otherwise we need to lock all the instance nodes for
7189       # disk re-creation
7190       if self.op.iallocator:
7191         assert not self.op.nodes
7192         assert not self.needed_locks[locking.LEVEL_NODE]
7193         assert len(self.owned_locks(locking.LEVEL_NODEGROUP)) == 1
7194
7195         # Lock member nodes of the group of the primary node
7196         for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP):
7197           self.needed_locks[locking.LEVEL_NODE].extend(
7198             self.cfg.GetNodeGroup(group_uuid).members)
7199       else:
7200         primary_only = bool(self.op.nodes)
7201         self._LockInstancesNodes(primary_only=primary_only)
7202     elif level == locking.LEVEL_NODE_RES:
7203       # Copy node locks
7204       self.needed_locks[locking.LEVEL_NODE_RES] = \
7205         self.needed_locks[locking.LEVEL_NODE][:]
7206
7207   def BuildHooksEnv(self):
7208     """Build hooks env.
7209
7210     This runs on master, primary and secondary nodes of the instance.
7211
7212     """
7213     return _BuildInstanceHookEnvByObject(self, self.instance)
7214
7215   def BuildHooksNodes(self):
7216     """Build hooks nodes.
7217
7218     """
7219     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7220     return (nl, nl)
7221
7222   def CheckPrereq(self):
7223     """Check prerequisites.
7224
7225     This checks that the instance is in the cluster and is not running.
7226
7227     """
7228     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7229     assert instance is not None, \
7230       "Cannot retrieve locked instance %s" % self.op.instance_name
7231     if self.op.nodes:
7232       if len(self.op.nodes) != len(instance.all_nodes):
7233         raise errors.OpPrereqError("Instance %s currently has %d nodes, but"
7234                                    " %d replacement nodes were specified" %
7235                                    (instance.name, len(instance.all_nodes),
7236                                     len(self.op.nodes)),
7237                                    errors.ECODE_INVAL)
7238       assert instance.disk_template != constants.DT_DRBD8 or \
7239           len(self.op.nodes) == 2
7240       assert instance.disk_template != constants.DT_PLAIN or \
7241           len(self.op.nodes) == 1
7242       primary_node = self.op.nodes[0]
7243     else:
7244       primary_node = instance.primary_node
7245     if not self.op.iallocator:
7246       _CheckNodeOnline(self, primary_node)
7247
7248     if instance.disk_template == constants.DT_DISKLESS:
7249       raise errors.OpPrereqError("Instance '%s' has no disks" %
7250                                  self.op.instance_name, errors.ECODE_INVAL)
7251
7252     # Verify if node group locks are still correct
7253     owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
7254     if owned_groups:
7255       # Node group locks are acquired only for the primary node (and only
7256       # when the allocator is used)
7257       _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups,
7258                                primary_only=True)
7259
7260     # if we replace nodes *and* the old primary is offline, we don't
7261     # check
7262     assert instance.primary_node in self.owned_locks(locking.LEVEL_NODE)
7263     assert instance.primary_node in self.owned_locks(locking.LEVEL_NODE_RES)
7264     old_pnode = self.cfg.GetNodeInfo(instance.primary_node)
7265     if not ((self.op.iallocator or self.op.nodes) and old_pnode.offline):
7266       _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
7267                           msg="cannot recreate disks")
7268
7269     if self.op.disks:
7270       self.disks = dict(self.op.disks)
7271     else:
7272       self.disks = dict((idx, {}) for idx in range(len(instance.disks)))
7273
7274     maxidx = max(self.disks.keys())
7275     if maxidx >= len(instance.disks):
7276       raise errors.OpPrereqError("Invalid disk index '%s'" % maxidx,
7277                                  errors.ECODE_INVAL)
7278
7279     if ((self.op.nodes or self.op.iallocator) and
7280         sorted(self.disks.keys()) != range(len(instance.disks))):
7281       raise errors.OpPrereqError("Can't recreate disks partially and"
7282                                  " change the nodes at the same time",
7283                                  errors.ECODE_INVAL)
7284
7285     self.instance = instance
7286
7287     if self.op.iallocator:
7288       self._RunAllocator()
7289
7290     # Release unneeded node and node resource locks
7291     _ReleaseLocks(self, locking.LEVEL_NODE, keep=self.op.nodes)
7292     _ReleaseLocks(self, locking.LEVEL_NODE_RES, keep=self.op.nodes)
7293
7294   def Exec(self, feedback_fn):
7295     """Recreate the disks.
7296
7297     """
7298     instance = self.instance
7299
7300     assert (self.owned_locks(locking.LEVEL_NODE) ==
7301             self.owned_locks(locking.LEVEL_NODE_RES))
7302
7303     to_skip = []
7304     mods = [] # keeps track of needed changes
7305
7306     for idx, disk in enumerate(instance.disks):
7307       try:
7308         changes = self.disks[idx]
7309       except KeyError:
7310         # Disk should not be recreated
7311         to_skip.append(idx)
7312         continue
7313
7314       # update secondaries for disks, if needed
7315       if self.op.nodes and disk.dev_type == constants.LD_DRBD8:
7316         # need to update the nodes and minors
7317         assert len(self.op.nodes) == 2
7318         assert len(disk.logical_id) == 6 # otherwise disk internals
7319                                          # have changed
7320         (_, _, old_port, _, _, old_secret) = disk.logical_id
7321         new_minors = self.cfg.AllocateDRBDMinor(self.op.nodes, instance.name)
7322         new_id = (self.op.nodes[0], self.op.nodes[1], old_port,
7323                   new_minors[0], new_minors[1], old_secret)
7324         assert len(disk.logical_id) == len(new_id)
7325       else:
7326         new_id = None
7327
7328       mods.append((idx, new_id, changes))
7329
7330     # now that we have passed all asserts above, we can apply the mods
7331     # in a single run (to avoid partial changes)
7332     for idx, new_id, changes in mods:
7333       disk = instance.disks[idx]
7334       if new_id is not None:
7335         assert disk.dev_type == constants.LD_DRBD8
7336         disk.logical_id = new_id
7337       if changes:
7338         disk.Update(size=changes.get(constants.IDISK_SIZE, None),
7339                     mode=changes.get(constants.IDISK_MODE, None))
7340
7341     # change primary node, if needed
7342     if self.op.nodes:
7343       instance.primary_node = self.op.nodes[0]
7344       self.LogWarning("Changing the instance's nodes, you will have to"
7345                       " remove any disks left on the older nodes manually")
7346
7347     if self.op.nodes:
7348       self.cfg.Update(instance, feedback_fn)
7349
7350     _CreateDisks(self, instance, to_skip=to_skip)
7351
7352
7353 class LUInstanceRename(LogicalUnit):
7354   """Rename an instance.
7355
7356   """
7357   HPATH = "instance-rename"
7358   HTYPE = constants.HTYPE_INSTANCE
7359
7360   def CheckArguments(self):
7361     """Check arguments.
7362
7363     """
7364     if self.op.ip_check and not self.op.name_check:
7365       # TODO: make the ip check more flexible and not depend on the name check
7366       raise errors.OpPrereqError("IP address check requires a name check",
7367                                  errors.ECODE_INVAL)
7368
7369   def BuildHooksEnv(self):
7370     """Build hooks env.
7371
7372     This runs on master, primary and secondary nodes of the instance.
7373
7374     """
7375     env = _BuildInstanceHookEnvByObject(self, self.instance)
7376     env["INSTANCE_NEW_NAME"] = self.op.new_name
7377     return env
7378
7379   def BuildHooksNodes(self):
7380     """Build hooks nodes.
7381
7382     """
7383     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7384     return (nl, nl)
7385
7386   def CheckPrereq(self):
7387     """Check prerequisites.
7388
7389     This checks that the instance is in the cluster and is not running.
7390
7391     """
7392     self.op.instance_name = _ExpandInstanceName(self.cfg,
7393                                                 self.op.instance_name)
7394     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7395     assert instance is not None
7396     _CheckNodeOnline(self, instance.primary_node)
7397     _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
7398                         msg="cannot rename")
7399     self.instance = instance
7400
7401     new_name = self.op.new_name
7402     if self.op.name_check:
7403       hostname = netutils.GetHostname(name=new_name)
7404       if hostname.name != new_name:
7405         self.LogInfo("Resolved given name '%s' to '%s'", new_name,
7406                      hostname.name)
7407       if not utils.MatchNameComponent(self.op.new_name, [hostname.name]):
7408         raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
7409                                     " same as given hostname '%s'") %
7410                                     (hostname.name, self.op.new_name),
7411                                     errors.ECODE_INVAL)
7412       new_name = self.op.new_name = hostname.name
7413       if (self.op.ip_check and
7414           netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
7415         raise errors.OpPrereqError("IP %s of instance %s already in use" %
7416                                    (hostname.ip, new_name),
7417                                    errors.ECODE_NOTUNIQUE)
7418
7419     instance_list = self.cfg.GetInstanceList()
7420     if new_name in instance_list and new_name != instance.name:
7421       raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
7422                                  new_name, errors.ECODE_EXISTS)
7423
7424   def Exec(self, feedback_fn):
7425     """Rename the instance.
7426
7427     """
7428     inst = self.instance
7429     old_name = inst.name
7430
7431     rename_file_storage = False
7432     if (inst.disk_template in constants.DTS_FILEBASED and
7433         self.op.new_name != inst.name):
7434       old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
7435       rename_file_storage = True
7436
7437     self.cfg.RenameInstance(inst.name, self.op.new_name)
7438     # Change the instance lock. This is definitely safe while we hold the BGL.
7439     # Otherwise the new lock would have to be added in acquired mode.
7440     assert self.REQ_BGL
7441     self.glm.remove(locking.LEVEL_INSTANCE, old_name)
7442     self.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
7443
7444     # re-read the instance from the configuration after rename
7445     inst = self.cfg.GetInstanceInfo(self.op.new_name)
7446
7447     if rename_file_storage:
7448       new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
7449       result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
7450                                                      old_file_storage_dir,
7451                                                      new_file_storage_dir)
7452       result.Raise("Could not rename on node %s directory '%s' to '%s'"
7453                    " (but the instance has been renamed in Ganeti)" %
7454                    (inst.primary_node, old_file_storage_dir,
7455                     new_file_storage_dir))
7456
7457     _StartInstanceDisks(self, inst, None)
7458     try:
7459       result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
7460                                                  old_name, self.op.debug_level)
7461       msg = result.fail_msg
7462       if msg:
7463         msg = ("Could not run OS rename script for instance %s on node %s"
7464                " (but the instance has been renamed in Ganeti): %s" %
7465                (inst.name, inst.primary_node, msg))
7466         self.proc.LogWarning(msg)
7467     finally:
7468       _ShutdownInstanceDisks(self, inst)
7469
7470     return inst.name
7471
7472
7473 class LUInstanceRemove(LogicalUnit):
7474   """Remove an instance.
7475
7476   """
7477   HPATH = "instance-remove"
7478   HTYPE = constants.HTYPE_INSTANCE
7479   REQ_BGL = False
7480
7481   def ExpandNames(self):
7482     self._ExpandAndLockInstance()
7483     self.needed_locks[locking.LEVEL_NODE] = []
7484     self.needed_locks[locking.LEVEL_NODE_RES] = []
7485     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7486
7487   def DeclareLocks(self, level):
7488     if level == locking.LEVEL_NODE:
7489       self._LockInstancesNodes()
7490     elif level == locking.LEVEL_NODE_RES:
7491       # Copy node locks
7492       self.needed_locks[locking.LEVEL_NODE_RES] = \
7493         self.needed_locks[locking.LEVEL_NODE][:]
7494
7495   def BuildHooksEnv(self):
7496     """Build hooks env.
7497
7498     This runs on master, primary and secondary nodes of the instance.
7499
7500     """
7501     env = _BuildInstanceHookEnvByObject(self, self.instance)
7502     env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
7503     return env
7504
7505   def BuildHooksNodes(self):
7506     """Build hooks nodes.
7507
7508     """
7509     nl = [self.cfg.GetMasterNode()]
7510     nl_post = list(self.instance.all_nodes) + nl
7511     return (nl, nl_post)
7512
7513   def CheckPrereq(self):
7514     """Check prerequisites.
7515
7516     This checks that the instance is in the cluster.
7517
7518     """
7519     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7520     assert self.instance is not None, \
7521       "Cannot retrieve locked instance %s" % self.op.instance_name
7522
7523   def Exec(self, feedback_fn):
7524     """Remove the instance.
7525
7526     """
7527     instance = self.instance
7528     logging.info("Shutting down instance %s on node %s",
7529                  instance.name, instance.primary_node)
7530
7531     result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
7532                                              self.op.shutdown_timeout)
7533     msg = result.fail_msg
7534     if msg:
7535       if self.op.ignore_failures:
7536         feedback_fn("Warning: can't shutdown instance: %s" % msg)
7537       else:
7538         raise errors.OpExecError("Could not shutdown instance %s on"
7539                                  " node %s: %s" %
7540                                  (instance.name, instance.primary_node, msg))
7541
7542     assert (self.owned_locks(locking.LEVEL_NODE) ==
7543             self.owned_locks(locking.LEVEL_NODE_RES))
7544     assert not (set(instance.all_nodes) -
7545                 self.owned_locks(locking.LEVEL_NODE)), \
7546       "Not owning correct locks"
7547
7548     _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
7549
7550
7551 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
7552   """Utility function to remove an instance.
7553
7554   """
7555   logging.info("Removing block devices for instance %s", instance.name)
7556
7557   if not _RemoveDisks(lu, instance, ignore_failures=ignore_failures):
7558     if not ignore_failures:
7559       raise errors.OpExecError("Can't remove instance's disks")
7560     feedback_fn("Warning: can't remove instance's disks")
7561
7562   logging.info("Removing instance %s out of cluster config", instance.name)
7563
7564   lu.cfg.RemoveInstance(instance.name)
7565
7566   assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
7567     "Instance lock removal conflict"
7568
7569   # Remove lock for the instance
7570   lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
7571
7572
7573 class LUInstanceQuery(NoHooksLU):
7574   """Logical unit for querying instances.
7575
7576   """
7577   # pylint: disable=W0142
7578   REQ_BGL = False
7579
7580   def CheckArguments(self):
7581     self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
7582                              self.op.output_fields, self.op.use_locking)
7583
7584   def ExpandNames(self):
7585     self.iq.ExpandNames(self)
7586
7587   def DeclareLocks(self, level):
7588     self.iq.DeclareLocks(self, level)
7589
7590   def Exec(self, feedback_fn):
7591     return self.iq.OldStyleQuery(self)
7592
7593
7594 class LUInstanceFailover(LogicalUnit):
7595   """Failover an instance.
7596
7597   """
7598   HPATH = "instance-failover"
7599   HTYPE = constants.HTYPE_INSTANCE
7600   REQ_BGL = False
7601
7602   def CheckArguments(self):
7603     """Check the arguments.
7604
7605     """
7606     self.iallocator = getattr(self.op, "iallocator", None)
7607     self.target_node = getattr(self.op, "target_node", None)
7608
7609   def ExpandNames(self):
7610     self._ExpandAndLockInstance()
7611
7612     if self.op.target_node is not None:
7613       self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7614
7615     self.needed_locks[locking.LEVEL_NODE] = []
7616     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7617
7618     self.needed_locks[locking.LEVEL_NODE_RES] = []
7619     self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
7620
7621     ignore_consistency = self.op.ignore_consistency
7622     shutdown_timeout = self.op.shutdown_timeout
7623     self._migrater = TLMigrateInstance(self, self.op.instance_name,
7624                                        cleanup=False,
7625                                        failover=True,
7626                                        ignore_consistency=ignore_consistency,
7627                                        shutdown_timeout=shutdown_timeout,
7628                                        ignore_ipolicy=self.op.ignore_ipolicy)
7629     self.tasklets = [self._migrater]
7630
7631   def DeclareLocks(self, level):
7632     if level == locking.LEVEL_NODE:
7633       instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
7634       if instance.disk_template in constants.DTS_EXT_MIRROR:
7635         if self.op.target_node is None:
7636           self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7637         else:
7638           self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
7639                                                    self.op.target_node]
7640         del self.recalculate_locks[locking.LEVEL_NODE]
7641       else:
7642         self._LockInstancesNodes()
7643     elif level == locking.LEVEL_NODE_RES:
7644       # Copy node locks
7645       self.needed_locks[locking.LEVEL_NODE_RES] = \
7646         self.needed_locks[locking.LEVEL_NODE][:]
7647
7648   def BuildHooksEnv(self):
7649     """Build hooks env.
7650
7651     This runs on master, primary and secondary nodes of the instance.
7652
7653     """
7654     instance = self._migrater.instance
7655     source_node = instance.primary_node
7656     target_node = self.op.target_node
7657     env = {
7658       "IGNORE_CONSISTENCY": self.op.ignore_consistency,
7659       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7660       "OLD_PRIMARY": source_node,
7661       "NEW_PRIMARY": target_node,
7662       }
7663
7664     if instance.disk_template in constants.DTS_INT_MIRROR:
7665       env["OLD_SECONDARY"] = instance.secondary_nodes[0]
7666       env["NEW_SECONDARY"] = source_node
7667     else:
7668       env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
7669
7670     env.update(_BuildInstanceHookEnvByObject(self, instance))
7671
7672     return env
7673
7674   def BuildHooksNodes(self):
7675     """Build hooks nodes.
7676
7677     """
7678     instance = self._migrater.instance
7679     nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
7680     return (nl, nl + [instance.primary_node])
7681
7682
7683 class LUInstanceMigrate(LogicalUnit):
7684   """Migrate an instance.
7685
7686   This is migration without shutting down, compared to the failover,
7687   which is done with shutdown.
7688
7689   """
7690   HPATH = "instance-migrate"
7691   HTYPE = constants.HTYPE_INSTANCE
7692   REQ_BGL = False
7693
7694   def ExpandNames(self):
7695     self._ExpandAndLockInstance()
7696
7697     if self.op.target_node is not None:
7698       self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7699
7700     self.needed_locks[locking.LEVEL_NODE] = []
7701     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7702
7703     self.needed_locks[locking.LEVEL_NODE] = []
7704     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7705
7706     self._migrater = \
7707       TLMigrateInstance(self, self.op.instance_name,
7708                         cleanup=self.op.cleanup,
7709                         failover=False,
7710                         fallback=self.op.allow_failover,
7711                         allow_runtime_changes=self.op.allow_runtime_changes,
7712                         ignore_ipolicy=self.op.ignore_ipolicy)
7713     self.tasklets = [self._migrater]
7714
7715   def DeclareLocks(self, level):
7716     if level == locking.LEVEL_NODE:
7717       instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
7718       if instance.disk_template in constants.DTS_EXT_MIRROR:
7719         if self.op.target_node is None:
7720           self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7721         else:
7722           self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
7723                                                    self.op.target_node]
7724         del self.recalculate_locks[locking.LEVEL_NODE]
7725       else:
7726         self._LockInstancesNodes()
7727     elif level == locking.LEVEL_NODE_RES:
7728       # Copy node locks
7729       self.needed_locks[locking.LEVEL_NODE_RES] = \
7730         self.needed_locks[locking.LEVEL_NODE][:]
7731
7732   def BuildHooksEnv(self):
7733     """Build hooks env.
7734
7735     This runs on master, primary and secondary nodes of the instance.
7736
7737     """
7738     instance = self._migrater.instance
7739     source_node = instance.primary_node
7740     target_node = self.op.target_node
7741     env = _BuildInstanceHookEnvByObject(self, instance)
7742     env.update({
7743       "MIGRATE_LIVE": self._migrater.live,
7744       "MIGRATE_CLEANUP": self.op.cleanup,
7745       "OLD_PRIMARY": source_node,
7746       "NEW_PRIMARY": target_node,
7747       "ALLOW_RUNTIME_CHANGES": self.op.allow_runtime_changes,
7748       })
7749
7750     if instance.disk_template in constants.DTS_INT_MIRROR:
7751       env["OLD_SECONDARY"] = target_node
7752       env["NEW_SECONDARY"] = source_node
7753     else:
7754       env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
7755
7756     return env
7757
7758   def BuildHooksNodes(self):
7759     """Build hooks nodes.
7760
7761     """
7762     instance = self._migrater.instance
7763     nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
7764     return (nl, nl + [instance.primary_node])
7765
7766
7767 class LUInstanceMove(LogicalUnit):
7768   """Move an instance by data-copying.
7769
7770   """
7771   HPATH = "instance-move"
7772   HTYPE = constants.HTYPE_INSTANCE
7773   REQ_BGL = False
7774
7775   def ExpandNames(self):
7776     self._ExpandAndLockInstance()
7777     target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7778     self.op.target_node = target_node
7779     self.needed_locks[locking.LEVEL_NODE] = [target_node]
7780     self.needed_locks[locking.LEVEL_NODE_RES] = []
7781     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7782
7783   def DeclareLocks(self, level):
7784     if level == locking.LEVEL_NODE:
7785       self._LockInstancesNodes(primary_only=True)
7786     elif level == locking.LEVEL_NODE_RES:
7787       # Copy node locks
7788       self.needed_locks[locking.LEVEL_NODE_RES] = \
7789         self.needed_locks[locking.LEVEL_NODE][:]
7790
7791   def BuildHooksEnv(self):
7792     """Build hooks env.
7793
7794     This runs on master, primary and secondary nodes of the instance.
7795
7796     """
7797     env = {
7798       "TARGET_NODE": self.op.target_node,
7799       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7800       }
7801     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
7802     return env
7803
7804   def BuildHooksNodes(self):
7805     """Build hooks nodes.
7806
7807     """
7808     nl = [
7809       self.cfg.GetMasterNode(),
7810       self.instance.primary_node,
7811       self.op.target_node,
7812       ]
7813     return (nl, nl)
7814
7815   def CheckPrereq(self):
7816     """Check prerequisites.
7817
7818     This checks that the instance is in the cluster.
7819
7820     """
7821     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7822     assert self.instance is not None, \
7823       "Cannot retrieve locked instance %s" % self.op.instance_name
7824
7825     node = self.cfg.GetNodeInfo(self.op.target_node)
7826     assert node is not None, \
7827       "Cannot retrieve locked node %s" % self.op.target_node
7828
7829     self.target_node = target_node = node.name
7830
7831     if target_node == instance.primary_node:
7832       raise errors.OpPrereqError("Instance %s is already on the node %s" %
7833                                  (instance.name, target_node),
7834                                  errors.ECODE_STATE)
7835
7836     bep = self.cfg.GetClusterInfo().FillBE(instance)
7837
7838     for idx, dsk in enumerate(instance.disks):
7839       if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
7840         raise errors.OpPrereqError("Instance disk %d has a complex layout,"
7841                                    " cannot copy" % idx, errors.ECODE_STATE)
7842
7843     _CheckNodeOnline(self, target_node)
7844     _CheckNodeNotDrained(self, target_node)
7845     _CheckNodeVmCapable(self, target_node)
7846     cluster = self.cfg.GetClusterInfo()
7847     group_info = self.cfg.GetNodeGroup(node.group)
7848     ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster, group_info)
7849     _CheckTargetNodeIPolicy(self, ipolicy, instance, node,
7850                             ignore=self.op.ignore_ipolicy)
7851
7852     if instance.admin_state == constants.ADMINST_UP:
7853       # check memory requirements on the secondary node
7854       _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
7855                            instance.name, bep[constants.BE_MAXMEM],
7856                            instance.hypervisor)
7857     else:
7858       self.LogInfo("Not checking memory on the secondary node as"
7859                    " instance will not be started")
7860
7861     # check bridge existance
7862     _CheckInstanceBridgesExist(self, instance, node=target_node)
7863
7864   def Exec(self, feedback_fn):
7865     """Move an instance.
7866
7867     The move is done by shutting it down on its present node, copying
7868     the data over (slow) and starting it on the new node.
7869
7870     """
7871     instance = self.instance
7872
7873     source_node = instance.primary_node
7874     target_node = self.target_node
7875
7876     self.LogInfo("Shutting down instance %s on source node %s",
7877                  instance.name, source_node)
7878
7879     assert (self.owned_locks(locking.LEVEL_NODE) ==
7880             self.owned_locks(locking.LEVEL_NODE_RES))
7881
7882     result = self.rpc.call_instance_shutdown(source_node, instance,
7883                                              self.op.shutdown_timeout)
7884     msg = result.fail_msg
7885     if msg:
7886       if self.op.ignore_consistency:
7887         self.proc.LogWarning("Could not shutdown instance %s on node %s."
7888                              " Proceeding anyway. Please make sure node"
7889                              " %s is down. Error details: %s",
7890                              instance.name, source_node, source_node, msg)
7891       else:
7892         raise errors.OpExecError("Could not shutdown instance %s on"
7893                                  " node %s: %s" %
7894                                  (instance.name, source_node, msg))
7895
7896     # create the target disks
7897     try:
7898       _CreateDisks(self, instance, target_node=target_node)
7899     except errors.OpExecError:
7900       self.LogWarning("Device creation failed, reverting...")
7901       try:
7902         _RemoveDisks(self, instance, target_node=target_node)
7903       finally:
7904         self.cfg.ReleaseDRBDMinors(instance.name)
7905         raise
7906
7907     cluster_name = self.cfg.GetClusterInfo().cluster_name
7908
7909     errs = []
7910     # activate, get path, copy the data over
7911     for idx, disk in enumerate(instance.disks):
7912       self.LogInfo("Copying data for disk %d", idx)
7913       result = self.rpc.call_blockdev_assemble(target_node, (disk, instance),
7914                                                instance.name, True, idx)
7915       if result.fail_msg:
7916         self.LogWarning("Can't assemble newly created disk %d: %s",
7917                         idx, result.fail_msg)
7918         errs.append(result.fail_msg)
7919         break
7920       dev_path = result.payload
7921       result = self.rpc.call_blockdev_export(source_node, (disk, instance),
7922                                              target_node, dev_path,
7923                                              cluster_name)
7924       if result.fail_msg:
7925         self.LogWarning("Can't copy data over for disk %d: %s",
7926                         idx, result.fail_msg)
7927         errs.append(result.fail_msg)
7928         break
7929
7930     if errs:
7931       self.LogWarning("Some disks failed to copy, aborting")
7932       try:
7933         _RemoveDisks(self, instance, target_node=target_node)
7934       finally:
7935         self.cfg.ReleaseDRBDMinors(instance.name)
7936         raise errors.OpExecError("Errors during disk copy: %s" %
7937                                  (",".join(errs),))
7938
7939     instance.primary_node = target_node
7940     self.cfg.Update(instance, feedback_fn)
7941
7942     self.LogInfo("Removing the disks on the original node")
7943     _RemoveDisks(self, instance, target_node=source_node)
7944
7945     # Only start the instance if it's marked as up
7946     if instance.admin_state == constants.ADMINST_UP:
7947       self.LogInfo("Starting instance %s on node %s",
7948                    instance.name, target_node)
7949
7950       disks_ok, _ = _AssembleInstanceDisks(self, instance,
7951                                            ignore_secondaries=True)
7952       if not disks_ok:
7953         _ShutdownInstanceDisks(self, instance)
7954         raise errors.OpExecError("Can't activate the instance's disks")
7955
7956       result = self.rpc.call_instance_start(target_node,
7957                                             (instance, None, None), False)
7958       msg = result.fail_msg
7959       if msg:
7960         _ShutdownInstanceDisks(self, instance)
7961         raise errors.OpExecError("Could not start instance %s on node %s: %s" %
7962                                  (instance.name, target_node, msg))
7963
7964
7965 class LUNodeMigrate(LogicalUnit):
7966   """Migrate all instances from a node.
7967
7968   """
7969   HPATH = "node-migrate"
7970   HTYPE = constants.HTYPE_NODE
7971   REQ_BGL = False
7972
7973   def CheckArguments(self):
7974     pass
7975
7976   def ExpandNames(self):
7977     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
7978
7979     self.share_locks = _ShareAll()
7980     self.needed_locks = {
7981       locking.LEVEL_NODE: [self.op.node_name],
7982       }
7983
7984   def BuildHooksEnv(self):
7985     """Build hooks env.
7986
7987     This runs on the master, the primary and all the secondaries.
7988
7989     """
7990     return {
7991       "NODE_NAME": self.op.node_name,
7992       "ALLOW_RUNTIME_CHANGES": self.op.allow_runtime_changes,
7993       }
7994
7995   def BuildHooksNodes(self):
7996     """Build hooks nodes.
7997
7998     """
7999     nl = [self.cfg.GetMasterNode()]
8000     return (nl, nl)
8001
8002   def CheckPrereq(self):
8003     pass
8004
8005   def Exec(self, feedback_fn):
8006     # Prepare jobs for migration instances
8007     allow_runtime_changes = self.op.allow_runtime_changes
8008     jobs = [
8009       [opcodes.OpInstanceMigrate(instance_name=inst.name,
8010                                  mode=self.op.mode,
8011                                  live=self.op.live,
8012                                  iallocator=self.op.iallocator,
8013                                  target_node=self.op.target_node,
8014                                  allow_runtime_changes=allow_runtime_changes,
8015                                  ignore_ipolicy=self.op.ignore_ipolicy)]
8016       for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name)
8017       ]
8018
8019     # TODO: Run iallocator in this opcode and pass correct placement options to
8020     # OpInstanceMigrate. Since other jobs can modify the cluster between
8021     # running the iallocator and the actual migration, a good consistency model
8022     # will have to be found.
8023
8024     assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
8025             frozenset([self.op.node_name]))
8026
8027     return ResultWithJobs(jobs)
8028
8029
8030 class TLMigrateInstance(Tasklet):
8031   """Tasklet class for instance migration.
8032
8033   @type live: boolean
8034   @ivar live: whether the migration will be done live or non-live;
8035       this variable is initalized only after CheckPrereq has run
8036   @type cleanup: boolean
8037   @ivar cleanup: Wheater we cleanup from a failed migration
8038   @type iallocator: string
8039   @ivar iallocator: The iallocator used to determine target_node
8040   @type target_node: string
8041   @ivar target_node: If given, the target_node to reallocate the instance to
8042   @type failover: boolean
8043   @ivar failover: Whether operation results in failover or migration
8044   @type fallback: boolean
8045   @ivar fallback: Whether fallback to failover is allowed if migration not
8046                   possible
8047   @type ignore_consistency: boolean
8048   @ivar ignore_consistency: Wheter we should ignore consistency between source
8049                             and target node
8050   @type shutdown_timeout: int
8051   @ivar shutdown_timeout: In case of failover timeout of the shutdown
8052   @type ignore_ipolicy: bool
8053   @ivar ignore_ipolicy: If true, we can ignore instance policy when migrating
8054
8055   """
8056
8057   # Constants
8058   _MIGRATION_POLL_INTERVAL = 1      # seconds
8059   _MIGRATION_FEEDBACK_INTERVAL = 10 # seconds
8060
8061   def __init__(self, lu, instance_name, cleanup=False,
8062                failover=False, fallback=False,
8063                ignore_consistency=False,
8064                allow_runtime_changes=True,
8065                shutdown_timeout=constants.DEFAULT_SHUTDOWN_TIMEOUT,
8066                ignore_ipolicy=False):
8067     """Initializes this class.
8068
8069     """
8070     Tasklet.__init__(self, lu)
8071
8072     # Parameters
8073     self.instance_name = instance_name
8074     self.cleanup = cleanup
8075     self.live = False # will be overridden later
8076     self.failover = failover
8077     self.fallback = fallback
8078     self.ignore_consistency = ignore_consistency
8079     self.shutdown_timeout = shutdown_timeout
8080     self.ignore_ipolicy = ignore_ipolicy
8081     self.allow_runtime_changes = allow_runtime_changes
8082
8083   def CheckPrereq(self):
8084     """Check prerequisites.
8085
8086     This checks that the instance is in the cluster.
8087
8088     """
8089     instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
8090     instance = self.cfg.GetInstanceInfo(instance_name)
8091     assert instance is not None
8092     self.instance = instance
8093     cluster = self.cfg.GetClusterInfo()
8094
8095     if (not self.cleanup and
8096         not instance.admin_state == constants.ADMINST_UP and
8097         not self.failover and self.fallback):
8098       self.lu.LogInfo("Instance is marked down or offline, fallback allowed,"
8099                       " switching to failover")
8100       self.failover = True
8101
8102     if instance.disk_template not in constants.DTS_MIRRORED:
8103       if self.failover:
8104         text = "failovers"
8105       else:
8106         text = "migrations"
8107       raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
8108                                  " %s" % (instance.disk_template, text),
8109                                  errors.ECODE_STATE)
8110
8111     if instance.disk_template in constants.DTS_EXT_MIRROR:
8112       _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
8113
8114       if self.lu.op.iallocator:
8115         self._RunAllocator()
8116       else:
8117         # We set set self.target_node as it is required by
8118         # BuildHooksEnv
8119         self.target_node = self.lu.op.target_node
8120
8121       # Check that the target node is correct in terms of instance policy
8122       nodeinfo = self.cfg.GetNodeInfo(self.target_node)
8123       group_info = self.cfg.GetNodeGroup(nodeinfo.group)
8124       ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
8125                                                               group_info)
8126       _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo,
8127                               ignore=self.ignore_ipolicy)
8128
8129       # self.target_node is already populated, either directly or by the
8130       # iallocator run
8131       target_node = self.target_node
8132       if self.target_node == instance.primary_node:
8133         raise errors.OpPrereqError("Cannot migrate instance %s"
8134                                    " to its primary (%s)" %
8135                                    (instance.name, instance.primary_node),
8136                                    errors.ECODE_STATE)
8137
8138       if len(self.lu.tasklets) == 1:
8139         # It is safe to release locks only when we're the only tasklet
8140         # in the LU
8141         _ReleaseLocks(self.lu, locking.LEVEL_NODE,
8142                       keep=[instance.primary_node, self.target_node])
8143
8144     else:
8145       secondary_nodes = instance.secondary_nodes
8146       if not secondary_nodes:
8147         raise errors.ConfigurationError("No secondary node but using"
8148                                         " %s disk template" %
8149                                         instance.disk_template)
8150       target_node = secondary_nodes[0]
8151       if self.lu.op.iallocator or (self.lu.op.target_node and
8152                                    self.lu.op.target_node != target_node):
8153         if self.failover:
8154           text = "failed over"
8155         else:
8156           text = "migrated"
8157         raise errors.OpPrereqError("Instances with disk template %s cannot"
8158                                    " be %s to arbitrary nodes"
8159                                    " (neither an iallocator nor a target"
8160                                    " node can be passed)" %
8161                                    (instance.disk_template, text),
8162                                    errors.ECODE_INVAL)
8163       nodeinfo = self.cfg.GetNodeInfo(target_node)
8164       group_info = self.cfg.GetNodeGroup(nodeinfo.group)
8165       ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
8166                                                               group_info)
8167       _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo,
8168                               ignore=self.ignore_ipolicy)
8169
8170     i_be = cluster.FillBE(instance)
8171
8172     # check memory requirements on the secondary node
8173     if (not self.cleanup and
8174          (not self.failover or instance.admin_state == constants.ADMINST_UP)):
8175       self.tgt_free_mem = _CheckNodeFreeMemory(self.lu, target_node,
8176                                                "migrating instance %s" %
8177                                                instance.name,
8178                                                i_be[constants.BE_MINMEM],
8179                                                instance.hypervisor)
8180     else:
8181       self.lu.LogInfo("Not checking memory on the secondary node as"
8182                       " instance will not be started")
8183
8184     # check if failover must be forced instead of migration
8185     if (not self.cleanup and not self.failover and
8186         i_be[constants.BE_ALWAYS_FAILOVER]):
8187       if self.fallback:
8188         self.lu.LogInfo("Instance configured to always failover; fallback"
8189                         " to failover")
8190         self.failover = True
8191       else:
8192         raise errors.OpPrereqError("This instance has been configured to"
8193                                    " always failover, please allow failover",
8194                                    errors.ECODE_STATE)
8195
8196     # check bridge existance
8197     _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
8198
8199     if not self.cleanup:
8200       _CheckNodeNotDrained(self.lu, target_node)
8201       if not self.failover:
8202         result = self.rpc.call_instance_migratable(instance.primary_node,
8203                                                    instance)
8204         if result.fail_msg and self.fallback:
8205           self.lu.LogInfo("Can't migrate, instance offline, fallback to"
8206                           " failover")
8207           self.failover = True
8208         else:
8209           result.Raise("Can't migrate, please use failover",
8210                        prereq=True, ecode=errors.ECODE_STATE)
8211
8212     assert not (self.failover and self.cleanup)
8213
8214     if not self.failover:
8215       if self.lu.op.live is not None and self.lu.op.mode is not None:
8216         raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
8217                                    " parameters are accepted",
8218                                    errors.ECODE_INVAL)
8219       if self.lu.op.live is not None:
8220         if self.lu.op.live:
8221           self.lu.op.mode = constants.HT_MIGRATION_LIVE
8222         else:
8223           self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
8224         # reset the 'live' parameter to None so that repeated
8225         # invocations of CheckPrereq do not raise an exception
8226         self.lu.op.live = None
8227       elif self.lu.op.mode is None:
8228         # read the default value from the hypervisor
8229         i_hv = cluster.FillHV(self.instance, skip_globals=False)
8230         self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
8231
8232       self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
8233     else:
8234       # Failover is never live
8235       self.live = False
8236
8237     if not (self.failover or self.cleanup):
8238       remote_info = self.rpc.call_instance_info(instance.primary_node,
8239                                                 instance.name,
8240                                                 instance.hypervisor)
8241       remote_info.Raise("Error checking instance on node %s" %
8242                         instance.primary_node)
8243       instance_running = bool(remote_info.payload)
8244       if instance_running:
8245         self.current_mem = int(remote_info.payload["memory"])
8246
8247   def _RunAllocator(self):
8248     """Run the allocator based on input opcode.
8249
8250     """
8251     # FIXME: add a self.ignore_ipolicy option
8252     req = iallocator.IAReqRelocate(name=self.instance_name,
8253                                    relocate_from=[self.instance.primary_node])
8254     ial = iallocator.IAllocator(self.cfg, self.rpc, req)
8255
8256     ial.Run(self.lu.op.iallocator)
8257
8258     if not ial.success:
8259       raise errors.OpPrereqError("Can't compute nodes using"
8260                                  " iallocator '%s': %s" %
8261                                  (self.lu.op.iallocator, ial.info),
8262                                  errors.ECODE_NORES)
8263     if len(ial.result) != ial.required_nodes:
8264       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
8265                                  " of nodes (%s), required %s" %
8266                                  (self.lu.op.iallocator, len(ial.result),
8267                                   ial.required_nodes), errors.ECODE_FAULT)
8268     self.target_node = ial.result[0]
8269     self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
8270                     self.instance_name, self.lu.op.iallocator,
8271                     utils.CommaJoin(ial.result))
8272
8273   def _WaitUntilSync(self):
8274     """Poll with custom rpc for disk sync.
8275
8276     This uses our own step-based rpc call.
8277
8278     """
8279     self.feedback_fn("* wait until resync is done")
8280     all_done = False
8281     while not all_done:
8282       all_done = True
8283       result = self.rpc.call_drbd_wait_sync(self.all_nodes,
8284                                             self.nodes_ip,
8285                                             (self.instance.disks,
8286                                              self.instance))
8287       min_percent = 100
8288       for node, nres in result.items():
8289         nres.Raise("Cannot resync disks on node %s" % node)
8290         node_done, node_percent = nres.payload
8291         all_done = all_done and node_done
8292         if node_percent is not None:
8293           min_percent = min(min_percent, node_percent)
8294       if not all_done:
8295         if min_percent < 100:
8296           self.feedback_fn("   - progress: %.1f%%" % min_percent)
8297         time.sleep(2)
8298
8299   def _EnsureSecondary(self, node):
8300     """Demote a node to secondary.
8301
8302     """
8303     self.feedback_fn("* switching node %s to secondary mode" % node)
8304
8305     for dev in self.instance.disks:
8306       self.cfg.SetDiskID(dev, node)
8307
8308     result = self.rpc.call_blockdev_close(node, self.instance.name,
8309                                           self.instance.disks)
8310     result.Raise("Cannot change disk to secondary on node %s" % node)
8311
8312   def _GoStandalone(self):
8313     """Disconnect from the network.
8314
8315     """
8316     self.feedback_fn("* changing into standalone mode")
8317     result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
8318                                                self.instance.disks)
8319     for node, nres in result.items():
8320       nres.Raise("Cannot disconnect disks node %s" % node)
8321
8322   def _GoReconnect(self, multimaster):
8323     """Reconnect to the network.
8324
8325     """
8326     if multimaster:
8327       msg = "dual-master"
8328     else:
8329       msg = "single-master"
8330     self.feedback_fn("* changing disks into %s mode" % msg)
8331     result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
8332                                            (self.instance.disks, self.instance),
8333                                            self.instance.name, multimaster)
8334     for node, nres in result.items():
8335       nres.Raise("Cannot change disks config on node %s" % node)
8336
8337   def _ExecCleanup(self):
8338     """Try to cleanup after a failed migration.
8339
8340     The cleanup is done by:
8341       - check that the instance is running only on one node
8342         (and update the config if needed)
8343       - change disks on its secondary node to secondary
8344       - wait until disks are fully synchronized
8345       - disconnect from the network
8346       - change disks into single-master mode
8347       - wait again until disks are fully synchronized
8348
8349     """
8350     instance = self.instance
8351     target_node = self.target_node
8352     source_node = self.source_node
8353
8354     # check running on only one node
8355     self.feedback_fn("* checking where the instance actually runs"
8356                      " (if this hangs, the hypervisor might be in"
8357                      " a bad state)")
8358     ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
8359     for node, result in ins_l.items():
8360       result.Raise("Can't contact node %s" % node)
8361
8362     runningon_source = instance.name in ins_l[source_node].payload
8363     runningon_target = instance.name in ins_l[target_node].payload
8364
8365     if runningon_source and runningon_target:
8366       raise errors.OpExecError("Instance seems to be running on two nodes,"
8367                                " or the hypervisor is confused; you will have"
8368                                " to ensure manually that it runs only on one"
8369                                " and restart this operation")
8370
8371     if not (runningon_source or runningon_target):
8372       raise errors.OpExecError("Instance does not seem to be running at all;"
8373                                " in this case it's safer to repair by"
8374                                " running 'gnt-instance stop' to ensure disk"
8375                                " shutdown, and then restarting it")
8376
8377     if runningon_target:
8378       # the migration has actually succeeded, we need to update the config
8379       self.feedback_fn("* instance running on secondary node (%s),"
8380                        " updating config" % target_node)
8381       instance.primary_node = target_node
8382       self.cfg.Update(instance, self.feedback_fn)
8383       demoted_node = source_node
8384     else:
8385       self.feedback_fn("* instance confirmed to be running on its"
8386                        " primary node (%s)" % source_node)
8387       demoted_node = target_node
8388
8389     if instance.disk_template in constants.DTS_INT_MIRROR:
8390       self._EnsureSecondary(demoted_node)
8391       try:
8392         self._WaitUntilSync()
8393       except errors.OpExecError:
8394         # we ignore here errors, since if the device is standalone, it
8395         # won't be able to sync
8396         pass
8397       self._GoStandalone()
8398       self._GoReconnect(False)
8399       self._WaitUntilSync()
8400
8401     self.feedback_fn("* done")
8402
8403   def _RevertDiskStatus(self):
8404     """Try to revert the disk status after a failed migration.
8405
8406     """
8407     target_node = self.target_node
8408     if self.instance.disk_template in constants.DTS_EXT_MIRROR:
8409       return
8410
8411     try:
8412       self._EnsureSecondary(target_node)
8413       self._GoStandalone()
8414       self._GoReconnect(False)
8415       self._WaitUntilSync()
8416     except errors.OpExecError, err:
8417       self.lu.LogWarning("Migration failed and I can't reconnect the drives,"
8418                          " please try to recover the instance manually;"
8419                          " error '%s'" % str(err))
8420
8421   def _AbortMigration(self):
8422     """Call the hypervisor code to abort a started migration.
8423
8424     """
8425     instance = self.instance
8426     target_node = self.target_node
8427     source_node = self.source_node
8428     migration_info = self.migration_info
8429
8430     abort_result = self.rpc.call_instance_finalize_migration_dst(target_node,
8431                                                                  instance,
8432                                                                  migration_info,
8433                                                                  False)
8434     abort_msg = abort_result.fail_msg
8435     if abort_msg:
8436       logging.error("Aborting migration failed on target node %s: %s",
8437                     target_node, abort_msg)
8438       # Don't raise an exception here, as we stil have to try to revert the
8439       # disk status, even if this step failed.
8440
8441     abort_result = self.rpc.call_instance_finalize_migration_src(
8442       source_node, instance, False, self.live)
8443     abort_msg = abort_result.fail_msg
8444     if abort_msg:
8445       logging.error("Aborting migration failed on source node %s: %s",
8446                     source_node, abort_msg)
8447
8448   def _ExecMigration(self):
8449     """Migrate an instance.
8450
8451     The migrate is done by:
8452       - change the disks into dual-master mode
8453       - wait until disks are fully synchronized again
8454       - migrate the instance
8455       - change disks on the new secondary node (the old primary) to secondary
8456       - wait until disks are fully synchronized
8457       - change disks into single-master mode
8458
8459     """
8460     instance = self.instance
8461     target_node = self.target_node
8462     source_node = self.source_node
8463
8464     # Check for hypervisor version mismatch and warn the user.
8465     nodeinfo = self.rpc.call_node_info([source_node, target_node],
8466                                        None, [self.instance.hypervisor])
8467     for ninfo in nodeinfo.values():
8468       ninfo.Raise("Unable to retrieve node information from node '%s'" %
8469                   ninfo.node)
8470     (_, _, (src_info, )) = nodeinfo[source_node].payload
8471     (_, _, (dst_info, )) = nodeinfo[target_node].payload
8472
8473     if ((constants.HV_NODEINFO_KEY_VERSION in src_info) and
8474         (constants.HV_NODEINFO_KEY_VERSION in dst_info)):
8475       src_version = src_info[constants.HV_NODEINFO_KEY_VERSION]
8476       dst_version = dst_info[constants.HV_NODEINFO_KEY_VERSION]
8477       if src_version != dst_version:
8478         self.feedback_fn("* warning: hypervisor version mismatch between"
8479                          " source (%s) and target (%s) node" %
8480                          (src_version, dst_version))
8481
8482     self.feedback_fn("* checking disk consistency between source and target")
8483     for (idx, dev) in enumerate(instance.disks):
8484       if not _CheckDiskConsistency(self.lu, instance, dev, target_node, False):
8485         raise errors.OpExecError("Disk %s is degraded or not fully"
8486                                  " synchronized on target node,"
8487                                  " aborting migration" % idx)
8488
8489     if self.current_mem > self.tgt_free_mem:
8490       if not self.allow_runtime_changes:
8491         raise errors.OpExecError("Memory ballooning not allowed and not enough"
8492                                  " free memory to fit instance %s on target"
8493                                  " node %s (have %dMB, need %dMB)" %
8494                                  (instance.name, target_node,
8495                                   self.tgt_free_mem, self.current_mem))
8496       self.feedback_fn("* setting instance memory to %s" % self.tgt_free_mem)
8497       rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node,
8498                                                      instance,
8499                                                      self.tgt_free_mem)
8500       rpcres.Raise("Cannot modify instance runtime memory")
8501
8502     # First get the migration information from the remote node
8503     result = self.rpc.call_migration_info(source_node, instance)
8504     msg = result.fail_msg
8505     if msg:
8506       log_err = ("Failed fetching source migration information from %s: %s" %
8507                  (source_node, msg))
8508       logging.error(log_err)
8509       raise errors.OpExecError(log_err)
8510
8511     self.migration_info = migration_info = result.payload
8512
8513     if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
8514       # Then switch the disks to master/master mode
8515       self._EnsureSecondary(target_node)
8516       self._GoStandalone()
8517       self._GoReconnect(True)
8518       self._WaitUntilSync()
8519
8520     self.feedback_fn("* preparing %s to accept the instance" % target_node)
8521     result = self.rpc.call_accept_instance(target_node,
8522                                            instance,
8523                                            migration_info,
8524                                            self.nodes_ip[target_node])
8525
8526     msg = result.fail_msg
8527     if msg:
8528       logging.error("Instance pre-migration failed, trying to revert"
8529                     " disk status: %s", msg)
8530       self.feedback_fn("Pre-migration failed, aborting")
8531       self._AbortMigration()
8532       self._RevertDiskStatus()
8533       raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
8534                                (instance.name, msg))
8535
8536     self.feedback_fn("* migrating instance to %s" % target_node)
8537     result = self.rpc.call_instance_migrate(source_node, instance,
8538                                             self.nodes_ip[target_node],
8539                                             self.live)
8540     msg = result.fail_msg
8541     if msg:
8542       logging.error("Instance migration failed, trying to revert"
8543                     " disk status: %s", msg)
8544       self.feedback_fn("Migration failed, aborting")
8545       self._AbortMigration()
8546       self._RevertDiskStatus()
8547       raise errors.OpExecError("Could not migrate instance %s: %s" %
8548                                (instance.name, msg))
8549
8550     self.feedback_fn("* starting memory transfer")
8551     last_feedback = time.time()
8552     while True:
8553       result = self.rpc.call_instance_get_migration_status(source_node,
8554                                                            instance)
8555       msg = result.fail_msg
8556       ms = result.payload   # MigrationStatus instance
8557       if msg or (ms.status in constants.HV_MIGRATION_FAILED_STATUSES):
8558         logging.error("Instance migration failed, trying to revert"
8559                       " disk status: %s", msg)
8560         self.feedback_fn("Migration failed, aborting")
8561         self._AbortMigration()
8562         self._RevertDiskStatus()
8563         raise errors.OpExecError("Could not migrate instance %s: %s" %
8564                                  (instance.name, msg))
8565
8566       if result.payload.status != constants.HV_MIGRATION_ACTIVE:
8567         self.feedback_fn("* memory transfer complete")
8568         break
8569
8570       if (utils.TimeoutExpired(last_feedback,
8571                                self._MIGRATION_FEEDBACK_INTERVAL) and
8572           ms.transferred_ram is not None):
8573         mem_progress = 100 * float(ms.transferred_ram) / float(ms.total_ram)
8574         self.feedback_fn("* memory transfer progress: %.2f %%" % mem_progress)
8575         last_feedback = time.time()
8576
8577       time.sleep(self._MIGRATION_POLL_INTERVAL)
8578
8579     result = self.rpc.call_instance_finalize_migration_src(source_node,
8580                                                            instance,
8581                                                            True,
8582                                                            self.live)
8583     msg = result.fail_msg
8584     if msg:
8585       logging.error("Instance migration succeeded, but finalization failed"
8586                     " on the source node: %s", msg)
8587       raise errors.OpExecError("Could not finalize instance migration: %s" %
8588                                msg)
8589
8590     instance.primary_node = target_node
8591
8592     # distribute new instance config to the other nodes
8593     self.cfg.Update(instance, self.feedback_fn)
8594
8595     result = self.rpc.call_instance_finalize_migration_dst(target_node,
8596                                                            instance,
8597                                                            migration_info,
8598                                                            True)
8599     msg = result.fail_msg
8600     if msg:
8601       logging.error("Instance migration succeeded, but finalization failed"
8602                     " on the target node: %s", msg)
8603       raise errors.OpExecError("Could not finalize instance migration: %s" %
8604                                msg)
8605
8606     if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
8607       self._EnsureSecondary(source_node)
8608       self._WaitUntilSync()
8609       self._GoStandalone()
8610       self._GoReconnect(False)
8611       self._WaitUntilSync()
8612
8613     # If the instance's disk template is `rbd' and there was a successful
8614     # migration, unmap the device from the source node.
8615     if self.instance.disk_template == constants.DT_RBD:
8616       disks = _ExpandCheckDisks(instance, instance.disks)
8617       self.feedback_fn("* unmapping instance's disks from %s" % source_node)
8618       for disk in disks:
8619         result = self.rpc.call_blockdev_shutdown(source_node, (disk, instance))
8620         msg = result.fail_msg
8621         if msg:
8622           logging.error("Migration was successful, but couldn't unmap the"
8623                         " block device %s on source node %s: %s",
8624                         disk.iv_name, source_node, msg)
8625           logging.error("You need to unmap the device %s manually on %s",
8626                         disk.iv_name, source_node)
8627
8628     self.feedback_fn("* done")
8629
8630   def _ExecFailover(self):
8631     """Failover an instance.
8632
8633     The failover is done by shutting it down on its present node and
8634     starting it on the secondary.
8635
8636     """
8637     instance = self.instance
8638     primary_node = self.cfg.GetNodeInfo(instance.primary_node)
8639
8640     source_node = instance.primary_node
8641     target_node = self.target_node
8642
8643     if instance.admin_state == constants.ADMINST_UP:
8644       self.feedback_fn("* checking disk consistency between source and target")
8645       for (idx, dev) in enumerate(instance.disks):
8646         # for drbd, these are drbd over lvm
8647         if not _CheckDiskConsistency(self.lu, instance, dev, target_node,
8648                                      False):
8649           if primary_node.offline:
8650             self.feedback_fn("Node %s is offline, ignoring degraded disk %s on"
8651                              " target node %s" %
8652                              (primary_node.name, idx, target_node))
8653           elif not self.ignore_consistency:
8654             raise errors.OpExecError("Disk %s is degraded on target node,"
8655                                      " aborting failover" % idx)
8656     else:
8657       self.feedback_fn("* not checking disk consistency as instance is not"
8658                        " running")
8659
8660     self.feedback_fn("* shutting down instance on source node")
8661     logging.info("Shutting down instance %s on node %s",
8662                  instance.name, source_node)
8663
8664     result = self.rpc.call_instance_shutdown(source_node, instance,
8665                                              self.shutdown_timeout)
8666     msg = result.fail_msg
8667     if msg:
8668       if self.ignore_consistency or primary_node.offline:
8669         self.lu.LogWarning("Could not shutdown instance %s on node %s,"
8670                            " proceeding anyway; please make sure node"
8671                            " %s is down; error details: %s",
8672                            instance.name, source_node, source_node, msg)
8673       else:
8674         raise errors.OpExecError("Could not shutdown instance %s on"
8675                                  " node %s: %s" %
8676                                  (instance.name, source_node, msg))
8677
8678     self.feedback_fn("* deactivating the instance's disks on source node")
8679     if not _ShutdownInstanceDisks(self.lu, instance, ignore_primary=True):
8680       raise errors.OpExecError("Can't shut down the instance's disks")
8681
8682     instance.primary_node = target_node
8683     # distribute new instance config to the other nodes
8684     self.cfg.Update(instance, self.feedback_fn)
8685
8686     # Only start the instance if it's marked as up
8687     if instance.admin_state == constants.ADMINST_UP:
8688       self.feedback_fn("* activating the instance's disks on target node %s" %
8689                        target_node)
8690       logging.info("Starting instance %s on node %s",
8691                    instance.name, target_node)
8692
8693       disks_ok, _ = _AssembleInstanceDisks(self.lu, instance,
8694                                            ignore_secondaries=True)
8695       if not disks_ok:
8696         _ShutdownInstanceDisks(self.lu, instance)
8697         raise errors.OpExecError("Can't activate the instance's disks")
8698
8699       self.feedback_fn("* starting the instance on the target node %s" %
8700                        target_node)
8701       result = self.rpc.call_instance_start(target_node, (instance, None, None),
8702                                             False)
8703       msg = result.fail_msg
8704       if msg:
8705         _ShutdownInstanceDisks(self.lu, instance)
8706         raise errors.OpExecError("Could not start instance %s on node %s: %s" %
8707                                  (instance.name, target_node, msg))
8708
8709   def Exec(self, feedback_fn):
8710     """Perform the migration.
8711
8712     """
8713     self.feedback_fn = feedback_fn
8714     self.source_node = self.instance.primary_node
8715
8716     # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
8717     if self.instance.disk_template in constants.DTS_INT_MIRROR:
8718       self.target_node = self.instance.secondary_nodes[0]
8719       # Otherwise self.target_node has been populated either
8720       # directly, or through an iallocator.
8721
8722     self.all_nodes = [self.source_node, self.target_node]
8723     self.nodes_ip = dict((name, node.secondary_ip) for (name, node)
8724                          in self.cfg.GetMultiNodeInfo(self.all_nodes))
8725
8726     if self.failover:
8727       feedback_fn("Failover instance %s" % self.instance.name)
8728       self._ExecFailover()
8729     else:
8730       feedback_fn("Migrating instance %s" % self.instance.name)
8731
8732       if self.cleanup:
8733         return self._ExecCleanup()
8734       else:
8735         return self._ExecMigration()
8736
8737
8738 def _CreateBlockDev(lu, node, instance, device, force_create, info,
8739                     force_open):
8740   """Wrapper around L{_CreateBlockDevInner}.
8741
8742   This method annotates the root device first.
8743
8744   """
8745   (disk,) = _AnnotateDiskParams(instance, [device], lu.cfg)
8746   return _CreateBlockDevInner(lu, node, instance, disk, force_create, info,
8747                               force_open)
8748
8749
8750 def _CreateBlockDevInner(lu, node, instance, device, force_create,
8751                          info, force_open):
8752   """Create a tree of block devices on a given node.
8753
8754   If this device type has to be created on secondaries, create it and
8755   all its children.
8756
8757   If not, just recurse to children keeping the same 'force' value.
8758
8759   @attention: The device has to be annotated already.
8760
8761   @param lu: the lu on whose behalf we execute
8762   @param node: the node on which to create the device
8763   @type instance: L{objects.Instance}
8764   @param instance: the instance which owns the device
8765   @type device: L{objects.Disk}
8766   @param device: the device to create
8767   @type force_create: boolean
8768   @param force_create: whether to force creation of this device; this
8769       will be change to True whenever we find a device which has
8770       CreateOnSecondary() attribute
8771   @param info: the extra 'metadata' we should attach to the device
8772       (this will be represented as a LVM tag)
8773   @type force_open: boolean
8774   @param force_open: this parameter will be passes to the
8775       L{backend.BlockdevCreate} function where it specifies
8776       whether we run on primary or not, and it affects both
8777       the child assembly and the device own Open() execution
8778
8779   """
8780   if device.CreateOnSecondary():
8781     force_create = True
8782
8783   if device.children:
8784     for child in device.children:
8785       _CreateBlockDevInner(lu, node, instance, child, force_create,
8786                            info, force_open)
8787
8788   if not force_create:
8789     return
8790
8791   _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
8792
8793
8794 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
8795   """Create a single block device on a given node.
8796
8797   This will not recurse over children of the device, so they must be
8798   created in advance.
8799
8800   @param lu: the lu on whose behalf we execute
8801   @param node: the node on which to create the device
8802   @type instance: L{objects.Instance}
8803   @param instance: the instance which owns the device
8804   @type device: L{objects.Disk}
8805   @param device: the device to create
8806   @param info: the extra 'metadata' we should attach to the device
8807       (this will be represented as a LVM tag)
8808   @type force_open: boolean
8809   @param force_open: this parameter will be passes to the
8810       L{backend.BlockdevCreate} function where it specifies
8811       whether we run on primary or not, and it affects both
8812       the child assembly and the device own Open() execution
8813
8814   """
8815   lu.cfg.SetDiskID(device, node)
8816   result = lu.rpc.call_blockdev_create(node, device, device.size,
8817                                        instance.name, force_open, info)
8818   result.Raise("Can't create block device %s on"
8819                " node %s for instance %s" % (device, node, instance.name))
8820   if device.physical_id is None:
8821     device.physical_id = result.payload
8822
8823
8824 def _GenerateUniqueNames(lu, exts):
8825   """Generate a suitable LV name.
8826
8827   This will generate a logical volume name for the given instance.
8828
8829   """
8830   results = []
8831   for val in exts:
8832     new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
8833     results.append("%s%s" % (new_id, val))
8834   return results
8835
8836
8837 def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
8838                          iv_name, p_minor, s_minor):
8839   """Generate a drbd8 device complete with its children.
8840
8841   """
8842   assert len(vgnames) == len(names) == 2
8843   port = lu.cfg.AllocatePort()
8844   shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
8845
8846   dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
8847                           logical_id=(vgnames[0], names[0]),
8848                           params={})
8849   dev_meta = objects.Disk(dev_type=constants.LD_LV,
8850                           size=constants.DRBD_META_SIZE,
8851                           logical_id=(vgnames[1], names[1]),
8852                           params={})
8853   drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
8854                           logical_id=(primary, secondary, port,
8855                                       p_minor, s_minor,
8856                                       shared_secret),
8857                           children=[dev_data, dev_meta],
8858                           iv_name=iv_name, params={})
8859   return drbd_dev
8860
8861
8862 _DISK_TEMPLATE_NAME_PREFIX = {
8863   constants.DT_PLAIN: "",
8864   constants.DT_RBD: ".rbd",
8865   }
8866
8867
8868 _DISK_TEMPLATE_DEVICE_TYPE = {
8869   constants.DT_PLAIN: constants.LD_LV,
8870   constants.DT_FILE: constants.LD_FILE,
8871   constants.DT_SHARED_FILE: constants.LD_FILE,
8872   constants.DT_BLOCK: constants.LD_BLOCKDEV,
8873   constants.DT_RBD: constants.LD_RBD,
8874   }
8875
8876
8877 def _GenerateDiskTemplate(
8878   lu, template_name, instance_name, primary_node, secondary_nodes,
8879   disk_info, file_storage_dir, file_driver, base_index,
8880   feedback_fn, full_disk_params, _req_file_storage=opcodes.RequireFileStorage,
8881   _req_shr_file_storage=opcodes.RequireSharedFileStorage):
8882   """Generate the entire disk layout for a given template type.
8883
8884   """
8885   #TODO: compute space requirements
8886
8887   vgname = lu.cfg.GetVGName()
8888   disk_count = len(disk_info)
8889   disks = []
8890
8891   if template_name == constants.DT_DISKLESS:
8892     pass
8893   elif template_name == constants.DT_DRBD8:
8894     if len(secondary_nodes) != 1:
8895       raise errors.ProgrammerError("Wrong template configuration")
8896     remote_node = secondary_nodes[0]
8897     minors = lu.cfg.AllocateDRBDMinor(
8898       [primary_node, remote_node] * len(disk_info), instance_name)
8899
8900     (drbd_params, _, _) = objects.Disk.ComputeLDParams(template_name,
8901                                                        full_disk_params)
8902     drbd_default_metavg = drbd_params[constants.LDP_DEFAULT_METAVG]
8903
8904     names = []
8905     for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
8906                                                for i in range(disk_count)]):
8907       names.append(lv_prefix + "_data")
8908       names.append(lv_prefix + "_meta")
8909     for idx, disk in enumerate(disk_info):
8910       disk_index = idx + base_index
8911       data_vg = disk.get(constants.IDISK_VG, vgname)
8912       meta_vg = disk.get(constants.IDISK_METAVG, drbd_default_metavg)
8913       disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
8914                                       disk[constants.IDISK_SIZE],
8915                                       [data_vg, meta_vg],
8916                                       names[idx * 2:idx * 2 + 2],
8917                                       "disk/%d" % disk_index,
8918                                       minors[idx * 2], minors[idx * 2 + 1])
8919       disk_dev.mode = disk[constants.IDISK_MODE]
8920       disks.append(disk_dev)
8921   else:
8922     if secondary_nodes:
8923       raise errors.ProgrammerError("Wrong template configuration")
8924
8925     if template_name == constants.DT_FILE:
8926       _req_file_storage()
8927     elif template_name == constants.DT_SHARED_FILE:
8928       _req_shr_file_storage()
8929
8930     name_prefix = _DISK_TEMPLATE_NAME_PREFIX.get(template_name, None)
8931     if name_prefix is None:
8932       names = None
8933     else:
8934       names = _GenerateUniqueNames(lu, ["%s.disk%s" %
8935                                         (name_prefix, base_index + i)
8936                                         for i in range(disk_count)])
8937
8938     if template_name == constants.DT_PLAIN:
8939       def logical_id_fn(idx, _, disk):
8940         vg = disk.get(constants.IDISK_VG, vgname)
8941         return (vg, names[idx])
8942     elif template_name in (constants.DT_FILE, constants.DT_SHARED_FILE):
8943       logical_id_fn = \
8944         lambda _, disk_index, disk: (file_driver,
8945                                      "%s/disk%d" % (file_storage_dir,
8946                                                     disk_index))
8947     elif template_name == constants.DT_BLOCK:
8948       logical_id_fn = \
8949         lambda idx, disk_index, disk: (constants.BLOCKDEV_DRIVER_MANUAL,
8950                                        disk[constants.IDISK_ADOPT])
8951     elif template_name == constants.DT_RBD:
8952       logical_id_fn = lambda idx, _, disk: ("rbd", names[idx])
8953     else:
8954       raise errors.ProgrammerError("Unknown disk template '%s'" % template_name)
8955
8956     dev_type = _DISK_TEMPLATE_DEVICE_TYPE[template_name]
8957
8958     for idx, disk in enumerate(disk_info):
8959       disk_index = idx + base_index
8960       size = disk[constants.IDISK_SIZE]
8961       feedback_fn("* disk %s, size %s" %
8962                   (disk_index, utils.FormatUnit(size, "h")))
8963       disks.append(objects.Disk(dev_type=dev_type, size=size,
8964                                 logical_id=logical_id_fn(idx, disk_index, disk),
8965                                 iv_name="disk/%d" % disk_index,
8966                                 mode=disk[constants.IDISK_MODE],
8967                                 params={}))
8968
8969   return disks
8970
8971
8972 def _GetInstanceInfoText(instance):
8973   """Compute that text that should be added to the disk's metadata.
8974
8975   """
8976   return "originstname+%s" % instance.name
8977
8978
8979 def _CalcEta(time_taken, written, total_size):
8980   """Calculates the ETA based on size written and total size.
8981
8982   @param time_taken: The time taken so far
8983   @param written: amount written so far
8984   @param total_size: The total size of data to be written
8985   @return: The remaining time in seconds
8986
8987   """
8988   avg_time = time_taken / float(written)
8989   return (total_size - written) * avg_time
8990
8991
8992 def _WipeDisks(lu, instance):
8993   """Wipes instance disks.
8994
8995   @type lu: L{LogicalUnit}
8996   @param lu: the logical unit on whose behalf we execute
8997   @type instance: L{objects.Instance}
8998   @param instance: the instance whose disks we should create
8999   @return: the success of the wipe
9000
9001   """
9002   node = instance.primary_node
9003
9004   for device in instance.disks:
9005     lu.cfg.SetDiskID(device, node)
9006
9007   logging.info("Pause sync of instance %s disks", instance.name)
9008   result = lu.rpc.call_blockdev_pause_resume_sync(node,
9009                                                   (instance.disks, instance),
9010                                                   True)
9011   result.Raise("Failed RPC to node %s for pausing the disk syncing" % node)
9012
9013   for idx, success in enumerate(result.payload):
9014     if not success:
9015       logging.warn("pause-sync of instance %s for disks %d failed",
9016                    instance.name, idx)
9017
9018   try:
9019     for idx, device in enumerate(instance.disks):
9020       # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
9021       # MAX_WIPE_CHUNK at max
9022       wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 *
9023                             constants.MIN_WIPE_CHUNK_PERCENT)
9024       # we _must_ make this an int, otherwise rounding errors will
9025       # occur
9026       wipe_chunk_size = int(wipe_chunk_size)
9027
9028       lu.LogInfo("* Wiping disk %d", idx)
9029       logging.info("Wiping disk %d for instance %s, node %s using"
9030                    " chunk size %s", idx, instance.name, node, wipe_chunk_size)
9031
9032       offset = 0
9033       size = device.size
9034       last_output = 0
9035       start_time = time.time()
9036
9037       while offset < size:
9038         wipe_size = min(wipe_chunk_size, size - offset)
9039         logging.debug("Wiping disk %d, offset %s, chunk %s",
9040                       idx, offset, wipe_size)
9041         result = lu.rpc.call_blockdev_wipe(node, (device, instance), offset,
9042                                            wipe_size)
9043         result.Raise("Could not wipe disk %d at offset %d for size %d" %
9044                      (idx, offset, wipe_size))
9045         now = time.time()
9046         offset += wipe_size
9047         if now - last_output >= 60:
9048           eta = _CalcEta(now - start_time, offset, size)
9049           lu.LogInfo(" - done: %.1f%% ETA: %s" %
9050                      (offset / float(size) * 100, utils.FormatSeconds(eta)))
9051           last_output = now
9052   finally:
9053     logging.info("Resume sync of instance %s disks", instance.name)
9054
9055     result = lu.rpc.call_blockdev_pause_resume_sync(node,
9056                                                     (instance.disks, instance),
9057                                                     False)
9058
9059     if result.fail_msg:
9060       lu.LogWarning("RPC call to %s for resuming disk syncing failed,"
9061                     " please have a look at the status and troubleshoot"
9062                     " the issue: %s", node, result.fail_msg)
9063     else:
9064       for idx, success in enumerate(result.payload):
9065         if not success:
9066           lu.LogWarning("Resume sync of disk %d failed, please have a"
9067                         " look at the status and troubleshoot the issue", idx)
9068           logging.warn("resume-sync of instance %s for disks %d failed",
9069                        instance.name, idx)
9070
9071
9072 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
9073   """Create all disks for an instance.
9074
9075   This abstracts away some work from AddInstance.
9076
9077   @type lu: L{LogicalUnit}
9078   @param lu: the logical unit on whose behalf we execute
9079   @type instance: L{objects.Instance}
9080   @param instance: the instance whose disks we should create
9081   @type to_skip: list
9082   @param to_skip: list of indices to skip
9083   @type target_node: string
9084   @param target_node: if passed, overrides the target node for creation
9085   @rtype: boolean
9086   @return: the success of the creation
9087
9088   """
9089   info = _GetInstanceInfoText(instance)
9090   if target_node is None:
9091     pnode = instance.primary_node
9092     all_nodes = instance.all_nodes
9093   else:
9094     pnode = target_node
9095     all_nodes = [pnode]
9096
9097   if instance.disk_template in constants.DTS_FILEBASED:
9098     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
9099     result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
9100
9101     result.Raise("Failed to create directory '%s' on"
9102                  " node %s" % (file_storage_dir, pnode))
9103
9104   # Note: this needs to be kept in sync with adding of disks in
9105   # LUInstanceSetParams
9106   for idx, device in enumerate(instance.disks):
9107     if to_skip and idx in to_skip:
9108       continue
9109     logging.info("Creating disk %s for instance '%s'", idx, instance.name)
9110     #HARDCODE
9111     for node in all_nodes:
9112       f_create = node == pnode
9113       _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
9114
9115
9116 def _RemoveDisks(lu, instance, target_node=None, ignore_failures=False):
9117   """Remove all disks for an instance.
9118
9119   This abstracts away some work from `AddInstance()` and
9120   `RemoveInstance()`. Note that in case some of the devices couldn't
9121   be removed, the removal will continue with the other ones (compare
9122   with `_CreateDisks()`).
9123
9124   @type lu: L{LogicalUnit}
9125   @param lu: the logical unit on whose behalf we execute
9126   @type instance: L{objects.Instance}
9127   @param instance: the instance whose disks we should remove
9128   @type target_node: string
9129   @param target_node: used to override the node on which to remove the disks
9130   @rtype: boolean
9131   @return: the success of the removal
9132
9133   """
9134   logging.info("Removing block devices for instance %s", instance.name)
9135
9136   all_result = True
9137   ports_to_release = set()
9138   anno_disks = _AnnotateDiskParams(instance, instance.disks, lu.cfg)
9139   for (idx, device) in enumerate(anno_disks):
9140     if target_node:
9141       edata = [(target_node, device)]
9142     else:
9143       edata = device.ComputeNodeTree(instance.primary_node)
9144     for node, disk in edata:
9145       lu.cfg.SetDiskID(disk, node)
9146       result = lu.rpc.call_blockdev_remove(node, disk)
9147       if result.fail_msg:
9148         lu.LogWarning("Could not remove disk %s on node %s,"
9149                       " continuing anyway: %s", idx, node, result.fail_msg)
9150         if not (result.offline and node != instance.primary_node):
9151           all_result = False
9152
9153     # if this is a DRBD disk, return its port to the pool
9154     if device.dev_type in constants.LDS_DRBD:
9155       ports_to_release.add(device.logical_id[2])
9156
9157   if all_result or ignore_failures:
9158     for port in ports_to_release:
9159       lu.cfg.AddTcpUdpPort(port)
9160
9161   if instance.disk_template == constants.DT_FILE:
9162     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
9163     if target_node:
9164       tgt = target_node
9165     else:
9166       tgt = instance.primary_node
9167     result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
9168     if result.fail_msg:
9169       lu.LogWarning("Could not remove directory '%s' on node %s: %s",
9170                     file_storage_dir, instance.primary_node, result.fail_msg)
9171       all_result = False
9172
9173   return all_result
9174
9175
9176 def _ComputeDiskSizePerVG(disk_template, disks):
9177   """Compute disk size requirements in the volume group
9178
9179   """
9180   def _compute(disks, payload):
9181     """Universal algorithm.
9182
9183     """
9184     vgs = {}
9185     for disk in disks:
9186       vgs[disk[constants.IDISK_VG]] = \
9187         vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
9188
9189     return vgs
9190
9191   # Required free disk space as a function of disk and swap space
9192   req_size_dict = {
9193     constants.DT_DISKLESS: {},
9194     constants.DT_PLAIN: _compute(disks, 0),
9195     # 128 MB are added for drbd metadata for each disk
9196     constants.DT_DRBD8: _compute(disks, constants.DRBD_META_SIZE),
9197     constants.DT_FILE: {},
9198     constants.DT_SHARED_FILE: {},
9199   }
9200
9201   if disk_template not in req_size_dict:
9202     raise errors.ProgrammerError("Disk template '%s' size requirement"
9203                                  " is unknown" % disk_template)
9204
9205   return req_size_dict[disk_template]
9206
9207
9208 def _FilterVmNodes(lu, nodenames):
9209   """Filters out non-vm_capable nodes from a list.
9210
9211   @type lu: L{LogicalUnit}
9212   @param lu: the logical unit for which we check
9213   @type nodenames: list
9214   @param nodenames: the list of nodes on which we should check
9215   @rtype: list
9216   @return: the list of vm-capable nodes
9217
9218   """
9219   vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
9220   return [name for name in nodenames if name not in vm_nodes]
9221
9222
9223 def _CheckHVParams(lu, nodenames, hvname, hvparams):
9224   """Hypervisor parameter validation.
9225
9226   This function abstract the hypervisor parameter validation to be
9227   used in both instance create and instance modify.
9228
9229   @type lu: L{LogicalUnit}
9230   @param lu: the logical unit for which we check
9231   @type nodenames: list
9232   @param nodenames: the list of nodes on which we should check
9233   @type hvname: string
9234   @param hvname: the name of the hypervisor we should use
9235   @type hvparams: dict
9236   @param hvparams: the parameters which we need to check
9237   @raise errors.OpPrereqError: if the parameters are not valid
9238
9239   """
9240   nodenames = _FilterVmNodes(lu, nodenames)
9241
9242   cluster = lu.cfg.GetClusterInfo()
9243   hvfull = objects.FillDict(cluster.hvparams.get(hvname, {}), hvparams)
9244
9245   hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames, hvname, hvfull)
9246   for node in nodenames:
9247     info = hvinfo[node]
9248     if info.offline:
9249       continue
9250     info.Raise("Hypervisor parameter validation failed on node %s" % node)
9251
9252
9253 def _CheckOSParams(lu, required, nodenames, osname, osparams):
9254   """OS parameters validation.
9255
9256   @type lu: L{LogicalUnit}
9257   @param lu: the logical unit for which we check
9258   @type required: boolean
9259   @param required: whether the validation should fail if the OS is not
9260       found
9261   @type nodenames: list
9262   @param nodenames: the list of nodes on which we should check
9263   @type osname: string
9264   @param osname: the name of the hypervisor we should use
9265   @type osparams: dict
9266   @param osparams: the parameters which we need to check
9267   @raise errors.OpPrereqError: if the parameters are not valid
9268
9269   """
9270   nodenames = _FilterVmNodes(lu, nodenames)
9271   result = lu.rpc.call_os_validate(nodenames, required, osname,
9272                                    [constants.OS_VALIDATE_PARAMETERS],
9273                                    osparams)
9274   for node, nres in result.items():
9275     # we don't check for offline cases since this should be run only
9276     # against the master node and/or an instance's nodes
9277     nres.Raise("OS Parameters validation failed on node %s" % node)
9278     if not nres.payload:
9279       lu.LogInfo("OS %s not found on node %s, validation skipped",
9280                  osname, node)
9281
9282
9283 class LUInstanceCreate(LogicalUnit):
9284   """Create an instance.
9285
9286   """
9287   HPATH = "instance-add"
9288   HTYPE = constants.HTYPE_INSTANCE
9289   REQ_BGL = False
9290
9291   def CheckArguments(self):
9292     """Check arguments.
9293
9294     """
9295     # do not require name_check to ease forward/backward compatibility
9296     # for tools
9297     if self.op.no_install and self.op.start:
9298       self.LogInfo("No-installation mode selected, disabling startup")
9299       self.op.start = False
9300     # validate/normalize the instance name
9301     self.op.instance_name = \
9302       netutils.Hostname.GetNormalizedName(self.op.instance_name)
9303
9304     if self.op.ip_check and not self.op.name_check:
9305       # TODO: make the ip check more flexible and not depend on the name check
9306       raise errors.OpPrereqError("Cannot do IP address check without a name"
9307                                  " check", errors.ECODE_INVAL)
9308
9309     # check nics' parameter names
9310     for nic in self.op.nics:
9311       utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
9312
9313     # check disks. parameter names and consistent adopt/no-adopt strategy
9314     has_adopt = has_no_adopt = False
9315     for disk in self.op.disks:
9316       utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
9317       if constants.IDISK_ADOPT in disk:
9318         has_adopt = True
9319       else:
9320         has_no_adopt = True
9321     if has_adopt and has_no_adopt:
9322       raise errors.OpPrereqError("Either all disks are adopted or none is",
9323                                  errors.ECODE_INVAL)
9324     if has_adopt:
9325       if self.op.disk_template not in constants.DTS_MAY_ADOPT:
9326         raise errors.OpPrereqError("Disk adoption is not supported for the"
9327                                    " '%s' disk template" %
9328                                    self.op.disk_template,
9329                                    errors.ECODE_INVAL)
9330       if self.op.iallocator is not None:
9331         raise errors.OpPrereqError("Disk adoption not allowed with an"
9332                                    " iallocator script", errors.ECODE_INVAL)
9333       if self.op.mode == constants.INSTANCE_IMPORT:
9334         raise errors.OpPrereqError("Disk adoption not allowed for"
9335                                    " instance import", errors.ECODE_INVAL)
9336     else:
9337       if self.op.disk_template in constants.DTS_MUST_ADOPT:
9338         raise errors.OpPrereqError("Disk template %s requires disk adoption,"
9339                                    " but no 'adopt' parameter given" %
9340                                    self.op.disk_template,
9341                                    errors.ECODE_INVAL)
9342
9343     self.adopt_disks = has_adopt
9344
9345     # instance name verification
9346     if self.op.name_check:
9347       self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
9348       self.op.instance_name = self.hostname1.name
9349       # used in CheckPrereq for ip ping check
9350       self.check_ip = self.hostname1.ip
9351     else:
9352       self.check_ip = None
9353
9354     # file storage checks
9355     if (self.op.file_driver and
9356         not self.op.file_driver in constants.FILE_DRIVER):
9357       raise errors.OpPrereqError("Invalid file driver name '%s'" %
9358                                  self.op.file_driver, errors.ECODE_INVAL)
9359
9360     if self.op.disk_template == constants.DT_FILE:
9361       opcodes.RequireFileStorage()
9362     elif self.op.disk_template == constants.DT_SHARED_FILE:
9363       opcodes.RequireSharedFileStorage()
9364
9365     ### Node/iallocator related checks
9366     _CheckIAllocatorOrNode(self, "iallocator", "pnode")
9367
9368     if self.op.pnode is not None:
9369       if self.op.disk_template in constants.DTS_INT_MIRROR:
9370         if self.op.snode is None:
9371           raise errors.OpPrereqError("The networked disk templates need"
9372                                      " a mirror node", errors.ECODE_INVAL)
9373       elif self.op.snode:
9374         self.LogWarning("Secondary node will be ignored on non-mirrored disk"
9375                         " template")
9376         self.op.snode = None
9377
9378     self._cds = _GetClusterDomainSecret()
9379
9380     if self.op.mode == constants.INSTANCE_IMPORT:
9381       # On import force_variant must be True, because if we forced it at
9382       # initial install, our only chance when importing it back is that it
9383       # works again!
9384       self.op.force_variant = True
9385
9386       if self.op.no_install:
9387         self.LogInfo("No-installation mode has no effect during import")
9388
9389     elif self.op.mode == constants.INSTANCE_CREATE:
9390       if self.op.os_type is None:
9391         raise errors.OpPrereqError("No guest OS specified",
9392                                    errors.ECODE_INVAL)
9393       if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
9394         raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
9395                                    " installation" % self.op.os_type,
9396                                    errors.ECODE_STATE)
9397       if self.op.disk_template is None:
9398         raise errors.OpPrereqError("No disk template specified",
9399                                    errors.ECODE_INVAL)
9400
9401     elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
9402       # Check handshake to ensure both clusters have the same domain secret
9403       src_handshake = self.op.source_handshake
9404       if not src_handshake:
9405         raise errors.OpPrereqError("Missing source handshake",
9406                                    errors.ECODE_INVAL)
9407
9408       errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
9409                                                            src_handshake)
9410       if errmsg:
9411         raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
9412                                    errors.ECODE_INVAL)
9413
9414       # Load and check source CA
9415       self.source_x509_ca_pem = self.op.source_x509_ca
9416       if not self.source_x509_ca_pem:
9417         raise errors.OpPrereqError("Missing source X509 CA",
9418                                    errors.ECODE_INVAL)
9419
9420       try:
9421         (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
9422                                                     self._cds)
9423       except OpenSSL.crypto.Error, err:
9424         raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
9425                                    (err, ), errors.ECODE_INVAL)
9426
9427       (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
9428       if errcode is not None:
9429         raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
9430                                    errors.ECODE_INVAL)
9431
9432       self.source_x509_ca = cert
9433
9434       src_instance_name = self.op.source_instance_name
9435       if not src_instance_name:
9436         raise errors.OpPrereqError("Missing source instance name",
9437                                    errors.ECODE_INVAL)
9438
9439       self.source_instance_name = \
9440           netutils.GetHostname(name=src_instance_name).name
9441
9442     else:
9443       raise errors.OpPrereqError("Invalid instance creation mode %r" %
9444                                  self.op.mode, errors.ECODE_INVAL)
9445
9446   def ExpandNames(self):
9447     """ExpandNames for CreateInstance.
9448
9449     Figure out the right locks for instance creation.
9450
9451     """
9452     self.needed_locks = {}
9453
9454     instance_name = self.op.instance_name
9455     # this is just a preventive check, but someone might still add this
9456     # instance in the meantime, and creation will fail at lock-add time
9457     if instance_name in self.cfg.GetInstanceList():
9458       raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
9459                                  instance_name, errors.ECODE_EXISTS)
9460
9461     self.add_locks[locking.LEVEL_INSTANCE] = instance_name
9462
9463     if self.op.iallocator:
9464       # TODO: Find a solution to not lock all nodes in the cluster, e.g. by
9465       # specifying a group on instance creation and then selecting nodes from
9466       # that group
9467       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9468       self.needed_locks[locking.LEVEL_NODE_RES] = locking.ALL_SET
9469     else:
9470       self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
9471       nodelist = [self.op.pnode]
9472       if self.op.snode is not None:
9473         self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
9474         nodelist.append(self.op.snode)
9475       self.needed_locks[locking.LEVEL_NODE] = nodelist
9476       # Lock resources of instance's primary and secondary nodes (copy to
9477       # prevent accidential modification)
9478       self.needed_locks[locking.LEVEL_NODE_RES] = list(nodelist)
9479
9480     # in case of import lock the source node too
9481     if self.op.mode == constants.INSTANCE_IMPORT:
9482       src_node = self.op.src_node
9483       src_path = self.op.src_path
9484
9485       if src_path is None:
9486         self.op.src_path = src_path = self.op.instance_name
9487
9488       if src_node is None:
9489         self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9490         self.op.src_node = None
9491         if os.path.isabs(src_path):
9492           raise errors.OpPrereqError("Importing an instance from a path"
9493                                      " requires a source node option",
9494                                      errors.ECODE_INVAL)
9495       else:
9496         self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
9497         if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
9498           self.needed_locks[locking.LEVEL_NODE].append(src_node)
9499         if not os.path.isabs(src_path):
9500           self.op.src_path = src_path = \
9501             utils.PathJoin(constants.EXPORT_DIR, src_path)
9502
9503   def _RunAllocator(self):
9504     """Run the allocator based on input opcode.
9505
9506     """
9507     nics = [n.ToDict() for n in self.nics]
9508     memory = self.be_full[constants.BE_MAXMEM]
9509     spindle_use = self.be_full[constants.BE_SPINDLE_USE]
9510     req = iallocator.IAReqInstanceAlloc(name=self.op.instance_name,
9511                                         disk_template=self.op.disk_template,
9512                                         tags=self.op.tags,
9513                                         os=self.op.os_type,
9514                                         vcpus=self.be_full[constants.BE_VCPUS],
9515                                         memory=memory,
9516                                         spindle_use=spindle_use,
9517                                         disks=self.disks,
9518                                         nics=nics,
9519                                         hypervisor=self.op.hypervisor)
9520     ial = iallocator.IAllocator(self.cfg, self.rpc, req)
9521
9522     ial.Run(self.op.iallocator)
9523
9524     if not ial.success:
9525       raise errors.OpPrereqError("Can't compute nodes using"
9526                                  " iallocator '%s': %s" %
9527                                  (self.op.iallocator, ial.info),
9528                                  errors.ECODE_NORES)
9529     if len(ial.result) != ial.required_nodes:
9530       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
9531                                  " of nodes (%s), required %s" %
9532                                  (self.op.iallocator, len(ial.result),
9533                                   ial.required_nodes), errors.ECODE_FAULT)
9534     self.op.pnode = ial.result[0]
9535     self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
9536                  self.op.instance_name, self.op.iallocator,
9537                  utils.CommaJoin(ial.result))
9538     if ial.required_nodes == 2:
9539       self.op.snode = ial.result[1]
9540
9541   def BuildHooksEnv(self):
9542     """Build hooks env.
9543
9544     This runs on master, primary and secondary nodes of the instance.
9545
9546     """
9547     env = {
9548       "ADD_MODE": self.op.mode,
9549       }
9550     if self.op.mode == constants.INSTANCE_IMPORT:
9551       env["SRC_NODE"] = self.op.src_node
9552       env["SRC_PATH"] = self.op.src_path
9553       env["SRC_IMAGES"] = self.src_images
9554
9555     env.update(_BuildInstanceHookEnv(
9556       name=self.op.instance_name,
9557       primary_node=self.op.pnode,
9558       secondary_nodes=self.secondaries,
9559       status=self.op.start,
9560       os_type=self.op.os_type,
9561       minmem=self.be_full[constants.BE_MINMEM],
9562       maxmem=self.be_full[constants.BE_MAXMEM],
9563       vcpus=self.be_full[constants.BE_VCPUS],
9564       nics=_NICListToTuple(self, self.nics),
9565       disk_template=self.op.disk_template,
9566       disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE])
9567              for d in self.disks],
9568       bep=self.be_full,
9569       hvp=self.hv_full,
9570       hypervisor_name=self.op.hypervisor,
9571       tags=self.op.tags,
9572     ))
9573
9574     return env
9575
9576   def BuildHooksNodes(self):
9577     """Build hooks nodes.
9578
9579     """
9580     nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
9581     return nl, nl
9582
9583   def _ReadExportInfo(self):
9584     """Reads the export information from disk.
9585
9586     It will override the opcode source node and path with the actual
9587     information, if these two were not specified before.
9588
9589     @return: the export information
9590
9591     """
9592     assert self.op.mode == constants.INSTANCE_IMPORT
9593
9594     src_node = self.op.src_node
9595     src_path = self.op.src_path
9596
9597     if src_node is None:
9598       locked_nodes = self.owned_locks(locking.LEVEL_NODE)
9599       exp_list = self.rpc.call_export_list(locked_nodes)
9600       found = False
9601       for node in exp_list:
9602         if exp_list[node].fail_msg:
9603           continue
9604         if src_path in exp_list[node].payload:
9605           found = True
9606           self.op.src_node = src_node = node
9607           self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
9608                                                        src_path)
9609           break
9610       if not found:
9611         raise errors.OpPrereqError("No export found for relative path %s" %
9612                                     src_path, errors.ECODE_INVAL)
9613
9614     _CheckNodeOnline(self, src_node)
9615     result = self.rpc.call_export_info(src_node, src_path)
9616     result.Raise("No export or invalid export found in dir %s" % src_path)
9617
9618     export_info = objects.SerializableConfigParser.Loads(str(result.payload))
9619     if not export_info.has_section(constants.INISECT_EXP):
9620       raise errors.ProgrammerError("Corrupted export config",
9621                                    errors.ECODE_ENVIRON)
9622
9623     ei_version = export_info.get(constants.INISECT_EXP, "version")
9624     if (int(ei_version) != constants.EXPORT_VERSION):
9625       raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
9626                                  (ei_version, constants.EXPORT_VERSION),
9627                                  errors.ECODE_ENVIRON)
9628     return export_info
9629
9630   def _ReadExportParams(self, einfo):
9631     """Use export parameters as defaults.
9632
9633     In case the opcode doesn't specify (as in override) some instance
9634     parameters, then try to use them from the export information, if
9635     that declares them.
9636
9637     """
9638     self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
9639
9640     if self.op.disk_template is None:
9641       if einfo.has_option(constants.INISECT_INS, "disk_template"):
9642         self.op.disk_template = einfo.get(constants.INISECT_INS,
9643                                           "disk_template")
9644         if self.op.disk_template not in constants.DISK_TEMPLATES:
9645           raise errors.OpPrereqError("Disk template specified in configuration"
9646                                      " file is not one of the allowed values:"
9647                                      " %s" %
9648                                      " ".join(constants.DISK_TEMPLATES),
9649                                      errors.ECODE_INVAL)
9650       else:
9651         raise errors.OpPrereqError("No disk template specified and the export"
9652                                    " is missing the disk_template information",
9653                                    errors.ECODE_INVAL)
9654
9655     if not self.op.disks:
9656       disks = []
9657       # TODO: import the disk iv_name too
9658       for idx in range(constants.MAX_DISKS):
9659         if einfo.has_option(constants.INISECT_INS, "disk%d_size" % idx):
9660           disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
9661           disks.append({constants.IDISK_SIZE: disk_sz})
9662       self.op.disks = disks
9663       if not disks and self.op.disk_template != constants.DT_DISKLESS:
9664         raise errors.OpPrereqError("No disk info specified and the export"
9665                                    " is missing the disk information",
9666                                    errors.ECODE_INVAL)
9667
9668     if not self.op.nics:
9669       nics = []
9670       for idx in range(constants.MAX_NICS):
9671         if einfo.has_option(constants.INISECT_INS, "nic%d_mac" % idx):
9672           ndict = {}
9673           for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
9674             v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
9675             ndict[name] = v
9676           nics.append(ndict)
9677         else:
9678           break
9679       self.op.nics = nics
9680
9681     if not self.op.tags and einfo.has_option(constants.INISECT_INS, "tags"):
9682       self.op.tags = einfo.get(constants.INISECT_INS, "tags").split()
9683
9684     if (self.op.hypervisor is None and
9685         einfo.has_option(constants.INISECT_INS, "hypervisor")):
9686       self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
9687
9688     if einfo.has_section(constants.INISECT_HYP):
9689       # use the export parameters but do not override the ones
9690       # specified by the user
9691       for name, value in einfo.items(constants.INISECT_HYP):
9692         if name not in self.op.hvparams:
9693           self.op.hvparams[name] = value
9694
9695     if einfo.has_section(constants.INISECT_BEP):
9696       # use the parameters, without overriding
9697       for name, value in einfo.items(constants.INISECT_BEP):
9698         if name not in self.op.beparams:
9699           self.op.beparams[name] = value
9700         # Compatibility for the old "memory" be param
9701         if name == constants.BE_MEMORY:
9702           if constants.BE_MAXMEM not in self.op.beparams:
9703             self.op.beparams[constants.BE_MAXMEM] = value
9704           if constants.BE_MINMEM not in self.op.beparams:
9705             self.op.beparams[constants.BE_MINMEM] = value
9706     else:
9707       # try to read the parameters old style, from the main section
9708       for name in constants.BES_PARAMETERS:
9709         if (name not in self.op.beparams and
9710             einfo.has_option(constants.INISECT_INS, name)):
9711           self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
9712
9713     if einfo.has_section(constants.INISECT_OSP):
9714       # use the parameters, without overriding
9715       for name, value in einfo.items(constants.INISECT_OSP):
9716         if name not in self.op.osparams:
9717           self.op.osparams[name] = value
9718
9719   def _RevertToDefaults(self, cluster):
9720     """Revert the instance parameters to the default values.
9721
9722     """
9723     # hvparams
9724     hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
9725     for name in self.op.hvparams.keys():
9726       if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
9727         del self.op.hvparams[name]
9728     # beparams
9729     be_defs = cluster.SimpleFillBE({})
9730     for name in self.op.beparams.keys():
9731       if name in be_defs and be_defs[name] == self.op.beparams[name]:
9732         del self.op.beparams[name]
9733     # nic params
9734     nic_defs = cluster.SimpleFillNIC({})
9735     for nic in self.op.nics:
9736       for name in constants.NICS_PARAMETERS:
9737         if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
9738           del nic[name]
9739     # osparams
9740     os_defs = cluster.SimpleFillOS(self.op.os_type, {})
9741     for name in self.op.osparams.keys():
9742       if name in os_defs and os_defs[name] == self.op.osparams[name]:
9743         del self.op.osparams[name]
9744
9745   def _CalculateFileStorageDir(self):
9746     """Calculate final instance file storage dir.
9747
9748     """
9749     # file storage dir calculation/check
9750     self.instance_file_storage_dir = None
9751     if self.op.disk_template in constants.DTS_FILEBASED:
9752       # build the full file storage dir path
9753       joinargs = []
9754
9755       if self.op.disk_template == constants.DT_SHARED_FILE:
9756         get_fsd_fn = self.cfg.GetSharedFileStorageDir
9757       else:
9758         get_fsd_fn = self.cfg.GetFileStorageDir
9759
9760       cfg_storagedir = get_fsd_fn()
9761       if not cfg_storagedir:
9762         raise errors.OpPrereqError("Cluster file storage dir not defined",
9763                                    errors.ECODE_STATE)
9764       joinargs.append(cfg_storagedir)
9765
9766       if self.op.file_storage_dir is not None:
9767         joinargs.append(self.op.file_storage_dir)
9768
9769       joinargs.append(self.op.instance_name)
9770
9771       # pylint: disable=W0142
9772       self.instance_file_storage_dir = utils.PathJoin(*joinargs)
9773
9774   def CheckPrereq(self): # pylint: disable=R0914
9775     """Check prerequisites.
9776
9777     """
9778     self._CalculateFileStorageDir()
9779
9780     if self.op.mode == constants.INSTANCE_IMPORT:
9781       export_info = self._ReadExportInfo()
9782       self._ReadExportParams(export_info)
9783       self._old_instance_name = export_info.get(constants.INISECT_INS, "name")
9784     else:
9785       self._old_instance_name = None
9786
9787     if (not self.cfg.GetVGName() and
9788         self.op.disk_template not in constants.DTS_NOT_LVM):
9789       raise errors.OpPrereqError("Cluster does not support lvm-based"
9790                                  " instances", errors.ECODE_STATE)
9791
9792     if (self.op.hypervisor is None or
9793         self.op.hypervisor == constants.VALUE_AUTO):
9794       self.op.hypervisor = self.cfg.GetHypervisorType()
9795
9796     cluster = self.cfg.GetClusterInfo()
9797     enabled_hvs = cluster.enabled_hypervisors
9798     if self.op.hypervisor not in enabled_hvs:
9799       raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
9800                                  " cluster (%s)" %
9801                                  (self.op.hypervisor, ",".join(enabled_hvs)),
9802                                  errors.ECODE_STATE)
9803
9804     # Check tag validity
9805     for tag in self.op.tags:
9806       objects.TaggableObject.ValidateTag(tag)
9807
9808     # check hypervisor parameter syntax (locally)
9809     utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
9810     filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
9811                                       self.op.hvparams)
9812     hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
9813     hv_type.CheckParameterSyntax(filled_hvp)
9814     self.hv_full = filled_hvp
9815     # check that we don't specify global parameters on an instance
9816     _CheckGlobalHvParams(self.op.hvparams)
9817
9818     # fill and remember the beparams dict
9819     default_beparams = cluster.beparams[constants.PP_DEFAULT]
9820     for param, value in self.op.beparams.iteritems():
9821       if value == constants.VALUE_AUTO:
9822         self.op.beparams[param] = default_beparams[param]
9823     objects.UpgradeBeParams(self.op.beparams)
9824     utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
9825     self.be_full = cluster.SimpleFillBE(self.op.beparams)
9826
9827     # build os parameters
9828     self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
9829
9830     # now that hvp/bep are in final format, let's reset to defaults,
9831     # if told to do so
9832     if self.op.identify_defaults:
9833       self._RevertToDefaults(cluster)
9834
9835     # NIC buildup
9836     self.nics = []
9837     for idx, nic in enumerate(self.op.nics):
9838       nic_mode_req = nic.get(constants.INIC_MODE, None)
9839       nic_mode = nic_mode_req
9840       if nic_mode is None or nic_mode == constants.VALUE_AUTO:
9841         nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
9842
9843       # in routed mode, for the first nic, the default ip is 'auto'
9844       if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
9845         default_ip_mode = constants.VALUE_AUTO
9846       else:
9847         default_ip_mode = constants.VALUE_NONE
9848
9849       # ip validity checks
9850       ip = nic.get(constants.INIC_IP, default_ip_mode)
9851       if ip is None or ip.lower() == constants.VALUE_NONE:
9852         nic_ip = None
9853       elif ip.lower() == constants.VALUE_AUTO:
9854         if not self.op.name_check:
9855           raise errors.OpPrereqError("IP address set to auto but name checks"
9856                                      " have been skipped",
9857                                      errors.ECODE_INVAL)
9858         nic_ip = self.hostname1.ip
9859       else:
9860         if not netutils.IPAddress.IsValid(ip):
9861           raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
9862                                      errors.ECODE_INVAL)
9863         nic_ip = ip
9864
9865       # TODO: check the ip address for uniqueness
9866       if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
9867         raise errors.OpPrereqError("Routed nic mode requires an ip address",
9868                                    errors.ECODE_INVAL)
9869
9870       # MAC address verification
9871       mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
9872       if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9873         mac = utils.NormalizeAndValidateMac(mac)
9874
9875         try:
9876           self.cfg.ReserveMAC(mac, self.proc.GetECId())
9877         except errors.ReservationError:
9878           raise errors.OpPrereqError("MAC address %s already in use"
9879                                      " in cluster" % mac,
9880                                      errors.ECODE_NOTUNIQUE)
9881
9882       #  Build nic parameters
9883       link = nic.get(constants.INIC_LINK, None)
9884       if link == constants.VALUE_AUTO:
9885         link = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_LINK]
9886       nicparams = {}
9887       if nic_mode_req:
9888         nicparams[constants.NIC_MODE] = nic_mode
9889       if link:
9890         nicparams[constants.NIC_LINK] = link
9891
9892       check_params = cluster.SimpleFillNIC(nicparams)
9893       objects.NIC.CheckParameterSyntax(check_params)
9894       self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
9895
9896     # disk checks/pre-build
9897     default_vg = self.cfg.GetVGName()
9898     self.disks = []
9899     for disk in self.op.disks:
9900       mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
9901       if mode not in constants.DISK_ACCESS_SET:
9902         raise errors.OpPrereqError("Invalid disk access mode '%s'" %
9903                                    mode, errors.ECODE_INVAL)
9904       size = disk.get(constants.IDISK_SIZE, None)
9905       if size is None:
9906         raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
9907       try:
9908         size = int(size)
9909       except (TypeError, ValueError):
9910         raise errors.OpPrereqError("Invalid disk size '%s'" % size,
9911                                    errors.ECODE_INVAL)
9912
9913       data_vg = disk.get(constants.IDISK_VG, default_vg)
9914       new_disk = {
9915         constants.IDISK_SIZE: size,
9916         constants.IDISK_MODE: mode,
9917         constants.IDISK_VG: data_vg,
9918         }
9919       if constants.IDISK_METAVG in disk:
9920         new_disk[constants.IDISK_METAVG] = disk[constants.IDISK_METAVG]
9921       if constants.IDISK_ADOPT in disk:
9922         new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
9923       self.disks.append(new_disk)
9924
9925     if self.op.mode == constants.INSTANCE_IMPORT:
9926       disk_images = []
9927       for idx in range(len(self.disks)):
9928         option = "disk%d_dump" % idx
9929         if export_info.has_option(constants.INISECT_INS, option):
9930           # FIXME: are the old os-es, disk sizes, etc. useful?
9931           export_name = export_info.get(constants.INISECT_INS, option)
9932           image = utils.PathJoin(self.op.src_path, export_name)
9933           disk_images.append(image)
9934         else:
9935           disk_images.append(False)
9936
9937       self.src_images = disk_images
9938
9939       if self.op.instance_name == self._old_instance_name:
9940         for idx, nic in enumerate(self.nics):
9941           if nic.mac == constants.VALUE_AUTO:
9942             nic_mac_ini = "nic%d_mac" % idx
9943             nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
9944
9945     # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
9946
9947     # ip ping checks (we use the same ip that was resolved in ExpandNames)
9948     if self.op.ip_check:
9949       if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
9950         raise errors.OpPrereqError("IP %s of instance %s already in use" %
9951                                    (self.check_ip, self.op.instance_name),
9952                                    errors.ECODE_NOTUNIQUE)
9953
9954     #### mac address generation
9955     # By generating here the mac address both the allocator and the hooks get
9956     # the real final mac address rather than the 'auto' or 'generate' value.
9957     # There is a race condition between the generation and the instance object
9958     # creation, which means that we know the mac is valid now, but we're not
9959     # sure it will be when we actually add the instance. If things go bad
9960     # adding the instance will abort because of a duplicate mac, and the
9961     # creation job will fail.
9962     for nic in self.nics:
9963       if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9964         nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
9965
9966     #### allocator run
9967
9968     if self.op.iallocator is not None:
9969       self._RunAllocator()
9970
9971     # Release all unneeded node locks
9972     _ReleaseLocks(self, locking.LEVEL_NODE,
9973                   keep=filter(None, [self.op.pnode, self.op.snode,
9974                                      self.op.src_node]))
9975     _ReleaseLocks(self, locking.LEVEL_NODE_RES,
9976                   keep=filter(None, [self.op.pnode, self.op.snode,
9977                                      self.op.src_node]))
9978
9979     #### node related checks
9980
9981     # check primary node
9982     self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
9983     assert self.pnode is not None, \
9984       "Cannot retrieve locked node %s" % self.op.pnode
9985     if pnode.offline:
9986       raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
9987                                  pnode.name, errors.ECODE_STATE)
9988     if pnode.drained:
9989       raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
9990                                  pnode.name, errors.ECODE_STATE)
9991     if not pnode.vm_capable:
9992       raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
9993                                  " '%s'" % pnode.name, errors.ECODE_STATE)
9994
9995     self.secondaries = []
9996
9997     # mirror node verification
9998     if self.op.disk_template in constants.DTS_INT_MIRROR:
9999       if self.op.snode == pnode.name:
10000         raise errors.OpPrereqError("The secondary node cannot be the"
10001                                    " primary node", errors.ECODE_INVAL)
10002       _CheckNodeOnline(self, self.op.snode)
10003       _CheckNodeNotDrained(self, self.op.snode)
10004       _CheckNodeVmCapable(self, self.op.snode)
10005       self.secondaries.append(self.op.snode)
10006
10007       snode = self.cfg.GetNodeInfo(self.op.snode)
10008       if pnode.group != snode.group:
10009         self.LogWarning("The primary and secondary nodes are in two"
10010                         " different node groups; the disk parameters"
10011                         " from the first disk's node group will be"
10012                         " used")
10013
10014     nodenames = [pnode.name] + self.secondaries
10015
10016     # Verify instance specs
10017     spindle_use = self.be_full.get(constants.BE_SPINDLE_USE, None)
10018     ispec = {
10019       constants.ISPEC_MEM_SIZE: self.be_full.get(constants.BE_MAXMEM, None),
10020       constants.ISPEC_CPU_COUNT: self.be_full.get(constants.BE_VCPUS, None),
10021       constants.ISPEC_DISK_COUNT: len(self.disks),
10022       constants.ISPEC_DISK_SIZE: [disk["size"] for disk in self.disks],
10023       constants.ISPEC_NIC_COUNT: len(self.nics),
10024       constants.ISPEC_SPINDLE_USE: spindle_use,
10025       }
10026
10027     group_info = self.cfg.GetNodeGroup(pnode.group)
10028     ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster, group_info)
10029     res = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec)
10030     if not self.op.ignore_ipolicy and res:
10031       raise errors.OpPrereqError(("Instance allocation to group %s violates"
10032                                   " policy: %s") % (pnode.group,
10033                                                     utils.CommaJoin(res)),
10034                                   errors.ECODE_INVAL)
10035
10036     if not self.adopt_disks:
10037       if self.op.disk_template == constants.DT_RBD:
10038         # _CheckRADOSFreeSpace() is just a placeholder.
10039         # Any function that checks prerequisites can be placed here.
10040         # Check if there is enough space on the RADOS cluster.
10041         _CheckRADOSFreeSpace()
10042       else:
10043         # Check lv size requirements, if not adopting
10044         req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
10045         _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
10046
10047     elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
10048       all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
10049                                 disk[constants.IDISK_ADOPT])
10050                      for disk in self.disks])
10051       if len(all_lvs) != len(self.disks):
10052         raise errors.OpPrereqError("Duplicate volume names given for adoption",
10053                                    errors.ECODE_INVAL)
10054       for lv_name in all_lvs:
10055         try:
10056           # FIXME: lv_name here is "vg/lv" need to ensure that other calls
10057           # to ReserveLV uses the same syntax
10058           self.cfg.ReserveLV(lv_name, self.proc.GetECId())
10059         except errors.ReservationError:
10060           raise errors.OpPrereqError("LV named %s used by another instance" %
10061                                      lv_name, errors.ECODE_NOTUNIQUE)
10062
10063       vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
10064       vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
10065
10066       node_lvs = self.rpc.call_lv_list([pnode.name],
10067                                        vg_names.payload.keys())[pnode.name]
10068       node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
10069       node_lvs = node_lvs.payload
10070
10071       delta = all_lvs.difference(node_lvs.keys())
10072       if delta:
10073         raise errors.OpPrereqError("Missing logical volume(s): %s" %
10074                                    utils.CommaJoin(delta),
10075                                    errors.ECODE_INVAL)
10076       online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
10077       if online_lvs:
10078         raise errors.OpPrereqError("Online logical volumes found, cannot"
10079                                    " adopt: %s" % utils.CommaJoin(online_lvs),
10080                                    errors.ECODE_STATE)
10081       # update the size of disk based on what is found
10082       for dsk in self.disks:
10083         dsk[constants.IDISK_SIZE] = \
10084           int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
10085                                         dsk[constants.IDISK_ADOPT])][0]))
10086
10087     elif self.op.disk_template == constants.DT_BLOCK:
10088       # Normalize and de-duplicate device paths
10089       all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
10090                        for disk in self.disks])
10091       if len(all_disks) != len(self.disks):
10092         raise errors.OpPrereqError("Duplicate disk names given for adoption",
10093                                    errors.ECODE_INVAL)
10094       baddisks = [d for d in all_disks
10095                   if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
10096       if baddisks:
10097         raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
10098                                    " cannot be adopted" %
10099                                    (", ".join(baddisks),
10100                                     constants.ADOPTABLE_BLOCKDEV_ROOT),
10101                                    errors.ECODE_INVAL)
10102
10103       node_disks = self.rpc.call_bdev_sizes([pnode.name],
10104                                             list(all_disks))[pnode.name]
10105       node_disks.Raise("Cannot get block device information from node %s" %
10106                        pnode.name)
10107       node_disks = node_disks.payload
10108       delta = all_disks.difference(node_disks.keys())
10109       if delta:
10110         raise errors.OpPrereqError("Missing block device(s): %s" %
10111                                    utils.CommaJoin(delta),
10112                                    errors.ECODE_INVAL)
10113       for dsk in self.disks:
10114         dsk[constants.IDISK_SIZE] = \
10115           int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
10116
10117     _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
10118
10119     _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
10120     # check OS parameters (remotely)
10121     _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
10122
10123     _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
10124
10125     # memory check on primary node
10126     #TODO(dynmem): use MINMEM for checking
10127     if self.op.start:
10128       _CheckNodeFreeMemory(self, self.pnode.name,
10129                            "creating instance %s" % self.op.instance_name,
10130                            self.be_full[constants.BE_MAXMEM],
10131                            self.op.hypervisor)
10132
10133     self.dry_run_result = list(nodenames)
10134
10135   def Exec(self, feedback_fn):
10136     """Create and add the instance to the cluster.
10137
10138     """
10139     instance = self.op.instance_name
10140     pnode_name = self.pnode.name
10141
10142     assert not (self.owned_locks(locking.LEVEL_NODE_RES) -
10143                 self.owned_locks(locking.LEVEL_NODE)), \
10144       "Node locks differ from node resource locks"
10145
10146     ht_kind = self.op.hypervisor
10147     if ht_kind in constants.HTS_REQ_PORT:
10148       network_port = self.cfg.AllocatePort()
10149     else:
10150       network_port = None
10151
10152     # This is ugly but we got a chicken-egg problem here
10153     # We can only take the group disk parameters, as the instance
10154     # has no disks yet (we are generating them right here).
10155     node = self.cfg.GetNodeInfo(pnode_name)
10156     nodegroup = self.cfg.GetNodeGroup(node.group)
10157     disks = _GenerateDiskTemplate(self,
10158                                   self.op.disk_template,
10159                                   instance, pnode_name,
10160                                   self.secondaries,
10161                                   self.disks,
10162                                   self.instance_file_storage_dir,
10163                                   self.op.file_driver,
10164                                   0,
10165                                   feedback_fn,
10166                                   self.cfg.GetGroupDiskParams(nodegroup))
10167
10168     iobj = objects.Instance(name=instance, os=self.op.os_type,
10169                             primary_node=pnode_name,
10170                             nics=self.nics, disks=disks,
10171                             disk_template=self.op.disk_template,
10172                             admin_state=constants.ADMINST_DOWN,
10173                             network_port=network_port,
10174                             beparams=self.op.beparams,
10175                             hvparams=self.op.hvparams,
10176                             hypervisor=self.op.hypervisor,
10177                             osparams=self.op.osparams,
10178                             )
10179
10180     if self.op.tags:
10181       for tag in self.op.tags:
10182         iobj.AddTag(tag)
10183
10184     if self.adopt_disks:
10185       if self.op.disk_template == constants.DT_PLAIN:
10186         # rename LVs to the newly-generated names; we need to construct
10187         # 'fake' LV disks with the old data, plus the new unique_id
10188         tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
10189         rename_to = []
10190         for t_dsk, a_dsk in zip(tmp_disks, self.disks):
10191           rename_to.append(t_dsk.logical_id)
10192           t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
10193           self.cfg.SetDiskID(t_dsk, pnode_name)
10194         result = self.rpc.call_blockdev_rename(pnode_name,
10195                                                zip(tmp_disks, rename_to))
10196         result.Raise("Failed to rename adoped LVs")
10197     else:
10198       feedback_fn("* creating instance disks...")
10199       try:
10200         _CreateDisks(self, iobj)
10201       except errors.OpExecError:
10202         self.LogWarning("Device creation failed, reverting...")
10203         try:
10204           _RemoveDisks(self, iobj)
10205         finally:
10206           self.cfg.ReleaseDRBDMinors(instance)
10207           raise
10208
10209     feedback_fn("adding instance %s to cluster config" % instance)
10210
10211     self.cfg.AddInstance(iobj, self.proc.GetECId())
10212
10213     # Declare that we don't want to remove the instance lock anymore, as we've
10214     # added the instance to the config
10215     del self.remove_locks[locking.LEVEL_INSTANCE]
10216
10217     if self.op.mode == constants.INSTANCE_IMPORT:
10218       # Release unused nodes
10219       _ReleaseLocks(self, locking.LEVEL_NODE, keep=[self.op.src_node])
10220     else:
10221       # Release all nodes
10222       _ReleaseLocks(self, locking.LEVEL_NODE)
10223
10224     disk_abort = False
10225     if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
10226       feedback_fn("* wiping instance disks...")
10227       try:
10228         _WipeDisks(self, iobj)
10229       except errors.OpExecError, err:
10230         logging.exception("Wiping disks failed")
10231         self.LogWarning("Wiping instance disks failed (%s)", err)
10232         disk_abort = True
10233
10234     if disk_abort:
10235       # Something is already wrong with the disks, don't do anything else
10236       pass
10237     elif self.op.wait_for_sync:
10238       disk_abort = not _WaitForSync(self, iobj)
10239     elif iobj.disk_template in constants.DTS_INT_MIRROR:
10240       # make sure the disks are not degraded (still sync-ing is ok)
10241       feedback_fn("* checking mirrors status")
10242       disk_abort = not _WaitForSync(self, iobj, oneshot=True)
10243     else:
10244       disk_abort = False
10245
10246     if disk_abort:
10247       _RemoveDisks(self, iobj)
10248       self.cfg.RemoveInstance(iobj.name)
10249       # Make sure the instance lock gets removed
10250       self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
10251       raise errors.OpExecError("There are some degraded disks for"
10252                                " this instance")
10253
10254     # Release all node resource locks
10255     _ReleaseLocks(self, locking.LEVEL_NODE_RES)
10256
10257     if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
10258       # we need to set the disks ID to the primary node, since the
10259       # preceding code might or might have not done it, depending on
10260       # disk template and other options
10261       for disk in iobj.disks:
10262         self.cfg.SetDiskID(disk, pnode_name)
10263       if self.op.mode == constants.INSTANCE_CREATE:
10264         if not self.op.no_install:
10265           pause_sync = (iobj.disk_template in constants.DTS_INT_MIRROR and
10266                         not self.op.wait_for_sync)
10267           if pause_sync:
10268             feedback_fn("* pausing disk sync to install instance OS")
10269             result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
10270                                                               (iobj.disks,
10271                                                                iobj), True)
10272             for idx, success in enumerate(result.payload):
10273               if not success:
10274                 logging.warn("pause-sync of instance %s for disk %d failed",
10275                              instance, idx)
10276
10277           feedback_fn("* running the instance OS create scripts...")
10278           # FIXME: pass debug option from opcode to backend
10279           os_add_result = \
10280             self.rpc.call_instance_os_add(pnode_name, (iobj, None), False,
10281                                           self.op.debug_level)
10282           if pause_sync:
10283             feedback_fn("* resuming disk sync")
10284             result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
10285                                                               (iobj.disks,
10286                                                                iobj), False)
10287             for idx, success in enumerate(result.payload):
10288               if not success:
10289                 logging.warn("resume-sync of instance %s for disk %d failed",
10290                              instance, idx)
10291
10292           os_add_result.Raise("Could not add os for instance %s"
10293                               " on node %s" % (instance, pnode_name))
10294
10295       else:
10296         if self.op.mode == constants.INSTANCE_IMPORT:
10297           feedback_fn("* running the instance OS import scripts...")
10298
10299           transfers = []
10300
10301           for idx, image in enumerate(self.src_images):
10302             if not image:
10303               continue
10304
10305             # FIXME: pass debug option from opcode to backend
10306             dt = masterd.instance.DiskTransfer("disk/%s" % idx,
10307                                                constants.IEIO_FILE, (image, ),
10308                                                constants.IEIO_SCRIPT,
10309                                                (iobj.disks[idx], idx),
10310                                                None)
10311             transfers.append(dt)
10312
10313           import_result = \
10314             masterd.instance.TransferInstanceData(self, feedback_fn,
10315                                                   self.op.src_node, pnode_name,
10316                                                   self.pnode.secondary_ip,
10317                                                   iobj, transfers)
10318           if not compat.all(import_result):
10319             self.LogWarning("Some disks for instance %s on node %s were not"
10320                             " imported successfully" % (instance, pnode_name))
10321
10322           rename_from = self._old_instance_name
10323
10324         elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
10325           feedback_fn("* preparing remote import...")
10326           # The source cluster will stop the instance before attempting to make
10327           # a connection. In some cases stopping an instance can take a long
10328           # time, hence the shutdown timeout is added to the connection
10329           # timeout.
10330           connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
10331                              self.op.source_shutdown_timeout)
10332           timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
10333
10334           assert iobj.primary_node == self.pnode.name
10335           disk_results = \
10336             masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
10337                                           self.source_x509_ca,
10338                                           self._cds, timeouts)
10339           if not compat.all(disk_results):
10340             # TODO: Should the instance still be started, even if some disks
10341             # failed to import (valid for local imports, too)?
10342             self.LogWarning("Some disks for instance %s on node %s were not"
10343                             " imported successfully" % (instance, pnode_name))
10344
10345           rename_from = self.source_instance_name
10346
10347         else:
10348           # also checked in the prereq part
10349           raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
10350                                        % self.op.mode)
10351
10352         # Run rename script on newly imported instance
10353         assert iobj.name == instance
10354         feedback_fn("Running rename script for %s" % instance)
10355         result = self.rpc.call_instance_run_rename(pnode_name, iobj,
10356                                                    rename_from,
10357                                                    self.op.debug_level)
10358         if result.fail_msg:
10359           self.LogWarning("Failed to run rename script for %s on node"
10360                           " %s: %s" % (instance, pnode_name, result.fail_msg))
10361
10362     assert not self.owned_locks(locking.LEVEL_NODE_RES)
10363
10364     if self.op.start:
10365       iobj.admin_state = constants.ADMINST_UP
10366       self.cfg.Update(iobj, feedback_fn)
10367       logging.info("Starting instance %s on node %s", instance, pnode_name)
10368       feedback_fn("* starting instance...")
10369       result = self.rpc.call_instance_start(pnode_name, (iobj, None, None),
10370                                             False)
10371       result.Raise("Could not start instance")
10372
10373     return list(iobj.all_nodes)
10374
10375
10376 def _CheckRADOSFreeSpace():
10377   """Compute disk size requirements inside the RADOS cluster.
10378
10379   """
10380   # For the RADOS cluster we assume there is always enough space.
10381   pass
10382
10383
10384 class LUInstanceConsole(NoHooksLU):
10385   """Connect to an instance's console.
10386
10387   This is somewhat special in that it returns the command line that
10388   you need to run on the master node in order to connect to the
10389   console.
10390
10391   """
10392   REQ_BGL = False
10393
10394   def ExpandNames(self):
10395     self.share_locks = _ShareAll()
10396     self._ExpandAndLockInstance()
10397
10398   def CheckPrereq(self):
10399     """Check prerequisites.
10400
10401     This checks that the instance is in the cluster.
10402
10403     """
10404     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10405     assert self.instance is not None, \
10406       "Cannot retrieve locked instance %s" % self.op.instance_name
10407     _CheckNodeOnline(self, self.instance.primary_node)
10408
10409   def Exec(self, feedback_fn):
10410     """Connect to the console of an instance
10411
10412     """
10413     instance = self.instance
10414     node = instance.primary_node
10415
10416     node_insts = self.rpc.call_instance_list([node],
10417                                              [instance.hypervisor])[node]
10418     node_insts.Raise("Can't get node information from %s" % node)
10419
10420     if instance.name not in node_insts.payload:
10421       if instance.admin_state == constants.ADMINST_UP:
10422         state = constants.INSTST_ERRORDOWN
10423       elif instance.admin_state == constants.ADMINST_DOWN:
10424         state = constants.INSTST_ADMINDOWN
10425       else:
10426         state = constants.INSTST_ADMINOFFLINE
10427       raise errors.OpExecError("Instance %s is not running (state %s)" %
10428                                (instance.name, state))
10429
10430     logging.debug("Connecting to console of %s on %s", instance.name, node)
10431
10432     return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
10433
10434
10435 def _GetInstanceConsole(cluster, instance):
10436   """Returns console information for an instance.
10437
10438   @type cluster: L{objects.Cluster}
10439   @type instance: L{objects.Instance}
10440   @rtype: dict
10441
10442   """
10443   hyper = hypervisor.GetHypervisor(instance.hypervisor)
10444   # beparams and hvparams are passed separately, to avoid editing the
10445   # instance and then saving the defaults in the instance itself.
10446   hvparams = cluster.FillHV(instance)
10447   beparams = cluster.FillBE(instance)
10448   console = hyper.GetInstanceConsole(instance, hvparams, beparams)
10449
10450   assert console.instance == instance.name
10451   assert console.Validate()
10452
10453   return console.ToDict()
10454
10455
10456 class LUInstanceReplaceDisks(LogicalUnit):
10457   """Replace the disks of an instance.
10458
10459   """
10460   HPATH = "mirrors-replace"
10461   HTYPE = constants.HTYPE_INSTANCE
10462   REQ_BGL = False
10463
10464   def CheckArguments(self):
10465     TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
10466                                   self.op.iallocator)
10467
10468   def ExpandNames(self):
10469     self._ExpandAndLockInstance()
10470
10471     assert locking.LEVEL_NODE not in self.needed_locks
10472     assert locking.LEVEL_NODE_RES not in self.needed_locks
10473     assert locking.LEVEL_NODEGROUP not in self.needed_locks
10474
10475     assert self.op.iallocator is None or self.op.remote_node is None, \
10476       "Conflicting options"
10477
10478     if self.op.remote_node is not None:
10479       self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10480
10481       # Warning: do not remove the locking of the new secondary here
10482       # unless DRBD8.AddChildren is changed to work in parallel;
10483       # currently it doesn't since parallel invocations of
10484       # FindUnusedMinor will conflict
10485       self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
10486       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
10487     else:
10488       self.needed_locks[locking.LEVEL_NODE] = []
10489       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10490
10491       if self.op.iallocator is not None:
10492         # iallocator will select a new node in the same group
10493         self.needed_locks[locking.LEVEL_NODEGROUP] = []
10494
10495     self.needed_locks[locking.LEVEL_NODE_RES] = []
10496
10497     self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
10498                                    self.op.iallocator, self.op.remote_node,
10499                                    self.op.disks, False, self.op.early_release,
10500                                    self.op.ignore_ipolicy)
10501
10502     self.tasklets = [self.replacer]
10503
10504   def DeclareLocks(self, level):
10505     if level == locking.LEVEL_NODEGROUP:
10506       assert self.op.remote_node is None
10507       assert self.op.iallocator is not None
10508       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
10509
10510       self.share_locks[locking.LEVEL_NODEGROUP] = 1
10511       # Lock all groups used by instance optimistically; this requires going
10512       # via the node before it's locked, requiring verification later on
10513       self.needed_locks[locking.LEVEL_NODEGROUP] = \
10514         self.cfg.GetInstanceNodeGroups(self.op.instance_name)
10515
10516     elif level == locking.LEVEL_NODE:
10517       if self.op.iallocator is not None:
10518         assert self.op.remote_node is None
10519         assert not self.needed_locks[locking.LEVEL_NODE]
10520
10521         # Lock member nodes of all locked groups
10522         self.needed_locks[locking.LEVEL_NODE] = \
10523             [node_name
10524              for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
10525              for node_name in self.cfg.GetNodeGroup(group_uuid).members]
10526       else:
10527         self._LockInstancesNodes()
10528     elif level == locking.LEVEL_NODE_RES:
10529       # Reuse node locks
10530       self.needed_locks[locking.LEVEL_NODE_RES] = \
10531         self.needed_locks[locking.LEVEL_NODE]
10532
10533   def BuildHooksEnv(self):
10534     """Build hooks env.
10535
10536     This runs on the master, the primary and all the secondaries.
10537
10538     """
10539     instance = self.replacer.instance
10540     env = {
10541       "MODE": self.op.mode,
10542       "NEW_SECONDARY": self.op.remote_node,
10543       "OLD_SECONDARY": instance.secondary_nodes[0],
10544       }
10545     env.update(_BuildInstanceHookEnvByObject(self, instance))
10546     return env
10547
10548   def BuildHooksNodes(self):
10549     """Build hooks nodes.
10550
10551     """
10552     instance = self.replacer.instance
10553     nl = [
10554       self.cfg.GetMasterNode(),
10555       instance.primary_node,
10556       ]
10557     if self.op.remote_node is not None:
10558       nl.append(self.op.remote_node)
10559     return nl, nl
10560
10561   def CheckPrereq(self):
10562     """Check prerequisites.
10563
10564     """
10565     assert (self.glm.is_owned(locking.LEVEL_NODEGROUP) or
10566             self.op.iallocator is None)
10567
10568     # Verify if node group locks are still correct
10569     owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
10570     if owned_groups:
10571       _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
10572
10573     return LogicalUnit.CheckPrereq(self)
10574
10575
10576 class TLReplaceDisks(Tasklet):
10577   """Replaces disks for an instance.
10578
10579   Note: Locking is not within the scope of this class.
10580
10581   """
10582   def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
10583                disks, delay_iallocator, early_release, ignore_ipolicy):
10584     """Initializes this class.
10585
10586     """
10587     Tasklet.__init__(self, lu)
10588
10589     # Parameters
10590     self.instance_name = instance_name
10591     self.mode = mode
10592     self.iallocator_name = iallocator_name
10593     self.remote_node = remote_node
10594     self.disks = disks
10595     self.delay_iallocator = delay_iallocator
10596     self.early_release = early_release
10597     self.ignore_ipolicy = ignore_ipolicy
10598
10599     # Runtime data
10600     self.instance = None
10601     self.new_node = None
10602     self.target_node = None
10603     self.other_node = None
10604     self.remote_node_info = None
10605     self.node_secondary_ip = None
10606
10607   @staticmethod
10608   def CheckArguments(mode, remote_node, ialloc):
10609     """Helper function for users of this class.
10610
10611     """
10612     # check for valid parameter combination
10613     if mode == constants.REPLACE_DISK_CHG:
10614       if remote_node is None and ialloc is None:
10615         raise errors.OpPrereqError("When changing the secondary either an"
10616                                    " iallocator script must be used or the"
10617                                    " new node given", errors.ECODE_INVAL)
10618
10619       if remote_node is not None and ialloc is not None:
10620         raise errors.OpPrereqError("Give either the iallocator or the new"
10621                                    " secondary, not both", errors.ECODE_INVAL)
10622
10623     elif remote_node is not None or ialloc is not None:
10624       # Not replacing the secondary
10625       raise errors.OpPrereqError("The iallocator and new node options can"
10626                                  " only be used when changing the"
10627                                  " secondary node", errors.ECODE_INVAL)
10628
10629   @staticmethod
10630   def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
10631     """Compute a new secondary node using an IAllocator.
10632
10633     """
10634     req = iallocator.IAReqRelocate(name=instance_name,
10635                                    relocate_from=list(relocate_from))
10636     ial = iallocator.IAllocator(lu.cfg, lu.rpc, req)
10637
10638     ial.Run(iallocator_name)
10639
10640     if not ial.success:
10641       raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
10642                                  " %s" % (iallocator_name, ial.info),
10643                                  errors.ECODE_NORES)
10644
10645     if len(ial.result) != ial.required_nodes:
10646       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
10647                                  " of nodes (%s), required %s" %
10648                                  (iallocator_name,
10649                                   len(ial.result), ial.required_nodes),
10650                                  errors.ECODE_FAULT)
10651
10652     remote_node_name = ial.result[0]
10653
10654     lu.LogInfo("Selected new secondary for instance '%s': %s",
10655                instance_name, remote_node_name)
10656
10657     return remote_node_name
10658
10659   def _FindFaultyDisks(self, node_name):
10660     """Wrapper for L{_FindFaultyInstanceDisks}.
10661
10662     """
10663     return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
10664                                     node_name, True)
10665
10666   def _CheckDisksActivated(self, instance):
10667     """Checks if the instance disks are activated.
10668
10669     @param instance: The instance to check disks
10670     @return: True if they are activated, False otherwise
10671
10672     """
10673     nodes = instance.all_nodes
10674
10675     for idx, dev in enumerate(instance.disks):
10676       for node in nodes:
10677         self.lu.LogInfo("Checking disk/%d on %s", idx, node)
10678         self.cfg.SetDiskID(dev, node)
10679
10680         result = _BlockdevFind(self, node, dev, instance)
10681
10682         if result.offline:
10683           continue
10684         elif result.fail_msg or not result.payload:
10685           return False
10686
10687     return True
10688
10689   def CheckPrereq(self):
10690     """Check prerequisites.
10691
10692     This checks that the instance is in the cluster.
10693
10694     """
10695     self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
10696     assert instance is not None, \
10697       "Cannot retrieve locked instance %s" % self.instance_name
10698
10699     if instance.disk_template != constants.DT_DRBD8:
10700       raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
10701                                  " instances", errors.ECODE_INVAL)
10702
10703     if len(instance.secondary_nodes) != 1:
10704       raise errors.OpPrereqError("The instance has a strange layout,"
10705                                  " expected one secondary but found %d" %
10706                                  len(instance.secondary_nodes),
10707                                  errors.ECODE_FAULT)
10708
10709     if not self.delay_iallocator:
10710       self._CheckPrereq2()
10711
10712   def _CheckPrereq2(self):
10713     """Check prerequisites, second part.
10714
10715     This function should always be part of CheckPrereq. It was separated and is
10716     now called from Exec because during node evacuation iallocator was only
10717     called with an unmodified cluster model, not taking planned changes into
10718     account.
10719
10720     """
10721     instance = self.instance
10722     secondary_node = instance.secondary_nodes[0]
10723
10724     if self.iallocator_name is None:
10725       remote_node = self.remote_node
10726     else:
10727       remote_node = self._RunAllocator(self.lu, self.iallocator_name,
10728                                        instance.name, instance.secondary_nodes)
10729
10730     if remote_node is None:
10731       self.remote_node_info = None
10732     else:
10733       assert remote_node in self.lu.owned_locks(locking.LEVEL_NODE), \
10734              "Remote node '%s' is not locked" % remote_node
10735
10736       self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
10737       assert self.remote_node_info is not None, \
10738         "Cannot retrieve locked node %s" % remote_node
10739
10740     if remote_node == self.instance.primary_node:
10741       raise errors.OpPrereqError("The specified node is the primary node of"
10742                                  " the instance", errors.ECODE_INVAL)
10743
10744     if remote_node == secondary_node:
10745       raise errors.OpPrereqError("The specified node is already the"
10746                                  " secondary node of the instance",
10747                                  errors.ECODE_INVAL)
10748
10749     if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
10750                                     constants.REPLACE_DISK_CHG):
10751       raise errors.OpPrereqError("Cannot specify disks to be replaced",
10752                                  errors.ECODE_INVAL)
10753
10754     if self.mode == constants.REPLACE_DISK_AUTO:
10755       if not self._CheckDisksActivated(instance):
10756         raise errors.OpPrereqError("Please run activate-disks on instance %s"
10757                                    " first" % self.instance_name,
10758                                    errors.ECODE_STATE)
10759       faulty_primary = self._FindFaultyDisks(instance.primary_node)
10760       faulty_secondary = self._FindFaultyDisks(secondary_node)
10761
10762       if faulty_primary and faulty_secondary:
10763         raise errors.OpPrereqError("Instance %s has faulty disks on more than"
10764                                    " one node and can not be repaired"
10765                                    " automatically" % self.instance_name,
10766                                    errors.ECODE_STATE)
10767
10768       if faulty_primary:
10769         self.disks = faulty_primary
10770         self.target_node = instance.primary_node
10771         self.other_node = secondary_node
10772         check_nodes = [self.target_node, self.other_node]
10773       elif faulty_secondary:
10774         self.disks = faulty_secondary
10775         self.target_node = secondary_node
10776         self.other_node = instance.primary_node
10777         check_nodes = [self.target_node, self.other_node]
10778       else:
10779         self.disks = []
10780         check_nodes = []
10781
10782     else:
10783       # Non-automatic modes
10784       if self.mode == constants.REPLACE_DISK_PRI:
10785         self.target_node = instance.primary_node
10786         self.other_node = secondary_node
10787         check_nodes = [self.target_node, self.other_node]
10788
10789       elif self.mode == constants.REPLACE_DISK_SEC:
10790         self.target_node = secondary_node
10791         self.other_node = instance.primary_node
10792         check_nodes = [self.target_node, self.other_node]
10793
10794       elif self.mode == constants.REPLACE_DISK_CHG:
10795         self.new_node = remote_node
10796         self.other_node = instance.primary_node
10797         self.target_node = secondary_node
10798         check_nodes = [self.new_node, self.other_node]
10799
10800         _CheckNodeNotDrained(self.lu, remote_node)
10801         _CheckNodeVmCapable(self.lu, remote_node)
10802
10803         old_node_info = self.cfg.GetNodeInfo(secondary_node)
10804         assert old_node_info is not None
10805         if old_node_info.offline and not self.early_release:
10806           # doesn't make sense to delay the release
10807           self.early_release = True
10808           self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
10809                           " early-release mode", secondary_node)
10810
10811       else:
10812         raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
10813                                      self.mode)
10814
10815       # If not specified all disks should be replaced
10816       if not self.disks:
10817         self.disks = range(len(self.instance.disks))
10818
10819     # TODO: This is ugly, but right now we can't distinguish between internal
10820     # submitted opcode and external one. We should fix that.
10821     if self.remote_node_info:
10822       # We change the node, lets verify it still meets instance policy
10823       new_group_info = self.cfg.GetNodeGroup(self.remote_node_info.group)
10824       cluster = self.cfg.GetClusterInfo()
10825       ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
10826                                                               new_group_info)
10827       _CheckTargetNodeIPolicy(self, ipolicy, instance, self.remote_node_info,
10828                               ignore=self.ignore_ipolicy)
10829
10830     for node in check_nodes:
10831       _CheckNodeOnline(self.lu, node)
10832
10833     touched_nodes = frozenset(node_name for node_name in [self.new_node,
10834                                                           self.other_node,
10835                                                           self.target_node]
10836                               if node_name is not None)
10837
10838     # Release unneeded node and node resource locks
10839     _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
10840     _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES, keep=touched_nodes)
10841
10842     # Release any owned node group
10843     if self.lu.glm.is_owned(locking.LEVEL_NODEGROUP):
10844       _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
10845
10846     # Check whether disks are valid
10847     for disk_idx in self.disks:
10848       instance.FindDisk(disk_idx)
10849
10850     # Get secondary node IP addresses
10851     self.node_secondary_ip = dict((name, node.secondary_ip) for (name, node)
10852                                   in self.cfg.GetMultiNodeInfo(touched_nodes))
10853
10854   def Exec(self, feedback_fn):
10855     """Execute disk replacement.
10856
10857     This dispatches the disk replacement to the appropriate handler.
10858
10859     """
10860     if self.delay_iallocator:
10861       self._CheckPrereq2()
10862
10863     if __debug__:
10864       # Verify owned locks before starting operation
10865       owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
10866       assert set(owned_nodes) == set(self.node_secondary_ip), \
10867           ("Incorrect node locks, owning %s, expected %s" %
10868            (owned_nodes, self.node_secondary_ip.keys()))
10869       assert (self.lu.owned_locks(locking.LEVEL_NODE) ==
10870               self.lu.owned_locks(locking.LEVEL_NODE_RES))
10871
10872       owned_instances = self.lu.owned_locks(locking.LEVEL_INSTANCE)
10873       assert list(owned_instances) == [self.instance_name], \
10874           "Instance '%s' not locked" % self.instance_name
10875
10876       assert not self.lu.glm.is_owned(locking.LEVEL_NODEGROUP), \
10877           "Should not own any node group lock at this point"
10878
10879     if not self.disks:
10880       feedback_fn("No disks need replacement")
10881       return
10882
10883     feedback_fn("Replacing disk(s) %s for %s" %
10884                 (utils.CommaJoin(self.disks), self.instance.name))
10885
10886     activate_disks = (self.instance.admin_state != constants.ADMINST_UP)
10887
10888     # Activate the instance disks if we're replacing them on a down instance
10889     if activate_disks:
10890       _StartInstanceDisks(self.lu, self.instance, True)
10891
10892     try:
10893       # Should we replace the secondary node?
10894       if self.new_node is not None:
10895         fn = self._ExecDrbd8Secondary
10896       else:
10897         fn = self._ExecDrbd8DiskOnly
10898
10899       result = fn(feedback_fn)
10900     finally:
10901       # Deactivate the instance disks if we're replacing them on a
10902       # down instance
10903       if activate_disks:
10904         _SafeShutdownInstanceDisks(self.lu, self.instance)
10905
10906     assert not self.lu.owned_locks(locking.LEVEL_NODE)
10907
10908     if __debug__:
10909       # Verify owned locks
10910       owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE_RES)
10911       nodes = frozenset(self.node_secondary_ip)
10912       assert ((self.early_release and not owned_nodes) or
10913               (not self.early_release and not (set(owned_nodes) - nodes))), \
10914         ("Not owning the correct locks, early_release=%s, owned=%r,"
10915          " nodes=%r" % (self.early_release, owned_nodes, nodes))
10916
10917     return result
10918
10919   def _CheckVolumeGroup(self, nodes):
10920     self.lu.LogInfo("Checking volume groups")
10921
10922     vgname = self.cfg.GetVGName()
10923
10924     # Make sure volume group exists on all involved nodes
10925     results = self.rpc.call_vg_list(nodes)
10926     if not results:
10927       raise errors.OpExecError("Can't list volume groups on the nodes")
10928
10929     for node in nodes:
10930       res = results[node]
10931       res.Raise("Error checking node %s" % node)
10932       if vgname not in res.payload:
10933         raise errors.OpExecError("Volume group '%s' not found on node %s" %
10934                                  (vgname, node))
10935
10936   def _CheckDisksExistence(self, nodes):
10937     # Check disk existence
10938     for idx, dev in enumerate(self.instance.disks):
10939       if idx not in self.disks:
10940         continue
10941
10942       for node in nodes:
10943         self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
10944         self.cfg.SetDiskID(dev, node)
10945
10946         result = _BlockdevFind(self, node, dev, self.instance)
10947
10948         msg = result.fail_msg
10949         if msg or not result.payload:
10950           if not msg:
10951             msg = "disk not found"
10952           raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
10953                                    (idx, node, msg))
10954
10955   def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
10956     for idx, dev in enumerate(self.instance.disks):
10957       if idx not in self.disks:
10958         continue
10959
10960       self.lu.LogInfo("Checking disk/%d consistency on node %s" %
10961                       (idx, node_name))
10962
10963       if not _CheckDiskConsistency(self.lu, self.instance, dev, node_name,
10964                                    on_primary, ldisk=ldisk):
10965         raise errors.OpExecError("Node %s has degraded storage, unsafe to"
10966                                  " replace disks for instance %s" %
10967                                  (node_name, self.instance.name))
10968
10969   def _CreateNewStorage(self, node_name):
10970     """Create new storage on the primary or secondary node.
10971
10972     This is only used for same-node replaces, not for changing the
10973     secondary node, hence we don't want to modify the existing disk.
10974
10975     """
10976     iv_names = {}
10977
10978     disks = _AnnotateDiskParams(self.instance, self.instance.disks, self.cfg)
10979     for idx, dev in enumerate(disks):
10980       if idx not in self.disks:
10981         continue
10982
10983       self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
10984
10985       self.cfg.SetDiskID(dev, node_name)
10986
10987       lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
10988       names = _GenerateUniqueNames(self.lu, lv_names)
10989
10990       (data_disk, meta_disk) = dev.children
10991       vg_data = data_disk.logical_id[0]
10992       lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
10993                              logical_id=(vg_data, names[0]),
10994                              params=data_disk.params)
10995       vg_meta = meta_disk.logical_id[0]
10996       lv_meta = objects.Disk(dev_type=constants.LD_LV,
10997                              size=constants.DRBD_META_SIZE,
10998                              logical_id=(vg_meta, names[1]),
10999                              params=meta_disk.params)
11000
11001       new_lvs = [lv_data, lv_meta]
11002       old_lvs = [child.Copy() for child in dev.children]
11003       iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
11004
11005       # we pass force_create=True to force the LVM creation
11006       for new_lv in new_lvs:
11007         _CreateBlockDevInner(self.lu, node_name, self.instance, new_lv, True,
11008                              _GetInstanceInfoText(self.instance), False)
11009
11010     return iv_names
11011
11012   def _CheckDevices(self, node_name, iv_names):
11013     for name, (dev, _, _) in iv_names.iteritems():
11014       self.cfg.SetDiskID(dev, node_name)
11015
11016       result = _BlockdevFind(self, node_name, dev, self.instance)
11017
11018       msg = result.fail_msg
11019       if msg or not result.payload:
11020         if not msg:
11021           msg = "disk not found"
11022         raise errors.OpExecError("Can't find DRBD device %s: %s" %
11023                                  (name, msg))
11024
11025       if result.payload.is_degraded:
11026         raise errors.OpExecError("DRBD device %s is degraded!" % name)
11027
11028   def _RemoveOldStorage(self, node_name, iv_names):
11029     for name, (_, old_lvs, _) in iv_names.iteritems():
11030       self.lu.LogInfo("Remove logical volumes for %s" % name)
11031
11032       for lv in old_lvs:
11033         self.cfg.SetDiskID(lv, node_name)
11034
11035         msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
11036         if msg:
11037           self.lu.LogWarning("Can't remove old LV: %s" % msg,
11038                              hint="remove unused LVs manually")
11039
11040   def _ExecDrbd8DiskOnly(self, feedback_fn): # pylint: disable=W0613
11041     """Replace a disk on the primary or secondary for DRBD 8.
11042
11043     The algorithm for replace is quite complicated:
11044
11045       1. for each disk to be replaced:
11046
11047         1. create new LVs on the target node with unique names
11048         1. detach old LVs from the drbd device
11049         1. rename old LVs to name_replaced.<time_t>
11050         1. rename new LVs to old LVs
11051         1. attach the new LVs (with the old names now) to the drbd device
11052
11053       1. wait for sync across all devices
11054
11055       1. for each modified disk:
11056
11057         1. remove old LVs (which have the name name_replaces.<time_t>)
11058
11059     Failures are not very well handled.
11060
11061     """
11062     steps_total = 6
11063
11064     # Step: check device activation
11065     self.lu.LogStep(1, steps_total, "Check device existence")
11066     self._CheckDisksExistence([self.other_node, self.target_node])
11067     self._CheckVolumeGroup([self.target_node, self.other_node])
11068
11069     # Step: check other node consistency
11070     self.lu.LogStep(2, steps_total, "Check peer consistency")
11071     self._CheckDisksConsistency(self.other_node,
11072                                 self.other_node == self.instance.primary_node,
11073                                 False)
11074
11075     # Step: create new storage
11076     self.lu.LogStep(3, steps_total, "Allocate new storage")
11077     iv_names = self._CreateNewStorage(self.target_node)
11078
11079     # Step: for each lv, detach+rename*2+attach
11080     self.lu.LogStep(4, steps_total, "Changing drbd configuration")
11081     for dev, old_lvs, new_lvs in iv_names.itervalues():
11082       self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
11083
11084       result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
11085                                                      old_lvs)
11086       result.Raise("Can't detach drbd from local storage on node"
11087                    " %s for device %s" % (self.target_node, dev.iv_name))
11088       #dev.children = []
11089       #cfg.Update(instance)
11090
11091       # ok, we created the new LVs, so now we know we have the needed
11092       # storage; as such, we proceed on the target node to rename
11093       # old_lv to _old, and new_lv to old_lv; note that we rename LVs
11094       # using the assumption that logical_id == physical_id (which in
11095       # turn is the unique_id on that node)
11096
11097       # FIXME(iustin): use a better name for the replaced LVs
11098       temp_suffix = int(time.time())
11099       ren_fn = lambda d, suff: (d.physical_id[0],
11100                                 d.physical_id[1] + "_replaced-%s" % suff)
11101
11102       # Build the rename list based on what LVs exist on the node
11103       rename_old_to_new = []
11104       for to_ren in old_lvs:
11105         result = self.rpc.call_blockdev_find(self.target_node, to_ren)
11106         if not result.fail_msg and result.payload:
11107           # device exists
11108           rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
11109
11110       self.lu.LogInfo("Renaming the old LVs on the target node")
11111       result = self.rpc.call_blockdev_rename(self.target_node,
11112                                              rename_old_to_new)
11113       result.Raise("Can't rename old LVs on node %s" % self.target_node)
11114
11115       # Now we rename the new LVs to the old LVs
11116       self.lu.LogInfo("Renaming the new LVs on the target node")
11117       rename_new_to_old = [(new, old.physical_id)
11118                            for old, new in zip(old_lvs, new_lvs)]
11119       result = self.rpc.call_blockdev_rename(self.target_node,
11120                                              rename_new_to_old)
11121       result.Raise("Can't rename new LVs on node %s" % self.target_node)
11122
11123       # Intermediate steps of in memory modifications
11124       for old, new in zip(old_lvs, new_lvs):
11125         new.logical_id = old.logical_id
11126         self.cfg.SetDiskID(new, self.target_node)
11127
11128       # We need to modify old_lvs so that removal later removes the
11129       # right LVs, not the newly added ones; note that old_lvs is a
11130       # copy here
11131       for disk in old_lvs:
11132         disk.logical_id = ren_fn(disk, temp_suffix)
11133         self.cfg.SetDiskID(disk, self.target_node)
11134
11135       # Now that the new lvs have the old name, we can add them to the device
11136       self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
11137       result = self.rpc.call_blockdev_addchildren(self.target_node,
11138                                                   (dev, self.instance), new_lvs)
11139       msg = result.fail_msg
11140       if msg:
11141         for new_lv in new_lvs:
11142           msg2 = self.rpc.call_blockdev_remove(self.target_node,
11143                                                new_lv).fail_msg
11144           if msg2:
11145             self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
11146                                hint=("cleanup manually the unused logical"
11147                                      "volumes"))
11148         raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
11149
11150     cstep = itertools.count(5)
11151
11152     if self.early_release:
11153       self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11154       self._RemoveOldStorage(self.target_node, iv_names)
11155       # TODO: Check if releasing locks early still makes sense
11156       _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
11157     else:
11158       # Release all resource locks except those used by the instance
11159       _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
11160                     keep=self.node_secondary_ip.keys())
11161
11162     # Release all node locks while waiting for sync
11163     _ReleaseLocks(self.lu, locking.LEVEL_NODE)
11164
11165     # TODO: Can the instance lock be downgraded here? Take the optional disk
11166     # shutdown in the caller into consideration.
11167
11168     # Wait for sync
11169     # This can fail as the old devices are degraded and _WaitForSync
11170     # does a combined result over all disks, so we don't check its return value
11171     self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
11172     _WaitForSync(self.lu, self.instance)
11173
11174     # Check all devices manually
11175     self._CheckDevices(self.instance.primary_node, iv_names)
11176
11177     # Step: remove old storage
11178     if not self.early_release:
11179       self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11180       self._RemoveOldStorage(self.target_node, iv_names)
11181
11182   def _ExecDrbd8Secondary(self, feedback_fn):
11183     """Replace the secondary node for DRBD 8.
11184
11185     The algorithm for replace is quite complicated:
11186       - for all disks of the instance:
11187         - create new LVs on the new node with same names
11188         - shutdown the drbd device on the old secondary
11189         - disconnect the drbd network on the primary
11190         - create the drbd device on the new secondary
11191         - network attach the drbd on the primary, using an artifice:
11192           the drbd code for Attach() will connect to the network if it
11193           finds a device which is connected to the good local disks but
11194           not network enabled
11195       - wait for sync across all devices
11196       - remove all disks from the old secondary
11197
11198     Failures are not very well handled.
11199
11200     """
11201     steps_total = 6
11202
11203     pnode = self.instance.primary_node
11204
11205     # Step: check device activation
11206     self.lu.LogStep(1, steps_total, "Check device existence")
11207     self._CheckDisksExistence([self.instance.primary_node])
11208     self._CheckVolumeGroup([self.instance.primary_node])
11209
11210     # Step: check other node consistency
11211     self.lu.LogStep(2, steps_total, "Check peer consistency")
11212     self._CheckDisksConsistency(self.instance.primary_node, True, True)
11213
11214     # Step: create new storage
11215     self.lu.LogStep(3, steps_total, "Allocate new storage")
11216     disks = _AnnotateDiskParams(self.instance, self.instance.disks, self.cfg)
11217     for idx, dev in enumerate(disks):
11218       self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
11219                       (self.new_node, idx))
11220       # we pass force_create=True to force LVM creation
11221       for new_lv in dev.children:
11222         _CreateBlockDevInner(self.lu, self.new_node, self.instance, new_lv,
11223                              True, _GetInstanceInfoText(self.instance), False)
11224
11225     # Step 4: dbrd minors and drbd setups changes
11226     # after this, we must manually remove the drbd minors on both the
11227     # error and the success paths
11228     self.lu.LogStep(4, steps_total, "Changing drbd configuration")
11229     minors = self.cfg.AllocateDRBDMinor([self.new_node
11230                                          for dev in self.instance.disks],
11231                                         self.instance.name)
11232     logging.debug("Allocated minors %r", minors)
11233
11234     iv_names = {}
11235     for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
11236       self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
11237                       (self.new_node, idx))
11238       # create new devices on new_node; note that we create two IDs:
11239       # one without port, so the drbd will be activated without
11240       # networking information on the new node at this stage, and one
11241       # with network, for the latter activation in step 4
11242       (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
11243       if self.instance.primary_node == o_node1:
11244         p_minor = o_minor1
11245       else:
11246         assert self.instance.primary_node == o_node2, "Three-node instance?"
11247         p_minor = o_minor2
11248
11249       new_alone_id = (self.instance.primary_node, self.new_node, None,
11250                       p_minor, new_minor, o_secret)
11251       new_net_id = (self.instance.primary_node, self.new_node, o_port,
11252                     p_minor, new_minor, o_secret)
11253
11254       iv_names[idx] = (dev, dev.children, new_net_id)
11255       logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
11256                     new_net_id)
11257       new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
11258                               logical_id=new_alone_id,
11259                               children=dev.children,
11260                               size=dev.size,
11261                               params={})
11262       (anno_new_drbd,) = _AnnotateDiskParams(self.instance, [new_drbd],
11263                                              self.cfg)
11264       try:
11265         _CreateSingleBlockDev(self.lu, self.new_node, self.instance,
11266                               anno_new_drbd,
11267                               _GetInstanceInfoText(self.instance), False)
11268       except errors.GenericError:
11269         self.cfg.ReleaseDRBDMinors(self.instance.name)
11270         raise
11271
11272     # We have new devices, shutdown the drbd on the old secondary
11273     for idx, dev in enumerate(self.instance.disks):
11274       self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
11275       self.cfg.SetDiskID(dev, self.target_node)
11276       msg = self.rpc.call_blockdev_shutdown(self.target_node,
11277                                             (dev, self.instance)).fail_msg
11278       if msg:
11279         self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
11280                            "node: %s" % (idx, msg),
11281                            hint=("Please cleanup this device manually as"
11282                                  " soon as possible"))
11283
11284     self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
11285     result = self.rpc.call_drbd_disconnect_net([pnode], self.node_secondary_ip,
11286                                                self.instance.disks)[pnode]
11287
11288     msg = result.fail_msg
11289     if msg:
11290       # detaches didn't succeed (unlikely)
11291       self.cfg.ReleaseDRBDMinors(self.instance.name)
11292       raise errors.OpExecError("Can't detach the disks from the network on"
11293                                " old node: %s" % (msg,))
11294
11295     # if we managed to detach at least one, we update all the disks of
11296     # the instance to point to the new secondary
11297     self.lu.LogInfo("Updating instance configuration")
11298     for dev, _, new_logical_id in iv_names.itervalues():
11299       dev.logical_id = new_logical_id
11300       self.cfg.SetDiskID(dev, self.instance.primary_node)
11301
11302     self.cfg.Update(self.instance, feedback_fn)
11303
11304     # Release all node locks (the configuration has been updated)
11305     _ReleaseLocks(self.lu, locking.LEVEL_NODE)
11306
11307     # and now perform the drbd attach
11308     self.lu.LogInfo("Attaching primary drbds to new secondary"
11309                     " (standalone => connected)")
11310     result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
11311                                             self.new_node],
11312                                            self.node_secondary_ip,
11313                                            (self.instance.disks, self.instance),
11314                                            self.instance.name,
11315                                            False)
11316     for to_node, to_result in result.items():
11317       msg = to_result.fail_msg
11318       if msg:
11319         self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
11320                            to_node, msg,
11321                            hint=("please do a gnt-instance info to see the"
11322                                  " status of disks"))
11323
11324     cstep = itertools.count(5)
11325
11326     if self.early_release:
11327       self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11328       self._RemoveOldStorage(self.target_node, iv_names)
11329       # TODO: Check if releasing locks early still makes sense
11330       _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
11331     else:
11332       # Release all resource locks except those used by the instance
11333       _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
11334                     keep=self.node_secondary_ip.keys())
11335
11336     # TODO: Can the instance lock be downgraded here? Take the optional disk
11337     # shutdown in the caller into consideration.
11338
11339     # Wait for sync
11340     # This can fail as the old devices are degraded and _WaitForSync
11341     # does a combined result over all disks, so we don't check its return value
11342     self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
11343     _WaitForSync(self.lu, self.instance)
11344
11345     # Check all devices manually
11346     self._CheckDevices(self.instance.primary_node, iv_names)
11347
11348     # Step: remove old storage
11349     if not self.early_release:
11350       self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11351       self._RemoveOldStorage(self.target_node, iv_names)
11352
11353
11354 class LURepairNodeStorage(NoHooksLU):
11355   """Repairs the volume group on a node.
11356
11357   """
11358   REQ_BGL = False
11359
11360   def CheckArguments(self):
11361     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
11362
11363     storage_type = self.op.storage_type
11364
11365     if (constants.SO_FIX_CONSISTENCY not in
11366         constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
11367       raise errors.OpPrereqError("Storage units of type '%s' can not be"
11368                                  " repaired" % storage_type,
11369                                  errors.ECODE_INVAL)
11370
11371   def ExpandNames(self):
11372     self.needed_locks = {
11373       locking.LEVEL_NODE: [self.op.node_name],
11374       }
11375
11376   def _CheckFaultyDisks(self, instance, node_name):
11377     """Ensure faulty disks abort the opcode or at least warn."""
11378     try:
11379       if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
11380                                   node_name, True):
11381         raise errors.OpPrereqError("Instance '%s' has faulty disks on"
11382                                    " node '%s'" % (instance.name, node_name),
11383                                    errors.ECODE_STATE)
11384     except errors.OpPrereqError, err:
11385       if self.op.ignore_consistency:
11386         self.proc.LogWarning(str(err.args[0]))
11387       else:
11388         raise
11389
11390   def CheckPrereq(self):
11391     """Check prerequisites.
11392
11393     """
11394     # Check whether any instance on this node has faulty disks
11395     for inst in _GetNodeInstances(self.cfg, self.op.node_name):
11396       if inst.admin_state != constants.ADMINST_UP:
11397         continue
11398       check_nodes = set(inst.all_nodes)
11399       check_nodes.discard(self.op.node_name)
11400       for inst_node_name in check_nodes:
11401         self._CheckFaultyDisks(inst, inst_node_name)
11402
11403   def Exec(self, feedback_fn):
11404     feedback_fn("Repairing storage unit '%s' on %s ..." %
11405                 (self.op.name, self.op.node_name))
11406
11407     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
11408     result = self.rpc.call_storage_execute(self.op.node_name,
11409                                            self.op.storage_type, st_args,
11410                                            self.op.name,
11411                                            constants.SO_FIX_CONSISTENCY)
11412     result.Raise("Failed to repair storage unit '%s' on %s" %
11413                  (self.op.name, self.op.node_name))
11414
11415
11416 class LUNodeEvacuate(NoHooksLU):
11417   """Evacuates instances off a list of nodes.
11418
11419   """
11420   REQ_BGL = False
11421
11422   _MODE2IALLOCATOR = {
11423     constants.NODE_EVAC_PRI: constants.IALLOCATOR_NEVAC_PRI,
11424     constants.NODE_EVAC_SEC: constants.IALLOCATOR_NEVAC_SEC,
11425     constants.NODE_EVAC_ALL: constants.IALLOCATOR_NEVAC_ALL,
11426     }
11427   assert frozenset(_MODE2IALLOCATOR.keys()) == constants.NODE_EVAC_MODES
11428   assert (frozenset(_MODE2IALLOCATOR.values()) ==
11429           constants.IALLOCATOR_NEVAC_MODES)
11430
11431   def CheckArguments(self):
11432     _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
11433
11434   def ExpandNames(self):
11435     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
11436
11437     if self.op.remote_node is not None:
11438       self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
11439       assert self.op.remote_node
11440
11441       if self.op.remote_node == self.op.node_name:
11442         raise errors.OpPrereqError("Can not use evacuated node as a new"
11443                                    " secondary node", errors.ECODE_INVAL)
11444
11445       if self.op.mode != constants.NODE_EVAC_SEC:
11446         raise errors.OpPrereqError("Without the use of an iallocator only"
11447                                    " secondary instances can be evacuated",
11448                                    errors.ECODE_INVAL)
11449
11450     # Declare locks
11451     self.share_locks = _ShareAll()
11452     self.needed_locks = {
11453       locking.LEVEL_INSTANCE: [],
11454       locking.LEVEL_NODEGROUP: [],
11455       locking.LEVEL_NODE: [],
11456       }
11457
11458     # Determine nodes (via group) optimistically, needs verification once locks
11459     # have been acquired
11460     self.lock_nodes = self._DetermineNodes()
11461
11462   def _DetermineNodes(self):
11463     """Gets the list of nodes to operate on.
11464
11465     """
11466     if self.op.remote_node is None:
11467       # Iallocator will choose any node(s) in the same group
11468       group_nodes = self.cfg.GetNodeGroupMembersByNodes([self.op.node_name])
11469     else:
11470       group_nodes = frozenset([self.op.remote_node])
11471
11472     # Determine nodes to be locked
11473     return set([self.op.node_name]) | group_nodes
11474
11475   def _DetermineInstances(self):
11476     """Builds list of instances to operate on.
11477
11478     """
11479     assert self.op.mode in constants.NODE_EVAC_MODES
11480
11481     if self.op.mode == constants.NODE_EVAC_PRI:
11482       # Primary instances only
11483       inst_fn = _GetNodePrimaryInstances
11484       assert self.op.remote_node is None, \
11485         "Evacuating primary instances requires iallocator"
11486     elif self.op.mode == constants.NODE_EVAC_SEC:
11487       # Secondary instances only
11488       inst_fn = _GetNodeSecondaryInstances
11489     else:
11490       # All instances
11491       assert self.op.mode == constants.NODE_EVAC_ALL
11492       inst_fn = _GetNodeInstances
11493       # TODO: In 2.6, change the iallocator interface to take an evacuation mode
11494       # per instance
11495       raise errors.OpPrereqError("Due to an issue with the iallocator"
11496                                  " interface it is not possible to evacuate"
11497                                  " all instances at once; specify explicitly"
11498                                  " whether to evacuate primary or secondary"
11499                                  " instances",
11500                                  errors.ECODE_INVAL)
11501
11502     return inst_fn(self.cfg, self.op.node_name)
11503
11504   def DeclareLocks(self, level):
11505     if level == locking.LEVEL_INSTANCE:
11506       # Lock instances optimistically, needs verification once node and group
11507       # locks have been acquired
11508       self.needed_locks[locking.LEVEL_INSTANCE] = \
11509         set(i.name for i in self._DetermineInstances())
11510
11511     elif level == locking.LEVEL_NODEGROUP:
11512       # Lock node groups for all potential target nodes optimistically, needs
11513       # verification once nodes have been acquired
11514       self.needed_locks[locking.LEVEL_NODEGROUP] = \
11515         self.cfg.GetNodeGroupsFromNodes(self.lock_nodes)
11516
11517     elif level == locking.LEVEL_NODE:
11518       self.needed_locks[locking.LEVEL_NODE] = self.lock_nodes
11519
11520   def CheckPrereq(self):
11521     # Verify locks
11522     owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
11523     owned_nodes = self.owned_locks(locking.LEVEL_NODE)
11524     owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
11525
11526     need_nodes = self._DetermineNodes()
11527
11528     if not owned_nodes.issuperset(need_nodes):
11529       raise errors.OpPrereqError("Nodes in same group as '%s' changed since"
11530                                  " locks were acquired, current nodes are"
11531                                  " are '%s', used to be '%s'; retry the"
11532                                  " operation" %
11533                                  (self.op.node_name,
11534                                   utils.CommaJoin(need_nodes),
11535                                   utils.CommaJoin(owned_nodes)),
11536                                  errors.ECODE_STATE)
11537
11538     wanted_groups = self.cfg.GetNodeGroupsFromNodes(owned_nodes)
11539     if owned_groups != wanted_groups:
11540       raise errors.OpExecError("Node groups changed since locks were acquired,"
11541                                " current groups are '%s', used to be '%s';"
11542                                " retry the operation" %
11543                                (utils.CommaJoin(wanted_groups),
11544                                 utils.CommaJoin(owned_groups)))
11545
11546     # Determine affected instances
11547     self.instances = self._DetermineInstances()
11548     self.instance_names = [i.name for i in self.instances]
11549
11550     if set(self.instance_names) != owned_instances:
11551       raise errors.OpExecError("Instances on node '%s' changed since locks"
11552                                " were acquired, current instances are '%s',"
11553                                " used to be '%s'; retry the operation" %
11554                                (self.op.node_name,
11555                                 utils.CommaJoin(self.instance_names),
11556                                 utils.CommaJoin(owned_instances)))
11557
11558     if self.instance_names:
11559       self.LogInfo("Evacuating instances from node '%s': %s",
11560                    self.op.node_name,
11561                    utils.CommaJoin(utils.NiceSort(self.instance_names)))
11562     else:
11563       self.LogInfo("No instances to evacuate from node '%s'",
11564                    self.op.node_name)
11565
11566     if self.op.remote_node is not None:
11567       for i in self.instances:
11568         if i.primary_node == self.op.remote_node:
11569           raise errors.OpPrereqError("Node %s is the primary node of"
11570                                      " instance %s, cannot use it as"
11571                                      " secondary" %
11572                                      (self.op.remote_node, i.name),
11573                                      errors.ECODE_INVAL)
11574
11575   def Exec(self, feedback_fn):
11576     assert (self.op.iallocator is not None) ^ (self.op.remote_node is not None)
11577
11578     if not self.instance_names:
11579       # No instances to evacuate
11580       jobs = []
11581
11582     elif self.op.iallocator is not None:
11583       # TODO: Implement relocation to other group
11584       evac_mode = self._MODE2IALLOCATOR[self.op.mode]
11585       req = iallocator.IAReqNodeEvac(evac_mode=evac_mode,
11586                                      instances=list(self.instance_names))
11587       ial = iallocator.IAllocator(self.cfg, self.rpc, req)
11588
11589       ial.Run(self.op.iallocator)
11590
11591       if not ial.success:
11592         raise errors.OpPrereqError("Can't compute node evacuation using"
11593                                    " iallocator '%s': %s" %
11594                                    (self.op.iallocator, ial.info),
11595                                    errors.ECODE_NORES)
11596
11597       jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, True)
11598
11599     elif self.op.remote_node is not None:
11600       assert self.op.mode == constants.NODE_EVAC_SEC
11601       jobs = [
11602         [opcodes.OpInstanceReplaceDisks(instance_name=instance_name,
11603                                         remote_node=self.op.remote_node,
11604                                         disks=[],
11605                                         mode=constants.REPLACE_DISK_CHG,
11606                                         early_release=self.op.early_release)]
11607         for instance_name in self.instance_names
11608         ]
11609
11610     else:
11611       raise errors.ProgrammerError("No iallocator or remote node")
11612
11613     return ResultWithJobs(jobs)
11614
11615
11616 def _SetOpEarlyRelease(early_release, op):
11617   """Sets C{early_release} flag on opcodes if available.
11618
11619   """
11620   try:
11621     op.early_release = early_release
11622   except AttributeError:
11623     assert not isinstance(op, opcodes.OpInstanceReplaceDisks)
11624
11625   return op
11626
11627
11628 def _NodeEvacDest(use_nodes, group, nodes):
11629   """Returns group or nodes depending on caller's choice.
11630
11631   """
11632   if use_nodes:
11633     return utils.CommaJoin(nodes)
11634   else:
11635     return group
11636
11637
11638 def _LoadNodeEvacResult(lu, alloc_result, early_release, use_nodes):
11639   """Unpacks the result of change-group and node-evacuate iallocator requests.
11640
11641   Iallocator modes L{constants.IALLOCATOR_MODE_NODE_EVAC} and
11642   L{constants.IALLOCATOR_MODE_CHG_GROUP}.
11643
11644   @type lu: L{LogicalUnit}
11645   @param lu: Logical unit instance
11646   @type alloc_result: tuple/list
11647   @param alloc_result: Result from iallocator
11648   @type early_release: bool
11649   @param early_release: Whether to release locks early if possible
11650   @type use_nodes: bool
11651   @param use_nodes: Whether to display node names instead of groups
11652
11653   """
11654   (moved, failed, jobs) = alloc_result
11655
11656   if failed:
11657     failreason = utils.CommaJoin("%s (%s)" % (name, reason)
11658                                  for (name, reason) in failed)
11659     lu.LogWarning("Unable to evacuate instances %s", failreason)
11660     raise errors.OpExecError("Unable to evacuate instances %s" % failreason)
11661
11662   if moved:
11663     lu.LogInfo("Instances to be moved: %s",
11664                utils.CommaJoin("%s (to %s)" %
11665                                (name, _NodeEvacDest(use_nodes, group, nodes))
11666                                for (name, group, nodes) in moved))
11667
11668   return [map(compat.partial(_SetOpEarlyRelease, early_release),
11669               map(opcodes.OpCode.LoadOpCode, ops))
11670           for ops in jobs]
11671
11672
11673 class LUInstanceGrowDisk(LogicalUnit):
11674   """Grow a disk of an instance.
11675
11676   """
11677   HPATH = "disk-grow"
11678   HTYPE = constants.HTYPE_INSTANCE
11679   REQ_BGL = False
11680
11681   def ExpandNames(self):
11682     self._ExpandAndLockInstance()
11683     self.needed_locks[locking.LEVEL_NODE] = []
11684     self.needed_locks[locking.LEVEL_NODE_RES] = []
11685     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
11686     self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
11687
11688   def DeclareLocks(self, level):
11689     if level == locking.LEVEL_NODE:
11690       self._LockInstancesNodes()
11691     elif level == locking.LEVEL_NODE_RES:
11692       # Copy node locks
11693       self.needed_locks[locking.LEVEL_NODE_RES] = \
11694         self.needed_locks[locking.LEVEL_NODE][:]
11695
11696   def BuildHooksEnv(self):
11697     """Build hooks env.
11698
11699     This runs on the master, the primary and all the secondaries.
11700
11701     """
11702     env = {
11703       "DISK": self.op.disk,
11704       "AMOUNT": self.op.amount,
11705       "ABSOLUTE": self.op.absolute,
11706       }
11707     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
11708     return env
11709
11710   def BuildHooksNodes(self):
11711     """Build hooks nodes.
11712
11713     """
11714     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
11715     return (nl, nl)
11716
11717   def CheckPrereq(self):
11718     """Check prerequisites.
11719
11720     This checks that the instance is in the cluster.
11721
11722     """
11723     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
11724     assert instance is not None, \
11725       "Cannot retrieve locked instance %s" % self.op.instance_name
11726     nodenames = list(instance.all_nodes)
11727     for node in nodenames:
11728       _CheckNodeOnline(self, node)
11729
11730     self.instance = instance
11731
11732     if instance.disk_template not in constants.DTS_GROWABLE:
11733       raise errors.OpPrereqError("Instance's disk layout does not support"
11734                                  " growing", errors.ECODE_INVAL)
11735
11736     self.disk = instance.FindDisk(self.op.disk)
11737
11738     if self.op.absolute:
11739       self.target = self.op.amount
11740       self.delta = self.target - self.disk.size
11741       if self.delta < 0:
11742         raise errors.OpPrereqError("Requested size (%s) is smaller than "
11743                                    "current disk size (%s)" %
11744                                    (utils.FormatUnit(self.target, "h"),
11745                                     utils.FormatUnit(self.disk.size, "h")),
11746                                    errors.ECODE_STATE)
11747     else:
11748       self.delta = self.op.amount
11749       self.target = self.disk.size + self.delta
11750       if self.delta < 0:
11751         raise errors.OpPrereqError("Requested increment (%s) is negative" %
11752                                    utils.FormatUnit(self.delta, "h"),
11753                                    errors.ECODE_INVAL)
11754
11755     if instance.disk_template not in (constants.DT_FILE,
11756                                       constants.DT_SHARED_FILE,
11757                                       constants.DT_RBD):
11758       # TODO: check the free disk space for file, when that feature will be
11759       # supported
11760       _CheckNodesFreeDiskPerVG(self, nodenames,
11761                                self.disk.ComputeGrowth(self.delta))
11762
11763   def Exec(self, feedback_fn):
11764     """Execute disk grow.
11765
11766     """
11767     instance = self.instance
11768     disk = self.disk
11769
11770     assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
11771     assert (self.owned_locks(locking.LEVEL_NODE) ==
11772             self.owned_locks(locking.LEVEL_NODE_RES))
11773
11774     disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
11775     if not disks_ok:
11776       raise errors.OpExecError("Cannot activate block device to grow")
11777
11778     feedback_fn("Growing disk %s of instance '%s' by %s to %s" %
11779                 (self.op.disk, instance.name,
11780                  utils.FormatUnit(self.delta, "h"),
11781                  utils.FormatUnit(self.target, "h")))
11782
11783     # First run all grow ops in dry-run mode
11784     for node in instance.all_nodes:
11785       self.cfg.SetDiskID(disk, node)
11786       result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
11787                                            True, True)
11788       result.Raise("Grow request failed to node %s" % node)
11789
11790     # We know that (as far as we can test) operations across different
11791     # nodes will succeed, time to run it for real on the backing storage
11792     for node in instance.all_nodes:
11793       self.cfg.SetDiskID(disk, node)
11794       result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
11795                                            False, True)
11796       result.Raise("Grow request failed to node %s" % node)
11797
11798     # And now execute it for logical storage, on the primary node
11799     node = instance.primary_node
11800     self.cfg.SetDiskID(disk, node)
11801     result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
11802                                          False, False)
11803     result.Raise("Grow request failed to node %s" % node)
11804
11805     disk.RecordGrow(self.delta)
11806     self.cfg.Update(instance, feedback_fn)
11807
11808     # Changes have been recorded, release node lock
11809     _ReleaseLocks(self, locking.LEVEL_NODE)
11810
11811     # Downgrade lock while waiting for sync
11812     self.glm.downgrade(locking.LEVEL_INSTANCE)
11813
11814     if self.op.wait_for_sync:
11815       disk_abort = not _WaitForSync(self, instance, disks=[disk])
11816       if disk_abort:
11817         self.proc.LogWarning("Disk sync-ing has not returned a good"
11818                              " status; please check the instance")
11819       if instance.admin_state != constants.ADMINST_UP:
11820         _SafeShutdownInstanceDisks(self, instance, disks=[disk])
11821     elif instance.admin_state != constants.ADMINST_UP:
11822       self.proc.LogWarning("Not shutting down the disk even if the instance is"
11823                            " not supposed to be running because no wait for"
11824                            " sync mode was requested")
11825
11826     assert self.owned_locks(locking.LEVEL_NODE_RES)
11827     assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
11828
11829
11830 class LUInstanceQueryData(NoHooksLU):
11831   """Query runtime instance data.
11832
11833   """
11834   REQ_BGL = False
11835
11836   def ExpandNames(self):
11837     self.needed_locks = {}
11838
11839     # Use locking if requested or when non-static information is wanted
11840     if not (self.op.static or self.op.use_locking):
11841       self.LogWarning("Non-static data requested, locks need to be acquired")
11842       self.op.use_locking = True
11843
11844     if self.op.instances or not self.op.use_locking:
11845       # Expand instance names right here
11846       self.wanted_names = _GetWantedInstances(self, self.op.instances)
11847     else:
11848       # Will use acquired locks
11849       self.wanted_names = None
11850
11851     if self.op.use_locking:
11852       self.share_locks = _ShareAll()
11853
11854       if self.wanted_names is None:
11855         self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
11856       else:
11857         self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
11858
11859       self.needed_locks[locking.LEVEL_NODEGROUP] = []
11860       self.needed_locks[locking.LEVEL_NODE] = []
11861       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
11862
11863   def DeclareLocks(self, level):
11864     if self.op.use_locking:
11865       if level == locking.LEVEL_NODEGROUP:
11866         owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
11867
11868         # Lock all groups used by instances optimistically; this requires going
11869         # via the node before it's locked, requiring verification later on
11870         self.needed_locks[locking.LEVEL_NODEGROUP] = \
11871           frozenset(group_uuid
11872                     for instance_name in owned_instances
11873                     for group_uuid in
11874                       self.cfg.GetInstanceNodeGroups(instance_name))
11875
11876       elif level == locking.LEVEL_NODE:
11877         self._LockInstancesNodes()
11878
11879   def CheckPrereq(self):
11880     """Check prerequisites.
11881
11882     This only checks the optional instance list against the existing names.
11883
11884     """
11885     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
11886     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
11887     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
11888
11889     if self.wanted_names is None:
11890       assert self.op.use_locking, "Locking was not used"
11891       self.wanted_names = owned_instances
11892
11893     instances = dict(self.cfg.GetMultiInstanceInfo(self.wanted_names))
11894
11895     if self.op.use_locking:
11896       _CheckInstancesNodeGroups(self.cfg, instances, owned_groups, owned_nodes,
11897                                 None)
11898     else:
11899       assert not (owned_instances or owned_groups or owned_nodes)
11900
11901     self.wanted_instances = instances.values()
11902
11903   def _ComputeBlockdevStatus(self, node, instance, dev):
11904     """Returns the status of a block device
11905
11906     """
11907     if self.op.static or not node:
11908       return None
11909
11910     self.cfg.SetDiskID(dev, node)
11911
11912     result = self.rpc.call_blockdev_find(node, dev)
11913     if result.offline:
11914       return None
11915
11916     result.Raise("Can't compute disk status for %s" % instance.name)
11917
11918     status = result.payload
11919     if status is None:
11920       return None
11921
11922     return (status.dev_path, status.major, status.minor,
11923             status.sync_percent, status.estimated_time,
11924             status.is_degraded, status.ldisk_status)
11925
11926   def _ComputeDiskStatus(self, instance, snode, dev):
11927     """Compute block device status.
11928
11929     """
11930     (anno_dev,) = _AnnotateDiskParams(instance, [dev], self.cfg)
11931
11932     return self._ComputeDiskStatusInner(instance, snode, anno_dev)
11933
11934   def _ComputeDiskStatusInner(self, instance, snode, dev):
11935     """Compute block device status.
11936
11937     @attention: The device has to be annotated already.
11938
11939     """
11940     if dev.dev_type in constants.LDS_DRBD:
11941       # we change the snode then (otherwise we use the one passed in)
11942       if dev.logical_id[0] == instance.primary_node:
11943         snode = dev.logical_id[1]
11944       else:
11945         snode = dev.logical_id[0]
11946
11947     dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
11948                                               instance, dev)
11949     dev_sstatus = self._ComputeBlockdevStatus(snode, instance, dev)
11950
11951     if dev.children:
11952       dev_children = map(compat.partial(self._ComputeDiskStatusInner,
11953                                         instance, snode),
11954                          dev.children)
11955     else:
11956       dev_children = []
11957
11958     return {
11959       "iv_name": dev.iv_name,
11960       "dev_type": dev.dev_type,
11961       "logical_id": dev.logical_id,
11962       "physical_id": dev.physical_id,
11963       "pstatus": dev_pstatus,
11964       "sstatus": dev_sstatus,
11965       "children": dev_children,
11966       "mode": dev.mode,
11967       "size": dev.size,
11968       }
11969
11970   def Exec(self, feedback_fn):
11971     """Gather and return data"""
11972     result = {}
11973
11974     cluster = self.cfg.GetClusterInfo()
11975
11976     node_names = itertools.chain(*(i.all_nodes for i in self.wanted_instances))
11977     nodes = dict(self.cfg.GetMultiNodeInfo(node_names))
11978
11979     groups = dict(self.cfg.GetMultiNodeGroupInfo(node.group
11980                                                  for node in nodes.values()))
11981
11982     group2name_fn = lambda uuid: groups[uuid].name
11983
11984     for instance in self.wanted_instances:
11985       pnode = nodes[instance.primary_node]
11986
11987       if self.op.static or pnode.offline:
11988         remote_state = None
11989         if pnode.offline:
11990           self.LogWarning("Primary node %s is marked offline, returning static"
11991                           " information only for instance %s" %
11992                           (pnode.name, instance.name))
11993       else:
11994         remote_info = self.rpc.call_instance_info(instance.primary_node,
11995                                                   instance.name,
11996                                                   instance.hypervisor)
11997         remote_info.Raise("Error checking node %s" % instance.primary_node)
11998         remote_info = remote_info.payload
11999         if remote_info and "state" in remote_info:
12000           remote_state = "up"
12001         else:
12002           if instance.admin_state == constants.ADMINST_UP:
12003             remote_state = "down"
12004           else:
12005             remote_state = instance.admin_state
12006
12007       disks = map(compat.partial(self._ComputeDiskStatus, instance, None),
12008                   instance.disks)
12009
12010       snodes_group_uuids = [nodes[snode_name].group
12011                             for snode_name in instance.secondary_nodes]
12012
12013       result[instance.name] = {
12014         "name": instance.name,
12015         "config_state": instance.admin_state,
12016         "run_state": remote_state,
12017         "pnode": instance.primary_node,
12018         "pnode_group_uuid": pnode.group,
12019         "pnode_group_name": group2name_fn(pnode.group),
12020         "snodes": instance.secondary_nodes,
12021         "snodes_group_uuids": snodes_group_uuids,
12022         "snodes_group_names": map(group2name_fn, snodes_group_uuids),
12023         "os": instance.os,
12024         # this happens to be the same format used for hooks
12025         "nics": _NICListToTuple(self, instance.nics),
12026         "disk_template": instance.disk_template,
12027         "disks": disks,
12028         "hypervisor": instance.hypervisor,
12029         "network_port": instance.network_port,
12030         "hv_instance": instance.hvparams,
12031         "hv_actual": cluster.FillHV(instance, skip_globals=True),
12032         "be_instance": instance.beparams,
12033         "be_actual": cluster.FillBE(instance),
12034         "os_instance": instance.osparams,
12035         "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
12036         "serial_no": instance.serial_no,
12037         "mtime": instance.mtime,
12038         "ctime": instance.ctime,
12039         "uuid": instance.uuid,
12040         }
12041
12042     return result
12043
12044
12045 def PrepareContainerMods(mods, private_fn):
12046   """Prepares a list of container modifications by adding a private data field.
12047
12048   @type mods: list of tuples; (operation, index, parameters)
12049   @param mods: List of modifications
12050   @type private_fn: callable or None
12051   @param private_fn: Callable for constructing a private data field for a
12052     modification
12053   @rtype: list
12054
12055   """
12056   if private_fn is None:
12057     fn = lambda: None
12058   else:
12059     fn = private_fn
12060
12061   return [(op, idx, params, fn()) for (op, idx, params) in mods]
12062
12063
12064 #: Type description for changes as returned by L{ApplyContainerMods}'s
12065 #: callbacks
12066 _TApplyContModsCbChanges = \
12067   ht.TMaybeListOf(ht.TAnd(ht.TIsLength(2), ht.TItems([
12068     ht.TNonEmptyString,
12069     ht.TAny,
12070     ])))
12071
12072
12073 def ApplyContainerMods(kind, container, chgdesc, mods,
12074                        create_fn, modify_fn, remove_fn):
12075   """Applies descriptions in C{mods} to C{container}.
12076
12077   @type kind: string
12078   @param kind: One-word item description
12079   @type container: list
12080   @param container: Container to modify
12081   @type chgdesc: None or list
12082   @param chgdesc: List of applied changes
12083   @type mods: list
12084   @param mods: Modifications as returned by L{PrepareContainerMods}
12085   @type create_fn: callable
12086   @param create_fn: Callback for creating a new item (L{constants.DDM_ADD});
12087     receives absolute item index, parameters and private data object as added
12088     by L{PrepareContainerMods}, returns tuple containing new item and changes
12089     as list
12090   @type modify_fn: callable
12091   @param modify_fn: Callback for modifying an existing item
12092     (L{constants.DDM_MODIFY}); receives absolute item index, item, parameters
12093     and private data object as added by L{PrepareContainerMods}, returns
12094     changes as list
12095   @type remove_fn: callable
12096   @param remove_fn: Callback on removing item; receives absolute item index,
12097     item and private data object as added by L{PrepareContainerMods}
12098
12099   """
12100   for (op, idx, params, private) in mods:
12101     if idx == -1:
12102       # Append
12103       absidx = len(container) - 1
12104     elif idx < 0:
12105       raise IndexError("Not accepting negative indices other than -1")
12106     elif idx > len(container):
12107       raise IndexError("Got %s index %s, but there are only %s" %
12108                        (kind, idx, len(container)))
12109     else:
12110       absidx = idx
12111
12112     changes = None
12113
12114     if op == constants.DDM_ADD:
12115       # Calculate where item will be added
12116       if idx == -1:
12117         addidx = len(container)
12118       else:
12119         addidx = idx
12120
12121       if create_fn is None:
12122         item = params
12123       else:
12124         (item, changes) = create_fn(addidx, params, private)
12125
12126       if idx == -1:
12127         container.append(item)
12128       else:
12129         assert idx >= 0
12130         assert idx <= len(container)
12131         # list.insert does so before the specified index
12132         container.insert(idx, item)
12133     else:
12134       # Retrieve existing item
12135       try:
12136         item = container[absidx]
12137       except IndexError:
12138         raise IndexError("Invalid %s index %s" % (kind, idx))
12139
12140       if op == constants.DDM_REMOVE:
12141         assert not params
12142
12143         if remove_fn is not None:
12144           remove_fn(absidx, item, private)
12145
12146         changes = [("%s/%s" % (kind, absidx), "remove")]
12147
12148         assert container[absidx] == item
12149         del container[absidx]
12150       elif op == constants.DDM_MODIFY:
12151         if modify_fn is not None:
12152           changes = modify_fn(absidx, item, params, private)
12153       else:
12154         raise errors.ProgrammerError("Unhandled operation '%s'" % op)
12155
12156     assert _TApplyContModsCbChanges(changes)
12157
12158     if not (chgdesc is None or changes is None):
12159       chgdesc.extend(changes)
12160
12161
12162 def _UpdateIvNames(base_index, disks):
12163   """Updates the C{iv_name} attribute of disks.
12164
12165   @type disks: list of L{objects.Disk}
12166
12167   """
12168   for (idx, disk) in enumerate(disks):
12169     disk.iv_name = "disk/%s" % (base_index + idx, )
12170
12171
12172 class _InstNicModPrivate:
12173   """Data structure for network interface modifications.
12174
12175   Used by L{LUInstanceSetParams}.
12176
12177   """
12178   def __init__(self):
12179     self.params = None
12180     self.filled = None
12181
12182
12183 class LUInstanceSetParams(LogicalUnit):
12184   """Modifies an instances's parameters.
12185
12186   """
12187   HPATH = "instance-modify"
12188   HTYPE = constants.HTYPE_INSTANCE
12189   REQ_BGL = False
12190
12191   @staticmethod
12192   def _UpgradeDiskNicMods(kind, mods, verify_fn):
12193     assert ht.TList(mods)
12194     assert not mods or len(mods[0]) in (2, 3)
12195
12196     if mods and len(mods[0]) == 2:
12197       result = []
12198
12199       addremove = 0
12200       for op, params in mods:
12201         if op in (constants.DDM_ADD, constants.DDM_REMOVE):
12202           result.append((op, -1, params))
12203           addremove += 1
12204
12205           if addremove > 1:
12206             raise errors.OpPrereqError("Only one %s add or remove operation is"
12207                                        " supported at a time" % kind,
12208                                        errors.ECODE_INVAL)
12209         else:
12210           result.append((constants.DDM_MODIFY, op, params))
12211
12212       assert verify_fn(result)
12213     else:
12214       result = mods
12215
12216     return result
12217
12218   @staticmethod
12219   def _CheckMods(kind, mods, key_types, item_fn):
12220     """Ensures requested disk/NIC modifications are valid.
12221
12222     """
12223     for (op, _, params) in mods:
12224       assert ht.TDict(params)
12225
12226       utils.ForceDictType(params, key_types)
12227
12228       if op == constants.DDM_REMOVE:
12229         if params:
12230           raise errors.OpPrereqError("No settings should be passed when"
12231                                      " removing a %s" % kind,
12232                                      errors.ECODE_INVAL)
12233       elif op in (constants.DDM_ADD, constants.DDM_MODIFY):
12234         item_fn(op, params)
12235       else:
12236         raise errors.ProgrammerError("Unhandled operation '%s'" % op)
12237
12238   @staticmethod
12239   def _VerifyDiskModification(op, params):
12240     """Verifies a disk modification.
12241
12242     """
12243     if op == constants.DDM_ADD:
12244       mode = params.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
12245       if mode not in constants.DISK_ACCESS_SET:
12246         raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
12247                                    errors.ECODE_INVAL)
12248
12249       size = params.get(constants.IDISK_SIZE, None)
12250       if size is None:
12251         raise errors.OpPrereqError("Required disk parameter '%s' missing" %
12252                                    constants.IDISK_SIZE, errors.ECODE_INVAL)
12253
12254       try:
12255         size = int(size)
12256       except (TypeError, ValueError), err:
12257         raise errors.OpPrereqError("Invalid disk size parameter: %s" % err,
12258                                    errors.ECODE_INVAL)
12259
12260       params[constants.IDISK_SIZE] = size
12261
12262     elif op == constants.DDM_MODIFY and constants.IDISK_SIZE in params:
12263       raise errors.OpPrereqError("Disk size change not possible, use"
12264                                  " grow-disk", errors.ECODE_INVAL)
12265
12266   @staticmethod
12267   def _VerifyNicModification(op, params):
12268     """Verifies a network interface modification.
12269
12270     """
12271     if op in (constants.DDM_ADD, constants.DDM_MODIFY):
12272       ip = params.get(constants.INIC_IP, None)
12273       if ip is None:
12274         pass
12275       elif ip.lower() == constants.VALUE_NONE:
12276         params[constants.INIC_IP] = None
12277       elif not netutils.IPAddress.IsValid(ip):
12278         raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
12279                                    errors.ECODE_INVAL)
12280
12281       bridge = params.get("bridge", None)
12282       link = params.get(constants.INIC_LINK, None)
12283       if bridge and link:
12284         raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
12285                                    " at the same time", errors.ECODE_INVAL)
12286       elif bridge and bridge.lower() == constants.VALUE_NONE:
12287         params["bridge"] = None
12288       elif link and link.lower() == constants.VALUE_NONE:
12289         params[constants.INIC_LINK] = None
12290
12291       if op == constants.DDM_ADD:
12292         macaddr = params.get(constants.INIC_MAC, None)
12293         if macaddr is None:
12294           params[constants.INIC_MAC] = constants.VALUE_AUTO
12295
12296       if constants.INIC_MAC in params:
12297         macaddr = params[constants.INIC_MAC]
12298         if macaddr not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
12299           macaddr = utils.NormalizeAndValidateMac(macaddr)
12300
12301         if op == constants.DDM_MODIFY and macaddr == constants.VALUE_AUTO:
12302           raise errors.OpPrereqError("'auto' is not a valid MAC address when"
12303                                      " modifying an existing NIC",
12304                                      errors.ECODE_INVAL)
12305
12306   def CheckArguments(self):
12307     if not (self.op.nics or self.op.disks or self.op.disk_template or
12308             self.op.hvparams or self.op.beparams or self.op.os_name or
12309             self.op.offline is not None or self.op.runtime_mem):
12310       raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
12311
12312     if self.op.hvparams:
12313       _CheckGlobalHvParams(self.op.hvparams)
12314
12315     self.op.disks = self._UpgradeDiskNicMods(
12316       "disk", self.op.disks, opcodes.OpInstanceSetParams.TestDiskModifications)
12317     self.op.nics = self._UpgradeDiskNicMods(
12318       "NIC", self.op.nics, opcodes.OpInstanceSetParams.TestNicModifications)
12319
12320     # Check disk modifications
12321     self._CheckMods("disk", self.op.disks, constants.IDISK_PARAMS_TYPES,
12322                     self._VerifyDiskModification)
12323
12324     if self.op.disks and self.op.disk_template is not None:
12325       raise errors.OpPrereqError("Disk template conversion and other disk"
12326                                  " changes not supported at the same time",
12327                                  errors.ECODE_INVAL)
12328
12329     if (self.op.disk_template and
12330         self.op.disk_template in constants.DTS_INT_MIRROR and
12331         self.op.remote_node is None):
12332       raise errors.OpPrereqError("Changing the disk template to a mirrored"
12333                                  " one requires specifying a secondary node",
12334                                  errors.ECODE_INVAL)
12335
12336     # Check NIC modifications
12337     self._CheckMods("NIC", self.op.nics, constants.INIC_PARAMS_TYPES,
12338                     self._VerifyNicModification)
12339
12340   def ExpandNames(self):
12341     self._ExpandAndLockInstance()
12342     # Can't even acquire node locks in shared mode as upcoming changes in
12343     # Ganeti 2.6 will start to modify the node object on disk conversion
12344     self.needed_locks[locking.LEVEL_NODE] = []
12345     self.needed_locks[locking.LEVEL_NODE_RES] = []
12346     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
12347
12348   def DeclareLocks(self, level):
12349     # TODO: Acquire group lock in shared mode (disk parameters)
12350     if level == locking.LEVEL_NODE:
12351       self._LockInstancesNodes()
12352       if self.op.disk_template and self.op.remote_node:
12353         self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
12354         self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
12355     elif level == locking.LEVEL_NODE_RES and self.op.disk_template:
12356       # Copy node locks
12357       self.needed_locks[locking.LEVEL_NODE_RES] = \
12358         self.needed_locks[locking.LEVEL_NODE][:]
12359
12360   def BuildHooksEnv(self):
12361     """Build hooks env.
12362
12363     This runs on the master, primary and secondaries.
12364
12365     """
12366     args = dict()
12367     if constants.BE_MINMEM in self.be_new:
12368       args["minmem"] = self.be_new[constants.BE_MINMEM]
12369     if constants.BE_MAXMEM in self.be_new:
12370       args["maxmem"] = self.be_new[constants.BE_MAXMEM]
12371     if constants.BE_VCPUS in self.be_new:
12372       args["vcpus"] = self.be_new[constants.BE_VCPUS]
12373     # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
12374     # information at all.
12375
12376     if self._new_nics is not None:
12377       nics = []
12378
12379       for nic in self._new_nics:
12380         nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
12381         mode = nicparams[constants.NIC_MODE]
12382         link = nicparams[constants.NIC_LINK]
12383         nics.append((nic.ip, nic.mac, mode, link))
12384
12385       args["nics"] = nics
12386
12387     env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
12388     if self.op.disk_template:
12389       env["NEW_DISK_TEMPLATE"] = self.op.disk_template
12390     if self.op.runtime_mem:
12391       env["RUNTIME_MEMORY"] = self.op.runtime_mem
12392
12393     return env
12394
12395   def BuildHooksNodes(self):
12396     """Build hooks nodes.
12397
12398     """
12399     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
12400     return (nl, nl)
12401
12402   def _PrepareNicModification(self, params, private, old_ip, old_params,
12403                               cluster, pnode):
12404     update_params_dict = dict([(key, params[key])
12405                                for key in constants.NICS_PARAMETERS
12406                                if key in params])
12407
12408     if "bridge" in params:
12409       update_params_dict[constants.NIC_LINK] = params["bridge"]
12410
12411     new_params = _GetUpdatedParams(old_params, update_params_dict)
12412     utils.ForceDictType(new_params, constants.NICS_PARAMETER_TYPES)
12413
12414     new_filled_params = cluster.SimpleFillNIC(new_params)
12415     objects.NIC.CheckParameterSyntax(new_filled_params)
12416
12417     new_mode = new_filled_params[constants.NIC_MODE]
12418     if new_mode == constants.NIC_MODE_BRIDGED:
12419       bridge = new_filled_params[constants.NIC_LINK]
12420       msg = self.rpc.call_bridges_exist(pnode, [bridge]).fail_msg
12421       if msg:
12422         msg = "Error checking bridges on node '%s': %s" % (pnode, msg)
12423         if self.op.force:
12424           self.warn.append(msg)
12425         else:
12426           raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
12427
12428     elif new_mode == constants.NIC_MODE_ROUTED:
12429       ip = params.get(constants.INIC_IP, old_ip)
12430       if ip is None:
12431         raise errors.OpPrereqError("Cannot set the NIC IP address to None"
12432                                    " on a routed NIC", errors.ECODE_INVAL)
12433
12434     if constants.INIC_MAC in params:
12435       mac = params[constants.INIC_MAC]
12436       if mac is None:
12437         raise errors.OpPrereqError("Cannot unset the NIC MAC address",
12438                                    errors.ECODE_INVAL)
12439       elif mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
12440         # otherwise generate the MAC address
12441         params[constants.INIC_MAC] = \
12442           self.cfg.GenerateMAC(self.proc.GetECId())
12443       else:
12444         # or validate/reserve the current one
12445         try:
12446           self.cfg.ReserveMAC(mac, self.proc.GetECId())
12447         except errors.ReservationError:
12448           raise errors.OpPrereqError("MAC address '%s' already in use"
12449                                      " in cluster" % mac,
12450                                      errors.ECODE_NOTUNIQUE)
12451
12452     private.params = new_params
12453     private.filled = new_filled_params
12454
12455   def CheckPrereq(self):
12456     """Check prerequisites.
12457
12458     This only checks the instance list against the existing names.
12459
12460     """
12461     # checking the new params on the primary/secondary nodes
12462
12463     instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
12464     cluster = self.cluster = self.cfg.GetClusterInfo()
12465     assert self.instance is not None, \
12466       "Cannot retrieve locked instance %s" % self.op.instance_name
12467     pnode = instance.primary_node
12468     nodelist = list(instance.all_nodes)
12469     pnode_info = self.cfg.GetNodeInfo(pnode)
12470     self.diskparams = self.cfg.GetInstanceDiskParams(instance)
12471
12472     # Prepare disk/NIC modifications
12473     self.diskmod = PrepareContainerMods(self.op.disks, None)
12474     self.nicmod = PrepareContainerMods(self.op.nics, _InstNicModPrivate)
12475
12476     # OS change
12477     if self.op.os_name and not self.op.force:
12478       _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
12479                       self.op.force_variant)
12480       instance_os = self.op.os_name
12481     else:
12482       instance_os = instance.os
12483
12484     assert not (self.op.disk_template and self.op.disks), \
12485       "Can't modify disk template and apply disk changes at the same time"
12486
12487     if self.op.disk_template:
12488       if instance.disk_template == self.op.disk_template:
12489         raise errors.OpPrereqError("Instance already has disk template %s" %
12490                                    instance.disk_template, errors.ECODE_INVAL)
12491
12492       if (instance.disk_template,
12493           self.op.disk_template) not in self._DISK_CONVERSIONS:
12494         raise errors.OpPrereqError("Unsupported disk template conversion from"
12495                                    " %s to %s" % (instance.disk_template,
12496                                                   self.op.disk_template),
12497                                    errors.ECODE_INVAL)
12498       _CheckInstanceState(self, instance, INSTANCE_DOWN,
12499                           msg="cannot change disk template")
12500       if self.op.disk_template in constants.DTS_INT_MIRROR:
12501         if self.op.remote_node == pnode:
12502           raise errors.OpPrereqError("Given new secondary node %s is the same"
12503                                      " as the primary node of the instance" %
12504                                      self.op.remote_node, errors.ECODE_STATE)
12505         _CheckNodeOnline(self, self.op.remote_node)
12506         _CheckNodeNotDrained(self, self.op.remote_node)
12507         # FIXME: here we assume that the old instance type is DT_PLAIN
12508         assert instance.disk_template == constants.DT_PLAIN
12509         disks = [{constants.IDISK_SIZE: d.size,
12510                   constants.IDISK_VG: d.logical_id[0]}
12511                  for d in instance.disks]
12512         required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
12513         _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
12514
12515         snode_info = self.cfg.GetNodeInfo(self.op.remote_node)
12516         snode_group = self.cfg.GetNodeGroup(snode_info.group)
12517         ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
12518                                                                 snode_group)
12519         _CheckTargetNodeIPolicy(self, ipolicy, instance, snode_info,
12520                                 ignore=self.op.ignore_ipolicy)
12521         if pnode_info.group != snode_info.group:
12522           self.LogWarning("The primary and secondary nodes are in two"
12523                           " different node groups; the disk parameters"
12524                           " from the first disk's node group will be"
12525                           " used")
12526
12527     # hvparams processing
12528     if self.op.hvparams:
12529       hv_type = instance.hypervisor
12530       i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
12531       utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
12532       hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
12533
12534       # local check
12535       hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
12536       _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
12537       self.hv_proposed = self.hv_new = hv_new # the new actual values
12538       self.hv_inst = i_hvdict # the new dict (without defaults)
12539     else:
12540       self.hv_proposed = cluster.SimpleFillHV(instance.hypervisor, instance.os,
12541                                               instance.hvparams)
12542       self.hv_new = self.hv_inst = {}
12543
12544     # beparams processing
12545     if self.op.beparams:
12546       i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
12547                                    use_none=True)
12548       objects.UpgradeBeParams(i_bedict)
12549       utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
12550       be_new = cluster.SimpleFillBE(i_bedict)
12551       self.be_proposed = self.be_new = be_new # the new actual values
12552       self.be_inst = i_bedict # the new dict (without defaults)
12553     else:
12554       self.be_new = self.be_inst = {}
12555       self.be_proposed = cluster.SimpleFillBE(instance.beparams)
12556     be_old = cluster.FillBE(instance)
12557
12558     # CPU param validation -- checking every time a parameter is
12559     # changed to cover all cases where either CPU mask or vcpus have
12560     # changed
12561     if (constants.BE_VCPUS in self.be_proposed and
12562         constants.HV_CPU_MASK in self.hv_proposed):
12563       cpu_list = \
12564         utils.ParseMultiCpuMask(self.hv_proposed[constants.HV_CPU_MASK])
12565       # Verify mask is consistent with number of vCPUs. Can skip this
12566       # test if only 1 entry in the CPU mask, which means same mask
12567       # is applied to all vCPUs.
12568       if (len(cpu_list) > 1 and
12569           len(cpu_list) != self.be_proposed[constants.BE_VCPUS]):
12570         raise errors.OpPrereqError("Number of vCPUs [%d] does not match the"
12571                                    " CPU mask [%s]" %
12572                                    (self.be_proposed[constants.BE_VCPUS],
12573                                     self.hv_proposed[constants.HV_CPU_MASK]),
12574                                    errors.ECODE_INVAL)
12575
12576       # Only perform this test if a new CPU mask is given
12577       if constants.HV_CPU_MASK in self.hv_new:
12578         # Calculate the largest CPU number requested
12579         max_requested_cpu = max(map(max, cpu_list))
12580         # Check that all of the instance's nodes have enough physical CPUs to
12581         # satisfy the requested CPU mask
12582         _CheckNodesPhysicalCPUs(self, instance.all_nodes,
12583                                 max_requested_cpu + 1, instance.hypervisor)
12584
12585     # osparams processing
12586     if self.op.osparams:
12587       i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
12588       _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
12589       self.os_inst = i_osdict # the new dict (without defaults)
12590     else:
12591       self.os_inst = {}
12592
12593     self.warn = []
12594
12595     #TODO(dynmem): do the appropriate check involving MINMEM
12596     if (constants.BE_MAXMEM in self.op.beparams and not self.op.force and
12597         be_new[constants.BE_MAXMEM] > be_old[constants.BE_MAXMEM]):
12598       mem_check_list = [pnode]
12599       if be_new[constants.BE_AUTO_BALANCE]:
12600         # either we changed auto_balance to yes or it was from before
12601         mem_check_list.extend(instance.secondary_nodes)
12602       instance_info = self.rpc.call_instance_info(pnode, instance.name,
12603                                                   instance.hypervisor)
12604       nodeinfo = self.rpc.call_node_info(mem_check_list, None,
12605                                          [instance.hypervisor])
12606       pninfo = nodeinfo[pnode]
12607       msg = pninfo.fail_msg
12608       if msg:
12609         # Assume the primary node is unreachable and go ahead
12610         self.warn.append("Can't get info from primary node %s: %s" %
12611                          (pnode, msg))
12612       else:
12613         (_, _, (pnhvinfo, )) = pninfo.payload
12614         if not isinstance(pnhvinfo.get("memory_free", None), int):
12615           self.warn.append("Node data from primary node %s doesn't contain"
12616                            " free memory information" % pnode)
12617         elif instance_info.fail_msg:
12618           self.warn.append("Can't get instance runtime information: %s" %
12619                            instance_info.fail_msg)
12620         else:
12621           if instance_info.payload:
12622             current_mem = int(instance_info.payload["memory"])
12623           else:
12624             # Assume instance not running
12625             # (there is a slight race condition here, but it's not very
12626             # probable, and we have no other way to check)
12627             # TODO: Describe race condition
12628             current_mem = 0
12629           #TODO(dynmem): do the appropriate check involving MINMEM
12630           miss_mem = (be_new[constants.BE_MAXMEM] - current_mem -
12631                       pnhvinfo["memory_free"])
12632           if miss_mem > 0:
12633             raise errors.OpPrereqError("This change will prevent the instance"
12634                                        " from starting, due to %d MB of memory"
12635                                        " missing on its primary node" %
12636                                        miss_mem, errors.ECODE_NORES)
12637
12638       if be_new[constants.BE_AUTO_BALANCE]:
12639         for node, nres in nodeinfo.items():
12640           if node not in instance.secondary_nodes:
12641             continue
12642           nres.Raise("Can't get info from secondary node %s" % node,
12643                      prereq=True, ecode=errors.ECODE_STATE)
12644           (_, _, (nhvinfo, )) = nres.payload
12645           if not isinstance(nhvinfo.get("memory_free", None), int):
12646             raise errors.OpPrereqError("Secondary node %s didn't return free"
12647                                        " memory information" % node,
12648                                        errors.ECODE_STATE)
12649           #TODO(dynmem): do the appropriate check involving MINMEM
12650           elif be_new[constants.BE_MAXMEM] > nhvinfo["memory_free"]:
12651             raise errors.OpPrereqError("This change will prevent the instance"
12652                                        " from failover to its secondary node"
12653                                        " %s, due to not enough memory" % node,
12654                                        errors.ECODE_STATE)
12655
12656     if self.op.runtime_mem:
12657       remote_info = self.rpc.call_instance_info(instance.primary_node,
12658                                                 instance.name,
12659                                                 instance.hypervisor)
12660       remote_info.Raise("Error checking node %s" % instance.primary_node)
12661       if not remote_info.payload: # not running already
12662         raise errors.OpPrereqError("Instance %s is not running" %
12663                                    instance.name, errors.ECODE_STATE)
12664
12665       current_memory = remote_info.payload["memory"]
12666       if (not self.op.force and
12667            (self.op.runtime_mem > self.be_proposed[constants.BE_MAXMEM] or
12668             self.op.runtime_mem < self.be_proposed[constants.BE_MINMEM])):
12669         raise errors.OpPrereqError("Instance %s must have memory between %d"
12670                                    " and %d MB of memory unless --force is"
12671                                    " given" %
12672                                    (instance.name,
12673                                     self.be_proposed[constants.BE_MINMEM],
12674                                     self.be_proposed[constants.BE_MAXMEM]),
12675                                    errors.ECODE_INVAL)
12676
12677       if self.op.runtime_mem > current_memory:
12678         _CheckNodeFreeMemory(self, instance.primary_node,
12679                              "ballooning memory for instance %s" %
12680                              instance.name,
12681                              self.op.memory - current_memory,
12682                              instance.hypervisor)
12683
12684     if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
12685       raise errors.OpPrereqError("Disk operations not supported for"
12686                                  " diskless instances", errors.ECODE_INVAL)
12687
12688     def _PrepareNicCreate(_, params, private):
12689       self._PrepareNicModification(params, private, None, {}, cluster, pnode)
12690       return (None, None)
12691
12692     def _PrepareNicMod(_, nic, params, private):
12693       self._PrepareNicModification(params, private, nic.ip,
12694                                    nic.nicparams, cluster, pnode)
12695       return None
12696
12697     # Verify NIC changes (operating on copy)
12698     nics = instance.nics[:]
12699     ApplyContainerMods("NIC", nics, None, self.nicmod,
12700                        _PrepareNicCreate, _PrepareNicMod, None)
12701     if len(nics) > constants.MAX_NICS:
12702       raise errors.OpPrereqError("Instance has too many network interfaces"
12703                                  " (%d), cannot add more" % constants.MAX_NICS,
12704                                  errors.ECODE_STATE)
12705
12706     # Verify disk changes (operating on a copy)
12707     disks = instance.disks[:]
12708     ApplyContainerMods("disk", disks, None, self.diskmod, None, None, None)
12709     if len(disks) > constants.MAX_DISKS:
12710       raise errors.OpPrereqError("Instance has too many disks (%d), cannot add"
12711                                  " more" % constants.MAX_DISKS,
12712                                  errors.ECODE_STATE)
12713
12714     if self.op.offline is not None:
12715       if self.op.offline:
12716         msg = "can't change to offline"
12717       else:
12718         msg = "can't change to online"
12719       _CheckInstanceState(self, instance, CAN_CHANGE_INSTANCE_OFFLINE, msg=msg)
12720
12721     # Pre-compute NIC changes (necessary to use result in hooks)
12722     self._nic_chgdesc = []
12723     if self.nicmod:
12724       # Operate on copies as this is still in prereq
12725       nics = [nic.Copy() for nic in instance.nics]
12726       ApplyContainerMods("NIC", nics, self._nic_chgdesc, self.nicmod,
12727                          self._CreateNewNic, self._ApplyNicMods, None)
12728       self._new_nics = nics
12729     else:
12730       self._new_nics = None
12731
12732   def _ConvertPlainToDrbd(self, feedback_fn):
12733     """Converts an instance from plain to drbd.
12734
12735     """
12736     feedback_fn("Converting template to drbd")
12737     instance = self.instance
12738     pnode = instance.primary_node
12739     snode = self.op.remote_node
12740
12741     assert instance.disk_template == constants.DT_PLAIN
12742
12743     # create a fake disk info for _GenerateDiskTemplate
12744     disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
12745                   constants.IDISK_VG: d.logical_id[0]}
12746                  for d in instance.disks]
12747     new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
12748                                       instance.name, pnode, [snode],
12749                                       disk_info, None, None, 0, feedback_fn,
12750                                       self.diskparams)
12751     anno_disks = rpc.AnnotateDiskParams(constants.DT_DRBD8, new_disks,
12752                                         self.diskparams)
12753     info = _GetInstanceInfoText(instance)
12754     feedback_fn("Creating additional volumes...")
12755     # first, create the missing data and meta devices
12756     for disk in anno_disks:
12757       # unfortunately this is... not too nice
12758       _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
12759                             info, True)
12760       for child in disk.children:
12761         _CreateSingleBlockDev(self, snode, instance, child, info, True)
12762     # at this stage, all new LVs have been created, we can rename the
12763     # old ones
12764     feedback_fn("Renaming original volumes...")
12765     rename_list = [(o, n.children[0].logical_id)
12766                    for (o, n) in zip(instance.disks, new_disks)]
12767     result = self.rpc.call_blockdev_rename(pnode, rename_list)
12768     result.Raise("Failed to rename original LVs")
12769
12770     feedback_fn("Initializing DRBD devices...")
12771     # all child devices are in place, we can now create the DRBD devices
12772     for disk in anno_disks:
12773       for node in [pnode, snode]:
12774         f_create = node == pnode
12775         _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
12776
12777     # at this point, the instance has been modified
12778     instance.disk_template = constants.DT_DRBD8
12779     instance.disks = new_disks
12780     self.cfg.Update(instance, feedback_fn)
12781
12782     # Release node locks while waiting for sync
12783     _ReleaseLocks(self, locking.LEVEL_NODE)
12784
12785     # disks are created, waiting for sync
12786     disk_abort = not _WaitForSync(self, instance,
12787                                   oneshot=not self.op.wait_for_sync)
12788     if disk_abort:
12789       raise errors.OpExecError("There are some degraded disks for"
12790                                " this instance, please cleanup manually")
12791
12792     # Node resource locks will be released by caller
12793
12794   def _ConvertDrbdToPlain(self, feedback_fn):
12795     """Converts an instance from drbd to plain.
12796
12797     """
12798     instance = self.instance
12799
12800     assert len(instance.secondary_nodes) == 1
12801     assert instance.disk_template == constants.DT_DRBD8
12802
12803     pnode = instance.primary_node
12804     snode = instance.secondary_nodes[0]
12805     feedback_fn("Converting template to plain")
12806
12807     old_disks = _AnnotateDiskParams(instance, instance.disks, self.cfg)
12808     new_disks = [d.children[0] for d in instance.disks]
12809
12810     # copy over size and mode
12811     for parent, child in zip(old_disks, new_disks):
12812       child.size = parent.size
12813       child.mode = parent.mode
12814
12815     # this is a DRBD disk, return its port to the pool
12816     # NOTE: this must be done right before the call to cfg.Update!
12817     for disk in old_disks:
12818       tcp_port = disk.logical_id[2]
12819       self.cfg.AddTcpUdpPort(tcp_port)
12820
12821     # update instance structure
12822     instance.disks = new_disks
12823     instance.disk_template = constants.DT_PLAIN
12824     self.cfg.Update(instance, feedback_fn)
12825
12826     # Release locks in case removing disks takes a while
12827     _ReleaseLocks(self, locking.LEVEL_NODE)
12828
12829     feedback_fn("Removing volumes on the secondary node...")
12830     for disk in old_disks:
12831       self.cfg.SetDiskID(disk, snode)
12832       msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
12833       if msg:
12834         self.LogWarning("Could not remove block device %s on node %s,"
12835                         " continuing anyway: %s", disk.iv_name, snode, msg)
12836
12837     feedback_fn("Removing unneeded volumes on the primary node...")
12838     for idx, disk in enumerate(old_disks):
12839       meta = disk.children[1]
12840       self.cfg.SetDiskID(meta, pnode)
12841       msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
12842       if msg:
12843         self.LogWarning("Could not remove metadata for disk %d on node %s,"
12844                         " continuing anyway: %s", idx, pnode, msg)
12845
12846   def _CreateNewDisk(self, idx, params, _):
12847     """Creates a new disk.
12848
12849     """
12850     instance = self.instance
12851
12852     # add a new disk
12853     if instance.disk_template in constants.DTS_FILEBASED:
12854       (file_driver, file_path) = instance.disks[0].logical_id
12855       file_path = os.path.dirname(file_path)
12856     else:
12857       file_driver = file_path = None
12858
12859     disk = \
12860       _GenerateDiskTemplate(self, instance.disk_template, instance.name,
12861                             instance.primary_node, instance.secondary_nodes,
12862                             [params], file_path, file_driver, idx,
12863                             self.Log, self.diskparams)[0]
12864
12865     info = _GetInstanceInfoText(instance)
12866
12867     logging.info("Creating volume %s for instance %s",
12868                  disk.iv_name, instance.name)
12869     # Note: this needs to be kept in sync with _CreateDisks
12870     #HARDCODE
12871     for node in instance.all_nodes:
12872       f_create = (node == instance.primary_node)
12873       try:
12874         _CreateBlockDev(self, node, instance, disk, f_create, info, f_create)
12875       except errors.OpExecError, err:
12876         self.LogWarning("Failed to create volume %s (%s) on node '%s': %s",
12877                         disk.iv_name, disk, node, err)
12878
12879     return (disk, [
12880       ("disk/%d" % idx, "add:size=%s,mode=%s" % (disk.size, disk.mode)),
12881       ])
12882
12883   @staticmethod
12884   def _ModifyDisk(idx, disk, params, _):
12885     """Modifies a disk.
12886
12887     """
12888     disk.mode = params[constants.IDISK_MODE]
12889
12890     return [
12891       ("disk.mode/%d" % idx, disk.mode),
12892       ]
12893
12894   def _RemoveDisk(self, idx, root, _):
12895     """Removes a disk.
12896
12897     """
12898     (anno_disk,) = _AnnotateDiskParams(self.instance, [root], self.cfg)
12899     for node, disk in anno_disk.ComputeNodeTree(self.instance.primary_node):
12900       self.cfg.SetDiskID(disk, node)
12901       msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
12902       if msg:
12903         self.LogWarning("Could not remove disk/%d on node '%s': %s,"
12904                         " continuing anyway", idx, node, msg)
12905
12906     # if this is a DRBD disk, return its port to the pool
12907     if root.dev_type in constants.LDS_DRBD:
12908       self.cfg.AddTcpUdpPort(root.logical_id[2])
12909
12910   @staticmethod
12911   def _CreateNewNic(idx, params, private):
12912     """Creates data structure for a new network interface.
12913
12914     """
12915     mac = params[constants.INIC_MAC]
12916     ip = params.get(constants.INIC_IP, None)
12917     nicparams = private.params
12918
12919     return (objects.NIC(mac=mac, ip=ip, nicparams=nicparams), [
12920       ("nic.%d" % idx,
12921        "add:mac=%s,ip=%s,mode=%s,link=%s" %
12922        (mac, ip, private.filled[constants.NIC_MODE],
12923        private.filled[constants.NIC_LINK])),
12924       ])
12925
12926   @staticmethod
12927   def _ApplyNicMods(idx, nic, params, private):
12928     """Modifies a network interface.
12929
12930     """
12931     changes = []
12932
12933     for key in [constants.INIC_MAC, constants.INIC_IP]:
12934       if key in params:
12935         changes.append(("nic.%s/%d" % (key, idx), params[key]))
12936         setattr(nic, key, params[key])
12937
12938     if private.params:
12939       nic.nicparams = private.params
12940
12941       for (key, val) in params.items():
12942         changes.append(("nic.%s/%d" % (key, idx), val))
12943
12944     return changes
12945
12946   def Exec(self, feedback_fn):
12947     """Modifies an instance.
12948
12949     All parameters take effect only at the next restart of the instance.
12950
12951     """
12952     # Process here the warnings from CheckPrereq, as we don't have a
12953     # feedback_fn there.
12954     # TODO: Replace with self.LogWarning
12955     for warn in self.warn:
12956       feedback_fn("WARNING: %s" % warn)
12957
12958     assert ((self.op.disk_template is None) ^
12959             bool(self.owned_locks(locking.LEVEL_NODE_RES))), \
12960       "Not owning any node resource locks"
12961
12962     result = []
12963     instance = self.instance
12964
12965     # runtime memory
12966     if self.op.runtime_mem:
12967       rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node,
12968                                                      instance,
12969                                                      self.op.runtime_mem)
12970       rpcres.Raise("Cannot modify instance runtime memory")
12971       result.append(("runtime_memory", self.op.runtime_mem))
12972
12973     # Apply disk changes
12974     ApplyContainerMods("disk", instance.disks, result, self.diskmod,
12975                        self._CreateNewDisk, self._ModifyDisk, self._RemoveDisk)
12976     _UpdateIvNames(0, instance.disks)
12977
12978     if self.op.disk_template:
12979       if __debug__:
12980         check_nodes = set(instance.all_nodes)
12981         if self.op.remote_node:
12982           check_nodes.add(self.op.remote_node)
12983         for level in [locking.LEVEL_NODE, locking.LEVEL_NODE_RES]:
12984           owned = self.owned_locks(level)
12985           assert not (check_nodes - owned), \
12986             ("Not owning the correct locks, owning %r, expected at least %r" %
12987              (owned, check_nodes))
12988
12989       r_shut = _ShutdownInstanceDisks(self, instance)
12990       if not r_shut:
12991         raise errors.OpExecError("Cannot shutdown instance disks, unable to"
12992                                  " proceed with disk template conversion")
12993       mode = (instance.disk_template, self.op.disk_template)
12994       try:
12995         self._DISK_CONVERSIONS[mode](self, feedback_fn)
12996       except:
12997         self.cfg.ReleaseDRBDMinors(instance.name)
12998         raise
12999       result.append(("disk_template", self.op.disk_template))
13000
13001       assert instance.disk_template == self.op.disk_template, \
13002         ("Expected disk template '%s', found '%s'" %
13003          (self.op.disk_template, instance.disk_template))
13004
13005     # Release node and resource locks if there are any (they might already have
13006     # been released during disk conversion)
13007     _ReleaseLocks(self, locking.LEVEL_NODE)
13008     _ReleaseLocks(self, locking.LEVEL_NODE_RES)
13009
13010     # Apply NIC changes
13011     if self._new_nics is not None:
13012       instance.nics = self._new_nics
13013       result.extend(self._nic_chgdesc)
13014
13015     # hvparams changes
13016     if self.op.hvparams:
13017       instance.hvparams = self.hv_inst
13018       for key, val in self.op.hvparams.iteritems():
13019         result.append(("hv/%s" % key, val))
13020
13021     # beparams changes
13022     if self.op.beparams:
13023       instance.beparams = self.be_inst
13024       for key, val in self.op.beparams.iteritems():
13025         result.append(("be/%s" % key, val))
13026
13027     # OS change
13028     if self.op.os_name:
13029       instance.os = self.op.os_name
13030
13031     # osparams changes
13032     if self.op.osparams:
13033       instance.osparams = self.os_inst
13034       for key, val in self.op.osparams.iteritems():
13035         result.append(("os/%s" % key, val))
13036
13037     if self.op.offline is None:
13038       # Ignore
13039       pass
13040     elif self.op.offline:
13041       # Mark instance as offline
13042       self.cfg.MarkInstanceOffline(instance.name)
13043       result.append(("admin_state", constants.ADMINST_OFFLINE))
13044     else:
13045       # Mark instance as online, but stopped
13046       self.cfg.MarkInstanceDown(instance.name)
13047       result.append(("admin_state", constants.ADMINST_DOWN))
13048
13049     self.cfg.Update(instance, feedback_fn)
13050
13051     assert not (self.owned_locks(locking.LEVEL_NODE_RES) or
13052                 self.owned_locks(locking.LEVEL_NODE)), \
13053       "All node locks should have been released by now"
13054
13055     return result
13056
13057   _DISK_CONVERSIONS = {
13058     (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
13059     (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
13060     }
13061
13062
13063 class LUInstanceChangeGroup(LogicalUnit):
13064   HPATH = "instance-change-group"
13065   HTYPE = constants.HTYPE_INSTANCE
13066   REQ_BGL = False
13067
13068   def ExpandNames(self):
13069     self.share_locks = _ShareAll()
13070     self.needed_locks = {
13071       locking.LEVEL_NODEGROUP: [],
13072       locking.LEVEL_NODE: [],
13073       }
13074
13075     self._ExpandAndLockInstance()
13076
13077     if self.op.target_groups:
13078       self.req_target_uuids = map(self.cfg.LookupNodeGroup,
13079                                   self.op.target_groups)
13080     else:
13081       self.req_target_uuids = None
13082
13083     self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
13084
13085   def DeclareLocks(self, level):
13086     if level == locking.LEVEL_NODEGROUP:
13087       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
13088
13089       if self.req_target_uuids:
13090         lock_groups = set(self.req_target_uuids)
13091
13092         # Lock all groups used by instance optimistically; this requires going
13093         # via the node before it's locked, requiring verification later on
13094         instance_groups = self.cfg.GetInstanceNodeGroups(self.op.instance_name)
13095         lock_groups.update(instance_groups)
13096       else:
13097         # No target groups, need to lock all of them
13098         lock_groups = locking.ALL_SET
13099
13100       self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
13101
13102     elif level == locking.LEVEL_NODE:
13103       if self.req_target_uuids:
13104         # Lock all nodes used by instances
13105         self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
13106         self._LockInstancesNodes()
13107
13108         # Lock all nodes in all potential target groups
13109         lock_groups = (frozenset(self.owned_locks(locking.LEVEL_NODEGROUP)) -
13110                        self.cfg.GetInstanceNodeGroups(self.op.instance_name))
13111         member_nodes = [node_name
13112                         for group in lock_groups
13113                         for node_name in self.cfg.GetNodeGroup(group).members]
13114         self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
13115       else:
13116         # Lock all nodes as all groups are potential targets
13117         self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
13118
13119   def CheckPrereq(self):
13120     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
13121     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
13122     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
13123
13124     assert (self.req_target_uuids is None or
13125             owned_groups.issuperset(self.req_target_uuids))
13126     assert owned_instances == set([self.op.instance_name])
13127
13128     # Get instance information
13129     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
13130
13131     # Check if node groups for locked instance are still correct
13132     assert owned_nodes.issuperset(self.instance.all_nodes), \
13133       ("Instance %s's nodes changed while we kept the lock" %
13134        self.op.instance_name)
13135
13136     inst_groups = _CheckInstanceNodeGroups(self.cfg, self.op.instance_name,
13137                                            owned_groups)
13138
13139     if self.req_target_uuids:
13140       # User requested specific target groups
13141       self.target_uuids = frozenset(self.req_target_uuids)
13142     else:
13143       # All groups except those used by the instance are potential targets
13144       self.target_uuids = owned_groups - inst_groups
13145
13146     conflicting_groups = self.target_uuids & inst_groups
13147     if conflicting_groups:
13148       raise errors.OpPrereqError("Can't use group(s) '%s' as targets, they are"
13149                                  " used by the instance '%s'" %
13150                                  (utils.CommaJoin(conflicting_groups),
13151                                   self.op.instance_name),
13152                                  errors.ECODE_INVAL)
13153
13154     if not self.target_uuids:
13155       raise errors.OpPrereqError("There are no possible target groups",
13156                                  errors.ECODE_INVAL)
13157
13158   def BuildHooksEnv(self):
13159     """Build hooks env.
13160
13161     """
13162     assert self.target_uuids
13163
13164     env = {
13165       "TARGET_GROUPS": " ".join(self.target_uuids),
13166       }
13167
13168     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
13169
13170     return env
13171
13172   def BuildHooksNodes(self):
13173     """Build hooks nodes.
13174
13175     """
13176     mn = self.cfg.GetMasterNode()
13177     return ([mn], [mn])
13178
13179   def Exec(self, feedback_fn):
13180     instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
13181
13182     assert instances == [self.op.instance_name], "Instance not locked"
13183
13184     req = iallocator.IAReqGroupChange(instances=instances,
13185                                       target_groups=list(self.target_uuids))
13186     ial = iallocator.IAllocator(self.cfg, self.rpc, req)
13187
13188     ial.Run(self.op.iallocator)
13189
13190     if not ial.success:
13191       raise errors.OpPrereqError("Can't compute solution for changing group of"
13192                                  " instance '%s' using iallocator '%s': %s" %
13193                                  (self.op.instance_name, self.op.iallocator,
13194                                   ial.info), errors.ECODE_NORES)
13195
13196     jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
13197
13198     self.LogInfo("Iallocator returned %s job(s) for changing group of"
13199                  " instance '%s'", len(jobs), self.op.instance_name)
13200
13201     return ResultWithJobs(jobs)
13202
13203
13204 class LUBackupQuery(NoHooksLU):
13205   """Query the exports list
13206
13207   """
13208   REQ_BGL = False
13209
13210   def CheckArguments(self):
13211     self.expq = _ExportQuery(qlang.MakeSimpleFilter("node", self.op.nodes),
13212                              ["node", "export"], self.op.use_locking)
13213
13214   def ExpandNames(self):
13215     self.expq.ExpandNames(self)
13216
13217   def DeclareLocks(self, level):
13218     self.expq.DeclareLocks(self, level)
13219
13220   def Exec(self, feedback_fn):
13221     result = {}
13222
13223     for (node, expname) in self.expq.OldStyleQuery(self):
13224       if expname is None:
13225         result[node] = False
13226       else:
13227         result.setdefault(node, []).append(expname)
13228
13229     return result
13230
13231
13232 class _ExportQuery(_QueryBase):
13233   FIELDS = query.EXPORT_FIELDS
13234
13235   #: The node name is not a unique key for this query
13236   SORT_FIELD = "node"
13237
13238   def ExpandNames(self, lu):
13239     lu.needed_locks = {}
13240
13241     # The following variables interact with _QueryBase._GetNames
13242     if self.names:
13243       self.wanted = _GetWantedNodes(lu, self.names)
13244     else:
13245       self.wanted = locking.ALL_SET
13246
13247     self.do_locking = self.use_locking
13248
13249     if self.do_locking:
13250       lu.share_locks = _ShareAll()
13251       lu.needed_locks = {
13252         locking.LEVEL_NODE: self.wanted,
13253         }
13254
13255   def DeclareLocks(self, lu, level):
13256     pass
13257
13258   def _GetQueryData(self, lu):
13259     """Computes the list of nodes and their attributes.
13260
13261     """
13262     # Locking is not used
13263     # TODO
13264     assert not (compat.any(lu.glm.is_owned(level)
13265                            for level in locking.LEVELS
13266                            if level != locking.LEVEL_CLUSTER) or
13267                 self.do_locking or self.use_locking)
13268
13269     nodes = self._GetNames(lu, lu.cfg.GetNodeList(), locking.LEVEL_NODE)
13270
13271     result = []
13272
13273     for (node, nres) in lu.rpc.call_export_list(nodes).items():
13274       if nres.fail_msg:
13275         result.append((node, None))
13276       else:
13277         result.extend((node, expname) for expname in nres.payload)
13278
13279     return result
13280
13281
13282 class LUBackupPrepare(NoHooksLU):
13283   """Prepares an instance for an export and returns useful information.
13284
13285   """
13286   REQ_BGL = False
13287
13288   def ExpandNames(self):
13289     self._ExpandAndLockInstance()
13290
13291   def CheckPrereq(self):
13292     """Check prerequisites.
13293
13294     """
13295     instance_name = self.op.instance_name
13296
13297     self.instance = self.cfg.GetInstanceInfo(instance_name)
13298     assert self.instance is not None, \
13299           "Cannot retrieve locked instance %s" % self.op.instance_name
13300     _CheckNodeOnline(self, self.instance.primary_node)
13301
13302     self._cds = _GetClusterDomainSecret()
13303
13304   def Exec(self, feedback_fn):
13305     """Prepares an instance for an export.
13306
13307     """
13308     instance = self.instance
13309
13310     if self.op.mode == constants.EXPORT_MODE_REMOTE:
13311       salt = utils.GenerateSecret(8)
13312
13313       feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
13314       result = self.rpc.call_x509_cert_create(instance.primary_node,
13315                                               constants.RIE_CERT_VALIDITY)
13316       result.Raise("Can't create X509 key and certificate on %s" % result.node)
13317
13318       (name, cert_pem) = result.payload
13319
13320       cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
13321                                              cert_pem)
13322
13323       return {
13324         "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
13325         "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
13326                           salt),
13327         "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
13328         }
13329
13330     return None
13331
13332
13333 class LUBackupExport(LogicalUnit):
13334   """Export an instance to an image in the cluster.
13335
13336   """
13337   HPATH = "instance-export"
13338   HTYPE = constants.HTYPE_INSTANCE
13339   REQ_BGL = False
13340
13341   def CheckArguments(self):
13342     """Check the arguments.
13343
13344     """
13345     self.x509_key_name = self.op.x509_key_name
13346     self.dest_x509_ca_pem = self.op.destination_x509_ca
13347
13348     if self.op.mode == constants.EXPORT_MODE_REMOTE:
13349       if not self.x509_key_name:
13350         raise errors.OpPrereqError("Missing X509 key name for encryption",
13351                                    errors.ECODE_INVAL)
13352
13353       if not self.dest_x509_ca_pem:
13354         raise errors.OpPrereqError("Missing destination X509 CA",
13355                                    errors.ECODE_INVAL)
13356
13357   def ExpandNames(self):
13358     self._ExpandAndLockInstance()
13359
13360     # Lock all nodes for local exports
13361     if self.op.mode == constants.EXPORT_MODE_LOCAL:
13362       # FIXME: lock only instance primary and destination node
13363       #
13364       # Sad but true, for now we have do lock all nodes, as we don't know where
13365       # the previous export might be, and in this LU we search for it and
13366       # remove it from its current node. In the future we could fix this by:
13367       #  - making a tasklet to search (share-lock all), then create the
13368       #    new one, then one to remove, after
13369       #  - removing the removal operation altogether
13370       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
13371
13372   def DeclareLocks(self, level):
13373     """Last minute lock declaration."""
13374     # All nodes are locked anyway, so nothing to do here.
13375
13376   def BuildHooksEnv(self):
13377     """Build hooks env.
13378
13379     This will run on the master, primary node and target node.
13380
13381     """
13382     env = {
13383       "EXPORT_MODE": self.op.mode,
13384       "EXPORT_NODE": self.op.target_node,
13385       "EXPORT_DO_SHUTDOWN": self.op.shutdown,
13386       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
13387       # TODO: Generic function for boolean env variables
13388       "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
13389       }
13390
13391     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
13392
13393     return env
13394
13395   def BuildHooksNodes(self):
13396     """Build hooks nodes.
13397
13398     """
13399     nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
13400
13401     if self.op.mode == constants.EXPORT_MODE_LOCAL:
13402       nl.append(self.op.target_node)
13403
13404     return (nl, nl)
13405
13406   def CheckPrereq(self):
13407     """Check prerequisites.
13408
13409     This checks that the instance and node names are valid.
13410
13411     """
13412     instance_name = self.op.instance_name
13413
13414     self.instance = self.cfg.GetInstanceInfo(instance_name)
13415     assert self.instance is not None, \
13416           "Cannot retrieve locked instance %s" % self.op.instance_name
13417     _CheckNodeOnline(self, self.instance.primary_node)
13418
13419     if (self.op.remove_instance and
13420         self.instance.admin_state == constants.ADMINST_UP and
13421         not self.op.shutdown):
13422       raise errors.OpPrereqError("Can not remove instance without shutting it"
13423                                  " down before", errors.ECODE_STATE)
13424
13425     if self.op.mode == constants.EXPORT_MODE_LOCAL:
13426       self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
13427       self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
13428       assert self.dst_node is not None
13429
13430       _CheckNodeOnline(self, self.dst_node.name)
13431       _CheckNodeNotDrained(self, self.dst_node.name)
13432
13433       self._cds = None
13434       self.dest_disk_info = None
13435       self.dest_x509_ca = None
13436
13437     elif self.op.mode == constants.EXPORT_MODE_REMOTE:
13438       self.dst_node = None
13439
13440       if len(self.op.target_node) != len(self.instance.disks):
13441         raise errors.OpPrereqError(("Received destination information for %s"
13442                                     " disks, but instance %s has %s disks") %
13443                                    (len(self.op.target_node), instance_name,
13444                                     len(self.instance.disks)),
13445                                    errors.ECODE_INVAL)
13446
13447       cds = _GetClusterDomainSecret()
13448
13449       # Check X509 key name
13450       try:
13451         (key_name, hmac_digest, hmac_salt) = self.x509_key_name
13452       except (TypeError, ValueError), err:
13453         raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err,
13454                                    errors.ECODE_INVAL)
13455
13456       if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
13457         raise errors.OpPrereqError("HMAC for X509 key name is wrong",
13458                                    errors.ECODE_INVAL)
13459
13460       # Load and verify CA
13461       try:
13462         (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
13463       except OpenSSL.crypto.Error, err:
13464         raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
13465                                    (err, ), errors.ECODE_INVAL)
13466
13467       (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
13468       if errcode is not None:
13469         raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
13470                                    (msg, ), errors.ECODE_INVAL)
13471
13472       self.dest_x509_ca = cert
13473
13474       # Verify target information
13475       disk_info = []
13476       for idx, disk_data in enumerate(self.op.target_node):
13477         try:
13478           (host, port, magic) = \
13479             masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
13480         except errors.GenericError, err:
13481           raise errors.OpPrereqError("Target info for disk %s: %s" %
13482                                      (idx, err), errors.ECODE_INVAL)
13483
13484         disk_info.append((host, port, magic))
13485
13486       assert len(disk_info) == len(self.op.target_node)
13487       self.dest_disk_info = disk_info
13488
13489     else:
13490       raise errors.ProgrammerError("Unhandled export mode %r" %
13491                                    self.op.mode)
13492
13493     # instance disk type verification
13494     # TODO: Implement export support for file-based disks
13495     for disk in self.instance.disks:
13496       if disk.dev_type == constants.LD_FILE:
13497         raise errors.OpPrereqError("Export not supported for instances with"
13498                                    " file-based disks", errors.ECODE_INVAL)
13499
13500   def _CleanupExports(self, feedback_fn):
13501     """Removes exports of current instance from all other nodes.
13502
13503     If an instance in a cluster with nodes A..D was exported to node C, its
13504     exports will be removed from the nodes A, B and D.
13505
13506     """
13507     assert self.op.mode != constants.EXPORT_MODE_REMOTE
13508
13509     nodelist = self.cfg.GetNodeList()
13510     nodelist.remove(self.dst_node.name)
13511
13512     # on one-node clusters nodelist will be empty after the removal
13513     # if we proceed the backup would be removed because OpBackupQuery
13514     # substitutes an empty list with the full cluster node list.
13515     iname = self.instance.name
13516     if nodelist:
13517       feedback_fn("Removing old exports for instance %s" % iname)
13518       exportlist = self.rpc.call_export_list(nodelist)
13519       for node in exportlist:
13520         if exportlist[node].fail_msg:
13521           continue
13522         if iname in exportlist[node].payload:
13523           msg = self.rpc.call_export_remove(node, iname).fail_msg
13524           if msg:
13525             self.LogWarning("Could not remove older export for instance %s"
13526                             " on node %s: %s", iname, node, msg)
13527
13528   def Exec(self, feedback_fn):
13529     """Export an instance to an image in the cluster.
13530
13531     """
13532     assert self.op.mode in constants.EXPORT_MODES
13533
13534     instance = self.instance
13535     src_node = instance.primary_node
13536
13537     if self.op.shutdown:
13538       # shutdown the instance, but not the disks
13539       feedback_fn("Shutting down instance %s" % instance.name)
13540       result = self.rpc.call_instance_shutdown(src_node, instance,
13541                                                self.op.shutdown_timeout)
13542       # TODO: Maybe ignore failures if ignore_remove_failures is set
13543       result.Raise("Could not shutdown instance %s on"
13544                    " node %s" % (instance.name, src_node))
13545
13546     # set the disks ID correctly since call_instance_start needs the
13547     # correct drbd minor to create the symlinks
13548     for disk in instance.disks:
13549       self.cfg.SetDiskID(disk, src_node)
13550
13551     activate_disks = (instance.admin_state != constants.ADMINST_UP)
13552
13553     if activate_disks:
13554       # Activate the instance disks if we'exporting a stopped instance
13555       feedback_fn("Activating disks for %s" % instance.name)
13556       _StartInstanceDisks(self, instance, None)
13557
13558     try:
13559       helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
13560                                                      instance)
13561
13562       helper.CreateSnapshots()
13563       try:
13564         if (self.op.shutdown and
13565             instance.admin_state == constants.ADMINST_UP and
13566             not self.op.remove_instance):
13567           assert not activate_disks
13568           feedback_fn("Starting instance %s" % instance.name)
13569           result = self.rpc.call_instance_start(src_node,
13570                                                 (instance, None, None), False)
13571           msg = result.fail_msg
13572           if msg:
13573             feedback_fn("Failed to start instance: %s" % msg)
13574             _ShutdownInstanceDisks(self, instance)
13575             raise errors.OpExecError("Could not start instance: %s" % msg)
13576
13577         if self.op.mode == constants.EXPORT_MODE_LOCAL:
13578           (fin_resu, dresults) = helper.LocalExport(self.dst_node)
13579         elif self.op.mode == constants.EXPORT_MODE_REMOTE:
13580           connect_timeout = constants.RIE_CONNECT_TIMEOUT
13581           timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
13582
13583           (key_name, _, _) = self.x509_key_name
13584
13585           dest_ca_pem = \
13586             OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
13587                                             self.dest_x509_ca)
13588
13589           (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
13590                                                      key_name, dest_ca_pem,
13591                                                      timeouts)
13592       finally:
13593         helper.Cleanup()
13594
13595       # Check for backwards compatibility
13596       assert len(dresults) == len(instance.disks)
13597       assert compat.all(isinstance(i, bool) for i in dresults), \
13598              "Not all results are boolean: %r" % dresults
13599
13600     finally:
13601       if activate_disks:
13602         feedback_fn("Deactivating disks for %s" % instance.name)
13603         _ShutdownInstanceDisks(self, instance)
13604
13605     if not (compat.all(dresults) and fin_resu):
13606       failures = []
13607       if not fin_resu:
13608         failures.append("export finalization")
13609       if not compat.all(dresults):
13610         fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
13611                                if not dsk)
13612         failures.append("disk export: disk(s) %s" % fdsk)
13613
13614       raise errors.OpExecError("Export failed, errors in %s" %
13615                                utils.CommaJoin(failures))
13616
13617     # At this point, the export was successful, we can cleanup/finish
13618
13619     # Remove instance if requested
13620     if self.op.remove_instance:
13621       feedback_fn("Removing instance %s" % instance.name)
13622       _RemoveInstance(self, feedback_fn, instance,
13623                       self.op.ignore_remove_failures)
13624
13625     if self.op.mode == constants.EXPORT_MODE_LOCAL:
13626       self._CleanupExports(feedback_fn)
13627
13628     return fin_resu, dresults
13629
13630
13631 class LUBackupRemove(NoHooksLU):
13632   """Remove exports related to the named instance.
13633
13634   """
13635   REQ_BGL = False
13636
13637   def ExpandNames(self):
13638     self.needed_locks = {}
13639     # We need all nodes to be locked in order for RemoveExport to work, but we
13640     # don't need to lock the instance itself, as nothing will happen to it (and
13641     # we can remove exports also for a removed instance)
13642     self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
13643
13644   def Exec(self, feedback_fn):
13645     """Remove any export.
13646
13647     """
13648     instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
13649     # If the instance was not found we'll try with the name that was passed in.
13650     # This will only work if it was an FQDN, though.
13651     fqdn_warn = False
13652     if not instance_name:
13653       fqdn_warn = True
13654       instance_name = self.op.instance_name
13655
13656     locked_nodes = self.owned_locks(locking.LEVEL_NODE)
13657     exportlist = self.rpc.call_export_list(locked_nodes)
13658     found = False
13659     for node in exportlist:
13660       msg = exportlist[node].fail_msg
13661       if msg:
13662         self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
13663         continue
13664       if instance_name in exportlist[node].payload:
13665         found = True
13666         result = self.rpc.call_export_remove(node, instance_name)
13667         msg = result.fail_msg
13668         if msg:
13669           logging.error("Could not remove export for instance %s"
13670                         " on node %s: %s", instance_name, node, msg)
13671
13672     if fqdn_warn and not found:
13673       feedback_fn("Export not found. If trying to remove an export belonging"
13674                   " to a deleted instance please use its Fully Qualified"
13675                   " Domain Name.")
13676
13677
13678 class LUGroupAdd(LogicalUnit):
13679   """Logical unit for creating node groups.
13680
13681   """
13682   HPATH = "group-add"
13683   HTYPE = constants.HTYPE_GROUP
13684   REQ_BGL = False
13685
13686   def ExpandNames(self):
13687     # We need the new group's UUID here so that we can create and acquire the
13688     # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
13689     # that it should not check whether the UUID exists in the configuration.
13690     self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
13691     self.needed_locks = {}
13692     self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
13693
13694   def CheckPrereq(self):
13695     """Check prerequisites.
13696
13697     This checks that the given group name is not an existing node group
13698     already.
13699
13700     """
13701     try:
13702       existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13703     except errors.OpPrereqError:
13704       pass
13705     else:
13706       raise errors.OpPrereqError("Desired group name '%s' already exists as a"
13707                                  " node group (UUID: %s)" %
13708                                  (self.op.group_name, existing_uuid),
13709                                  errors.ECODE_EXISTS)
13710
13711     if self.op.ndparams:
13712       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
13713
13714     if self.op.hv_state:
13715       self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
13716     else:
13717       self.new_hv_state = None
13718
13719     if self.op.disk_state:
13720       self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
13721     else:
13722       self.new_disk_state = None
13723
13724     if self.op.diskparams:
13725       for templ in constants.DISK_TEMPLATES:
13726         if templ in self.op.diskparams:
13727           utils.ForceDictType(self.op.diskparams[templ],
13728                               constants.DISK_DT_TYPES)
13729       self.new_diskparams = self.op.diskparams
13730       try:
13731         utils.VerifyDictOptions(self.new_diskparams, constants.DISK_DT_DEFAULTS)
13732       except errors.OpPrereqError, err:
13733         raise errors.OpPrereqError("While verify diskparams options: %s" % err,
13734                                    errors.ECODE_INVAL)
13735     else:
13736       self.new_diskparams = {}
13737
13738     if self.op.ipolicy:
13739       cluster = self.cfg.GetClusterInfo()
13740       full_ipolicy = cluster.SimpleFillIPolicy(self.op.ipolicy)
13741       try:
13742         objects.InstancePolicy.CheckParameterSyntax(full_ipolicy, False)
13743       except errors.ConfigurationError, err:
13744         raise errors.OpPrereqError("Invalid instance policy: %s" % err,
13745                                    errors.ECODE_INVAL)
13746
13747   def BuildHooksEnv(self):
13748     """Build hooks env.
13749
13750     """
13751     return {
13752       "GROUP_NAME": self.op.group_name,
13753       }
13754
13755   def BuildHooksNodes(self):
13756     """Build hooks nodes.
13757
13758     """
13759     mn = self.cfg.GetMasterNode()
13760     return ([mn], [mn])
13761
13762   def Exec(self, feedback_fn):
13763     """Add the node group to the cluster.
13764
13765     """
13766     group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
13767                                   uuid=self.group_uuid,
13768                                   alloc_policy=self.op.alloc_policy,
13769                                   ndparams=self.op.ndparams,
13770                                   diskparams=self.new_diskparams,
13771                                   ipolicy=self.op.ipolicy,
13772                                   hv_state_static=self.new_hv_state,
13773                                   disk_state_static=self.new_disk_state)
13774
13775     self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
13776     del self.remove_locks[locking.LEVEL_NODEGROUP]
13777
13778
13779 class LUGroupAssignNodes(NoHooksLU):
13780   """Logical unit for assigning nodes to groups.
13781
13782   """
13783   REQ_BGL = False
13784
13785   def ExpandNames(self):
13786     # These raise errors.OpPrereqError on their own:
13787     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13788     self.op.nodes = _GetWantedNodes(self, self.op.nodes)
13789
13790     # We want to lock all the affected nodes and groups. We have readily
13791     # available the list of nodes, and the *destination* group. To gather the
13792     # list of "source" groups, we need to fetch node information later on.
13793     self.needed_locks = {
13794       locking.LEVEL_NODEGROUP: set([self.group_uuid]),
13795       locking.LEVEL_NODE: self.op.nodes,
13796       }
13797
13798   def DeclareLocks(self, level):
13799     if level == locking.LEVEL_NODEGROUP:
13800       assert len(self.needed_locks[locking.LEVEL_NODEGROUP]) == 1
13801
13802       # Try to get all affected nodes' groups without having the group or node
13803       # lock yet. Needs verification later in the code flow.
13804       groups = self.cfg.GetNodeGroupsFromNodes(self.op.nodes)
13805
13806       self.needed_locks[locking.LEVEL_NODEGROUP].update(groups)
13807
13808   def CheckPrereq(self):
13809     """Check prerequisites.
13810
13811     """
13812     assert self.needed_locks[locking.LEVEL_NODEGROUP]
13813     assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
13814             frozenset(self.op.nodes))
13815
13816     expected_locks = (set([self.group_uuid]) |
13817                       self.cfg.GetNodeGroupsFromNodes(self.op.nodes))
13818     actual_locks = self.owned_locks(locking.LEVEL_NODEGROUP)
13819     if actual_locks != expected_locks:
13820       raise errors.OpExecError("Nodes changed groups since locks were acquired,"
13821                                " current groups are '%s', used to be '%s'" %
13822                                (utils.CommaJoin(expected_locks),
13823                                 utils.CommaJoin(actual_locks)))
13824
13825     self.node_data = self.cfg.GetAllNodesInfo()
13826     self.group = self.cfg.GetNodeGroup(self.group_uuid)
13827     instance_data = self.cfg.GetAllInstancesInfo()
13828
13829     if self.group is None:
13830       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
13831                                (self.op.group_name, self.group_uuid))
13832
13833     (new_splits, previous_splits) = \
13834       self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
13835                                              for node in self.op.nodes],
13836                                             self.node_data, instance_data)
13837
13838     if new_splits:
13839       fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
13840
13841       if not self.op.force:
13842         raise errors.OpExecError("The following instances get split by this"
13843                                  " change and --force was not given: %s" %
13844                                  fmt_new_splits)
13845       else:
13846         self.LogWarning("This operation will split the following instances: %s",
13847                         fmt_new_splits)
13848
13849         if previous_splits:
13850           self.LogWarning("In addition, these already-split instances continue"
13851                           " to be split across groups: %s",
13852                           utils.CommaJoin(utils.NiceSort(previous_splits)))
13853
13854   def Exec(self, feedback_fn):
13855     """Assign nodes to a new group.
13856
13857     """
13858     mods = [(node_name, self.group_uuid) for node_name in self.op.nodes]
13859
13860     self.cfg.AssignGroupNodes(mods)
13861
13862   @staticmethod
13863   def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
13864     """Check for split instances after a node assignment.
13865
13866     This method considers a series of node assignments as an atomic operation,
13867     and returns information about split instances after applying the set of
13868     changes.
13869
13870     In particular, it returns information about newly split instances, and
13871     instances that were already split, and remain so after the change.
13872
13873     Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
13874     considered.
13875
13876     @type changes: list of (node_name, new_group_uuid) pairs.
13877     @param changes: list of node assignments to consider.
13878     @param node_data: a dict with data for all nodes
13879     @param instance_data: a dict with all instances to consider
13880     @rtype: a two-tuple
13881     @return: a list of instances that were previously okay and result split as a
13882       consequence of this change, and a list of instances that were previously
13883       split and this change does not fix.
13884
13885     """
13886     changed_nodes = dict((node, group) for node, group in changes
13887                          if node_data[node].group != group)
13888
13889     all_split_instances = set()
13890     previously_split_instances = set()
13891
13892     def InstanceNodes(instance):
13893       return [instance.primary_node] + list(instance.secondary_nodes)
13894
13895     for inst in instance_data.values():
13896       if inst.disk_template not in constants.DTS_INT_MIRROR:
13897         continue
13898
13899       instance_nodes = InstanceNodes(inst)
13900
13901       if len(set(node_data[node].group for node in instance_nodes)) > 1:
13902         previously_split_instances.add(inst.name)
13903
13904       if len(set(changed_nodes.get(node, node_data[node].group)
13905                  for node in instance_nodes)) > 1:
13906         all_split_instances.add(inst.name)
13907
13908     return (list(all_split_instances - previously_split_instances),
13909             list(previously_split_instances & all_split_instances))
13910
13911
13912 class _GroupQuery(_QueryBase):
13913   FIELDS = query.GROUP_FIELDS
13914
13915   def ExpandNames(self, lu):
13916     lu.needed_locks = {}
13917
13918     self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
13919     self._cluster = lu.cfg.GetClusterInfo()
13920     name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
13921
13922     if not self.names:
13923       self.wanted = [name_to_uuid[name]
13924                      for name in utils.NiceSort(name_to_uuid.keys())]
13925     else:
13926       # Accept names to be either names or UUIDs.
13927       missing = []
13928       self.wanted = []
13929       all_uuid = frozenset(self._all_groups.keys())
13930
13931       for name in self.names:
13932         if name in all_uuid:
13933           self.wanted.append(name)
13934         elif name in name_to_uuid:
13935           self.wanted.append(name_to_uuid[name])
13936         else:
13937           missing.append(name)
13938
13939       if missing:
13940         raise errors.OpPrereqError("Some groups do not exist: %s" %
13941                                    utils.CommaJoin(missing),
13942                                    errors.ECODE_NOENT)
13943
13944   def DeclareLocks(self, lu, level):
13945     pass
13946
13947   def _GetQueryData(self, lu):
13948     """Computes the list of node groups and their attributes.
13949
13950     """
13951     do_nodes = query.GQ_NODE in self.requested_data
13952     do_instances = query.GQ_INST in self.requested_data
13953
13954     group_to_nodes = None
13955     group_to_instances = None
13956
13957     # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
13958     # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
13959     # latter GetAllInstancesInfo() is not enough, for we have to go through
13960     # instance->node. Hence, we will need to process nodes even if we only need
13961     # instance information.
13962     if do_nodes or do_instances:
13963       all_nodes = lu.cfg.GetAllNodesInfo()
13964       group_to_nodes = dict((uuid, []) for uuid in self.wanted)
13965       node_to_group = {}
13966
13967       for node in all_nodes.values():
13968         if node.group in group_to_nodes:
13969           group_to_nodes[node.group].append(node.name)
13970           node_to_group[node.name] = node.group
13971
13972       if do_instances:
13973         all_instances = lu.cfg.GetAllInstancesInfo()
13974         group_to_instances = dict((uuid, []) for uuid in self.wanted)
13975
13976         for instance in all_instances.values():
13977           node = instance.primary_node
13978           if node in node_to_group:
13979             group_to_instances[node_to_group[node]].append(instance.name)
13980
13981         if not do_nodes:
13982           # Do not pass on node information if it was not requested.
13983           group_to_nodes = None
13984
13985     return query.GroupQueryData(self._cluster,
13986                                 [self._all_groups[uuid]
13987                                  for uuid in self.wanted],
13988                                 group_to_nodes, group_to_instances,
13989                                 query.GQ_DISKPARAMS in self.requested_data)
13990
13991
13992 class LUGroupQuery(NoHooksLU):
13993   """Logical unit for querying node groups.
13994
13995   """
13996   REQ_BGL = False
13997
13998   def CheckArguments(self):
13999     self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
14000                           self.op.output_fields, False)
14001
14002   def ExpandNames(self):
14003     self.gq.ExpandNames(self)
14004
14005   def DeclareLocks(self, level):
14006     self.gq.DeclareLocks(self, level)
14007
14008   def Exec(self, feedback_fn):
14009     return self.gq.OldStyleQuery(self)
14010
14011
14012 class LUGroupSetParams(LogicalUnit):
14013   """Modifies the parameters of a node group.
14014
14015   """
14016   HPATH = "group-modify"
14017   HTYPE = constants.HTYPE_GROUP
14018   REQ_BGL = False
14019
14020   def CheckArguments(self):
14021     all_changes = [
14022       self.op.ndparams,
14023       self.op.diskparams,
14024       self.op.alloc_policy,
14025       self.op.hv_state,
14026       self.op.disk_state,
14027       self.op.ipolicy,
14028       ]
14029
14030     if all_changes.count(None) == len(all_changes):
14031       raise errors.OpPrereqError("Please pass at least one modification",
14032                                  errors.ECODE_INVAL)
14033
14034   def ExpandNames(self):
14035     # This raises errors.OpPrereqError on its own:
14036     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14037
14038     self.needed_locks = {
14039       locking.LEVEL_INSTANCE: [],
14040       locking.LEVEL_NODEGROUP: [self.group_uuid],
14041       }
14042
14043     self.share_locks[locking.LEVEL_INSTANCE] = 1
14044
14045   def DeclareLocks(self, level):
14046     if level == locking.LEVEL_INSTANCE:
14047       assert not self.needed_locks[locking.LEVEL_INSTANCE]
14048
14049       # Lock instances optimistically, needs verification once group lock has
14050       # been acquired
14051       self.needed_locks[locking.LEVEL_INSTANCE] = \
14052           self.cfg.GetNodeGroupInstances(self.group_uuid)
14053
14054   @staticmethod
14055   def _UpdateAndVerifyDiskParams(old, new):
14056     """Updates and verifies disk parameters.
14057
14058     """
14059     new_params = _GetUpdatedParams(old, new)
14060     utils.ForceDictType(new_params, constants.DISK_DT_TYPES)
14061     return new_params
14062
14063   def CheckPrereq(self):
14064     """Check prerequisites.
14065
14066     """
14067     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
14068
14069     # Check if locked instances are still correct
14070     _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
14071
14072     self.group = self.cfg.GetNodeGroup(self.group_uuid)
14073     cluster = self.cfg.GetClusterInfo()
14074
14075     if self.group is None:
14076       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
14077                                (self.op.group_name, self.group_uuid))
14078
14079     if self.op.ndparams:
14080       new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
14081       utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
14082       self.new_ndparams = new_ndparams
14083
14084     if self.op.diskparams:
14085       diskparams = self.group.diskparams
14086       uavdp = self._UpdateAndVerifyDiskParams
14087       # For each disktemplate subdict update and verify the values
14088       new_diskparams = dict((dt,
14089                              uavdp(diskparams.get(dt, {}),
14090                                    self.op.diskparams[dt]))
14091                             for dt in constants.DISK_TEMPLATES
14092                             if dt in self.op.diskparams)
14093       # As we've all subdicts of diskparams ready, lets merge the actual
14094       # dict with all updated subdicts
14095       self.new_diskparams = objects.FillDict(diskparams, new_diskparams)
14096       try:
14097         utils.VerifyDictOptions(self.new_diskparams, constants.DISK_DT_DEFAULTS)
14098       except errors.OpPrereqError, err:
14099         raise errors.OpPrereqError("While verify diskparams options: %s" % err,
14100                                    errors.ECODE_INVAL)
14101
14102     if self.op.hv_state:
14103       self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
14104                                                  self.group.hv_state_static)
14105
14106     if self.op.disk_state:
14107       self.new_disk_state = \
14108         _MergeAndVerifyDiskState(self.op.disk_state,
14109                                  self.group.disk_state_static)
14110
14111     if self.op.ipolicy:
14112       self.new_ipolicy = _GetUpdatedIPolicy(self.group.ipolicy,
14113                                             self.op.ipolicy,
14114                                             group_policy=True)
14115
14116       new_ipolicy = cluster.SimpleFillIPolicy(self.new_ipolicy)
14117       inst_filter = lambda inst: inst.name in owned_instances
14118       instances = self.cfg.GetInstancesInfoByFilter(inst_filter).values()
14119       gmi = ganeti.masterd.instance
14120       violations = \
14121           _ComputeNewInstanceViolations(gmi.CalculateGroupIPolicy(cluster,
14122                                                                   self.group),
14123                                         new_ipolicy, instances)
14124
14125       if violations:
14126         self.LogWarning("After the ipolicy change the following instances"
14127                         " violate them: %s",
14128                         utils.CommaJoin(violations))
14129
14130   def BuildHooksEnv(self):
14131     """Build hooks env.
14132
14133     """
14134     return {
14135       "GROUP_NAME": self.op.group_name,
14136       "NEW_ALLOC_POLICY": self.op.alloc_policy,
14137       }
14138
14139   def BuildHooksNodes(self):
14140     """Build hooks nodes.
14141
14142     """
14143     mn = self.cfg.GetMasterNode()
14144     return ([mn], [mn])
14145
14146   def Exec(self, feedback_fn):
14147     """Modifies the node group.
14148
14149     """
14150     result = []
14151
14152     if self.op.ndparams:
14153       self.group.ndparams = self.new_ndparams
14154       result.append(("ndparams", str(self.group.ndparams)))
14155
14156     if self.op.diskparams:
14157       self.group.diskparams = self.new_diskparams
14158       result.append(("diskparams", str(self.group.diskparams)))
14159
14160     if self.op.alloc_policy:
14161       self.group.alloc_policy = self.op.alloc_policy
14162
14163     if self.op.hv_state:
14164       self.group.hv_state_static = self.new_hv_state
14165
14166     if self.op.disk_state:
14167       self.group.disk_state_static = self.new_disk_state
14168
14169     if self.op.ipolicy:
14170       self.group.ipolicy = self.new_ipolicy
14171
14172     self.cfg.Update(self.group, feedback_fn)
14173     return result
14174
14175
14176 class LUGroupRemove(LogicalUnit):
14177   HPATH = "group-remove"
14178   HTYPE = constants.HTYPE_GROUP
14179   REQ_BGL = False
14180
14181   def ExpandNames(self):
14182     # This will raises errors.OpPrereqError on its own:
14183     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14184     self.needed_locks = {
14185       locking.LEVEL_NODEGROUP: [self.group_uuid],
14186       }
14187
14188   def CheckPrereq(self):
14189     """Check prerequisites.
14190
14191     This checks that the given group name exists as a node group, that is
14192     empty (i.e., contains no nodes), and that is not the last group of the
14193     cluster.
14194
14195     """
14196     # Verify that the group is empty.
14197     group_nodes = [node.name
14198                    for node in self.cfg.GetAllNodesInfo().values()
14199                    if node.group == self.group_uuid]
14200
14201     if group_nodes:
14202       raise errors.OpPrereqError("Group '%s' not empty, has the following"
14203                                  " nodes: %s" %
14204                                  (self.op.group_name,
14205                                   utils.CommaJoin(utils.NiceSort(group_nodes))),
14206                                  errors.ECODE_STATE)
14207
14208     # Verify the cluster would not be left group-less.
14209     if len(self.cfg.GetNodeGroupList()) == 1:
14210       raise errors.OpPrereqError("Group '%s' is the only group, cannot be"
14211                                  " removed" % self.op.group_name,
14212                                  errors.ECODE_STATE)
14213
14214   def BuildHooksEnv(self):
14215     """Build hooks env.
14216
14217     """
14218     return {
14219       "GROUP_NAME": self.op.group_name,
14220       }
14221
14222   def BuildHooksNodes(self):
14223     """Build hooks nodes.
14224
14225     """
14226     mn = self.cfg.GetMasterNode()
14227     return ([mn], [mn])
14228
14229   def Exec(self, feedback_fn):
14230     """Remove the node group.
14231
14232     """
14233     try:
14234       self.cfg.RemoveNodeGroup(self.group_uuid)
14235     except errors.ConfigurationError:
14236       raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
14237                                (self.op.group_name, self.group_uuid))
14238
14239     self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
14240
14241
14242 class LUGroupRename(LogicalUnit):
14243   HPATH = "group-rename"
14244   HTYPE = constants.HTYPE_GROUP
14245   REQ_BGL = False
14246
14247   def ExpandNames(self):
14248     # This raises errors.OpPrereqError on its own:
14249     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14250
14251     self.needed_locks = {
14252       locking.LEVEL_NODEGROUP: [self.group_uuid],
14253       }
14254
14255   def CheckPrereq(self):
14256     """Check prerequisites.
14257
14258     Ensures requested new name is not yet used.
14259
14260     """
14261     try:
14262       new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
14263     except errors.OpPrereqError:
14264       pass
14265     else:
14266       raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
14267                                  " node group (UUID: %s)" %
14268                                  (self.op.new_name, new_name_uuid),
14269                                  errors.ECODE_EXISTS)
14270
14271   def BuildHooksEnv(self):
14272     """Build hooks env.
14273
14274     """
14275     return {
14276       "OLD_NAME": self.op.group_name,
14277       "NEW_NAME": self.op.new_name,
14278       }
14279
14280   def BuildHooksNodes(self):
14281     """Build hooks nodes.
14282
14283     """
14284     mn = self.cfg.GetMasterNode()
14285
14286     all_nodes = self.cfg.GetAllNodesInfo()
14287     all_nodes.pop(mn, None)
14288
14289     run_nodes = [mn]
14290     run_nodes.extend(node.name for node in all_nodes.values()
14291                      if node.group == self.group_uuid)
14292
14293     return (run_nodes, run_nodes)
14294
14295   def Exec(self, feedback_fn):
14296     """Rename the node group.
14297
14298     """
14299     group = self.cfg.GetNodeGroup(self.group_uuid)
14300
14301     if group is None:
14302       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
14303                                (self.op.group_name, self.group_uuid))
14304
14305     group.name = self.op.new_name
14306     self.cfg.Update(group, feedback_fn)
14307
14308     return self.op.new_name
14309
14310
14311 class LUGroupEvacuate(LogicalUnit):
14312   HPATH = "group-evacuate"
14313   HTYPE = constants.HTYPE_GROUP
14314   REQ_BGL = False
14315
14316   def ExpandNames(self):
14317     # This raises errors.OpPrereqError on its own:
14318     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14319
14320     if self.op.target_groups:
14321       self.req_target_uuids = map(self.cfg.LookupNodeGroup,
14322                                   self.op.target_groups)
14323     else:
14324       self.req_target_uuids = []
14325
14326     if self.group_uuid in self.req_target_uuids:
14327       raise errors.OpPrereqError("Group to be evacuated (%s) can not be used"
14328                                  " as a target group (targets are %s)" %
14329                                  (self.group_uuid,
14330                                   utils.CommaJoin(self.req_target_uuids)),
14331                                  errors.ECODE_INVAL)
14332
14333     self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
14334
14335     self.share_locks = _ShareAll()
14336     self.needed_locks = {
14337       locking.LEVEL_INSTANCE: [],
14338       locking.LEVEL_NODEGROUP: [],
14339       locking.LEVEL_NODE: [],
14340       }
14341
14342   def DeclareLocks(self, level):
14343     if level == locking.LEVEL_INSTANCE:
14344       assert not self.needed_locks[locking.LEVEL_INSTANCE]
14345
14346       # Lock instances optimistically, needs verification once node and group
14347       # locks have been acquired
14348       self.needed_locks[locking.LEVEL_INSTANCE] = \
14349         self.cfg.GetNodeGroupInstances(self.group_uuid)
14350
14351     elif level == locking.LEVEL_NODEGROUP:
14352       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
14353
14354       if self.req_target_uuids:
14355         lock_groups = set([self.group_uuid] + self.req_target_uuids)
14356
14357         # Lock all groups used by instances optimistically; this requires going
14358         # via the node before it's locked, requiring verification later on
14359         lock_groups.update(group_uuid
14360                            for instance_name in
14361                              self.owned_locks(locking.LEVEL_INSTANCE)
14362                            for group_uuid in
14363                              self.cfg.GetInstanceNodeGroups(instance_name))
14364       else:
14365         # No target groups, need to lock all of them
14366         lock_groups = locking.ALL_SET
14367
14368       self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
14369
14370     elif level == locking.LEVEL_NODE:
14371       # This will only lock the nodes in the group to be evacuated which
14372       # contain actual instances
14373       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
14374       self._LockInstancesNodes()
14375
14376       # Lock all nodes in group to be evacuated and target groups
14377       owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
14378       assert self.group_uuid in owned_groups
14379       member_nodes = [node_name
14380                       for group in owned_groups
14381                       for node_name in self.cfg.GetNodeGroup(group).members]
14382       self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
14383
14384   def CheckPrereq(self):
14385     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
14386     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
14387     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
14388
14389     assert owned_groups.issuperset(self.req_target_uuids)
14390     assert self.group_uuid in owned_groups
14391
14392     # Check if locked instances are still correct
14393     _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
14394
14395     # Get instance information
14396     self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
14397
14398     # Check if node groups for locked instances are still correct
14399     _CheckInstancesNodeGroups(self.cfg, self.instances,
14400                               owned_groups, owned_nodes, self.group_uuid)
14401
14402     if self.req_target_uuids:
14403       # User requested specific target groups
14404       self.target_uuids = self.req_target_uuids
14405     else:
14406       # All groups except the one to be evacuated are potential targets
14407       self.target_uuids = [group_uuid for group_uuid in owned_groups
14408                            if group_uuid != self.group_uuid]
14409
14410       if not self.target_uuids:
14411         raise errors.OpPrereqError("There are no possible target groups",
14412                                    errors.ECODE_INVAL)
14413
14414   def BuildHooksEnv(self):
14415     """Build hooks env.
14416
14417     """
14418     return {
14419       "GROUP_NAME": self.op.group_name,
14420       "TARGET_GROUPS": " ".join(self.target_uuids),
14421       }
14422
14423   def BuildHooksNodes(self):
14424     """Build hooks nodes.
14425
14426     """
14427     mn = self.cfg.GetMasterNode()
14428
14429     assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
14430
14431     run_nodes = [mn] + self.cfg.GetNodeGroup(self.group_uuid).members
14432
14433     return (run_nodes, run_nodes)
14434
14435   def Exec(self, feedback_fn):
14436     instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
14437
14438     assert self.group_uuid not in self.target_uuids
14439
14440     req = iallocator.IAReqGroupChange(instances=instances,
14441                                       target_groups=self.target_uuids)
14442     ial = iallocator.IAllocator(self.cfg, self.rpc, req)
14443
14444     ial.Run(self.op.iallocator)
14445
14446     if not ial.success:
14447       raise errors.OpPrereqError("Can't compute group evacuation using"
14448                                  " iallocator '%s': %s" %
14449                                  (self.op.iallocator, ial.info),
14450                                  errors.ECODE_NORES)
14451
14452     jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
14453
14454     self.LogInfo("Iallocator returned %s job(s) for evacuating node group %s",
14455                  len(jobs), self.op.group_name)
14456
14457     return ResultWithJobs(jobs)
14458
14459
14460 class TagsLU(NoHooksLU): # pylint: disable=W0223
14461   """Generic tags LU.
14462
14463   This is an abstract class which is the parent of all the other tags LUs.
14464
14465   """
14466   def ExpandNames(self):
14467     self.group_uuid = None
14468     self.needed_locks = {}
14469
14470     if self.op.kind == constants.TAG_NODE:
14471       self.op.name = _ExpandNodeName(self.cfg, self.op.name)
14472       lock_level = locking.LEVEL_NODE
14473       lock_name = self.op.name
14474     elif self.op.kind == constants.TAG_INSTANCE:
14475       self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
14476       lock_level = locking.LEVEL_INSTANCE
14477       lock_name = self.op.name
14478     elif self.op.kind == constants.TAG_NODEGROUP:
14479       self.group_uuid = self.cfg.LookupNodeGroup(self.op.name)
14480       lock_level = locking.LEVEL_NODEGROUP
14481       lock_name = self.group_uuid
14482     else:
14483       lock_level = None
14484       lock_name = None
14485
14486     if lock_level and getattr(self.op, "use_locking", True):
14487       self.needed_locks[lock_level] = lock_name
14488
14489     # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
14490     # not possible to acquire the BGL based on opcode parameters)
14491
14492   def CheckPrereq(self):
14493     """Check prerequisites.
14494
14495     """
14496     if self.op.kind == constants.TAG_CLUSTER:
14497       self.target = self.cfg.GetClusterInfo()
14498     elif self.op.kind == constants.TAG_NODE:
14499       self.target = self.cfg.GetNodeInfo(self.op.name)
14500     elif self.op.kind == constants.TAG_INSTANCE:
14501       self.target = self.cfg.GetInstanceInfo(self.op.name)
14502     elif self.op.kind == constants.TAG_NODEGROUP:
14503       self.target = self.cfg.GetNodeGroup(self.group_uuid)
14504     else:
14505       raise errors.OpPrereqError("Wrong tag type requested (%s)" %
14506                                  str(self.op.kind), errors.ECODE_INVAL)
14507
14508
14509 class LUTagsGet(TagsLU):
14510   """Returns the tags of a given object.
14511
14512   """
14513   REQ_BGL = False
14514
14515   def ExpandNames(self):
14516     TagsLU.ExpandNames(self)
14517
14518     # Share locks as this is only a read operation
14519     self.share_locks = _ShareAll()
14520
14521   def Exec(self, feedback_fn):
14522     """Returns the tag list.
14523
14524     """
14525     return list(self.target.GetTags())
14526
14527
14528 class LUTagsSearch(NoHooksLU):
14529   """Searches the tags for a given pattern.
14530
14531   """
14532   REQ_BGL = False
14533
14534   def ExpandNames(self):
14535     self.needed_locks = {}
14536
14537   def CheckPrereq(self):
14538     """Check prerequisites.
14539
14540     This checks the pattern passed for validity by compiling it.
14541
14542     """
14543     try:
14544       self.re = re.compile(self.op.pattern)
14545     except re.error, err:
14546       raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
14547                                  (self.op.pattern, err), errors.ECODE_INVAL)
14548
14549   def Exec(self, feedback_fn):
14550     """Returns the tag list.
14551
14552     """
14553     cfg = self.cfg
14554     tgts = [("/cluster", cfg.GetClusterInfo())]
14555     ilist = cfg.GetAllInstancesInfo().values()
14556     tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
14557     nlist = cfg.GetAllNodesInfo().values()
14558     tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
14559     tgts.extend(("/nodegroup/%s" % n.name, n)
14560                 for n in cfg.GetAllNodeGroupsInfo().values())
14561     results = []
14562     for path, target in tgts:
14563       for tag in target.GetTags():
14564         if self.re.search(tag):
14565           results.append((path, tag))
14566     return results
14567
14568
14569 class LUTagsSet(TagsLU):
14570   """Sets a tag on a given object.
14571
14572   """
14573   REQ_BGL = False
14574
14575   def CheckPrereq(self):
14576     """Check prerequisites.
14577
14578     This checks the type and length of the tag name and value.
14579
14580     """
14581     TagsLU.CheckPrereq(self)
14582     for tag in self.op.tags:
14583       objects.TaggableObject.ValidateTag(tag)
14584
14585   def Exec(self, feedback_fn):
14586     """Sets the tag.
14587
14588     """
14589     try:
14590       for tag in self.op.tags:
14591         self.target.AddTag(tag)
14592     except errors.TagError, err:
14593       raise errors.OpExecError("Error while setting tag: %s" % str(err))
14594     self.cfg.Update(self.target, feedback_fn)
14595
14596
14597 class LUTagsDel(TagsLU):
14598   """Delete a list of tags from a given object.
14599
14600   """
14601   REQ_BGL = False
14602
14603   def CheckPrereq(self):
14604     """Check prerequisites.
14605
14606     This checks that we have the given tag.
14607
14608     """
14609     TagsLU.CheckPrereq(self)
14610     for tag in self.op.tags:
14611       objects.TaggableObject.ValidateTag(tag)
14612     del_tags = frozenset(self.op.tags)
14613     cur_tags = self.target.GetTags()
14614
14615     diff_tags = del_tags - cur_tags
14616     if diff_tags:
14617       diff_names = ("'%s'" % i for i in sorted(diff_tags))
14618       raise errors.OpPrereqError("Tag(s) %s not found" %
14619                                  (utils.CommaJoin(diff_names), ),
14620                                  errors.ECODE_NOENT)
14621
14622   def Exec(self, feedback_fn):
14623     """Remove the tag from the object.
14624
14625     """
14626     for tag in self.op.tags:
14627       self.target.RemoveTag(tag)
14628     self.cfg.Update(self.target, feedback_fn)
14629
14630
14631 class LUTestDelay(NoHooksLU):
14632   """Sleep for a specified amount of time.
14633
14634   This LU sleeps on the master and/or nodes for a specified amount of
14635   time.
14636
14637   """
14638   REQ_BGL = False
14639
14640   def ExpandNames(self):
14641     """Expand names and set required locks.
14642
14643     This expands the node list, if any.
14644
14645     """
14646     self.needed_locks = {}
14647     if self.op.on_nodes:
14648       # _GetWantedNodes can be used here, but is not always appropriate to use
14649       # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
14650       # more information.
14651       self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
14652       self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
14653
14654   def _TestDelay(self):
14655     """Do the actual sleep.
14656
14657     """
14658     if self.op.on_master:
14659       if not utils.TestDelay(self.op.duration):
14660         raise errors.OpExecError("Error during master delay test")
14661     if self.op.on_nodes:
14662       result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
14663       for node, node_result in result.items():
14664         node_result.Raise("Failure during rpc call to node %s" % node)
14665
14666   def Exec(self, feedback_fn):
14667     """Execute the test delay opcode, with the wanted repetitions.
14668
14669     """
14670     if self.op.repeat == 0:
14671       self._TestDelay()
14672     else:
14673       top_value = self.op.repeat - 1
14674       for i in range(self.op.repeat):
14675         self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
14676         self._TestDelay()
14677
14678
14679 class LUTestJqueue(NoHooksLU):
14680   """Utility LU to test some aspects of the job queue.
14681
14682   """
14683   REQ_BGL = False
14684
14685   # Must be lower than default timeout for WaitForJobChange to see whether it
14686   # notices changed jobs
14687   _CLIENT_CONNECT_TIMEOUT = 20.0
14688   _CLIENT_CONFIRM_TIMEOUT = 60.0
14689
14690   @classmethod
14691   def _NotifyUsingSocket(cls, cb, errcls):
14692     """Opens a Unix socket and waits for another program to connect.
14693
14694     @type cb: callable
14695     @param cb: Callback to send socket name to client
14696     @type errcls: class
14697     @param errcls: Exception class to use for errors
14698
14699     """
14700     # Using a temporary directory as there's no easy way to create temporary
14701     # sockets without writing a custom loop around tempfile.mktemp and
14702     # socket.bind
14703     tmpdir = tempfile.mkdtemp()
14704     try:
14705       tmpsock = utils.PathJoin(tmpdir, "sock")
14706
14707       logging.debug("Creating temporary socket at %s", tmpsock)
14708       sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
14709       try:
14710         sock.bind(tmpsock)
14711         sock.listen(1)
14712
14713         # Send details to client
14714         cb(tmpsock)
14715
14716         # Wait for client to connect before continuing
14717         sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
14718         try:
14719           (conn, _) = sock.accept()
14720         except socket.error, err:
14721           raise errcls("Client didn't connect in time (%s)" % err)
14722       finally:
14723         sock.close()
14724     finally:
14725       # Remove as soon as client is connected
14726       shutil.rmtree(tmpdir)
14727
14728     # Wait for client to close
14729     try:
14730       try:
14731         # pylint: disable=E1101
14732         # Instance of '_socketobject' has no ... member
14733         conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
14734         conn.recv(1)
14735       except socket.error, err:
14736         raise errcls("Client failed to confirm notification (%s)" % err)
14737     finally:
14738       conn.close()
14739
14740   def _SendNotification(self, test, arg, sockname):
14741     """Sends a notification to the client.
14742
14743     @type test: string
14744     @param test: Test name
14745     @param arg: Test argument (depends on test)
14746     @type sockname: string
14747     @param sockname: Socket path
14748
14749     """
14750     self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
14751
14752   def _Notify(self, prereq, test, arg):
14753     """Notifies the client of a test.
14754
14755     @type prereq: bool
14756     @param prereq: Whether this is a prereq-phase test
14757     @type test: string
14758     @param test: Test name
14759     @param arg: Test argument (depends on test)
14760
14761     """
14762     if prereq:
14763       errcls = errors.OpPrereqError
14764     else:
14765       errcls = errors.OpExecError
14766
14767     return self._NotifyUsingSocket(compat.partial(self._SendNotification,
14768                                                   test, arg),
14769                                    errcls)
14770
14771   def CheckArguments(self):
14772     self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
14773     self.expandnames_calls = 0
14774
14775   def ExpandNames(self):
14776     checkargs_calls = getattr(self, "checkargs_calls", 0)
14777     if checkargs_calls < 1:
14778       raise errors.ProgrammerError("CheckArguments was not called")
14779
14780     self.expandnames_calls += 1
14781
14782     if self.op.notify_waitlock:
14783       self._Notify(True, constants.JQT_EXPANDNAMES, None)
14784
14785     self.LogInfo("Expanding names")
14786
14787     # Get lock on master node (just to get a lock, not for a particular reason)
14788     self.needed_locks = {
14789       locking.LEVEL_NODE: self.cfg.GetMasterNode(),
14790       }
14791
14792   def Exec(self, feedback_fn):
14793     if self.expandnames_calls < 1:
14794       raise errors.ProgrammerError("ExpandNames was not called")
14795
14796     if self.op.notify_exec:
14797       self._Notify(False, constants.JQT_EXEC, None)
14798
14799     self.LogInfo("Executing")
14800
14801     if self.op.log_messages:
14802       self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
14803       for idx, msg in enumerate(self.op.log_messages):
14804         self.LogInfo("Sending log message %s", idx + 1)
14805         feedback_fn(constants.JQT_MSGPREFIX + msg)
14806         # Report how many test messages have been sent
14807         self._Notify(False, constants.JQT_LOGMSG, idx + 1)
14808
14809     if self.op.fail:
14810       raise errors.OpExecError("Opcode failure was requested")
14811
14812     return True
14813
14814
14815 class LUTestAllocator(NoHooksLU):
14816   """Run allocator tests.
14817
14818   This LU runs the allocator tests
14819
14820   """
14821   def CheckPrereq(self):
14822     """Check prerequisites.
14823
14824     This checks the opcode parameters depending on the director and mode test.
14825
14826     """
14827     if self.op.mode in (constants.IALLOCATOR_MODE_ALLOC,
14828                         constants.IALLOCATOR_MODE_MULTI_ALLOC):
14829       for attr in ["memory", "disks", "disk_template",
14830                    "os", "tags", "nics", "vcpus"]:
14831         if not hasattr(self.op, attr):
14832           raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
14833                                      attr, errors.ECODE_INVAL)
14834       iname = self.cfg.ExpandInstanceName(self.op.name)
14835       if iname is not None:
14836         raise errors.OpPrereqError("Instance '%s' already in the cluster" %
14837                                    iname, errors.ECODE_EXISTS)
14838       if not isinstance(self.op.nics, list):
14839         raise errors.OpPrereqError("Invalid parameter 'nics'",
14840                                    errors.ECODE_INVAL)
14841       if not isinstance(self.op.disks, list):
14842         raise errors.OpPrereqError("Invalid parameter 'disks'",
14843                                    errors.ECODE_INVAL)
14844       for row in self.op.disks:
14845         if (not isinstance(row, dict) or
14846             constants.IDISK_SIZE not in row or
14847             not isinstance(row[constants.IDISK_SIZE], int) or
14848             constants.IDISK_MODE not in row or
14849             row[constants.IDISK_MODE] not in constants.DISK_ACCESS_SET):
14850           raise errors.OpPrereqError("Invalid contents of the 'disks'"
14851                                      " parameter", errors.ECODE_INVAL)
14852       if self.op.hypervisor is None:
14853         self.op.hypervisor = self.cfg.GetHypervisorType()
14854     elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
14855       fname = _ExpandInstanceName(self.cfg, self.op.name)
14856       self.op.name = fname
14857       self.relocate_from = \
14858           list(self.cfg.GetInstanceInfo(fname).secondary_nodes)
14859     elif self.op.mode in (constants.IALLOCATOR_MODE_CHG_GROUP,
14860                           constants.IALLOCATOR_MODE_NODE_EVAC):
14861       if not self.op.instances:
14862         raise errors.OpPrereqError("Missing instances", errors.ECODE_INVAL)
14863       self.op.instances = _GetWantedInstances(self, self.op.instances)
14864     else:
14865       raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
14866                                  self.op.mode, errors.ECODE_INVAL)
14867
14868     if self.op.direction == constants.IALLOCATOR_DIR_OUT:
14869       if self.op.allocator is None:
14870         raise errors.OpPrereqError("Missing allocator name",
14871                                    errors.ECODE_INVAL)
14872     elif self.op.direction != constants.IALLOCATOR_DIR_IN:
14873       raise errors.OpPrereqError("Wrong allocator test '%s'" %
14874                                  self.op.direction, errors.ECODE_INVAL)
14875
14876   def Exec(self, feedback_fn):
14877     """Run the allocator test.
14878
14879     """
14880     if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
14881       req = iallocator.IAReqInstanceAlloc(name=self.op.name,
14882                                           memory=self.op.memory,
14883                                           disks=self.op.disks,
14884                                           disk_template=self.op.disk_template,
14885                                           os=self.op.os,
14886                                           tags=self.op.tags,
14887                                           nics=self.op.nics,
14888                                           vcpus=self.op.vcpus,
14889                                           spindle_use=self.op.spindle_use,
14890                                           hypervisor=self.op.hypervisor)
14891     elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
14892       req = iallocator.IAReqRelocate(name=self.op.name,
14893                                      relocate_from=list(self.relocate_from))
14894     elif self.op.mode == constants.IALLOCATOR_MODE_CHG_GROUP:
14895       req = iallocator.IAReqGroupChange(instances=self.op.instances,
14896                                         target_groups=self.op.target_groups)
14897     elif self.op.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
14898       req = iallocator.IAReqNodeEvac(instances=self.op.instances,
14899                                      evac_mode=self.op.evac_mode)
14900     elif self.op.mode == constants.IALLOCATOR_MODE_MULTI_ALLOC:
14901       disk_template = self.op.disk_template
14902       insts = [iallocator.IAReqInstanceAlloc(name="%s%s" % (self.op.name, idx),
14903                                              memory=self.op.memory,
14904                                              disks=self.op.disks,
14905                                              disk_template=disk_template,
14906                                              os=self.op.os,
14907                                              tags=self.op.tags,
14908                                              nics=self.op.nics,
14909                                              vcpus=self.op.vcpus,
14910                                              spindle_use=self.op.spindle_use,
14911                                              hypervisor=self.op.hypervisor)
14912                for idx in range(self.op.count)]
14913       req = iallocator.IAReqMultiInstanceAlloc(instances=insts)
14914     else:
14915       raise errors.ProgrammerError("Uncatched mode %s in"
14916                                    " LUTestAllocator.Exec", self.op.mode)
14917
14918     ial = iallocator.IAllocator(self.cfg, self.rpc, req)
14919     if self.op.direction == constants.IALLOCATOR_DIR_IN:
14920       result = ial.in_text
14921     else:
14922       ial.Run(self.op.allocator, validate=False)
14923       result = ial.out_text
14924     return result
14925
14926
14927 #: Query type implementations
14928 _QUERY_IMPL = {
14929   constants.QR_CLUSTER: _ClusterQuery,
14930   constants.QR_INSTANCE: _InstanceQuery,
14931   constants.QR_NODE: _NodeQuery,
14932   constants.QR_GROUP: _GroupQuery,
14933   constants.QR_OS: _OsQuery,
14934   constants.QR_EXPORT: _ExportQuery,
14935   }
14936
14937 assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
14938
14939
14940 def _GetQueryImplementation(name):
14941   """Returns the implemtnation for a query type.
14942
14943   @param name: Query type, must be one of L{constants.QR_VIA_OP}
14944
14945   """
14946   try:
14947     return _QUERY_IMPL[name]
14948   except KeyError:
14949     raise errors.OpPrereqError("Unknown query resource '%s'" % name,
14950                                errors.ECODE_INVAL)