code.grnet.gr Git - ganeti-local/blob - lib/cmdlib.py

   1 #
   2 #
   3
   4 # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012 Google Inc.
   5 #
   6 # This program is free software; you can redistribute it and/or modify
   7 # it under the terms of the GNU General Public License as published by
   8 # the Free Software Foundation; either version 2 of the License, or
   9 # (at your option) any later version.
  10 #
  11 # This program is distributed in the hope that it will be useful, but
  12 # WITHOUT ANY WARRANTY; without even the implied warranty of
  13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 # General Public License for more details.
  15 #
  16 # You should have received a copy of the GNU General Public License
  17 # along with this program; if not, write to the Free Software
  18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  19 # 02110-1301, USA.
  20
  21
  22 """Module implementing the master-side code."""
  23
  24 # pylint: disable=W0201,C0302
  25
  26 # W0201 since most LU attributes are defined in CheckPrereq or similar
  27 # functions
  28
  29 # C0302: since we have waaaay too many lines in this module
  30
  31 import os
  32 import os.path
  33 import time
  34 import re
  35 import logging
  36 import copy
  37 import OpenSSL
  38 import socket
  39 import tempfile
  40 import shutil
  41 import itertools
  42 import operator
  43
  44 from ganeti import ssh
  45 from ganeti import utils
  46 from ganeti import errors
  47 from ganeti import hypervisor
  48 from ganeti import locking
  49 from ganeti import constants
  50 from ganeti import objects
  51 from ganeti import serializer
  52 from ganeti import ssconf
  53 from ganeti import uidpool
  54 from ganeti import compat
  55 from ganeti import masterd
  56 from ganeti import netutils
  57 from ganeti import query
  58 from ganeti import qlang
  59 from ganeti import opcodes
  60 from ganeti import ht
  61 from ganeti import rpc
  62 from ganeti import runtime
  63
  64 import ganeti.masterd.instance # pylint: disable=W0611
  65
  66
  67 #: Size of DRBD meta block device
  68 DRBD_META_SIZE = 128
  69
  70 # States of instance
  71 INSTANCE_DOWN = [constants.ADMINST_DOWN]
  72 INSTANCE_ONLINE = [constants.ADMINST_DOWN, constants.ADMINST_UP]
  73 INSTANCE_NOT_RUNNING = [constants.ADMINST_DOWN, constants.ADMINST_OFFLINE]
  74
  75 #: Instance status in which an instance can be marked as offline/online
  76 CAN_CHANGE_INSTANCE_OFFLINE = (frozenset(INSTANCE_DOWN) | frozenset([
  77   constants.ADMINST_OFFLINE,
  78   ]))
  79
  80
  81 class ResultWithJobs:
  82   """Data container for LU results with jobs.
  83
  84   Instances of this class returned from L{LogicalUnit.Exec} will be recognized
  85   by L{mcpu._ProcessResult}. The latter will then submit the jobs
  86   contained in the C{jobs} attribute and include the job IDs in the opcode
  87   result.
  88
  89   """
  90   def __init__(self, jobs, **kwargs):
  91     """Initializes this class.
  92
  93     Additional return values can be specified as keyword arguments.
  94
  95     @type jobs: list of lists of L{opcode.OpCode}
  96     @param jobs: A list of lists of opcode objects
  97
  98     """
  99     self.jobs = jobs
 100     self.other = kwargs
 101
 102
 103 class LogicalUnit(object):
 104   """Logical Unit base class.
 105
 106   Subclasses must follow these rules:
 107     - implement ExpandNames
 108     - implement CheckPrereq (except when tasklets are used)
 109     - implement Exec (except when tasklets are used)
 110     - implement BuildHooksEnv
 111     - implement BuildHooksNodes
 112     - redefine HPATH and HTYPE
 113     - optionally redefine their run requirements:
 114         REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
 115
 116   Note that all commands require root permissions.
 117
 118   @ivar dry_run_result: the value (if any) that will be returned to the caller
 119       in dry-run mode (signalled by opcode dry_run parameter)
 120
 121   """
 122   HPATH = None
 123   HTYPE = None
 124   REQ_BGL = True
 125
 126   def __init__(self, processor, op, context, rpc_runner):
 127     """Constructor for LogicalUnit.
 128
 129     This needs to be overridden in derived classes in order to check op
 130     validity.
 131
 132     """
 133     self.proc = processor
 134     self.op = op
 135     self.cfg = context.cfg
 136     self.glm = context.glm
 137     # readability alias
 138     self.owned_locks = context.glm.list_owned
 139     self.context = context
 140     self.rpc = rpc_runner
 141     # Dicts used to declare locking needs to mcpu
 142     self.needed_locks = None
 143     self.share_locks = dict.fromkeys(locking.LEVELS, 0)
 144     self.add_locks = {}
 145     self.remove_locks = {}
 146     # Used to force good behavior when calling helper functions
 147     self.recalculate_locks = {}
 148     # logging
 149     self.Log = processor.Log # pylint: disable=C0103
 150     self.LogWarning = processor.LogWarning # pylint: disable=C0103
 151     self.LogInfo = processor.LogInfo # pylint: disable=C0103
 152     self.LogStep = processor.LogStep # pylint: disable=C0103
 153     # support for dry-run
 154     self.dry_run_result = None
 155     # support for generic debug attribute
 156     if (not hasattr(self.op, "debug_level") or
 157         not isinstance(self.op.debug_level, int)):
 158       self.op.debug_level = 0
 159
 160     # Tasklets
 161     self.tasklets = None
 162
 163     # Validate opcode parameters and set defaults
 164     self.op.Validate(True)
 165
 166     self.CheckArguments()
 167
 168   def CheckArguments(self):
 169     """Check syntactic validity for the opcode arguments.
 170
 171     This method is for doing a simple syntactic check and ensure
 172     validity of opcode parameters, without any cluster-related
 173     checks. While the same can be accomplished in ExpandNames and/or
 174     CheckPrereq, doing these separate is better because:
 175
 176       - ExpandNames is left as as purely a lock-related function
 177       - CheckPrereq is run after we have acquired locks (and possible
 178         waited for them)
 179
 180     The function is allowed to change the self.op attribute so that
 181     later methods can no longer worry about missing parameters.
 182
 183     """
 184     pass
 185
 186   def ExpandNames(self):
 187     """Expand names for this LU.
 188
 189     This method is called before starting to execute the opcode, and it should
 190     update all the parameters of the opcode to their canonical form (e.g. a
 191     short node name must be fully expanded after this method has successfully
 192     completed). This way locking, hooks, logging, etc. can work correctly.
 193
 194     LUs which implement this method must also populate the self.needed_locks
 195     member, as a dict with lock levels as keys, and a list of needed lock names
 196     as values. Rules:
 197
 198       - use an empty dict if you don't need any lock
 199       - if you don't need any lock at a particular level omit that
 200         level (note that in this case C{DeclareLocks} won't be called
 201         at all for that level)
 202       - if you need locks at a level, but you can't calculate it in
 203         this function, initialise that level with an empty list and do
 204         further processing in L{LogicalUnit.DeclareLocks} (see that
 205         function's docstring)
 206       - don't put anything for the BGL level
 207       - if you want all locks at a level use L{locking.ALL_SET} as a value
 208
 209     If you need to share locks (rather than acquire them exclusively) at one
 210     level you can modify self.share_locks, setting a true value (usually 1) for
 211     that level. By default locks are not shared.
 212
 213     This function can also define a list of tasklets, which then will be
 214     executed in order instead of the usual LU-level CheckPrereq and Exec
 215     functions, if those are not defined by the LU.
 216
 217     Examples::
 218
 219       # Acquire all nodes and one instance
 220       self.needed_locks = {
 221         locking.LEVEL_NODE: locking.ALL_SET,
 222         locking.LEVEL_INSTANCE: ['instance1.example.com'],
 223       }
 224       # Acquire just two nodes
 225       self.needed_locks = {
 226         locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
 227       }
 228       # Acquire no locks
 229       self.needed_locks = {} # No, you can't leave it to the default value None
 230
 231     """
 232     # The implementation of this method is mandatory only if the new LU is
 233     # concurrent, so that old LUs don't need to be changed all at the same
 234     # time.
 235     if self.REQ_BGL:
 236       self.needed_locks = {} # Exclusive LUs don't need locks.
 237     else:
 238       raise NotImplementedError
 239
 240   def DeclareLocks(self, level):
 241     """Declare LU locking needs for a level
 242
 243     While most LUs can just declare their locking needs at ExpandNames time,
 244     sometimes there's the need to calculate some locks after having acquired
 245     the ones before. This function is called just before acquiring locks at a
 246     particular level, but after acquiring the ones at lower levels, and permits
 247     such calculations. It can be used to modify self.needed_locks, and by
 248     default it does nothing.
 249
 250     This function is only called if you have something already set in
 251     self.needed_locks for the level.
 252
 253     @param level: Locking level which is going to be locked
 254     @type level: member of L{ganeti.locking.LEVELS}
 255
 256     """
 257
 258   def CheckPrereq(self):
 259     """Check prerequisites for this LU.
 260
 261     This method should check that the prerequisites for the execution
 262     of this LU are fulfilled. It can do internode communication, but
 263     it should be idempotent - no cluster or system changes are
 264     allowed.
 265
 266     The method should raise errors.OpPrereqError in case something is
 267     not fulfilled. Its return value is ignored.
 268
 269     This method should also update all the parameters of the opcode to
 270     their canonical form if it hasn't been done by ExpandNames before.
 271
 272     """
 273     if self.tasklets is not None:
 274       for (idx, tl) in enumerate(self.tasklets):
 275         logging.debug("Checking prerequisites for tasklet %s/%s",
 276                       idx + 1, len(self.tasklets))
 277         tl.CheckPrereq()
 278     else:
 279       pass
 280
 281   def Exec(self, feedback_fn):
 282     """Execute the LU.
 283
 284     This method should implement the actual work. It should raise
 285     errors.OpExecError for failures that are somewhat dealt with in
 286     code, or expected.
 287
 288     """
 289     if self.tasklets is not None:
 290       for (idx, tl) in enumerate(self.tasklets):
 291         logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
 292         tl.Exec(feedback_fn)
 293     else:
 294       raise NotImplementedError
 295
 296   def BuildHooksEnv(self):
 297     """Build hooks environment for this LU.
 298
 299     @rtype: dict
 300     @return: Dictionary containing the environment that will be used for
 301       running the hooks for this LU. The keys of the dict must not be prefixed
 302       with "GANETI_"--that'll be added by the hooks runner. The hooks runner
 303       will extend the environment with additional variables. If no environment
 304       should be defined, an empty dictionary should be returned (not C{None}).
 305     @note: If the C{HPATH} attribute of the LU class is C{None}, this function
 306       will not be called.
 307
 308     """
 309     raise NotImplementedError
 310
 311   def BuildHooksNodes(self):
 312     """Build list of nodes to run LU's hooks.
 313
 314     @rtype: tuple; (list, list)
 315     @return: Tuple containing a list of node names on which the hook
 316       should run before the execution and a list of node names on which the
 317       hook should run after the execution. No nodes should be returned as an
 318       empty list (and not None).
 319     @note: If the C{HPATH} attribute of the LU class is C{None}, this function
 320       will not be called.
 321
 322     """
 323     raise NotImplementedError
 324
 325   def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
 326     """Notify the LU about the results of its hooks.
 327
 328     This method is called every time a hooks phase is executed, and notifies
 329     the Logical Unit about the hooks' result. The LU can then use it to alter
 330     its result based on the hooks.  By default the method does nothing and the
 331     previous result is passed back unchanged but any LU can define it if it
 332     wants to use the local cluster hook-scripts somehow.
 333
 334     @param phase: one of L{constants.HOOKS_PHASE_POST} or
 335         L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
 336     @param hook_results: the results of the multi-node hooks rpc call
 337     @param feedback_fn: function used send feedback back to the caller
 338     @param lu_result: the previous Exec result this LU had, or None
 339         in the PRE phase
 340     @return: the new Exec result, based on the previous result
 341         and hook results
 342
 343     """
 344     # API must be kept, thus we ignore the unused argument and could
 345     # be a function warnings
 346     # pylint: disable=W0613,R0201
 347     return lu_result
 348
 349   def _ExpandAndLockInstance(self):
 350     """Helper function to expand and lock an instance.
 351
 352     Many LUs that work on an instance take its name in self.op.instance_name
 353     and need to expand it and then declare the expanded name for locking. This
 354     function does it, and then updates self.op.instance_name to the expanded
 355     name. It also initializes needed_locks as a dict, if this hasn't been done
 356     before.
 357
 358     """
 359     if self.needed_locks is None:
 360       self.needed_locks = {}
 361     else:
 362       assert locking.LEVEL_INSTANCE not in self.needed_locks, \
 363         "_ExpandAndLockInstance called with instance-level locks set"
 364     self.op.instance_name = _ExpandInstanceName(self.cfg,
 365                                                 self.op.instance_name)
 366     self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
 367
 368   def _LockInstancesNodes(self, primary_only=False,
 369                           level=locking.LEVEL_NODE):
 370     """Helper function to declare instances' nodes for locking.
 371
 372     This function should be called after locking one or more instances to lock
 373     their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
 374     with all primary or secondary nodes for instances already locked and
 375     present in self.needed_locks[locking.LEVEL_INSTANCE].
 376
 377     It should be called from DeclareLocks, and for safety only works if
 378     self.recalculate_locks[locking.LEVEL_NODE] is set.
 379
 380     In the future it may grow parameters to just lock some instance's nodes, or
 381     to just lock primaries or secondary nodes, if needed.
 382
 383     If should be called in DeclareLocks in a way similar to::
 384
 385       if level == locking.LEVEL_NODE:
 386         self._LockInstancesNodes()
 387
 388     @type primary_only: boolean
 389     @param primary_only: only lock primary nodes of locked instances
 390     @param level: Which lock level to use for locking nodes
 391
 392     """
 393     assert level in self.recalculate_locks, \
 394       "_LockInstancesNodes helper function called with no nodes to recalculate"
 395
 396     # TODO: check if we're really been called with the instance locks held
 397
 398     # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
 399     # future we might want to have different behaviors depending on the value
 400     # of self.recalculate_locks[locking.LEVEL_NODE]
 401     wanted_nodes = []
 402     locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
 403     for _, instance in self.cfg.GetMultiInstanceInfo(locked_i):
 404       wanted_nodes.append(instance.primary_node)
 405       if not primary_only:
 406         wanted_nodes.extend(instance.secondary_nodes)
 407
 408     if self.recalculate_locks[level] == constants.LOCKS_REPLACE:
 409       self.needed_locks[level] = wanted_nodes
 410     elif self.recalculate_locks[level] == constants.LOCKS_APPEND:
 411       self.needed_locks[level].extend(wanted_nodes)
 412     else:
 413       raise errors.ProgrammerError("Unknown recalculation mode")
 414
 415     del self.recalculate_locks[level]
 416
 417
 418 class NoHooksLU(LogicalUnit): # pylint: disable=W0223
 419   """Simple LU which runs no hooks.
 420
 421   This LU is intended as a parent for other LogicalUnits which will
 422   run no hooks, in order to reduce duplicate code.
 423
 424   """
 425   HPATH = None
 426   HTYPE = None
 427
 428   def BuildHooksEnv(self):
 429     """Empty BuildHooksEnv for NoHooksLu.
 430
 431     This just raises an error.
 432
 433     """
 434     raise AssertionError("BuildHooksEnv called for NoHooksLUs")
 435
 436   def BuildHooksNodes(self):
 437     """Empty BuildHooksNodes for NoHooksLU.
 438
 439     """
 440     raise AssertionError("BuildHooksNodes called for NoHooksLU")
 441
 442
 443 class Tasklet:
 444   """Tasklet base class.
 445
 446   Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
 447   they can mix legacy code with tasklets. Locking needs to be done in the LU,
 448   tasklets know nothing about locks.
 449
 450   Subclasses must follow these rules:
 451     - Implement CheckPrereq
 452     - Implement Exec
 453
 454   """
 455   def __init__(self, lu):
 456     self.lu = lu
 457
 458     # Shortcuts
 459     self.cfg = lu.cfg
 460     self.rpc = lu.rpc
 461
 462   def CheckPrereq(self):
 463     """Check prerequisites for this tasklets.
 464
 465     This method should check whether the prerequisites for the execution of
 466     this tasklet are fulfilled. It can do internode communication, but it
 467     should be idempotent - no cluster or system changes are allowed.
 468
 469     The method should raise errors.OpPrereqError in case something is not
 470     fulfilled. Its return value is ignored.
 471
 472     This method should also update all parameters to their canonical form if it
 473     hasn't been done before.
 474
 475     """
 476     pass
 477
 478   def Exec(self, feedback_fn):
 479     """Execute the tasklet.
 480
 481     This method should implement the actual work. It should raise
 482     errors.OpExecError for failures that are somewhat dealt with in code, or
 483     expected.
 484
 485     """
 486     raise NotImplementedError
 487
 488
 489 class _QueryBase:
 490   """Base for query utility classes.
 491
 492   """
 493   #: Attribute holding field definitions
 494   FIELDS = None
 495
 496   def __init__(self, qfilter, fields, use_locking):
 497     """Initializes this class.
 498
 499     """
 500     self.use_locking = use_locking
 501
 502     self.query = query.Query(self.FIELDS, fields, qfilter=qfilter,
 503                              namefield="name")
 504     self.requested_data = self.query.RequestedData()
 505     self.names = self.query.RequestedNames()
 506
 507     # Sort only if no names were requested
 508     self.sort_by_name = not self.names
 509
 510     self.do_locking = None
 511     self.wanted = None
 512
 513   def _GetNames(self, lu, all_names, lock_level):
 514     """Helper function to determine names asked for in the query.
 515
 516     """
 517     if self.do_locking:
 518       names = lu.owned_locks(lock_level)
 519     else:
 520       names = all_names
 521
 522     if self.wanted == locking.ALL_SET:
 523       assert not self.names
 524       # caller didn't specify names, so ordering is not important
 525       return utils.NiceSort(names)
 526
 527     # caller specified names and we must keep the same order
 528     assert self.names
 529     assert not self.do_locking or lu.glm.is_owned(lock_level)
 530
 531     missing = set(self.wanted).difference(names)
 532     if missing:
 533       raise errors.OpExecError("Some items were removed before retrieving"
 534                                " their data: %s" % missing)
 535
 536     # Return expanded names
 537     return self.wanted
 538
 539   def ExpandNames(self, lu):
 540     """Expand names for this query.
 541
 542     See L{LogicalUnit.ExpandNames}.
 543
 544     """
 545     raise NotImplementedError()
 546
 547   def DeclareLocks(self, lu, level):
 548     """Declare locks for this query.
 549
 550     See L{LogicalUnit.DeclareLocks}.
 551
 552     """
 553     raise NotImplementedError()
 554
 555   def _GetQueryData(self, lu):
 556     """Collects all data for this query.
 557
 558     @return: Query data object
 559
 560     """
 561     raise NotImplementedError()
 562
 563   def NewStyleQuery(self, lu):
 564     """Collect data and execute query.
 565
 566     """
 567     return query.GetQueryResponse(self.query, self._GetQueryData(lu),
 568                                   sort_by_name=self.sort_by_name)
 569
 570   def OldStyleQuery(self, lu):
 571     """Collect data and execute query.
 572
 573     """
 574     return self.query.OldStyleQuery(self._GetQueryData(lu),
 575                                     sort_by_name=self.sort_by_name)
 576
 577
 578 def _ShareAll():
 579   """Returns a dict declaring all lock levels shared.
 580
 581   """
 582   return dict.fromkeys(locking.LEVELS, 1)
 583
 584
 585 def _MakeLegacyNodeInfo(data):
 586   """Formats the data returned by L{rpc.RpcRunner.call_node_info}.
 587
 588   Converts the data into a single dictionary. This is fine for most use cases,
 589   but some require information from more than one volume group or hypervisor.
 590
 591   """
 592   (bootid, (vg_info, ), (hv_info, )) = data
 593
 594   return utils.JoinDisjointDicts(utils.JoinDisjointDicts(vg_info, hv_info), {
 595     "bootid": bootid,
 596     })
 597
 598
 599 def _CheckInstancesNodeGroups(cfg, instances, owned_groups, owned_nodes,
 600                               cur_group_uuid):
 601   """Checks if node groups for locked instances are still correct.
 602
 603   @type cfg: L{config.ConfigWriter}
 604   @param cfg: Cluster configuration
 605   @type instances: dict; string as key, L{objects.Instance} as value
 606   @param instances: Dictionary, instance name as key, instance object as value
 607   @type owned_groups: iterable of string
 608   @param owned_groups: List of owned groups
 609   @type owned_nodes: iterable of string
 610   @param owned_nodes: List of owned nodes
 611   @type cur_group_uuid: string or None
 612   @param cur_group_uuid: Optional group UUID to check against instance's groups
 613
 614   """
 615   for (name, inst) in instances.items():
 616     assert owned_nodes.issuperset(inst.all_nodes), \
 617       "Instance %s's nodes changed while we kept the lock" % name
 618
 619     inst_groups = _CheckInstanceNodeGroups(cfg, name, owned_groups)
 620
 621     assert cur_group_uuid is None or cur_group_uuid in inst_groups, \
 622       "Instance %s has no node in group %s" % (name, cur_group_uuid)
 623
 624
 625 def _CheckInstanceNodeGroups(cfg, instance_name, owned_groups):
 626   """Checks if the owned node groups are still correct for an instance.
 627
 628   @type cfg: L{config.ConfigWriter}
 629   @param cfg: The cluster configuration
 630   @type instance_name: string
 631   @param instance_name: Instance name
 632   @type owned_groups: set or frozenset
 633   @param owned_groups: List of currently owned node groups
 634
 635   """
 636   inst_groups = cfg.GetInstanceNodeGroups(instance_name)
 637
 638   if not owned_groups.issuperset(inst_groups):
 639     raise errors.OpPrereqError("Instance %s's node groups changed since"
 640                                " locks were acquired, current groups are"
 641                                " are '%s', owning groups '%s'; retry the"
 642                                " operation" %
 643                                (instance_name,
 644                                 utils.CommaJoin(inst_groups),
 645                                 utils.CommaJoin(owned_groups)),
 646                                errors.ECODE_STATE)
 647
 648   return inst_groups
 649
 650
 651 def _CheckNodeGroupInstances(cfg, group_uuid, owned_instances):
 652   """Checks if the instances in a node group are still correct.
 653
 654   @type cfg: L{config.ConfigWriter}
 655   @param cfg: The cluster configuration
 656   @type group_uuid: string
 657   @param group_uuid: Node group UUID
 658   @type owned_instances: set or frozenset
 659   @param owned_instances: List of currently owned instances
 660
 661   """
 662   wanted_instances = cfg.GetNodeGroupInstances(group_uuid)
 663   if owned_instances != wanted_instances:
 664     raise errors.OpPrereqError("Instances in node group '%s' changed since"
 665                                " locks were acquired, wanted '%s', have '%s';"
 666                                " retry the operation" %
 667                                (group_uuid,
 668                                 utils.CommaJoin(wanted_instances),
 669                                 utils.CommaJoin(owned_instances)),
 670                                errors.ECODE_STATE)
 671
 672   return wanted_instances
 673
 674
 675 def _SupportsOob(cfg, node):
 676   """Tells if node supports OOB.
 677
 678   @type cfg: L{config.ConfigWriter}
 679   @param cfg: The cluster configuration
 680   @type node: L{objects.Node}
 681   @param node: The node
 682   @return: The OOB script if supported or an empty string otherwise
 683
 684   """
 685   return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
 686
 687
 688 def _GetWantedNodes(lu, nodes):
 689   """Returns list of checked and expanded node names.
 690
 691   @type lu: L{LogicalUnit}
 692   @param lu: the logical unit on whose behalf we execute
 693   @type nodes: list
 694   @param nodes: list of node names or None for all nodes
 695   @rtype: list
 696   @return: the list of nodes, sorted
 697   @raise errors.ProgrammerError: if the nodes parameter is wrong type
 698
 699   """
 700   if nodes:
 701     return [_ExpandNodeName(lu.cfg, name) for name in nodes]
 702
 703   return utils.NiceSort(lu.cfg.GetNodeList())
 704
 705
 706 def _GetWantedInstances(lu, instances):
 707   """Returns list of checked and expanded instance names.
 708
 709   @type lu: L{LogicalUnit}
 710   @param lu: the logical unit on whose behalf we execute
 711   @type instances: list
 712   @param instances: list of instance names or None for all instances
 713   @rtype: list
 714   @return: the list of instances, sorted
 715   @raise errors.OpPrereqError: if the instances parameter is wrong type
 716   @raise errors.OpPrereqError: if any of the passed instances is not found
 717
 718   """
 719   if instances:
 720     wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
 721   else:
 722     wanted = utils.NiceSort(lu.cfg.GetInstanceList())
 723   return wanted
 724
 725
 726 def _GetUpdatedParams(old_params, update_dict,
 727                       use_default=True, use_none=False):
 728   """Return the new version of a parameter dictionary.
 729
 730   @type old_params: dict
 731   @param old_params: old parameters
 732   @type update_dict: dict
 733   @param update_dict: dict containing new parameter values, or
 734       constants.VALUE_DEFAULT to reset the parameter to its default
 735       value
 736   @param use_default: boolean
 737   @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
 738       values as 'to be deleted' values
 739   @param use_none: boolean
 740   @type use_none: whether to recognise C{None} values as 'to be
 741       deleted' values
 742   @rtype: dict
 743   @return: the new parameter dictionary
 744
 745   """
 746   params_copy = copy.deepcopy(old_params)
 747   for key, val in update_dict.iteritems():
 748     if ((use_default and val == constants.VALUE_DEFAULT) or
 749         (use_none and val is None)):
 750       try:
 751         del params_copy[key]
 752       except KeyError:
 753         pass
 754     else:
 755       params_copy[key] = val
 756   return params_copy
 757
 758
 759 def _GetUpdatedIPolicy(old_ipolicy, new_ipolicy, group_policy=False):
 760   """Return the new version of a instance policy.
 761
 762   @param group_policy: whether this policy applies to a group and thus
 763     we should support removal of policy entries
 764
 765   """
 766   use_none = use_default = group_policy
 767   ipolicy = copy.deepcopy(old_ipolicy)
 768   for key, value in new_ipolicy.items():
 769     if key not in constants.IPOLICY_ALL_KEYS:
 770       raise errors.OpPrereqError("Invalid key in new ipolicy: %s" % key,
 771                                  errors.ECODE_INVAL)
 772     if key in constants.IPOLICY_ISPECS:
 773       utils.ForceDictType(value, constants.ISPECS_PARAMETER_TYPES)
 774       ipolicy[key] = _GetUpdatedParams(old_ipolicy.get(key, {}), value,
 775                                        use_none=use_none,
 776                                        use_default=use_default)
 777     else:
 778       if not value or value == [constants.VALUE_DEFAULT]:
 779         if group_policy:
 780           del ipolicy[key]
 781         else:
 782           raise errors.OpPrereqError("Can't unset ipolicy attribute '%s'"
 783                                      " on the cluster'" % key,
 784                                      errors.ECODE_INVAL)
 785       else:
 786         if key in constants.IPOLICY_PARAMETERS:
 787           # FIXME: we assume all such values are float
 788           try:
 789             ipolicy[key] = float(value)
 790           except (TypeError, ValueError), err:
 791             raise errors.OpPrereqError("Invalid value for attribute"
 792                                        " '%s': '%s', error: %s" %
 793                                        (key, value, err), errors.ECODE_INVAL)
 794         else:
 795           # FIXME: we assume all others are lists; this should be redone
 796           # in a nicer way
 797           ipolicy[key] = list(value)
 798   try:
 799     objects.InstancePolicy.CheckParameterSyntax(ipolicy)
 800   except errors.ConfigurationError, err:
 801     raise errors.OpPrereqError("Invalid instance policy: %s" % err,
 802                                errors.ECODE_INVAL)
 803   return ipolicy
 804
 805
 806 def _UpdateAndVerifySubDict(base, updates, type_check):
 807   """Updates and verifies a dict with sub dicts of the same type.
 808
 809   @param base: The dict with the old data
 810   @param updates: The dict with the new data
 811   @param type_check: Dict suitable to ForceDictType to verify correct types
 812   @returns: A new dict with updated and verified values
 813
 814   """
 815   def fn(old, value):
 816     new = _GetUpdatedParams(old, value)
 817     utils.ForceDictType(new, type_check)
 818     return new
 819
 820   ret = copy.deepcopy(base)
 821   ret.update(dict((key, fn(base.get(key, {}), value))
 822                   for key, value in updates.items()))
 823   return ret
 824
 825
 826 def _MergeAndVerifyHvState(op_input, obj_input):
 827   """Combines the hv state from an opcode with the one of the object
 828
 829   @param op_input: The input dict from the opcode
 830   @param obj_input: The input dict from the objects
 831   @return: The verified and updated dict
 832
 833   """
 834   if op_input:
 835     invalid_hvs = set(op_input) - constants.HYPER_TYPES
 836     if invalid_hvs:
 837       raise errors.OpPrereqError("Invalid hypervisor(s) in hypervisor state:"
 838                                  " %s" % utils.CommaJoin(invalid_hvs),
 839                                  errors.ECODE_INVAL)
 840     if obj_input is None:
 841       obj_input = {}
 842     type_check = constants.HVSTS_PARAMETER_TYPES
 843     return _UpdateAndVerifySubDict(obj_input, op_input, type_check)
 844
 845   return None
 846
 847
 848 def _MergeAndVerifyDiskState(op_input, obj_input):
 849   """Combines the disk state from an opcode with the one of the object
 850
 851   @param op_input: The input dict from the opcode
 852   @param obj_input: The input dict from the objects
 853   @return: The verified and updated dict
 854   """
 855   if op_input:
 856     invalid_dst = set(op_input) - constants.DS_VALID_TYPES
 857     if invalid_dst:
 858       raise errors.OpPrereqError("Invalid storage type(s) in disk state: %s" %
 859                                  utils.CommaJoin(invalid_dst),
 860                                  errors.ECODE_INVAL)
 861     type_check = constants.DSS_PARAMETER_TYPES
 862     if obj_input is None:
 863       obj_input = {}
 864     return dict((key, _UpdateAndVerifySubDict(obj_input.get(key, {}), value,
 865                                               type_check))
 866                 for key, value in op_input.items())
 867
 868   return None
 869
 870
 871 def _ReleaseLocks(lu, level, names=None, keep=None):
 872   """Releases locks owned by an LU.
 873
 874   @type lu: L{LogicalUnit}
 875   @param level: Lock level
 876   @type names: list or None
 877   @param names: Names of locks to release
 878   @type keep: list or None
 879   @param keep: Names of locks to retain
 880
 881   """
 882   assert not (keep is not None and names is not None), \
 883          "Only one of the 'names' and the 'keep' parameters can be given"
 884
 885   if names is not None:
 886     should_release = names.__contains__
 887   elif keep:
 888     should_release = lambda name: name not in keep
 889   else:
 890     should_release = None
 891
 892   owned = lu.owned_locks(level)
 893   if not owned:
 894     # Not owning any lock at this level, do nothing
 895     pass
 896
 897   elif should_release:
 898     retain = []
 899     release = []
 900
 901     # Determine which locks to release
 902     for name in owned:
 903       if should_release(name):
 904         release.append(name)
 905       else:
 906         retain.append(name)
 907
 908     assert len(lu.owned_locks(level)) == (len(retain) + len(release))
 909
 910     # Release just some locks
 911     lu.glm.release(level, names=release)
 912
 913     assert frozenset(lu.owned_locks(level)) == frozenset(retain)
 914   else:
 915     # Release everything
 916     lu.glm.release(level)
 917
 918     assert not lu.glm.is_owned(level), "No locks should be owned"
 919
 920
 921 def _MapInstanceDisksToNodes(instances):
 922   """Creates a map from (node, volume) to instance name.
 923
 924   @type instances: list of L{objects.Instance}
 925   @rtype: dict; tuple of (node name, volume name) as key, instance name as value
 926
 927   """
 928   return dict(((node, vol), inst.name)
 929               for inst in instances
 930               for (node, vols) in inst.MapLVsByNode().items()
 931               for vol in vols)
 932
 933
 934 def _RunPostHook(lu, node_name):
 935   """Runs the post-hook for an opcode on a single node.
 936
 937   """
 938   hm = lu.proc.BuildHooksManager(lu)
 939   try:
 940     hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
 941   except:
 942     # pylint: disable=W0702
 943     lu.LogWarning("Errors occurred running hooks on %s" % node_name)
 944
 945
 946 def _CheckOutputFields(static, dynamic, selected):
 947   """Checks whether all selected fields are valid.
 948
 949   @type static: L{utils.FieldSet}
 950   @param static: static fields set
 951   @type dynamic: L{utils.FieldSet}
 952   @param dynamic: dynamic fields set
 953
 954   """
 955   f = utils.FieldSet()
 956   f.Extend(static)
 957   f.Extend(dynamic)
 958
 959   delta = f.NonMatching(selected)
 960   if delta:
 961     raise errors.OpPrereqError("Unknown output fields selected: %s"
 962                                % ",".join(delta), errors.ECODE_INVAL)
 963
 964
 965 def _CheckGlobalHvParams(params):
 966   """Validates that given hypervisor params are not global ones.
 967
 968   This will ensure that instances don't get customised versions of
 969   global params.
 970
 971   """
 972   used_globals = constants.HVC_GLOBALS.intersection(params)
 973   if used_globals:
 974     msg = ("The following hypervisor parameters are global and cannot"
 975            " be customized at instance level, please modify them at"
 976            " cluster level: %s" % utils.CommaJoin(used_globals))
 977     raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
 978
 979
 980 def _CheckNodeOnline(lu, node, msg=None):
 981   """Ensure that a given node is online.
 982
 983   @param lu: the LU on behalf of which we make the check
 984   @param node: the node to check
 985   @param msg: if passed, should be a message to replace the default one
 986   @raise errors.OpPrereqError: if the node is offline
 987
 988   """
 989   if msg is None:
 990     msg = "Can't use offline node"
 991   if lu.cfg.GetNodeInfo(node).offline:
 992     raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
 993
 994
 995 def _CheckNodeNotDrained(lu, node):
 996   """Ensure that a given node is not drained.
 997
 998   @param lu: the LU on behalf of which we make the check
 999   @param node: the node to check
1000   @raise errors.OpPrereqError: if the node is drained
1001
1002   """
1003   if lu.cfg.GetNodeInfo(node).drained:
1004     raise errors.OpPrereqError("Can't use drained node %s" % node,
1005                                errors.ECODE_STATE)
1006
1007
1008 def _CheckNodeVmCapable(lu, node):
1009   """Ensure that a given node is vm capable.
1010
1011   @param lu: the LU on behalf of which we make the check
1012   @param node: the node to check
1013   @raise errors.OpPrereqError: if the node is not vm capable
1014
1015   """
1016   if not lu.cfg.GetNodeInfo(node).vm_capable:
1017     raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
1018                                errors.ECODE_STATE)
1019
1020
1021 def _CheckNodeHasOS(lu, node, os_name, force_variant):
1022   """Ensure that a node supports a given OS.
1023
1024   @param lu: the LU on behalf of which we make the check
1025   @param node: the node to check
1026   @param os_name: the OS to query about
1027   @param force_variant: whether to ignore variant errors
1028   @raise errors.OpPrereqError: if the node is not supporting the OS
1029
1030   """
1031   result = lu.rpc.call_os_get(node, os_name)
1032   result.Raise("OS '%s' not in supported OS list for node %s" %
1033                (os_name, node),
1034                prereq=True, ecode=errors.ECODE_INVAL)
1035   if not force_variant:
1036     _CheckOSVariant(result.payload, os_name)
1037
1038
1039 def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
1040   """Ensure that a node has the given secondary ip.
1041
1042   @type lu: L{LogicalUnit}
1043   @param lu: the LU on behalf of which we make the check
1044   @type node: string
1045   @param node: the node to check
1046   @type secondary_ip: string
1047   @param secondary_ip: the ip to check
1048   @type prereq: boolean
1049   @param prereq: whether to throw a prerequisite or an execute error
1050   @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
1051   @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
1052
1053   """
1054   result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
1055   result.Raise("Failure checking secondary ip on node %s" % node,
1056                prereq=prereq, ecode=errors.ECODE_ENVIRON)
1057   if not result.payload:
1058     msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
1059            " please fix and re-run this command" % secondary_ip)
1060     if prereq:
1061       raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
1062     else:
1063       raise errors.OpExecError(msg)
1064
1065
1066 def _GetClusterDomainSecret():
1067   """Reads the cluster domain secret.
1068
1069   """
1070   return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
1071                                strict=True)
1072
1073
1074 def _CheckInstanceState(lu, instance, req_states, msg=None):
1075   """Ensure that an instance is in one of the required states.
1076
1077   @param lu: the LU on behalf of which we make the check
1078   @param instance: the instance to check
1079   @param msg: if passed, should be a message to replace the default one
1080   @raise errors.OpPrereqError: if the instance is not in the required state
1081
1082   """
1083   if msg is None:
1084     msg = "can't use instance from outside %s states" % ", ".join(req_states)
1085   if instance.admin_state not in req_states:
1086     raise errors.OpPrereqError("Instance '%s' is marked to be %s, %s" %
1087                                (instance.name, instance.admin_state, msg),
1088                                errors.ECODE_STATE)
1089
1090   if constants.ADMINST_UP not in req_states:
1091     pnode = instance.primary_node
1092     ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
1093     ins_l.Raise("Can't contact node %s for instance information" % pnode,
1094                 prereq=True, ecode=errors.ECODE_ENVIRON)
1095
1096     if instance.name in ins_l.payload:
1097       raise errors.OpPrereqError("Instance %s is running, %s" %
1098                                  (instance.name, msg), errors.ECODE_STATE)
1099
1100
1101 def _ComputeMinMaxSpec(name, ipolicy, value):
1102   """Computes if value is in the desired range.
1103
1104   @param name: name of the parameter for which we perform the check
1105   @param ipolicy: dictionary containing min, max and std values
1106   @param value: actual value that we want to use
1107   @return: None or element not meeting the criteria
1108
1109
1110   """
1111   if value in [None, constants.VALUE_AUTO]:
1112     return None
1113   max_v = ipolicy[constants.ISPECS_MAX].get(name, value)
1114   min_v = ipolicy[constants.ISPECS_MIN].get(name, value)
1115   if value > max_v or min_v > value:
1116     return ("%s value %s is not in range [%s, %s]" %
1117             (name, value, min_v, max_v))
1118   return None
1119
1120
1121 def _ComputeIPolicySpecViolation(ipolicy, mem_size, cpu_count, disk_count,
1122                                  nic_count, disk_sizes, spindle_use,
1123                                  _compute_fn=_ComputeMinMaxSpec):
1124   """Verifies ipolicy against provided specs.
1125
1126   @type ipolicy: dict
1127   @param ipolicy: The ipolicy
1128   @type mem_size: int
1129   @param mem_size: The memory size
1130   @type cpu_count: int
1131   @param cpu_count: Used cpu cores
1132   @type disk_count: int
1133   @param disk_count: Number of disks used
1134   @type nic_count: int
1135   @param nic_count: Number of nics used
1136   @type disk_sizes: list of ints
1137   @param disk_sizes: Disk sizes of used disk (len must match C{disk_count})
1138   @type spindle_use: int
1139   @param spindle_use: The number of spindles this instance uses
1140   @param _compute_fn: The compute function (unittest only)
1141   @return: A list of violations, or an empty list of no violations are found
1142
1143   """
1144   assert disk_count == len(disk_sizes)
1145
1146   test_settings = [
1147     (constants.ISPEC_MEM_SIZE, mem_size),
1148     (constants.ISPEC_CPU_COUNT, cpu_count),
1149     (constants.ISPEC_DISK_COUNT, disk_count),
1150     (constants.ISPEC_NIC_COUNT, nic_count),
1151     (constants.ISPEC_SPINDLE_USE, spindle_use),
1152     ] + map((lambda d: (constants.ISPEC_DISK_SIZE, d)), disk_sizes)
1153
1154   return filter(None,
1155                 (_compute_fn(name, ipolicy, value)
1156                  for (name, value) in test_settings))
1157
1158
1159 def _ComputeIPolicyInstanceViolation(ipolicy, instance,
1160                                      _compute_fn=_ComputeIPolicySpecViolation):
1161   """Compute if instance meets the specs of ipolicy.
1162
1163   @type ipolicy: dict
1164   @param ipolicy: The ipolicy to verify against
1165   @type instance: L{objects.Instance}
1166   @param instance: The instance to verify
1167   @param _compute_fn: The function to verify ipolicy (unittest only)
1168   @see: L{_ComputeIPolicySpecViolation}
1169
1170   """
1171   mem_size = instance.beparams.get(constants.BE_MAXMEM, None)
1172   cpu_count = instance.beparams.get(constants.BE_VCPUS, None)
1173   spindle_use = instance.beparams.get(constants.BE_SPINDLE_USE, None)
1174   disk_count = len(instance.disks)
1175   disk_sizes = [disk.size for disk in instance.disks]
1176   nic_count = len(instance.nics)
1177
1178   return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1179                      disk_sizes, spindle_use)
1180
1181
1182 def _ComputeIPolicyInstanceSpecViolation(ipolicy, instance_spec,
1183     _compute_fn=_ComputeIPolicySpecViolation):
1184   """Compute if instance specs meets the specs of ipolicy.
1185
1186   @type ipolicy: dict
1187   @param ipolicy: The ipolicy to verify against
1188   @param instance_spec: dict
1189   @param instance_spec: The instance spec to verify
1190   @param _compute_fn: The function to verify ipolicy (unittest only)
1191   @see: L{_ComputeIPolicySpecViolation}
1192
1193   """
1194   mem_size = instance_spec.get(constants.ISPEC_MEM_SIZE, None)
1195   cpu_count = instance_spec.get(constants.ISPEC_CPU_COUNT, None)
1196   disk_count = instance_spec.get(constants.ISPEC_DISK_COUNT, 0)
1197   disk_sizes = instance_spec.get(constants.ISPEC_DISK_SIZE, [])
1198   nic_count = instance_spec.get(constants.ISPEC_NIC_COUNT, 0)
1199   spindle_use = instance_spec.get(constants.ISPEC_SPINDLE_USE, None)
1200
1201   return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1202                      disk_sizes, spindle_use)
1203
1204
1205 def _ComputeIPolicyNodeViolation(ipolicy, instance, current_group,
1206                                  target_group,
1207                                  _compute_fn=_ComputeIPolicyInstanceViolation):
1208   """Compute if instance meets the specs of the new target group.
1209
1210   @param ipolicy: The ipolicy to verify
1211   @param instance: The instance object to verify
1212   @param current_group: The current group of the instance
1213   @param target_group: The new group of the instance
1214   @param _compute_fn: The function to verify ipolicy (unittest only)
1215   @see: L{_ComputeIPolicySpecViolation}
1216
1217   """
1218   if current_group == target_group:
1219     return []
1220   else:
1221     return _compute_fn(ipolicy, instance)
1222
1223
1224 def _CheckTargetNodeIPolicy(lu, ipolicy, instance, node, ignore=False,
1225                             _compute_fn=_ComputeIPolicyNodeViolation):
1226   """Checks that the target node is correct in terms of instance policy.
1227
1228   @param ipolicy: The ipolicy to verify
1229   @param instance: The instance object to verify
1230   @param node: The new node to relocate
1231   @param ignore: Ignore violations of the ipolicy
1232   @param _compute_fn: The function to verify ipolicy (unittest only)
1233   @see: L{_ComputeIPolicySpecViolation}
1234
1235   """
1236   primary_node = lu.cfg.GetNodeInfo(instance.primary_node)
1237   res = _compute_fn(ipolicy, instance, primary_node.group, node.group)
1238
1239   if res:
1240     msg = ("Instance does not meet target node group's (%s) instance"
1241            " policy: %s") % (node.group, utils.CommaJoin(res))
1242     if ignore:
1243       lu.LogWarning(msg)
1244     else:
1245       raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
1246
1247
1248 def _ComputeNewInstanceViolations(old_ipolicy, new_ipolicy, instances):
1249   """Computes a set of any instances that would violate the new ipolicy.
1250
1251   @param old_ipolicy: The current (still in-place) ipolicy
1252   @param new_ipolicy: The new (to become) ipolicy
1253   @param instances: List of instances to verify
1254   @return: A list of instances which violates the new ipolicy but did not before
1255
1256   """
1257   return (_ComputeViolatingInstances(old_ipolicy, instances) -
1258           _ComputeViolatingInstances(new_ipolicy, instances))
1259
1260
1261 def _ExpandItemName(fn, name, kind):
1262   """Expand an item name.
1263
1264   @param fn: the function to use for expansion
1265   @param name: requested item name
1266   @param kind: text description ('Node' or 'Instance')
1267   @return: the resolved (full) name
1268   @raise errors.OpPrereqError: if the item is not found
1269
1270   """
1271   full_name = fn(name)
1272   if full_name is None:
1273     raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
1274                                errors.ECODE_NOENT)
1275   return full_name
1276
1277
1278 def _ExpandNodeName(cfg, name):
1279   """Wrapper over L{_ExpandItemName} for nodes."""
1280   return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
1281
1282
1283 def _ExpandInstanceName(cfg, name):
1284   """Wrapper over L{_ExpandItemName} for instance."""
1285   return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
1286
1287
1288 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
1289                           minmem, maxmem, vcpus, nics, disk_template, disks,
1290                           bep, hvp, hypervisor_name, tags):
1291   """Builds instance related env variables for hooks
1292
1293   This builds the hook environment from individual variables.
1294
1295   @type name: string
1296   @param name: the name of the instance
1297   @type primary_node: string
1298   @param primary_node: the name of the instance's primary node
1299   @type secondary_nodes: list
1300   @param secondary_nodes: list of secondary nodes as strings
1301   @type os_type: string
1302   @param os_type: the name of the instance's OS
1303   @type status: string
1304   @param status: the desired status of the instance
1305   @type minmem: string
1306   @param minmem: the minimum memory size of the instance
1307   @type maxmem: string
1308   @param maxmem: the maximum memory size of the instance
1309   @type vcpus: string
1310   @param vcpus: the count of VCPUs the instance has
1311   @type nics: list
1312   @param nics: list of tuples (ip, mac, mode, link) representing
1313       the NICs the instance has
1314   @type disk_template: string
1315   @param disk_template: the disk template of the instance
1316   @type disks: list
1317   @param disks: the list of (size, mode) pairs
1318   @type bep: dict
1319   @param bep: the backend parameters for the instance
1320   @type hvp: dict
1321   @param hvp: the hypervisor parameters for the instance
1322   @type hypervisor_name: string
1323   @param hypervisor_name: the hypervisor for the instance
1324   @type tags: list
1325   @param tags: list of instance tags as strings
1326   @rtype: dict
1327   @return: the hook environment for this instance
1328
1329   """
1330   env = {
1331     "OP_TARGET": name,
1332     "INSTANCE_NAME": name,
1333     "INSTANCE_PRIMARY": primary_node,
1334     "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
1335     "INSTANCE_OS_TYPE": os_type,
1336     "INSTANCE_STATUS": status,
1337     "INSTANCE_MINMEM": minmem,
1338     "INSTANCE_MAXMEM": maxmem,
1339     # TODO(2.7) remove deprecated "memory" value
1340     "INSTANCE_MEMORY": maxmem,
1341     "INSTANCE_VCPUS": vcpus,
1342     "INSTANCE_DISK_TEMPLATE": disk_template,
1343     "INSTANCE_HYPERVISOR": hypervisor_name,
1344   }
1345   if nics:
1346     nic_count = len(nics)
1347     for idx, (ip, mac, mode, link) in enumerate(nics):
1348       if ip is None:
1349         ip = ""
1350       env["INSTANCE_NIC%d_IP" % idx] = ip
1351       env["INSTANCE_NIC%d_MAC" % idx] = mac
1352       env["INSTANCE_NIC%d_MODE" % idx] = mode
1353       env["INSTANCE_NIC%d_LINK" % idx] = link
1354       if mode == constants.NIC_MODE_BRIDGED:
1355         env["INSTANCE_NIC%d_BRIDGE" % idx] = link
1356   else:
1357     nic_count = 0
1358
1359   env["INSTANCE_NIC_COUNT"] = nic_count
1360
1361   if disks:
1362     disk_count = len(disks)
1363     for idx, (size, mode) in enumerate(disks):
1364       env["INSTANCE_DISK%d_SIZE" % idx] = size
1365       env["INSTANCE_DISK%d_MODE" % idx] = mode
1366   else:
1367     disk_count = 0
1368
1369   env["INSTANCE_DISK_COUNT"] = disk_count
1370
1371   if not tags:
1372     tags = []
1373
1374   env["INSTANCE_TAGS"] = " ".join(tags)
1375
1376   for source, kind in [(bep, "BE"), (hvp, "HV")]:
1377     for key, value in source.items():
1378       env["INSTANCE_%s_%s" % (kind, key)] = value
1379
1380   return env
1381
1382
1383 def _NICListToTuple(lu, nics):
1384   """Build a list of nic information tuples.
1385
1386   This list is suitable to be passed to _BuildInstanceHookEnv or as a return
1387   value in LUInstanceQueryData.
1388
1389   @type lu:  L{LogicalUnit}
1390   @param lu: the logical unit on whose behalf we execute
1391   @type nics: list of L{objects.NIC}
1392   @param nics: list of nics to convert to hooks tuples
1393
1394   """
1395   hooks_nics = []
1396   cluster = lu.cfg.GetClusterInfo()
1397   for nic in nics:
1398     ip = nic.ip
1399     mac = nic.mac
1400     filled_params = cluster.SimpleFillNIC(nic.nicparams)
1401     mode = filled_params[constants.NIC_MODE]
1402     link = filled_params[constants.NIC_LINK]
1403     hooks_nics.append((ip, mac, mode, link))
1404   return hooks_nics
1405
1406
1407 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
1408   """Builds instance related env variables for hooks from an object.
1409
1410   @type lu: L{LogicalUnit}
1411   @param lu: the logical unit on whose behalf we execute
1412   @type instance: L{objects.Instance}
1413   @param instance: the instance for which we should build the
1414       environment
1415   @type override: dict
1416   @param override: dictionary with key/values that will override
1417       our values
1418   @rtype: dict
1419   @return: the hook environment dictionary
1420
1421   """
1422   cluster = lu.cfg.GetClusterInfo()
1423   bep = cluster.FillBE(instance)
1424   hvp = cluster.FillHV(instance)
1425   args = {
1426     "name": instance.name,
1427     "primary_node": instance.primary_node,
1428     "secondary_nodes": instance.secondary_nodes,
1429     "os_type": instance.os,
1430     "status": instance.admin_state,
1431     "maxmem": bep[constants.BE_MAXMEM],
1432     "minmem": bep[constants.BE_MINMEM],
1433     "vcpus": bep[constants.BE_VCPUS],
1434     "nics": _NICListToTuple(lu, instance.nics),
1435     "disk_template": instance.disk_template,
1436     "disks": [(disk.size, disk.mode) for disk in instance.disks],
1437     "bep": bep,
1438     "hvp": hvp,
1439     "hypervisor_name": instance.hypervisor,
1440     "tags": instance.tags,
1441   }
1442   if override:
1443     args.update(override)
1444   return _BuildInstanceHookEnv(**args) # pylint: disable=W0142
1445
1446
1447 def _AdjustCandidatePool(lu, exceptions):
1448   """Adjust the candidate pool after node operations.
1449
1450   """
1451   mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1452   if mod_list:
1453     lu.LogInfo("Promoted nodes to master candidate role: %s",
1454                utils.CommaJoin(node.name for node in mod_list))
1455     for name in mod_list:
1456       lu.context.ReaddNode(name)
1457   mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1458   if mc_now > mc_max:
1459     lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1460                (mc_now, mc_max))
1461
1462
1463 def _DecideSelfPromotion(lu, exceptions=None):
1464   """Decide whether I should promote myself as a master candidate.
1465
1466   """
1467   cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1468   mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1469   # the new node will increase mc_max with one, so:
1470   mc_should = min(mc_should + 1, cp_size)
1471   return mc_now < mc_should
1472
1473
1474 def _CalculateGroupIPolicy(cluster, group):
1475   """Calculate instance policy for group.
1476
1477   """
1478   return cluster.SimpleFillIPolicy(group.ipolicy)
1479
1480
1481 def _ComputeViolatingInstances(ipolicy, instances):
1482   """Computes a set of instances who violates given ipolicy.
1483
1484   @param ipolicy: The ipolicy to verify
1485   @type instances: object.Instance
1486   @param instances: List of instances to verify
1487   @return: A frozenset of instance names violating the ipolicy
1488
1489   """
1490   return frozenset([inst.name for inst in instances
1491                     if _ComputeIPolicyInstanceViolation(ipolicy, inst)])
1492
1493
1494 def _CheckNicsBridgesExist(lu, target_nics, target_node):
1495   """Check that the brigdes needed by a list of nics exist.
1496
1497   """
1498   cluster = lu.cfg.GetClusterInfo()
1499   paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1500   brlist = [params[constants.NIC_LINK] for params in paramslist
1501             if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1502   if brlist:
1503     result = lu.rpc.call_bridges_exist(target_node, brlist)
1504     result.Raise("Error checking bridges on destination node '%s'" %
1505                  target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1506
1507
1508 def _CheckInstanceBridgesExist(lu, instance, node=None):
1509   """Check that the brigdes needed by an instance exist.
1510
1511   """
1512   if node is None:
1513     node = instance.primary_node
1514   _CheckNicsBridgesExist(lu, instance.nics, node)
1515
1516
1517 def _CheckOSVariant(os_obj, name):
1518   """Check whether an OS name conforms to the os variants specification.
1519
1520   @type os_obj: L{objects.OS}
1521   @param os_obj: OS object to check
1522   @type name: string
1523   @param name: OS name passed by the user, to check for validity
1524
1525   """
1526   variant = objects.OS.GetVariant(name)
1527   if not os_obj.supported_variants:
1528     if variant:
1529       raise errors.OpPrereqError("OS '%s' doesn't support variants ('%s'"
1530                                  " passed)" % (os_obj.name, variant),
1531                                  errors.ECODE_INVAL)
1532     return
1533   if not variant:
1534     raise errors.OpPrereqError("OS name must include a variant",
1535                                errors.ECODE_INVAL)
1536
1537   if variant not in os_obj.supported_variants:
1538     raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1539
1540
1541 def _GetNodeInstancesInner(cfg, fn):
1542   return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1543
1544
1545 def _GetNodeInstances(cfg, node_name):
1546   """Returns a list of all primary and secondary instances on a node.
1547
1548   """
1549
1550   return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1551
1552
1553 def _GetNodePrimaryInstances(cfg, node_name):
1554   """Returns primary instances on a node.
1555
1556   """
1557   return _GetNodeInstancesInner(cfg,
1558                                 lambda inst: node_name == inst.primary_node)
1559
1560
1561 def _GetNodeSecondaryInstances(cfg, node_name):
1562   """Returns secondary instances on a node.
1563
1564   """
1565   return _GetNodeInstancesInner(cfg,
1566                                 lambda inst: node_name in inst.secondary_nodes)
1567
1568
1569 def _GetStorageTypeArgs(cfg, storage_type):
1570   """Returns the arguments for a storage type.
1571
1572   """
1573   # Special case for file storage
1574   if storage_type == constants.ST_FILE:
1575     # storage.FileStorage wants a list of storage directories
1576     return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1577
1578   return []
1579
1580
1581 def _FindFaultyInstanceDisks(cfg, rpc_runner, instance, node_name, prereq):
1582   faulty = []
1583
1584   for dev in instance.disks:
1585     cfg.SetDiskID(dev, node_name)
1586
1587   result = rpc_runner.call_blockdev_getmirrorstatus(node_name, instance.disks)
1588   result.Raise("Failed to get disk status from node %s" % node_name,
1589                prereq=prereq, ecode=errors.ECODE_ENVIRON)
1590
1591   for idx, bdev_status in enumerate(result.payload):
1592     if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1593       faulty.append(idx)
1594
1595   return faulty
1596
1597
1598 def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1599   """Check the sanity of iallocator and node arguments and use the
1600   cluster-wide iallocator if appropriate.
1601
1602   Check that at most one of (iallocator, node) is specified. If none is
1603   specified, then the LU's opcode's iallocator slot is filled with the
1604   cluster-wide default iallocator.
1605
1606   @type iallocator_slot: string
1607   @param iallocator_slot: the name of the opcode iallocator slot
1608   @type node_slot: string
1609   @param node_slot: the name of the opcode target node slot
1610
1611   """
1612   node = getattr(lu.op, node_slot, None)
1613   iallocator = getattr(lu.op, iallocator_slot, None)
1614
1615   if node is not None and iallocator is not None:
1616     raise errors.OpPrereqError("Do not specify both, iallocator and node",
1617                                errors.ECODE_INVAL)
1618   elif node is None and iallocator is None:
1619     default_iallocator = lu.cfg.GetDefaultIAllocator()
1620     if default_iallocator:
1621       setattr(lu.op, iallocator_slot, default_iallocator)
1622     else:
1623       raise errors.OpPrereqError("No iallocator or node given and no"
1624                                  " cluster-wide default iallocator found;"
1625                                  " please specify either an iallocator or a"
1626                                  " node, or set a cluster-wide default"
1627                                  " iallocator")
1628
1629
1630 def _GetDefaultIAllocator(cfg, iallocator):
1631   """Decides on which iallocator to use.
1632
1633   @type cfg: L{config.ConfigWriter}
1634   @param cfg: Cluster configuration object
1635   @type iallocator: string or None
1636   @param iallocator: Iallocator specified in opcode
1637   @rtype: string
1638   @return: Iallocator name
1639
1640   """
1641   if not iallocator:
1642     # Use default iallocator
1643     iallocator = cfg.GetDefaultIAllocator()
1644
1645   if not iallocator:
1646     raise errors.OpPrereqError("No iallocator was specified, neither in the"
1647                                " opcode nor as a cluster-wide default",
1648                                errors.ECODE_INVAL)
1649
1650   return iallocator
1651
1652
1653 class LUClusterPostInit(LogicalUnit):
1654   """Logical unit for running hooks after cluster initialization.
1655
1656   """
1657   HPATH = "cluster-init"
1658   HTYPE = constants.HTYPE_CLUSTER
1659
1660   def BuildHooksEnv(self):
1661     """Build hooks env.
1662
1663     """
1664     return {
1665       "OP_TARGET": self.cfg.GetClusterName(),
1666       }
1667
1668   def BuildHooksNodes(self):
1669     """Build hooks nodes.
1670
1671     """
1672     return ([], [self.cfg.GetMasterNode()])
1673
1674   def Exec(self, feedback_fn):
1675     """Nothing to do.
1676
1677     """
1678     return True
1679
1680
1681 class LUClusterDestroy(LogicalUnit):
1682   """Logical unit for destroying the cluster.
1683
1684   """
1685   HPATH = "cluster-destroy"
1686   HTYPE = constants.HTYPE_CLUSTER
1687
1688   def BuildHooksEnv(self):
1689     """Build hooks env.
1690
1691     """
1692     return {
1693       "OP_TARGET": self.cfg.GetClusterName(),
1694       }
1695
1696   def BuildHooksNodes(self):
1697     """Build hooks nodes.
1698
1699     """
1700     return ([], [])
1701
1702   def CheckPrereq(self):
1703     """Check prerequisites.
1704
1705     This checks whether the cluster is empty.
1706
1707     Any errors are signaled by raising errors.OpPrereqError.
1708
1709     """
1710     master = self.cfg.GetMasterNode()
1711
1712     nodelist = self.cfg.GetNodeList()
1713     if len(nodelist) != 1 or nodelist[0] != master:
1714       raise errors.OpPrereqError("There are still %d node(s) in"
1715                                  " this cluster." % (len(nodelist) - 1),
1716                                  errors.ECODE_INVAL)
1717     instancelist = self.cfg.GetInstanceList()
1718     if instancelist:
1719       raise errors.OpPrereqError("There are still %d instance(s) in"
1720                                  " this cluster." % len(instancelist),
1721                                  errors.ECODE_INVAL)
1722
1723   def Exec(self, feedback_fn):
1724     """Destroys the cluster.
1725
1726     """
1727     master_params = self.cfg.GetMasterNetworkParameters()
1728
1729     # Run post hooks on master node before it's removed
1730     _RunPostHook(self, master_params.name)
1731
1732     ems = self.cfg.GetUseExternalMipScript()
1733     result = self.rpc.call_node_deactivate_master_ip(master_params.name,
1734                                                      master_params, ems)
1735     if result.fail_msg:
1736       self.LogWarning("Error disabling the master IP address: %s",
1737                       result.fail_msg)
1738
1739     return master_params.name
1740
1741
1742 def _VerifyCertificate(filename):
1743   """Verifies a certificate for L{LUClusterVerifyConfig}.
1744
1745   @type filename: string
1746   @param filename: Path to PEM file
1747
1748   """
1749   try:
1750     cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1751                                            utils.ReadFile(filename))
1752   except Exception, err: # pylint: disable=W0703
1753     return (LUClusterVerifyConfig.ETYPE_ERROR,
1754             "Failed to load X509 certificate %s: %s" % (filename, err))
1755
1756   (errcode, msg) = \
1757     utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1758                                 constants.SSL_CERT_EXPIRATION_ERROR)
1759
1760   if msg:
1761     fnamemsg = "While verifying %s: %s" % (filename, msg)
1762   else:
1763     fnamemsg = None
1764
1765   if errcode is None:
1766     return (None, fnamemsg)
1767   elif errcode == utils.CERT_WARNING:
1768     return (LUClusterVerifyConfig.ETYPE_WARNING, fnamemsg)
1769   elif errcode == utils.CERT_ERROR:
1770     return (LUClusterVerifyConfig.ETYPE_ERROR, fnamemsg)
1771
1772   raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1773
1774
1775 def _GetAllHypervisorParameters(cluster, instances):
1776   """Compute the set of all hypervisor parameters.
1777
1778   @type cluster: L{objects.Cluster}
1779   @param cluster: the cluster object
1780   @param instances: list of L{objects.Instance}
1781   @param instances: additional instances from which to obtain parameters
1782   @rtype: list of (origin, hypervisor, parameters)
1783   @return: a list with all parameters found, indicating the hypervisor they
1784        apply to, and the origin (can be "cluster", "os X", or "instance Y")
1785
1786   """
1787   hvp_data = []
1788
1789   for hv_name in cluster.enabled_hypervisors:
1790     hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
1791
1792   for os_name, os_hvp in cluster.os_hvp.items():
1793     for hv_name, hv_params in os_hvp.items():
1794       if hv_params:
1795         full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
1796         hvp_data.append(("os %s" % os_name, hv_name, full_params))
1797
1798   # TODO: collapse identical parameter values in a single one
1799   for instance in instances:
1800     if instance.hvparams:
1801       hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
1802                        cluster.FillHV(instance)))
1803
1804   return hvp_data
1805
1806
1807 class _VerifyErrors(object):
1808   """Mix-in for cluster/group verify LUs.
1809
1810   It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects
1811   self.op and self._feedback_fn to be available.)
1812
1813   """
1814
1815   ETYPE_FIELD = "code"
1816   ETYPE_ERROR = "ERROR"
1817   ETYPE_WARNING = "WARNING"
1818
1819   def _Error(self, ecode, item, msg, *args, **kwargs):
1820     """Format an error message.
1821
1822     Based on the opcode's error_codes parameter, either format a
1823     parseable error code, or a simpler error string.
1824
1825     This must be called only from Exec and functions called from Exec.
1826
1827     """
1828     ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1829     itype, etxt, _ = ecode
1830     # first complete the msg
1831     if args:
1832       msg = msg % args
1833     # then format the whole message
1834     if self.op.error_codes: # This is a mix-in. pylint: disable=E1101
1835       msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1836     else:
1837       if item:
1838         item = " " + item
1839       else:
1840         item = ""
1841       msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1842     # and finally report it via the feedback_fn
1843     self._feedback_fn("  - %s" % msg) # Mix-in. pylint: disable=E1101
1844
1845   def _ErrorIf(self, cond, ecode, *args, **kwargs):
1846     """Log an error message if the passed condition is True.
1847
1848     """
1849     cond = (bool(cond)
1850             or self.op.debug_simulate_errors) # pylint: disable=E1101
1851
1852     # If the error code is in the list of ignored errors, demote the error to a
1853     # warning
1854     (_, etxt, _) = ecode
1855     if etxt in self.op.ignore_errors:     # pylint: disable=E1101
1856       kwargs[self.ETYPE_FIELD] = self.ETYPE_WARNING
1857
1858     if cond:
1859       self._Error(ecode, *args, **kwargs)
1860
1861     # do not mark the operation as failed for WARN cases only
1862     if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1863       self.bad = self.bad or cond
1864
1865
1866 class LUClusterVerify(NoHooksLU):
1867   """Submits all jobs necessary to verify the cluster.
1868
1869   """
1870   REQ_BGL = False
1871
1872   def ExpandNames(self):
1873     self.needed_locks = {}
1874
1875   def Exec(self, feedback_fn):
1876     jobs = []
1877
1878     if self.op.group_name:
1879       groups = [self.op.group_name]
1880       depends_fn = lambda: None
1881     else:
1882       groups = self.cfg.GetNodeGroupList()
1883
1884       # Verify global configuration
1885       jobs.append([
1886         opcodes.OpClusterVerifyConfig(ignore_errors=self.op.ignore_errors)
1887         ])
1888
1889       # Always depend on global verification
1890       depends_fn = lambda: [(-len(jobs), [])]
1891
1892     jobs.extend([opcodes.OpClusterVerifyGroup(group_name=group,
1893                                             ignore_errors=self.op.ignore_errors,
1894                                             depends=depends_fn())]
1895                 for group in groups)
1896
1897     # Fix up all parameters
1898     for op in itertools.chain(*jobs): # pylint: disable=W0142
1899       op.debug_simulate_errors = self.op.debug_simulate_errors
1900       op.verbose = self.op.verbose
1901       op.error_codes = self.op.error_codes
1902       try:
1903         op.skip_checks = self.op.skip_checks
1904       except AttributeError:
1905         assert not isinstance(op, opcodes.OpClusterVerifyGroup)
1906
1907     return ResultWithJobs(jobs)
1908
1909
1910 class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
1911   """Verifies the cluster config.
1912
1913   """
1914   REQ_BGL = False
1915
1916   def _VerifyHVP(self, hvp_data):
1917     """Verifies locally the syntax of the hypervisor parameters.
1918
1919     """
1920     for item, hv_name, hv_params in hvp_data:
1921       msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
1922              (item, hv_name))
1923       try:
1924         hv_class = hypervisor.GetHypervisor(hv_name)
1925         utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
1926         hv_class.CheckParameterSyntax(hv_params)
1927       except errors.GenericError, err:
1928         self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg % str(err))
1929
1930   def ExpandNames(self):
1931     self.needed_locks = dict.fromkeys(locking.LEVELS, locking.ALL_SET)
1932     self.share_locks = _ShareAll()
1933
1934   def CheckPrereq(self):
1935     """Check prerequisites.
1936
1937     """
1938     # Retrieve all information
1939     self.all_group_info = self.cfg.GetAllNodeGroupsInfo()
1940     self.all_node_info = self.cfg.GetAllNodesInfo()
1941     self.all_inst_info = self.cfg.GetAllInstancesInfo()
1942
1943   def Exec(self, feedback_fn):
1944     """Verify integrity of cluster, performing various test on nodes.
1945
1946     """
1947     self.bad = False
1948     self._feedback_fn = feedback_fn
1949
1950     feedback_fn("* Verifying cluster config")
1951
1952     for msg in self.cfg.VerifyConfig():
1953       self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg)
1954
1955     feedback_fn("* Verifying cluster certificate files")
1956
1957     for cert_filename in constants.ALL_CERT_FILES:
1958       (errcode, msg) = _VerifyCertificate(cert_filename)
1959       self._ErrorIf(errcode, constants.CV_ECLUSTERCERT, None, msg, code=errcode)
1960
1961     feedback_fn("* Verifying hypervisor parameters")
1962
1963     self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(),
1964                                                 self.all_inst_info.values()))
1965
1966     feedback_fn("* Verifying all nodes belong to an existing group")
1967
1968     # We do this verification here because, should this bogus circumstance
1969     # occur, it would never be caught by VerifyGroup, which only acts on
1970     # nodes/instances reachable from existing node groups.
1971
1972     dangling_nodes = set(node.name for node in self.all_node_info.values()
1973                          if node.group not in self.all_group_info)
1974
1975     dangling_instances = {}
1976     no_node_instances = []
1977
1978     for inst in self.all_inst_info.values():
1979       if inst.primary_node in dangling_nodes:
1980         dangling_instances.setdefault(inst.primary_node, []).append(inst.name)
1981       elif inst.primary_node not in self.all_node_info:
1982         no_node_instances.append(inst.name)
1983
1984     pretty_dangling = [
1985         "%s (%s)" %
1986         (node.name,
1987          utils.CommaJoin(dangling_instances.get(node.name,
1988                                                 ["no instances"])))
1989         for node in dangling_nodes]
1990
1991     self._ErrorIf(bool(dangling_nodes), constants.CV_ECLUSTERDANGLINGNODES,
1992                   None,
1993                   "the following nodes (and their instances) belong to a non"
1994                   " existing group: %s", utils.CommaJoin(pretty_dangling))
1995
1996     self._ErrorIf(bool(no_node_instances), constants.CV_ECLUSTERDANGLINGINST,
1997                   None,
1998                   "the following instances have a non-existing primary-node:"
1999                   " %s", utils.CommaJoin(no_node_instances))
2000
2001     return not self.bad
2002
2003
2004 class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
2005   """Verifies the status of a node group.
2006
2007   """
2008   HPATH = "cluster-verify"
2009   HTYPE = constants.HTYPE_CLUSTER
2010   REQ_BGL = False
2011
2012   _HOOKS_INDENT_RE = re.compile("^", re.M)
2013
2014   class NodeImage(object):
2015     """A class representing the logical and physical status of a node.
2016
2017     @type name: string
2018     @ivar name: the node name to which this object refers
2019     @ivar volumes: a structure as returned from
2020         L{ganeti.backend.GetVolumeList} (runtime)
2021     @ivar instances: a list of running instances (runtime)
2022     @ivar pinst: list of configured primary instances (config)
2023     @ivar sinst: list of configured secondary instances (config)
2024     @ivar sbp: dictionary of {primary-node: list of instances} for all
2025         instances for which this node is secondary (config)
2026     @ivar mfree: free memory, as reported by hypervisor (runtime)
2027     @ivar dfree: free disk, as reported by the node (runtime)
2028     @ivar offline: the offline status (config)
2029     @type rpc_fail: boolean
2030     @ivar rpc_fail: whether the RPC verify call was successfull (overall,
2031         not whether the individual keys were correct) (runtime)
2032     @type lvm_fail: boolean
2033     @ivar lvm_fail: whether the RPC call didn't return valid LVM data
2034     @type hyp_fail: boolean
2035     @ivar hyp_fail: whether the RPC call didn't return the instance list
2036     @type ghost: boolean
2037     @ivar ghost: whether this is a known node or not (config)
2038     @type os_fail: boolean
2039     @ivar os_fail: whether the RPC call didn't return valid OS data
2040     @type oslist: list
2041     @ivar oslist: list of OSes as diagnosed by DiagnoseOS
2042     @type vm_capable: boolean
2043     @ivar vm_capable: whether the node can host instances
2044
2045     """
2046     def __init__(self, offline=False, name=None, vm_capable=True):
2047       self.name = name
2048       self.volumes = {}
2049       self.instances = []
2050       self.pinst = []
2051       self.sinst = []
2052       self.sbp = {}
2053       self.mfree = 0
2054       self.dfree = 0
2055       self.offline = offline
2056       self.vm_capable = vm_capable
2057       self.rpc_fail = False
2058       self.lvm_fail = False
2059       self.hyp_fail = False
2060       self.ghost = False
2061       self.os_fail = False
2062       self.oslist = {}
2063
2064   def ExpandNames(self):
2065     # This raises errors.OpPrereqError on its own:
2066     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
2067
2068     # Get instances in node group; this is unsafe and needs verification later
2069     inst_names = \
2070       self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True)
2071
2072     self.needed_locks = {
2073       locking.LEVEL_INSTANCE: inst_names,
2074       locking.LEVEL_NODEGROUP: [self.group_uuid],
2075       locking.LEVEL_NODE: [],
2076       }
2077
2078     self.share_locks = _ShareAll()
2079
2080   def DeclareLocks(self, level):
2081     if level == locking.LEVEL_NODE:
2082       # Get members of node group; this is unsafe and needs verification later
2083       nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
2084
2085       all_inst_info = self.cfg.GetAllInstancesInfo()
2086
2087       # In Exec(), we warn about mirrored instances that have primary and
2088       # secondary living in separate node groups. To fully verify that
2089       # volumes for these instances are healthy, we will need to do an
2090       # extra call to their secondaries. We ensure here those nodes will
2091       # be locked.
2092       for inst in self.owned_locks(locking.LEVEL_INSTANCE):
2093         # Important: access only the instances whose lock is owned
2094         if all_inst_info[inst].disk_template in constants.DTS_INT_MIRROR:
2095           nodes.update(all_inst_info[inst].secondary_nodes)
2096
2097       self.needed_locks[locking.LEVEL_NODE] = nodes
2098
2099   def CheckPrereq(self):
2100     assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
2101     self.group_info = self.cfg.GetNodeGroup(self.group_uuid)
2102
2103     group_nodes = set(self.group_info.members)
2104     group_instances = \
2105       self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True)
2106
2107     unlocked_nodes = \
2108         group_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
2109
2110     unlocked_instances = \
2111         group_instances.difference(self.owned_locks(locking.LEVEL_INSTANCE))
2112
2113     if unlocked_nodes:
2114       raise errors.OpPrereqError("Missing lock for nodes: %s" %
2115                                  utils.CommaJoin(unlocked_nodes),
2116                                  errors.ECODE_STATE)
2117
2118     if unlocked_instances:
2119       raise errors.OpPrereqError("Missing lock for instances: %s" %
2120                                  utils.CommaJoin(unlocked_instances),
2121                                  errors.ECODE_STATE)
2122
2123     self.all_node_info = self.cfg.GetAllNodesInfo()
2124     self.all_inst_info = self.cfg.GetAllInstancesInfo()
2125
2126     self.my_node_names = utils.NiceSort(group_nodes)
2127     self.my_inst_names = utils.NiceSort(group_instances)
2128
2129     self.my_node_info = dict((name, self.all_node_info[name])
2130                              for name in self.my_node_names)
2131
2132     self.my_inst_info = dict((name, self.all_inst_info[name])
2133                              for name in self.my_inst_names)
2134
2135     # We detect here the nodes that will need the extra RPC calls for verifying
2136     # split LV volumes; they should be locked.
2137     extra_lv_nodes = set()
2138
2139     for inst in self.my_inst_info.values():
2140       if inst.disk_template in constants.DTS_INT_MIRROR:
2141         for nname in inst.all_nodes:
2142           if self.all_node_info[nname].group != self.group_uuid:
2143             extra_lv_nodes.add(nname)
2144
2145     unlocked_lv_nodes = \
2146         extra_lv_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
2147
2148     if unlocked_lv_nodes:
2149       raise errors.OpPrereqError("Missing node locks for LV check: %s" %
2150                                  utils.CommaJoin(unlocked_lv_nodes),
2151                                  errors.ECODE_STATE)
2152     self.extra_lv_nodes = list(extra_lv_nodes)
2153
2154   def _VerifyNode(self, ninfo, nresult):
2155     """Perform some basic validation on data returned from a node.
2156
2157       - check the result data structure is well formed and has all the
2158         mandatory fields
2159       - check ganeti version
2160
2161     @type ninfo: L{objects.Node}
2162     @param ninfo: the node to check
2163     @param nresult: the results from the node
2164     @rtype: boolean
2165     @return: whether overall this call was successful (and we can expect
2166          reasonable values in the respose)
2167
2168     """
2169     node = ninfo.name
2170     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2171
2172     # main result, nresult should be a non-empty dict
2173     test = not nresult or not isinstance(nresult, dict)
2174     _ErrorIf(test, constants.CV_ENODERPC, node,
2175                   "unable to verify node: no data returned")
2176     if test:
2177       return False
2178
2179     # compares ganeti version
2180     local_version = constants.PROTOCOL_VERSION
2181     remote_version = nresult.get("version", None)
2182     test = not (remote_version and
2183                 isinstance(remote_version, (list, tuple)) and
2184                 len(remote_version) == 2)
2185     _ErrorIf(test, constants.CV_ENODERPC, node,
2186              "connection to node returned invalid data")
2187     if test:
2188       return False
2189
2190     test = local_version != remote_version[0]
2191     _ErrorIf(test, constants.CV_ENODEVERSION, node,
2192              "incompatible protocol versions: master %s,"
2193              " node %s", local_version, remote_version[0])
2194     if test:
2195       return False
2196
2197     # node seems compatible, we can actually try to look into its results
2198
2199     # full package version
2200     self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
2201                   constants.CV_ENODEVERSION, node,
2202                   "software version mismatch: master %s, node %s",
2203                   constants.RELEASE_VERSION, remote_version[1],
2204                   code=self.ETYPE_WARNING)
2205
2206     hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
2207     if ninfo.vm_capable and isinstance(hyp_result, dict):
2208       for hv_name, hv_result in hyp_result.iteritems():
2209         test = hv_result is not None
2210         _ErrorIf(test, constants.CV_ENODEHV, node,
2211                  "hypervisor %s verify failure: '%s'", hv_name, hv_result)
2212
2213     hvp_result = nresult.get(constants.NV_HVPARAMS, None)
2214     if ninfo.vm_capable and isinstance(hvp_result, list):
2215       for item, hv_name, hv_result in hvp_result:
2216         _ErrorIf(True, constants.CV_ENODEHV, node,
2217                  "hypervisor %s parameter verify failure (source %s): %s",
2218                  hv_name, item, hv_result)
2219
2220     test = nresult.get(constants.NV_NODESETUP,
2221                        ["Missing NODESETUP results"])
2222     _ErrorIf(test, constants.CV_ENODESETUP, node, "node setup error: %s",
2223              "; ".join(test))
2224
2225     return True
2226
2227   def _VerifyNodeTime(self, ninfo, nresult,
2228                       nvinfo_starttime, nvinfo_endtime):
2229     """Check the node time.
2230
2231     @type ninfo: L{objects.Node}
2232     @param ninfo: the node to check
2233     @param nresult: the remote results for the node
2234     @param nvinfo_starttime: the start time of the RPC call
2235     @param nvinfo_endtime: the end time of the RPC call
2236
2237     """
2238     node = ninfo.name
2239     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2240
2241     ntime = nresult.get(constants.NV_TIME, None)
2242     try:
2243       ntime_merged = utils.MergeTime(ntime)
2244     except (ValueError, TypeError):
2245       _ErrorIf(True, constants.CV_ENODETIME, node, "Node returned invalid time")
2246       return
2247
2248     if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
2249       ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
2250     elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
2251       ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
2252     else:
2253       ntime_diff = None
2254
2255     _ErrorIf(ntime_diff is not None, constants.CV_ENODETIME, node,
2256              "Node time diverges by at least %s from master node time",
2257              ntime_diff)
2258
2259   def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
2260     """Check the node LVM results.
2261
2262     @type ninfo: L{objects.Node}
2263     @param ninfo: the node to check
2264     @param nresult: the remote results for the node
2265     @param vg_name: the configured VG name
2266
2267     """
2268     if vg_name is None:
2269       return
2270
2271     node = ninfo.name
2272     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2273
2274     # checks vg existence and size > 20G
2275     vglist = nresult.get(constants.NV_VGLIST, None)
2276     test = not vglist
2277     _ErrorIf(test, constants.CV_ENODELVM, node, "unable to check volume groups")
2278     if not test:
2279       vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
2280                                             constants.MIN_VG_SIZE)
2281       _ErrorIf(vgstatus, constants.CV_ENODELVM, node, vgstatus)
2282
2283     # check pv names
2284     pvlist = nresult.get(constants.NV_PVLIST, None)
2285     test = pvlist is None
2286     _ErrorIf(test, constants.CV_ENODELVM, node, "Can't get PV list from node")
2287     if not test:
2288       # check that ':' is not present in PV names, since it's a
2289       # special character for lvcreate (denotes the range of PEs to
2290       # use on the PV)
2291       for _, pvname, owner_vg in pvlist:
2292         test = ":" in pvname
2293         _ErrorIf(test, constants.CV_ENODELVM, node,
2294                  "Invalid character ':' in PV '%s' of VG '%s'",
2295                  pvname, owner_vg)
2296
2297   def _VerifyNodeBridges(self, ninfo, nresult, bridges):
2298     """Check the node bridges.
2299
2300     @type ninfo: L{objects.Node}
2301     @param ninfo: the node to check
2302     @param nresult: the remote results for the node
2303     @param bridges: the expected list of bridges
2304
2305     """
2306     if not bridges:
2307       return
2308
2309     node = ninfo.name
2310     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2311
2312     missing = nresult.get(constants.NV_BRIDGES, None)
2313     test = not isinstance(missing, list)
2314     _ErrorIf(test, constants.CV_ENODENET, node,
2315              "did not return valid bridge information")
2316     if not test:
2317       _ErrorIf(bool(missing), constants.CV_ENODENET, node,
2318                "missing bridges: %s" % utils.CommaJoin(sorted(missing)))
2319
2320   def _VerifyNodeUserScripts(self, ninfo, nresult):
2321     """Check the results of user scripts presence and executability on the node
2322
2323     @type ninfo: L{objects.Node}
2324     @param ninfo: the node to check
2325     @param nresult: the remote results for the node
2326
2327     """
2328     node = ninfo.name
2329
2330     test = not constants.NV_USERSCRIPTS in nresult
2331     self._ErrorIf(test, constants.CV_ENODEUSERSCRIPTS, node,
2332                   "did not return user scripts information")
2333
2334     broken_scripts = nresult.get(constants.NV_USERSCRIPTS, None)
2335     if not test:
2336       self._ErrorIf(broken_scripts, constants.CV_ENODEUSERSCRIPTS, node,
2337                     "user scripts not present or not executable: %s" %
2338                     utils.CommaJoin(sorted(broken_scripts)))
2339
2340   def _VerifyNodeNetwork(self, ninfo, nresult):
2341     """Check the node network connectivity results.
2342
2343     @type ninfo: L{objects.Node}
2344     @param ninfo: the node to check
2345     @param nresult: the remote results for the node
2346
2347     """
2348     node = ninfo.name
2349     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2350
2351     test = constants.NV_NODELIST not in nresult
2352     _ErrorIf(test, constants.CV_ENODESSH, node,
2353              "node hasn't returned node ssh connectivity data")
2354     if not test:
2355       if nresult[constants.NV_NODELIST]:
2356         for a_node, a_msg in nresult[constants.NV_NODELIST].items():
2357           _ErrorIf(True, constants.CV_ENODESSH, node,
2358                    "ssh communication with node '%s': %s", a_node, a_msg)
2359
2360     test = constants.NV_NODENETTEST not in nresult
2361     _ErrorIf(test, constants.CV_ENODENET, node,
2362              "node hasn't returned node tcp connectivity data")
2363     if not test:
2364       if nresult[constants.NV_NODENETTEST]:
2365         nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
2366         for anode in nlist:
2367           _ErrorIf(True, constants.CV_ENODENET, node,
2368                    "tcp communication with node '%s': %s",
2369                    anode, nresult[constants.NV_NODENETTEST][anode])
2370
2371     test = constants.NV_MASTERIP not in nresult
2372     _ErrorIf(test, constants.CV_ENODENET, node,
2373              "node hasn't returned node master IP reachability data")
2374     if not test:
2375       if not nresult[constants.NV_MASTERIP]:
2376         if node == self.master_node:
2377           msg = "the master node cannot reach the master IP (not configured?)"
2378         else:
2379           msg = "cannot reach the master IP"
2380         _ErrorIf(True, constants.CV_ENODENET, node, msg)
2381
2382   def _VerifyInstance(self, instance, instanceconfig, node_image,
2383                       diskstatus):
2384     """Verify an instance.
2385
2386     This function checks to see if the required block devices are
2387     available on the instance's node.
2388
2389     """
2390     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2391     node_current = instanceconfig.primary_node
2392
2393     node_vol_should = {}
2394     instanceconfig.MapLVsByNode(node_vol_should)
2395
2396     ipolicy = _CalculateGroupIPolicy(self.cfg.GetClusterInfo(), self.group_info)
2397     err = _ComputeIPolicyInstanceViolation(ipolicy, instanceconfig)
2398     _ErrorIf(err, constants.CV_EINSTANCEPOLICY, instance, err)
2399
2400     for node in node_vol_should:
2401       n_img = node_image[node]
2402       if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2403         # ignore missing volumes on offline or broken nodes
2404         continue
2405       for volume in node_vol_should[node]:
2406         test = volume not in n_img.volumes
2407         _ErrorIf(test, constants.CV_EINSTANCEMISSINGDISK, instance,
2408                  "volume %s missing on node %s", volume, node)
2409
2410     if instanceconfig.admin_state == constants.ADMINST_UP:
2411       pri_img = node_image[node_current]
2412       test = instance not in pri_img.instances and not pri_img.offline
2413       _ErrorIf(test, constants.CV_EINSTANCEDOWN, instance,
2414                "instance not running on its primary node %s",
2415                node_current)
2416
2417     diskdata = [(nname, success, status, idx)
2418                 for (nname, disks) in diskstatus.items()
2419                 for idx, (success, status) in enumerate(disks)]
2420
2421     for nname, success, bdev_status, idx in diskdata:
2422       # the 'ghost node' construction in Exec() ensures that we have a
2423       # node here
2424       snode = node_image[nname]
2425       bad_snode = snode.ghost or snode.offline
2426       _ErrorIf(instanceconfig.admin_state == constants.ADMINST_UP and
2427                not success and not bad_snode,
2428                constants.CV_EINSTANCEFAULTYDISK, instance,
2429                "couldn't retrieve status for disk/%s on %s: %s",
2430                idx, nname, bdev_status)
2431       _ErrorIf((instanceconfig.admin_state == constants.ADMINST_UP and
2432                 success and bdev_status.ldisk_status == constants.LDS_FAULTY),
2433                constants.CV_EINSTANCEFAULTYDISK, instance,
2434                "disk/%s on %s is faulty", idx, nname)
2435
2436   def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
2437     """Verify if there are any unknown volumes in the cluster.
2438
2439     The .os, .swap and backup volumes are ignored. All other volumes are
2440     reported as unknown.
2441
2442     @type reserved: L{ganeti.utils.FieldSet}
2443     @param reserved: a FieldSet of reserved volume names
2444
2445     """
2446     for node, n_img in node_image.items():
2447       if (n_img.offline or n_img.rpc_fail or n_img.lvm_fail or
2448           self.all_node_info[node].group != self.group_uuid):
2449         # skip non-healthy nodes
2450         continue
2451       for volume in n_img.volumes:
2452         test = ((node not in node_vol_should or
2453                 volume not in node_vol_should[node]) and
2454                 not reserved.Matches(volume))
2455         self._ErrorIf(test, constants.CV_ENODEORPHANLV, node,
2456                       "volume %s is unknown", volume)
2457
2458   def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
2459     """Verify N+1 Memory Resilience.
2460
2461     Check that if one single node dies we can still start all the
2462     instances it was primary for.
2463
2464     """
2465     cluster_info = self.cfg.GetClusterInfo()
2466     for node, n_img in node_image.items():
2467       # This code checks that every node which is now listed as
2468       # secondary has enough memory to host all instances it is
2469       # supposed to should a single other node in the cluster fail.
2470       # FIXME: not ready for failover to an arbitrary node
2471       # FIXME: does not support file-backed instances
2472       # WARNING: we currently take into account down instances as well
2473       # as up ones, considering that even if they're down someone
2474       # might want to start them even in the event of a node failure.
2475       if n_img.offline or self.all_node_info[node].group != self.group_uuid:
2476         # we're skipping nodes marked offline and nodes in other groups from
2477         # the N+1 warning, since most likely we don't have good memory
2478         # infromation from them; we already list instances living on such
2479         # nodes, and that's enough warning
2480         continue
2481       #TODO(dynmem): also consider ballooning out other instances
2482       for prinode, instances in n_img.sbp.items():
2483         needed_mem = 0
2484         for instance in instances:
2485           bep = cluster_info.FillBE(instance_cfg[instance])
2486           if bep[constants.BE_AUTO_BALANCE]:
2487             needed_mem += bep[constants.BE_MINMEM]
2488         test = n_img.mfree < needed_mem
2489         self._ErrorIf(test, constants.CV_ENODEN1, node,
2490                       "not enough memory to accomodate instance failovers"
2491                       " should node %s fail (%dMiB needed, %dMiB available)",
2492                       prinode, needed_mem, n_img.mfree)
2493
2494   @classmethod
2495   def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
2496                    (files_all, files_opt, files_mc, files_vm)):
2497     """Verifies file checksums collected from all nodes.
2498
2499     @param errorif: Callback for reporting errors
2500     @param nodeinfo: List of L{objects.Node} objects
2501     @param master_node: Name of master node
2502     @param all_nvinfo: RPC results
2503
2504     """
2505     # Define functions determining which nodes to consider for a file
2506     files2nodefn = [
2507       (files_all, None),
2508       (files_mc, lambda node: (node.master_candidate or
2509                                node.name == master_node)),
2510       (files_vm, lambda node: node.vm_capable),
2511       ]
2512
2513     # Build mapping from filename to list of nodes which should have the file
2514     nodefiles = {}
2515     for (files, fn) in files2nodefn:
2516       if fn is None:
2517         filenodes = nodeinfo
2518       else:
2519         filenodes = filter(fn, nodeinfo)
2520       nodefiles.update((filename,
2521                         frozenset(map(operator.attrgetter("name"), filenodes)))
2522                        for filename in files)
2523
2524     assert set(nodefiles) == (files_all | files_mc | files_vm)
2525
2526     fileinfo = dict((filename, {}) for filename in nodefiles)
2527     ignore_nodes = set()
2528
2529     for node in nodeinfo:
2530       if node.offline:
2531         ignore_nodes.add(node.name)
2532         continue
2533
2534       nresult = all_nvinfo[node.name]
2535
2536       if nresult.fail_msg or not nresult.payload:
2537         node_files = None
2538       else:
2539         node_files = nresult.payload.get(constants.NV_FILELIST, None)
2540
2541       test = not (node_files and isinstance(node_files, dict))
2542       errorif(test, constants.CV_ENODEFILECHECK, node.name,
2543               "Node did not return file checksum data")
2544       if test:
2545         ignore_nodes.add(node.name)
2546         continue
2547
2548       # Build per-checksum mapping from filename to nodes having it
2549       for (filename, checksum) in node_files.items():
2550         assert filename in nodefiles
2551         fileinfo[filename].setdefault(checksum, set()).add(node.name)
2552
2553     for (filename, checksums) in fileinfo.items():
2554       assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
2555
2556       # Nodes having the file
2557       with_file = frozenset(node_name
2558                             for nodes in fileinfo[filename].values()
2559                             for node_name in nodes) - ignore_nodes
2560
2561       expected_nodes = nodefiles[filename] - ignore_nodes
2562
2563       # Nodes missing file
2564       missing_file = expected_nodes - with_file
2565
2566       if filename in files_opt:
2567         # All or no nodes
2568         errorif(missing_file and missing_file != expected_nodes,
2569                 constants.CV_ECLUSTERFILECHECK, None,
2570                 "File %s is optional, but it must exist on all or no"
2571                 " nodes (not found on %s)",
2572                 filename, utils.CommaJoin(utils.NiceSort(missing_file)))
2573       else:
2574         errorif(missing_file, constants.CV_ECLUSTERFILECHECK, None,
2575                 "File %s is missing from node(s) %s", filename,
2576                 utils.CommaJoin(utils.NiceSort(missing_file)))
2577
2578         # Warn if a node has a file it shouldn't
2579         unexpected = with_file - expected_nodes
2580         errorif(unexpected,
2581                 constants.CV_ECLUSTERFILECHECK, None,
2582                 "File %s should not exist on node(s) %s",
2583                 filename, utils.CommaJoin(utils.NiceSort(unexpected)))
2584
2585       # See if there are multiple versions of the file
2586       test = len(checksums) > 1
2587       if test:
2588         variants = ["variant %s on %s" %
2589                     (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
2590                     for (idx, (checksum, nodes)) in
2591                       enumerate(sorted(checksums.items()))]
2592       else:
2593         variants = []
2594
2595       errorif(test, constants.CV_ECLUSTERFILECHECK, None,
2596               "File %s found with %s different checksums (%s)",
2597               filename, len(checksums), "; ".join(variants))
2598
2599   def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
2600                       drbd_map):
2601     """Verifies and the node DRBD status.
2602
2603     @type ninfo: L{objects.Node}
2604     @param ninfo: the node to check
2605     @param nresult: the remote results for the node
2606     @param instanceinfo: the dict of instances
2607     @param drbd_helper: the configured DRBD usermode helper
2608     @param drbd_map: the DRBD map as returned by
2609         L{ganeti.config.ConfigWriter.ComputeDRBDMap}
2610
2611     """
2612     node = ninfo.name
2613     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2614
2615     if drbd_helper:
2616       helper_result = nresult.get(constants.NV_DRBDHELPER, None)
2617       test = (helper_result == None)
2618       _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2619                "no drbd usermode helper returned")
2620       if helper_result:
2621         status, payload = helper_result
2622         test = not status
2623         _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2624                  "drbd usermode helper check unsuccessful: %s", payload)
2625         test = status and (payload != drbd_helper)
2626         _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2627                  "wrong drbd usermode helper: %s", payload)
2628
2629     # compute the DRBD minors
2630     node_drbd = {}
2631     for minor, instance in drbd_map[node].items():
2632       test = instance not in instanceinfo
2633       _ErrorIf(test, constants.CV_ECLUSTERCFG, None,
2634                "ghost instance '%s' in temporary DRBD map", instance)
2635         # ghost instance should not be running, but otherwise we
2636         # don't give double warnings (both ghost instance and
2637         # unallocated minor in use)
2638       if test:
2639         node_drbd[minor] = (instance, False)
2640       else:
2641         instance = instanceinfo[instance]
2642         node_drbd[minor] = (instance.name,
2643                             instance.admin_state == constants.ADMINST_UP)
2644
2645     # and now check them
2646     used_minors = nresult.get(constants.NV_DRBDLIST, [])
2647     test = not isinstance(used_minors, (tuple, list))
2648     _ErrorIf(test, constants.CV_ENODEDRBD, node,
2649              "cannot parse drbd status file: %s", str(used_minors))
2650     if test:
2651       # we cannot check drbd status
2652       return
2653
2654     for minor, (iname, must_exist) in node_drbd.items():
2655       test = minor not in used_minors and must_exist
2656       _ErrorIf(test, constants.CV_ENODEDRBD, node,
2657                "drbd minor %d of instance %s is not active", minor, iname)
2658     for minor in used_minors:
2659       test = minor not in node_drbd
2660       _ErrorIf(test, constants.CV_ENODEDRBD, node,
2661                "unallocated drbd minor %d is in use", minor)
2662
2663   def _UpdateNodeOS(self, ninfo, nresult, nimg):
2664     """Builds the node OS structures.
2665
2666     @type ninfo: L{objects.Node}
2667     @param ninfo: the node to check
2668     @param nresult: the remote results for the node
2669     @param nimg: the node image object
2670
2671     """
2672     node = ninfo.name
2673     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2674
2675     remote_os = nresult.get(constants.NV_OSLIST, None)
2676     test = (not isinstance(remote_os, list) or
2677             not compat.all(isinstance(v, list) and len(v) == 7
2678                            for v in remote_os))
2679
2680     _ErrorIf(test, constants.CV_ENODEOS, node,
2681              "node hasn't returned valid OS data")
2682
2683     nimg.os_fail = test
2684
2685     if test:
2686       return
2687
2688     os_dict = {}
2689
2690     for (name, os_path, status, diagnose,
2691          variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
2692
2693       if name not in os_dict:
2694         os_dict[name] = []
2695
2696       # parameters is a list of lists instead of list of tuples due to
2697       # JSON lacking a real tuple type, fix it:
2698       parameters = [tuple(v) for v in parameters]
2699       os_dict[name].append((os_path, status, diagnose,
2700                             set(variants), set(parameters), set(api_ver)))
2701
2702     nimg.oslist = os_dict
2703
2704   def _VerifyNodeOS(self, ninfo, nimg, base):
2705     """Verifies the node OS list.
2706
2707     @type ninfo: L{objects.Node}
2708     @param ninfo: the node to check
2709     @param nimg: the node image object
2710     @param base: the 'template' node we match against (e.g. from the master)
2711
2712     """
2713     node = ninfo.name
2714     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2715
2716     assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
2717
2718     beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
2719     for os_name, os_data in nimg.oslist.items():
2720       assert os_data, "Empty OS status for OS %s?!" % os_name
2721       f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
2722       _ErrorIf(not f_status, constants.CV_ENODEOS, node,
2723                "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
2724       _ErrorIf(len(os_data) > 1, constants.CV_ENODEOS, node,
2725                "OS '%s' has multiple entries (first one shadows the rest): %s",
2726                os_name, utils.CommaJoin([v[0] for v in os_data]))
2727       # comparisons with the 'base' image
2728       test = os_name not in base.oslist
2729       _ErrorIf(test, constants.CV_ENODEOS, node,
2730                "Extra OS %s not present on reference node (%s)",
2731                os_name, base.name)
2732       if test:
2733         continue
2734       assert base.oslist[os_name], "Base node has empty OS status?"
2735       _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
2736       if not b_status:
2737         # base OS is invalid, skipping
2738         continue
2739       for kind, a, b in [("API version", f_api, b_api),
2740                          ("variants list", f_var, b_var),
2741                          ("parameters", beautify_params(f_param),
2742                           beautify_params(b_param))]:
2743         _ErrorIf(a != b, constants.CV_ENODEOS, node,
2744                  "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
2745                  kind, os_name, base.name,
2746                  utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
2747
2748     # check any missing OSes
2749     missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
2750     _ErrorIf(missing, constants.CV_ENODEOS, node,
2751              "OSes present on reference node %s but missing on this node: %s",
2752              base.name, utils.CommaJoin(missing))
2753
2754   def _VerifyOob(self, ninfo, nresult):
2755     """Verifies out of band functionality of a node.
2756
2757     @type ninfo: L{objects.Node}
2758     @param ninfo: the node to check
2759     @param nresult: the remote results for the node
2760
2761     """
2762     node = ninfo.name
2763     # We just have to verify the paths on master and/or master candidates
2764     # as the oob helper is invoked on the master
2765     if ((ninfo.master_candidate or ninfo.master_capable) and
2766         constants.NV_OOB_PATHS in nresult):
2767       for path_result in nresult[constants.NV_OOB_PATHS]:
2768         self._ErrorIf(path_result, constants.CV_ENODEOOBPATH, node, path_result)
2769
2770   def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
2771     """Verifies and updates the node volume data.
2772
2773     This function will update a L{NodeImage}'s internal structures
2774     with data from the remote call.
2775
2776     @type ninfo: L{objects.Node}
2777     @param ninfo: the node to check
2778     @param nresult: the remote results for the node
2779     @param nimg: the node image object
2780     @param vg_name: the configured VG name
2781
2782     """
2783     node = ninfo.name
2784     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2785
2786     nimg.lvm_fail = True
2787     lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
2788     if vg_name is None:
2789       pass
2790     elif isinstance(lvdata, basestring):
2791       _ErrorIf(True, constants.CV_ENODELVM, node, "LVM problem on node: %s",
2792                utils.SafeEncode(lvdata))
2793     elif not isinstance(lvdata, dict):
2794       _ErrorIf(True, constants.CV_ENODELVM, node,
2795                "rpc call to node failed (lvlist)")
2796     else:
2797       nimg.volumes = lvdata
2798       nimg.lvm_fail = False
2799
2800   def _UpdateNodeInstances(self, ninfo, nresult, nimg):
2801     """Verifies and updates the node instance list.
2802
2803     If the listing was successful, then updates this node's instance
2804     list. Otherwise, it marks the RPC call as failed for the instance
2805     list key.
2806
2807     @type ninfo: L{objects.Node}
2808     @param ninfo: the node to check
2809     @param nresult: the remote results for the node
2810     @param nimg: the node image object
2811
2812     """
2813     idata = nresult.get(constants.NV_INSTANCELIST, None)
2814     test = not isinstance(idata, list)
2815     self._ErrorIf(test, constants.CV_ENODEHV, ninfo.name,
2816                   "rpc call to node failed (instancelist): %s",
2817                   utils.SafeEncode(str(idata)))
2818     if test:
2819       nimg.hyp_fail = True
2820     else:
2821       nimg.instances = idata
2822
2823   def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
2824     """Verifies and computes a node information map
2825
2826     @type ninfo: L{objects.Node}
2827     @param ninfo: the node to check
2828     @param nresult: the remote results for the node
2829     @param nimg: the node image object
2830     @param vg_name: the configured VG name
2831
2832     """
2833     node = ninfo.name
2834     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2835
2836     # try to read free memory (from the hypervisor)
2837     hv_info = nresult.get(constants.NV_HVINFO, None)
2838     test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
2839     _ErrorIf(test, constants.CV_ENODEHV, node,
2840              "rpc call to node failed (hvinfo)")
2841     if not test:
2842       try:
2843         nimg.mfree = int(hv_info["memory_free"])
2844       except (ValueError, TypeError):
2845         _ErrorIf(True, constants.CV_ENODERPC, node,
2846                  "node returned invalid nodeinfo, check hypervisor")
2847
2848     # FIXME: devise a free space model for file based instances as well
2849     if vg_name is not None:
2850       test = (constants.NV_VGLIST not in nresult or
2851               vg_name not in nresult[constants.NV_VGLIST])
2852       _ErrorIf(test, constants.CV_ENODELVM, node,
2853                "node didn't return data for the volume group '%s'"
2854                " - it is either missing or broken", vg_name)
2855       if not test:
2856         try:
2857           nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
2858         except (ValueError, TypeError):
2859           _ErrorIf(True, constants.CV_ENODERPC, node,
2860                    "node returned invalid LVM info, check LVM status")
2861
2862   def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
2863     """Gets per-disk status information for all instances.
2864
2865     @type nodelist: list of strings
2866     @param nodelist: Node names
2867     @type node_image: dict of (name, L{objects.Node})
2868     @param node_image: Node objects
2869     @type instanceinfo: dict of (name, L{objects.Instance})
2870     @param instanceinfo: Instance objects
2871     @rtype: {instance: {node: [(succes, payload)]}}
2872     @return: a dictionary of per-instance dictionaries with nodes as
2873         keys and disk information as values; the disk information is a
2874         list of tuples (success, payload)
2875
2876     """
2877     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2878
2879     node_disks = {}
2880     node_disks_devonly = {}
2881     diskless_instances = set()
2882     diskless = constants.DT_DISKLESS
2883
2884     for nname in nodelist:
2885       node_instances = list(itertools.chain(node_image[nname].pinst,
2886                                             node_image[nname].sinst))
2887       diskless_instances.update(inst for inst in node_instances
2888                                 if instanceinfo[inst].disk_template == diskless)
2889       disks = [(inst, disk)
2890                for inst in node_instances
2891                for disk in instanceinfo[inst].disks]
2892
2893       if not disks:
2894         # No need to collect data
2895         continue
2896
2897       node_disks[nname] = disks
2898
2899       # Creating copies as SetDiskID below will modify the objects and that can
2900       # lead to incorrect data returned from nodes
2901       devonly = [dev.Copy() for (_, dev) in disks]
2902
2903       for dev in devonly:
2904         self.cfg.SetDiskID(dev, nname)
2905
2906       node_disks_devonly[nname] = devonly
2907
2908     assert len(node_disks) == len(node_disks_devonly)
2909
2910     # Collect data from all nodes with disks
2911     result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
2912                                                           node_disks_devonly)
2913
2914     assert len(result) == len(node_disks)
2915
2916     instdisk = {}
2917
2918     for (nname, nres) in result.items():
2919       disks = node_disks[nname]
2920
2921       if nres.offline:
2922         # No data from this node
2923         data = len(disks) * [(False, "node offline")]
2924       else:
2925         msg = nres.fail_msg
2926         _ErrorIf(msg, constants.CV_ENODERPC, nname,
2927                  "while getting disk information: %s", msg)
2928         if msg:
2929           # No data from this node
2930           data = len(disks) * [(False, msg)]
2931         else:
2932           data = []
2933           for idx, i in enumerate(nres.payload):
2934             if isinstance(i, (tuple, list)) and len(i) == 2:
2935               data.append(i)
2936             else:
2937               logging.warning("Invalid result from node %s, entry %d: %s",
2938                               nname, idx, i)
2939               data.append((False, "Invalid result from the remote node"))
2940
2941       for ((inst, _), status) in zip(disks, data):
2942         instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
2943
2944     # Add empty entries for diskless instances.
2945     for inst in diskless_instances:
2946       assert inst not in instdisk
2947       instdisk[inst] = {}
2948
2949     assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
2950                       len(nnames) <= len(instanceinfo[inst].all_nodes) and
2951                       compat.all(isinstance(s, (tuple, list)) and
2952                                  len(s) == 2 for s in statuses)
2953                       for inst, nnames in instdisk.items()
2954                       for nname, statuses in nnames.items())
2955     assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
2956
2957     return instdisk
2958
2959   @staticmethod
2960   def _SshNodeSelector(group_uuid, all_nodes):
2961     """Create endless iterators for all potential SSH check hosts.
2962
2963     """
2964     nodes = [node for node in all_nodes
2965              if (node.group != group_uuid and
2966                  not node.offline)]
2967     keyfunc = operator.attrgetter("group")
2968
2969     return map(itertools.cycle,
2970                [sorted(map(operator.attrgetter("name"), names))
2971                 for _, names in itertools.groupby(sorted(nodes, key=keyfunc),
2972                                                   keyfunc)])
2973
2974   @classmethod
2975   def _SelectSshCheckNodes(cls, group_nodes, group_uuid, all_nodes):
2976     """Choose which nodes should talk to which other nodes.
2977
2978     We will make nodes contact all nodes in their group, and one node from
2979     every other group.
2980
2981     @warning: This algorithm has a known issue if one node group is much
2982       smaller than others (e.g. just one node). In such a case all other
2983       nodes will talk to the single node.
2984
2985     """
2986     online_nodes = sorted(node.name for node in group_nodes if not node.offline)
2987     sel = cls._SshNodeSelector(group_uuid, all_nodes)
2988
2989     return (online_nodes,
2990             dict((name, sorted([i.next() for i in sel]))
2991                  for name in online_nodes))
2992
2993   def BuildHooksEnv(self):
2994     """Build hooks env.
2995
2996     Cluster-Verify hooks just ran in the post phase and their failure makes
2997     the output be logged in the verify output and the verification to fail.
2998
2999     """
3000     env = {
3001       "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
3002       }
3003
3004     env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
3005                for node in self.my_node_info.values())
3006
3007     return env
3008
3009   def BuildHooksNodes(self):
3010     """Build hooks nodes.
3011
3012     """
3013     return ([], self.my_node_names)
3014
3015   def Exec(self, feedback_fn):
3016     """Verify integrity of the node group, performing various test on nodes.
3017
3018     """
3019     # This method has too many local variables. pylint: disable=R0914
3020     feedback_fn("* Verifying group '%s'" % self.group_info.name)
3021
3022     if not self.my_node_names:
3023       # empty node group
3024       feedback_fn("* Empty node group, skipping verification")
3025       return True
3026
3027     self.bad = False
3028     _ErrorIf = self._ErrorIf # pylint: disable=C0103
3029     verbose = self.op.verbose
3030     self._feedback_fn = feedback_fn
3031
3032     vg_name = self.cfg.GetVGName()
3033     drbd_helper = self.cfg.GetDRBDHelper()
3034     cluster = self.cfg.GetClusterInfo()
3035     groupinfo = self.cfg.GetAllNodeGroupsInfo()
3036     hypervisors = cluster.enabled_hypervisors
3037     node_data_list = [self.my_node_info[name] for name in self.my_node_names]
3038
3039     i_non_redundant = [] # Non redundant instances
3040     i_non_a_balanced = [] # Non auto-balanced instances
3041     i_offline = 0 # Count of offline instances
3042     n_offline = 0 # Count of offline nodes
3043     n_drained = 0 # Count of nodes being drained
3044     node_vol_should = {}
3045
3046     # FIXME: verify OS list
3047
3048     # File verification
3049     filemap = _ComputeAncillaryFiles(cluster, False)
3050
3051     # do local checksums
3052     master_node = self.master_node = self.cfg.GetMasterNode()
3053     master_ip = self.cfg.GetMasterIP()
3054
3055     feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names))
3056
3057     user_scripts = []
3058     if self.cfg.GetUseExternalMipScript():
3059       user_scripts.append(constants.EXTERNAL_MASTER_SETUP_SCRIPT)
3060
3061     node_verify_param = {
3062       constants.NV_FILELIST:
3063         utils.UniqueSequence(filename
3064                              for files in filemap
3065                              for filename in files),
3066       constants.NV_NODELIST:
3067         self._SelectSshCheckNodes(node_data_list, self.group_uuid,
3068                                   self.all_node_info.values()),
3069       constants.NV_HYPERVISOR: hypervisors,
3070       constants.NV_HVPARAMS:
3071         _GetAllHypervisorParameters(cluster, self.all_inst_info.values()),
3072       constants.NV_NODENETTEST: [(node.name, node.primary_ip, node.secondary_ip)
3073                                  for node in node_data_list
3074                                  if not node.offline],
3075       constants.NV_INSTANCELIST: hypervisors,
3076       constants.NV_VERSION: None,
3077       constants.NV_HVINFO: self.cfg.GetHypervisorType(),
3078       constants.NV_NODESETUP: None,
3079       constants.NV_TIME: None,
3080       constants.NV_MASTERIP: (master_node, master_ip),
3081       constants.NV_OSLIST: None,
3082       constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
3083       constants.NV_USERSCRIPTS: user_scripts,
3084       }
3085
3086     if vg_name is not None:
3087       node_verify_param[constants.NV_VGLIST] = None
3088       node_verify_param[constants.NV_LVLIST] = vg_name
3089       node_verify_param[constants.NV_PVLIST] = [vg_name]
3090       node_verify_param[constants.NV_DRBDLIST] = None
3091
3092     if drbd_helper:
3093       node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
3094
3095     # bridge checks
3096     # FIXME: this needs to be changed per node-group, not cluster-wide
3097     bridges = set()
3098     default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
3099     if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3100       bridges.add(default_nicpp[constants.NIC_LINK])
3101     for instance in self.my_inst_info.values():
3102       for nic in instance.nics:
3103         full_nic = cluster.SimpleFillNIC(nic.nicparams)
3104         if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3105           bridges.add(full_nic[constants.NIC_LINK])
3106
3107     if bridges:
3108       node_verify_param[constants.NV_BRIDGES] = list(bridges)
3109
3110     # Build our expected cluster state
3111     node_image = dict((node.name, self.NodeImage(offline=node.offline,
3112                                                  name=node.name,
3113                                                  vm_capable=node.vm_capable))
3114                       for node in node_data_list)
3115
3116     # Gather OOB paths
3117     oob_paths = []
3118     for node in self.all_node_info.values():
3119       path = _SupportsOob(self.cfg, node)
3120       if path and path not in oob_paths:
3121         oob_paths.append(path)
3122
3123     if oob_paths:
3124       node_verify_param[constants.NV_OOB_PATHS] = oob_paths
3125
3126     for instance in self.my_inst_names:
3127       inst_config = self.my_inst_info[instance]
3128
3129       for nname in inst_config.all_nodes:
3130         if nname not in node_image:
3131           gnode = self.NodeImage(name=nname)
3132           gnode.ghost = (nname not in self.all_node_info)
3133           node_image[nname] = gnode
3134
3135       inst_config.MapLVsByNode(node_vol_should)
3136
3137       pnode = inst_config.primary_node
3138       node_image[pnode].pinst.append(instance)
3139
3140       for snode in inst_config.secondary_nodes:
3141         nimg = node_image[snode]
3142         nimg.sinst.append(instance)
3143         if pnode not in nimg.sbp:
3144           nimg.sbp[pnode] = []
3145         nimg.sbp[pnode].append(instance)
3146
3147     # At this point, we have the in-memory data structures complete,
3148     # except for the runtime information, which we'll gather next
3149
3150     # Due to the way our RPC system works, exact response times cannot be
3151     # guaranteed (e.g. a broken node could run into a timeout). By keeping the
3152     # time before and after executing the request, we can at least have a time
3153     # window.
3154     nvinfo_starttime = time.time()
3155     all_nvinfo = self.rpc.call_node_verify(self.my_node_names,
3156                                            node_verify_param,
3157                                            self.cfg.GetClusterName())
3158     nvinfo_endtime = time.time()
3159
3160     if self.extra_lv_nodes and vg_name is not None:
3161       extra_lv_nvinfo = \
3162           self.rpc.call_node_verify(self.extra_lv_nodes,
3163                                     {constants.NV_LVLIST: vg_name},
3164                                     self.cfg.GetClusterName())
3165     else:
3166       extra_lv_nvinfo = {}
3167
3168     all_drbd_map = self.cfg.ComputeDRBDMap()
3169
3170     feedback_fn("* Gathering disk information (%s nodes)" %
3171                 len(self.my_node_names))
3172     instdisk = self._CollectDiskInfo(self.my_node_names, node_image,
3173                                      self.my_inst_info)
3174
3175     feedback_fn("* Verifying configuration file consistency")
3176
3177     # If not all nodes are being checked, we need to make sure the master node
3178     # and a non-checked vm_capable node are in the list.
3179     absent_nodes = set(self.all_node_info).difference(self.my_node_info)
3180     if absent_nodes:
3181       vf_nvinfo = all_nvinfo.copy()
3182       vf_node_info = list(self.my_node_info.values())
3183       additional_nodes = []
3184       if master_node not in self.my_node_info:
3185         additional_nodes.append(master_node)
3186         vf_node_info.append(self.all_node_info[master_node])
3187       # Add the first vm_capable node we find which is not included
3188       for node in absent_nodes:
3189         nodeinfo = self.all_node_info[node]
3190         if nodeinfo.vm_capable and not nodeinfo.offline:
3191           additional_nodes.append(node)
3192           vf_node_info.append(self.all_node_info[node])
3193           break
3194       key = constants.NV_FILELIST
3195       vf_nvinfo.update(self.rpc.call_node_verify(additional_nodes,
3196                                                  {key: node_verify_param[key]},
3197                                                  self.cfg.GetClusterName()))
3198     else:
3199       vf_nvinfo = all_nvinfo
3200       vf_node_info = self.my_node_info.values()
3201
3202     self._VerifyFiles(_ErrorIf, vf_node_info, master_node, vf_nvinfo, filemap)
3203
3204     feedback_fn("* Verifying node status")
3205
3206     refos_img = None
3207
3208     for node_i in node_data_list:
3209       node = node_i.name
3210       nimg = node_image[node]
3211
3212       if node_i.offline:
3213         if verbose:
3214           feedback_fn("* Skipping offline node %s" % (node,))
3215         n_offline += 1
3216         continue
3217
3218       if node == master_node:
3219         ntype = "master"
3220       elif node_i.master_candidate:
3221         ntype = "master candidate"
3222       elif node_i.drained:
3223         ntype = "drained"
3224         n_drained += 1
3225       else:
3226         ntype = "regular"
3227       if verbose:
3228         feedback_fn("* Verifying node %s (%s)" % (node, ntype))
3229
3230       msg = all_nvinfo[node].fail_msg
3231       _ErrorIf(msg, constants.CV_ENODERPC, node, "while contacting node: %s",
3232                msg)
3233       if msg:
3234         nimg.rpc_fail = True
3235         continue
3236
3237       nresult = all_nvinfo[node].payload
3238
3239       nimg.call_ok = self._VerifyNode(node_i, nresult)
3240       self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
3241       self._VerifyNodeNetwork(node_i, nresult)
3242       self._VerifyNodeUserScripts(node_i, nresult)
3243       self._VerifyOob(node_i, nresult)
3244
3245       if nimg.vm_capable:
3246         self._VerifyNodeLVM(node_i, nresult, vg_name)
3247         self._VerifyNodeDrbd(node_i, nresult, self.all_inst_info, drbd_helper,
3248                              all_drbd_map)
3249
3250         self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
3251         self._UpdateNodeInstances(node_i, nresult, nimg)
3252         self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
3253         self._UpdateNodeOS(node_i, nresult, nimg)
3254
3255         if not nimg.os_fail:
3256           if refos_img is None:
3257             refos_img = nimg
3258           self._VerifyNodeOS(node_i, nimg, refos_img)
3259         self._VerifyNodeBridges(node_i, nresult, bridges)
3260
3261         # Check whether all running instancies are primary for the node. (This
3262         # can no longer be done from _VerifyInstance below, since some of the
3263         # wrong instances could be from other node groups.)
3264         non_primary_inst = set(nimg.instances).difference(nimg.pinst)
3265
3266         for inst in non_primary_inst:
3267           # FIXME: investigate best way to handle offline insts
3268           if inst.admin_state == constants.ADMINST_OFFLINE:
3269             if verbose:
3270               feedback_fn("* Skipping offline instance %s" % inst.name)
3271             i_offline += 1
3272             continue
3273           test = inst in self.all_inst_info
3274           _ErrorIf(test, constants.CV_EINSTANCEWRONGNODE, inst,
3275                    "instance should not run on node %s", node_i.name)
3276           _ErrorIf(not test, constants.CV_ENODEORPHANINSTANCE, node_i.name,
3277                    "node is running unknown instance %s", inst)
3278
3279     for node, result in extra_lv_nvinfo.items():
3280       self._UpdateNodeVolumes(self.all_node_info[node], result.payload,
3281                               node_image[node], vg_name)
3282
3283     feedback_fn("* Verifying instance status")
3284     for instance in self.my_inst_names:
3285       if verbose:
3286         feedback_fn("* Verifying instance %s" % instance)
3287       inst_config = self.my_inst_info[instance]
3288       self._VerifyInstance(instance, inst_config, node_image,
3289                            instdisk[instance])
3290       inst_nodes_offline = []
3291
3292       pnode = inst_config.primary_node
3293       pnode_img = node_image[pnode]
3294       _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
3295                constants.CV_ENODERPC, pnode, "instance %s, connection to"
3296                " primary node failed", instance)
3297
3298       _ErrorIf(inst_config.admin_state == constants.ADMINST_UP and
3299                pnode_img.offline,
3300                constants.CV_EINSTANCEBADNODE, instance,
3301                "instance is marked as running and lives on offline node %s",
3302                inst_config.primary_node)
3303
3304       # If the instance is non-redundant we cannot survive losing its primary
3305       # node, so we are not N+1 compliant. On the other hand we have no disk
3306       # templates with more than one secondary so that situation is not well
3307       # supported either.
3308       # FIXME: does not support file-backed instances
3309       if not inst_config.secondary_nodes:
3310         i_non_redundant.append(instance)
3311
3312       _ErrorIf(len(inst_config.secondary_nodes) > 1,
3313                constants.CV_EINSTANCELAYOUT,
3314                instance, "instance has multiple secondary nodes: %s",
3315                utils.CommaJoin(inst_config.secondary_nodes),
3316                code=self.ETYPE_WARNING)
3317
3318       if inst_config.disk_template in constants.DTS_INT_MIRROR:
3319         pnode = inst_config.primary_node
3320         instance_nodes = utils.NiceSort(inst_config.all_nodes)
3321         instance_groups = {}
3322
3323         for node in instance_nodes:
3324           instance_groups.setdefault(self.all_node_info[node].group,
3325                                      []).append(node)
3326
3327         pretty_list = [
3328           "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
3329           # Sort so that we always list the primary node first.
3330           for group, nodes in sorted(instance_groups.items(),
3331                                      key=lambda (_, nodes): pnode in nodes,
3332                                      reverse=True)]
3333
3334         self._ErrorIf(len(instance_groups) > 1,
3335                       constants.CV_EINSTANCESPLITGROUPS,
3336                       instance, "instance has primary and secondary nodes in"
3337                       " different groups: %s", utils.CommaJoin(pretty_list),
3338                       code=self.ETYPE_WARNING)
3339
3340       if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
3341         i_non_a_balanced.append(instance)
3342
3343       for snode in inst_config.secondary_nodes:
3344         s_img = node_image[snode]
3345         _ErrorIf(s_img.rpc_fail and not s_img.offline, constants.CV_ENODERPC,
3346                  snode, "instance %s, connection to secondary node failed",
3347                  instance)
3348
3349         if s_img.offline:
3350           inst_nodes_offline.append(snode)
3351
3352       # warn that the instance lives on offline nodes
3353       _ErrorIf(inst_nodes_offline, constants.CV_EINSTANCEBADNODE, instance,
3354                "instance has offline secondary node(s) %s",
3355                utils.CommaJoin(inst_nodes_offline))
3356       # ... or ghost/non-vm_capable nodes
3357       for node in inst_config.all_nodes:
3358         _ErrorIf(node_image[node].ghost, constants.CV_EINSTANCEBADNODE,
3359                  instance, "instance lives on ghost node %s", node)
3360         _ErrorIf(not node_image[node].vm_capable, constants.CV_EINSTANCEBADNODE,
3361                  instance, "instance lives on non-vm_capable node %s", node)
3362
3363     feedback_fn("* Verifying orphan volumes")
3364     reserved = utils.FieldSet(*cluster.reserved_lvs)
3365
3366     # We will get spurious "unknown volume" warnings if any node of this group
3367     # is secondary for an instance whose primary is in another group. To avoid
3368     # them, we find these instances and add their volumes to node_vol_should.
3369     for inst in self.all_inst_info.values():
3370       for secondary in inst.secondary_nodes:
3371         if (secondary in self.my_node_info
3372             and inst.name not in self.my_inst_info):
3373           inst.MapLVsByNode(node_vol_should)
3374           break
3375
3376     self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
3377
3378     if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
3379       feedback_fn("* Verifying N+1 Memory redundancy")
3380       self._VerifyNPlusOneMemory(node_image, self.my_inst_info)
3381
3382     feedback_fn("* Other Notes")
3383     if i_non_redundant:
3384       feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
3385                   % len(i_non_redundant))
3386
3387     if i_non_a_balanced:
3388       feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
3389                   % len(i_non_a_balanced))
3390
3391     if i_offline:
3392       feedback_fn("  - NOTICE: %d offline instance(s) found." % i_offline)
3393
3394     if n_offline:
3395       feedback_fn("  - NOTICE: %d offline node(s) found." % n_offline)
3396
3397     if n_drained:
3398       feedback_fn("  - NOTICE: %d drained node(s) found." % n_drained)
3399
3400     return not self.bad
3401
3402   def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
3403     """Analyze the post-hooks' result
3404
3405     This method analyses the hook result, handles it, and sends some
3406     nicely-formatted feedback back to the user.
3407
3408     @param phase: one of L{constants.HOOKS_PHASE_POST} or
3409         L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
3410     @param hooks_results: the results of the multi-node hooks rpc call
3411     @param feedback_fn: function used send feedback back to the caller
3412     @param lu_result: previous Exec result
3413     @return: the new Exec result, based on the previous result
3414         and hook results
3415
3416     """
3417     # We only really run POST phase hooks, only for non-empty groups,
3418     # and are only interested in their results
3419     if not self.my_node_names:
3420       # empty node group
3421       pass
3422     elif phase == constants.HOOKS_PHASE_POST:
3423       # Used to change hooks' output to proper indentation
3424       feedback_fn("* Hooks Results")
3425       assert hooks_results, "invalid result from hooks"
3426
3427       for node_name in hooks_results:
3428         res = hooks_results[node_name]
3429         msg = res.fail_msg
3430         test = msg and not res.offline
3431         self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3432                       "Communication failure in hooks execution: %s", msg)
3433         if res.offline or msg:
3434           # No need to investigate payload if node is offline or gave
3435           # an error.
3436           continue
3437         for script, hkr, output in res.payload:
3438           test = hkr == constants.HKR_FAIL
3439           self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3440                         "Script %s failed, output:", script)
3441           if test:
3442             output = self._HOOKS_INDENT_RE.sub("      ", output)
3443             feedback_fn("%s" % output)
3444             lu_result = False
3445
3446     return lu_result
3447
3448
3449 class LUClusterVerifyDisks(NoHooksLU):
3450   """Verifies the cluster disks status.
3451
3452   """
3453   REQ_BGL = False
3454
3455   def ExpandNames(self):
3456     self.share_locks = _ShareAll()
3457     self.needed_locks = {
3458       locking.LEVEL_NODEGROUP: locking.ALL_SET,
3459       }
3460
3461   def Exec(self, feedback_fn):
3462     group_names = self.owned_locks(locking.LEVEL_NODEGROUP)
3463
3464     # Submit one instance of L{opcodes.OpGroupVerifyDisks} per node group
3465     return ResultWithJobs([[opcodes.OpGroupVerifyDisks(group_name=group)]
3466                            for group in group_names])
3467
3468
3469 class LUGroupVerifyDisks(NoHooksLU):
3470   """Verifies the status of all disks in a node group.
3471
3472   """
3473   REQ_BGL = False
3474
3475   def ExpandNames(self):
3476     # Raises errors.OpPrereqError on its own if group can't be found
3477     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
3478
3479     self.share_locks = _ShareAll()
3480     self.needed_locks = {
3481       locking.LEVEL_INSTANCE: [],
3482       locking.LEVEL_NODEGROUP: [],
3483       locking.LEVEL_NODE: [],
3484       }
3485
3486   def DeclareLocks(self, level):
3487     if level == locking.LEVEL_INSTANCE:
3488       assert not self.needed_locks[locking.LEVEL_INSTANCE]
3489
3490       # Lock instances optimistically, needs verification once node and group
3491       # locks have been acquired
3492       self.needed_locks[locking.LEVEL_INSTANCE] = \
3493         self.cfg.GetNodeGroupInstances(self.group_uuid)
3494
3495     elif level == locking.LEVEL_NODEGROUP:
3496       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
3497
3498       self.needed_locks[locking.LEVEL_NODEGROUP] = \
3499         set([self.group_uuid] +
3500             # Lock all groups used by instances optimistically; this requires
3501             # going via the node before it's locked, requiring verification
3502             # later on
3503             [group_uuid
3504              for instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
3505              for group_uuid in self.cfg.GetInstanceNodeGroups(instance_name)])
3506
3507     elif level == locking.LEVEL_NODE:
3508       # This will only lock the nodes in the group to be verified which contain
3509       # actual instances
3510       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
3511       self._LockInstancesNodes()
3512
3513       # Lock all nodes in group to be verified
3514       assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
3515       member_nodes = self.cfg.GetNodeGroup(self.group_uuid).members
3516       self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
3517
3518   def CheckPrereq(self):
3519     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
3520     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
3521     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
3522
3523     assert self.group_uuid in owned_groups
3524
3525     # Check if locked instances are still correct
3526     _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
3527
3528     # Get instance information
3529     self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
3530
3531     # Check if node groups for locked instances are still correct
3532     _CheckInstancesNodeGroups(self.cfg, self.instances,
3533                               owned_groups, owned_nodes, self.group_uuid)
3534
3535   def Exec(self, feedback_fn):
3536     """Verify integrity of cluster disks.
3537
3538     @rtype: tuple of three items
3539     @return: a tuple of (dict of node-to-node_error, list of instances
3540         which need activate-disks, dict of instance: (node, volume) for
3541         missing volumes
3542
3543     """
3544     res_nodes = {}
3545     res_instances = set()
3546     res_missing = {}
3547
3548     nv_dict = _MapInstanceDisksToNodes([inst
3549             for inst in self.instances.values()
3550             if inst.admin_state == constants.ADMINST_UP])
3551
3552     if nv_dict:
3553       nodes = utils.NiceSort(set(self.owned_locks(locking.LEVEL_NODE)) &
3554                              set(self.cfg.GetVmCapableNodeList()))
3555
3556       node_lvs = self.rpc.call_lv_list(nodes, [])
3557
3558       for (node, node_res) in node_lvs.items():
3559         if node_res.offline:
3560           continue
3561
3562         msg = node_res.fail_msg
3563         if msg:
3564           logging.warning("Error enumerating LVs on node %s: %s", node, msg)
3565           res_nodes[node] = msg
3566           continue
3567
3568         for lv_name, (_, _, lv_online) in node_res.payload.items():
3569           inst = nv_dict.pop((node, lv_name), None)
3570           if not (lv_online or inst is None):
3571             res_instances.add(inst)
3572
3573       # any leftover items in nv_dict are missing LVs, let's arrange the data
3574       # better
3575       for key, inst in nv_dict.iteritems():
3576         res_missing.setdefault(inst, []).append(list(key))
3577
3578     return (res_nodes, list(res_instances), res_missing)
3579
3580
3581 class LUClusterRepairDiskSizes(NoHooksLU):
3582   """Verifies the cluster disks sizes.
3583
3584   """
3585   REQ_BGL = False
3586
3587   def ExpandNames(self):
3588     if self.op.instances:
3589       self.wanted_names = _GetWantedInstances(self, self.op.instances)
3590       self.needed_locks = {
3591         locking.LEVEL_NODE_RES: [],
3592         locking.LEVEL_INSTANCE: self.wanted_names,
3593         }
3594       self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
3595     else:
3596       self.wanted_names = None
3597       self.needed_locks = {
3598         locking.LEVEL_NODE_RES: locking.ALL_SET,
3599         locking.LEVEL_INSTANCE: locking.ALL_SET,
3600         }
3601     self.share_locks = {
3602       locking.LEVEL_NODE_RES: 1,
3603       locking.LEVEL_INSTANCE: 0,
3604       }
3605
3606   def DeclareLocks(self, level):
3607     if level == locking.LEVEL_NODE_RES and self.wanted_names is not None:
3608       self._LockInstancesNodes(primary_only=True, level=level)
3609
3610   def CheckPrereq(self):
3611     """Check prerequisites.
3612
3613     This only checks the optional instance list against the existing names.
3614
3615     """
3616     if self.wanted_names is None:
3617       self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
3618
3619     self.wanted_instances = \
3620         map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
3621
3622   def _EnsureChildSizes(self, disk):
3623     """Ensure children of the disk have the needed disk size.
3624
3625     This is valid mainly for DRBD8 and fixes an issue where the
3626     children have smaller disk size.
3627
3628     @param disk: an L{ganeti.objects.Disk} object
3629
3630     """
3631     if disk.dev_type == constants.LD_DRBD8:
3632       assert disk.children, "Empty children for DRBD8?"
3633       fchild = disk.children[0]
3634       mismatch = fchild.size < disk.size
3635       if mismatch:
3636         self.LogInfo("Child disk has size %d, parent %d, fixing",
3637                      fchild.size, disk.size)
3638         fchild.size = disk.size
3639
3640       # and we recurse on this child only, not on the metadev
3641       return self._EnsureChildSizes(fchild) or mismatch
3642     else:
3643       return False
3644
3645   def Exec(self, feedback_fn):
3646     """Verify the size of cluster disks.
3647
3648     """
3649     # TODO: check child disks too
3650     # TODO: check differences in size between primary/secondary nodes
3651     per_node_disks = {}
3652     for instance in self.wanted_instances:
3653       pnode = instance.primary_node
3654       if pnode not in per_node_disks:
3655         per_node_disks[pnode] = []
3656       for idx, disk in enumerate(instance.disks):
3657         per_node_disks[pnode].append((instance, idx, disk))
3658
3659     assert not (frozenset(per_node_disks.keys()) -
3660                 self.owned_locks(locking.LEVEL_NODE_RES)), \
3661       "Not owning correct locks"
3662     assert not self.owned_locks(locking.LEVEL_NODE)
3663
3664     changed = []
3665     for node, dskl in per_node_disks.items():
3666       newl = [v[2].Copy() for v in dskl]
3667       for dsk in newl:
3668         self.cfg.SetDiskID(dsk, node)
3669       result = self.rpc.call_blockdev_getsize(node, newl)
3670       if result.fail_msg:
3671         self.LogWarning("Failure in blockdev_getsize call to node"
3672                         " %s, ignoring", node)
3673         continue
3674       if len(result.payload) != len(dskl):
3675         logging.warning("Invalid result from node %s: len(dksl)=%d,"
3676                         " result.payload=%s", node, len(dskl), result.payload)
3677         self.LogWarning("Invalid result from node %s, ignoring node results",
3678                         node)
3679         continue
3680       for ((instance, idx, disk), size) in zip(dskl, result.payload):
3681         if size is None:
3682           self.LogWarning("Disk %d of instance %s did not return size"
3683                           " information, ignoring", idx, instance.name)
3684           continue
3685         if not isinstance(size, (int, long)):
3686           self.LogWarning("Disk %d of instance %s did not return valid"
3687                           " size information, ignoring", idx, instance.name)
3688           continue
3689         size = size >> 20
3690         if size != disk.size:
3691           self.LogInfo("Disk %d of instance %s has mismatched size,"
3692                        " correcting: recorded %d, actual %d", idx,
3693                        instance.name, disk.size, size)
3694           disk.size = size
3695           self.cfg.Update(instance, feedback_fn)
3696           changed.append((instance.name, idx, size))
3697         if self._EnsureChildSizes(disk):
3698           self.cfg.Update(instance, feedback_fn)
3699           changed.append((instance.name, idx, disk.size))
3700     return changed
3701
3702
3703 class LUClusterRename(LogicalUnit):
3704   """Rename the cluster.
3705
3706   """
3707   HPATH = "cluster-rename"
3708   HTYPE = constants.HTYPE_CLUSTER
3709
3710   def BuildHooksEnv(self):
3711     """Build hooks env.
3712
3713     """
3714     return {
3715       "OP_TARGET": self.cfg.GetClusterName(),
3716       "NEW_NAME": self.op.name,
3717       }
3718
3719   def BuildHooksNodes(self):
3720     """Build hooks nodes.
3721
3722     """
3723     return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
3724
3725   def CheckPrereq(self):
3726     """Verify that the passed name is a valid one.
3727
3728     """
3729     hostname = netutils.GetHostname(name=self.op.name,
3730                                     family=self.cfg.GetPrimaryIPFamily())
3731
3732     new_name = hostname.name
3733     self.ip = new_ip = hostname.ip
3734     old_name = self.cfg.GetClusterName()
3735     old_ip = self.cfg.GetMasterIP()
3736     if new_name == old_name and new_ip == old_ip:
3737       raise errors.OpPrereqError("Neither the name nor the IP address of the"
3738                                  " cluster has changed",
3739                                  errors.ECODE_INVAL)
3740     if new_ip != old_ip:
3741       if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
3742         raise errors.OpPrereqError("The given cluster IP address (%s) is"
3743                                    " reachable on the network" %
3744                                    new_ip, errors.ECODE_NOTUNIQUE)
3745
3746     self.op.name = new_name
3747
3748   def Exec(self, feedback_fn):
3749     """Rename the cluster.
3750
3751     """
3752     clustername = self.op.name
3753     new_ip = self.ip
3754
3755     # shutdown the master IP
3756     master_params = self.cfg.GetMasterNetworkParameters()
3757     ems = self.cfg.GetUseExternalMipScript()
3758     result = self.rpc.call_node_deactivate_master_ip(master_params.name,
3759                                                      master_params, ems)
3760     result.Raise("Could not disable the master role")
3761
3762     try:
3763       cluster = self.cfg.GetClusterInfo()
3764       cluster.cluster_name = clustername
3765       cluster.master_ip = new_ip
3766       self.cfg.Update(cluster, feedback_fn)
3767
3768       # update the known hosts file
3769       ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
3770       node_list = self.cfg.GetOnlineNodeList()
3771       try:
3772         node_list.remove(master_params.name)
3773       except ValueError:
3774         pass
3775       _UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE)
3776     finally:
3777       master_params.ip = new_ip
3778       result = self.rpc.call_node_activate_master_ip(master_params.name,
3779                                                      master_params, ems)
3780       msg = result.fail_msg
3781       if msg:
3782         self.LogWarning("Could not re-enable the master role on"
3783                         " the master, please restart manually: %s", msg)
3784
3785     return clustername
3786
3787
3788 def _ValidateNetmask(cfg, netmask):
3789   """Checks if a netmask is valid.
3790
3791   @type cfg: L{config.ConfigWriter}
3792   @param cfg: The cluster configuration
3793   @type netmask: int
3794   @param netmask: the netmask to be verified
3795   @raise errors.OpPrereqError: if the validation fails
3796
3797   """
3798   ip_family = cfg.GetPrimaryIPFamily()
3799   try:
3800     ipcls = netutils.IPAddress.GetClassFromIpFamily(ip_family)
3801   except errors.ProgrammerError:
3802     raise errors.OpPrereqError("Invalid primary ip family: %s." %
3803                                ip_family)
3804   if not ipcls.ValidateNetmask(netmask):
3805     raise errors.OpPrereqError("CIDR netmask (%s) not valid" %
3806                                 (netmask))
3807
3808
3809 class LUClusterSetParams(LogicalUnit):
3810   """Change the parameters of the cluster.
3811
3812   """
3813   HPATH = "cluster-modify"
3814   HTYPE = constants.HTYPE_CLUSTER
3815   REQ_BGL = False
3816
3817   def CheckArguments(self):
3818     """Check parameters
3819
3820     """
3821     if self.op.uid_pool:
3822       uidpool.CheckUidPool(self.op.uid_pool)
3823
3824     if self.op.add_uids:
3825       uidpool.CheckUidPool(self.op.add_uids)
3826
3827     if self.op.remove_uids:
3828       uidpool.CheckUidPool(self.op.remove_uids)
3829
3830     if self.op.master_netmask is not None:
3831       _ValidateNetmask(self.cfg, self.op.master_netmask)
3832
3833     if self.op.diskparams:
3834       for dt_params in self.op.diskparams.values():
3835         utils.ForceDictType(dt_params, constants.DISK_DT_TYPES)
3836
3837   def ExpandNames(self):
3838     # FIXME: in the future maybe other cluster params won't require checking on
3839     # all nodes to be modified.
3840     self.needed_locks = {
3841       locking.LEVEL_NODE: locking.ALL_SET,
3842       locking.LEVEL_INSTANCE: locking.ALL_SET,
3843       locking.LEVEL_NODEGROUP: locking.ALL_SET,
3844     }
3845     self.share_locks = {
3846         locking.LEVEL_NODE: 1,
3847         locking.LEVEL_INSTANCE: 1,
3848         locking.LEVEL_NODEGROUP: 1,
3849     }
3850
3851   def BuildHooksEnv(self):
3852     """Build hooks env.
3853
3854     """
3855     return {
3856       "OP_TARGET": self.cfg.GetClusterName(),
3857       "NEW_VG_NAME": self.op.vg_name,
3858       }
3859
3860   def BuildHooksNodes(self):
3861     """Build hooks nodes.
3862
3863     """
3864     mn = self.cfg.GetMasterNode()
3865     return ([mn], [mn])
3866
3867   def CheckPrereq(self):
3868     """Check prerequisites.
3869
3870     This checks whether the given params don't conflict and
3871     if the given volume group is valid.
3872
3873     """
3874     if self.op.vg_name is not None and not self.op.vg_name:
3875       if self.cfg.HasAnyDiskOfType(constants.LD_LV):
3876         raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
3877                                    " instances exist", errors.ECODE_INVAL)
3878
3879     if self.op.drbd_helper is not None and not self.op.drbd_helper:
3880       if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
3881         raise errors.OpPrereqError("Cannot disable drbd helper while"
3882                                    " drbd-based instances exist",
3883                                    errors.ECODE_INVAL)
3884
3885     node_list = self.owned_locks(locking.LEVEL_NODE)
3886
3887     # if vg_name not None, checks given volume group on all nodes
3888     if self.op.vg_name:
3889       vglist = self.rpc.call_vg_list(node_list)
3890       for node in node_list:
3891         msg = vglist[node].fail_msg
3892         if msg:
3893           # ignoring down node
3894           self.LogWarning("Error while gathering data on node %s"
3895                           " (ignoring node): %s", node, msg)
3896           continue
3897         vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
3898                                               self.op.vg_name,
3899                                               constants.MIN_VG_SIZE)
3900         if vgstatus:
3901           raise errors.OpPrereqError("Error on node '%s': %s" %
3902                                      (node, vgstatus), errors.ECODE_ENVIRON)
3903
3904     if self.op.drbd_helper:
3905       # checks given drbd helper on all nodes
3906       helpers = self.rpc.call_drbd_helper(node_list)
3907       for (node, ninfo) in self.cfg.GetMultiNodeInfo(node_list):
3908         if ninfo.offline:
3909           self.LogInfo("Not checking drbd helper on offline node %s", node)
3910           continue
3911         msg = helpers[node].fail_msg
3912         if msg:
3913           raise errors.OpPrereqError("Error checking drbd helper on node"
3914                                      " '%s': %s" % (node, msg),
3915                                      errors.ECODE_ENVIRON)
3916         node_helper = helpers[node].payload
3917         if node_helper != self.op.drbd_helper:
3918           raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
3919                                      (node, node_helper), errors.ECODE_ENVIRON)
3920
3921     self.cluster = cluster = self.cfg.GetClusterInfo()
3922     # validate params changes
3923     if self.op.beparams:
3924       objects.UpgradeBeParams(self.op.beparams)
3925       utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
3926       self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
3927
3928     if self.op.ndparams:
3929       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
3930       self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
3931
3932       # TODO: we need a more general way to handle resetting
3933       # cluster-level parameters to default values
3934       if self.new_ndparams["oob_program"] == "":
3935         self.new_ndparams["oob_program"] = \
3936             constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
3937
3938     if self.op.hv_state:
3939       new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
3940                                             self.cluster.hv_state_static)
3941       self.new_hv_state = dict((hv, cluster.SimpleFillHvState(values))
3942                                for hv, values in new_hv_state.items())
3943
3944     if self.op.disk_state:
3945       new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state,
3946                                                 self.cluster.disk_state_static)
3947       self.new_disk_state = \
3948         dict((storage, dict((name, cluster.SimpleFillDiskState(values))
3949                             for name, values in svalues.items()))
3950              for storage, svalues in new_disk_state.items())
3951
3952     if self.op.ipolicy:
3953       self.new_ipolicy = _GetUpdatedIPolicy(cluster.ipolicy, self.op.ipolicy,
3954                                             group_policy=False)
3955
3956       all_instances = self.cfg.GetAllInstancesInfo().values()
3957       violations = set()
3958       for group in self.cfg.GetAllNodeGroupsInfo().values():
3959         instances = frozenset([inst for inst in all_instances
3960                                if compat.any(node in group.members
3961                                              for node in inst.all_nodes)])
3962         new_ipolicy = objects.FillIPolicy(self.new_ipolicy, group.ipolicy)
3963         new = _ComputeNewInstanceViolations(_CalculateGroupIPolicy(cluster,
3964                                                                    group),
3965                                             new_ipolicy, instances)
3966         if new:
3967           violations.update(new)
3968
3969       if violations:
3970         self.LogWarning("After the ipolicy change the following instances"
3971                         " violate them: %s",
3972                         utils.CommaJoin(violations))
3973
3974     if self.op.nicparams:
3975       utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
3976       self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
3977       objects.NIC.CheckParameterSyntax(self.new_nicparams)
3978       nic_errors = []
3979
3980       # check all instances for consistency
3981       for instance in self.cfg.GetAllInstancesInfo().values():
3982         for nic_idx, nic in enumerate(instance.nics):
3983           params_copy = copy.deepcopy(nic.nicparams)
3984           params_filled = objects.FillDict(self.new_nicparams, params_copy)
3985
3986           # check parameter syntax
3987           try:
3988             objects.NIC.CheckParameterSyntax(params_filled)
3989           except errors.ConfigurationError, err:
3990             nic_errors.append("Instance %s, nic/%d: %s" %
3991                               (instance.name, nic_idx, err))
3992
3993           # if we're moving instances to routed, check that they have an ip
3994           target_mode = params_filled[constants.NIC_MODE]
3995           if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
3996             nic_errors.append("Instance %s, nic/%d: routed NIC with no ip"
3997                               " address" % (instance.name, nic_idx))
3998       if nic_errors:
3999         raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
4000                                    "\n".join(nic_errors))
4001
4002     # hypervisor list/parameters
4003     self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
4004     if self.op.hvparams:
4005       for hv_name, hv_dict in self.op.hvparams.items():
4006         if hv_name not in self.new_hvparams:
4007           self.new_hvparams[hv_name] = hv_dict
4008         else:
4009           self.new_hvparams[hv_name].update(hv_dict)
4010
4011     # disk template parameters
4012     self.new_diskparams = objects.FillDict(cluster.diskparams, {})
4013     if self.op.diskparams:
4014       for dt_name, dt_params in self.op.diskparams.items():
4015         if dt_name not in self.op.diskparams:
4016           self.new_diskparams[dt_name] = dt_params
4017         else:
4018           self.new_diskparams[dt_name].update(dt_params)
4019
4020     # os hypervisor parameters
4021     self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
4022     if self.op.os_hvp:
4023       for os_name, hvs in self.op.os_hvp.items():
4024         if os_name not in self.new_os_hvp:
4025           self.new_os_hvp[os_name] = hvs
4026         else:
4027           for hv_name, hv_dict in hvs.items():
4028             if hv_name not in self.new_os_hvp[os_name]:
4029               self.new_os_hvp[os_name][hv_name] = hv_dict
4030             else:
4031               self.new_os_hvp[os_name][hv_name].update(hv_dict)
4032
4033     # os parameters
4034     self.new_osp = objects.FillDict(cluster.osparams, {})
4035     if self.op.osparams:
4036       for os_name, osp in self.op.osparams.items():
4037         if os_name not in self.new_osp:
4038           self.new_osp[os_name] = {}
4039
4040         self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
4041                                                   use_none=True)
4042
4043         if not self.new_osp[os_name]:
4044           # we removed all parameters
4045           del self.new_osp[os_name]
4046         else:
4047           # check the parameter validity (remote check)
4048           _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
4049                          os_name, self.new_osp[os_name])
4050
4051     # changes to the hypervisor list
4052     if self.op.enabled_hypervisors is not None:
4053       self.hv_list = self.op.enabled_hypervisors
4054       for hv in self.hv_list:
4055         # if the hypervisor doesn't already exist in the cluster
4056         # hvparams, we initialize it to empty, and then (in both
4057         # cases) we make sure to fill the defaults, as we might not
4058         # have a complete defaults list if the hypervisor wasn't
4059         # enabled before
4060         if hv not in new_hvp:
4061           new_hvp[hv] = {}
4062         new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
4063         utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
4064     else:
4065       self.hv_list = cluster.enabled_hypervisors
4066
4067     if self.op.hvparams or self.op.enabled_hypervisors is not None:
4068       # either the enabled list has changed, or the parameters have, validate
4069       for hv_name, hv_params in self.new_hvparams.items():
4070         if ((self.op.hvparams and hv_name in self.op.hvparams) or
4071             (self.op.enabled_hypervisors and
4072              hv_name in self.op.enabled_hypervisors)):
4073           # either this is a new hypervisor, or its parameters have changed
4074           hv_class = hypervisor.GetHypervisor(hv_name)
4075           utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
4076           hv_class.CheckParameterSyntax(hv_params)
4077           _CheckHVParams(self, node_list, hv_name, hv_params)
4078
4079     if self.op.os_hvp:
4080       # no need to check any newly-enabled hypervisors, since the
4081       # defaults have already been checked in the above code-block
4082       for os_name, os_hvp in self.new_os_hvp.items():
4083         for hv_name, hv_params in os_hvp.items():
4084           utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
4085           # we need to fill in the new os_hvp on top of the actual hv_p
4086           cluster_defaults = self.new_hvparams.get(hv_name, {})
4087           new_osp = objects.FillDict(cluster_defaults, hv_params)
4088           hv_class = hypervisor.GetHypervisor(hv_name)
4089           hv_class.CheckParameterSyntax(new_osp)
4090           _CheckHVParams(self, node_list, hv_name, new_osp)
4091
4092     if self.op.default_iallocator:
4093       alloc_script = utils.FindFile(self.op.default_iallocator,
4094                                     constants.IALLOCATOR_SEARCH_PATH,
4095                                     os.path.isfile)
4096       if alloc_script is None:
4097         raise errors.OpPrereqError("Invalid default iallocator script '%s'"
4098                                    " specified" % self.op.default_iallocator,
4099                                    errors.ECODE_INVAL)
4100
4101   def Exec(self, feedback_fn):
4102     """Change the parameters of the cluster.
4103
4104     """
4105     if self.op.vg_name is not None:
4106       new_volume = self.op.vg_name
4107       if not new_volume:
4108         new_volume = None
4109       if new_volume != self.cfg.GetVGName():
4110         self.cfg.SetVGName(new_volume)
4111       else:
4112         feedback_fn("Cluster LVM configuration already in desired"
4113                     " state, not changing")
4114     if self.op.drbd_helper is not None:
4115       new_helper = self.op.drbd_helper
4116       if not new_helper:
4117         new_helper = None
4118       if new_helper != self.cfg.GetDRBDHelper():
4119         self.cfg.SetDRBDHelper(new_helper)
4120       else:
4121         feedback_fn("Cluster DRBD helper already in desired state,"
4122                     " not changing")
4123     if self.op.hvparams:
4124       self.cluster.hvparams = self.new_hvparams
4125     if self.op.os_hvp:
4126       self.cluster.os_hvp = self.new_os_hvp
4127     if self.op.enabled_hypervisors is not None:
4128       self.cluster.hvparams = self.new_hvparams
4129       self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
4130     if self.op.beparams:
4131       self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
4132     if self.op.nicparams:
4133       self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
4134     if self.op.ipolicy:
4135       self.cluster.ipolicy = self.new_ipolicy
4136     if self.op.osparams:
4137       self.cluster.osparams = self.new_osp
4138     if self.op.ndparams:
4139       self.cluster.ndparams = self.new_ndparams
4140     if self.op.diskparams:
4141       self.cluster.diskparams = self.new_diskparams
4142     if self.op.hv_state:
4143       self.cluster.hv_state_static = self.new_hv_state
4144     if self.op.disk_state:
4145       self.cluster.disk_state_static = self.new_disk_state
4146
4147     if self.op.candidate_pool_size is not None:
4148       self.cluster.candidate_pool_size = self.op.candidate_pool_size
4149       # we need to update the pool size here, otherwise the save will fail
4150       _AdjustCandidatePool(self, [])
4151
4152     if self.op.maintain_node_health is not None:
4153       if self.op.maintain_node_health and not constants.ENABLE_CONFD:
4154         feedback_fn("Note: CONFD was disabled at build time, node health"
4155                     " maintenance is not useful (still enabling it)")
4156       self.cluster.maintain_node_health = self.op.maintain_node_health
4157
4158     if self.op.prealloc_wipe_disks is not None:
4159       self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
4160
4161     if self.op.add_uids is not None:
4162       uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
4163
4164     if self.op.remove_uids is not None:
4165       uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
4166
4167     if self.op.uid_pool is not None:
4168       self.cluster.uid_pool = self.op.uid_pool
4169
4170     if self.op.default_iallocator is not None:
4171       self.cluster.default_iallocator = self.op.default_iallocator
4172
4173     if self.op.reserved_lvs is not None:
4174       self.cluster.reserved_lvs = self.op.reserved_lvs
4175
4176     if self.op.use_external_mip_script is not None:
4177       self.cluster.use_external_mip_script = self.op.use_external_mip_script
4178
4179     def helper_os(aname, mods, desc):
4180       desc += " OS list"
4181       lst = getattr(self.cluster, aname)
4182       for key, val in mods:
4183         if key == constants.DDM_ADD:
4184           if val in lst:
4185             feedback_fn("OS %s already in %s, ignoring" % (val, desc))
4186           else:
4187             lst.append(val)
4188         elif key == constants.DDM_REMOVE:
4189           if val in lst:
4190             lst.remove(val)
4191           else:
4192             feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
4193         else:
4194           raise errors.ProgrammerError("Invalid modification '%s'" % key)
4195
4196     if self.op.hidden_os:
4197       helper_os("hidden_os", self.op.hidden_os, "hidden")
4198
4199     if self.op.blacklisted_os:
4200       helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
4201
4202     if self.op.master_netdev:
4203       master_params = self.cfg.GetMasterNetworkParameters()
4204       ems = self.cfg.GetUseExternalMipScript()
4205       feedback_fn("Shutting down master ip on the current netdev (%s)" %
4206                   self.cluster.master_netdev)
4207       result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4208                                                        master_params, ems)
4209       result.Raise("Could not disable the master ip")
4210       feedback_fn("Changing master_netdev from %s to %s" %
4211                   (master_params.netdev, self.op.master_netdev))
4212       self.cluster.master_netdev = self.op.master_netdev
4213
4214     if self.op.master_netmask:
4215       master_params = self.cfg.GetMasterNetworkParameters()
4216       feedback_fn("Changing master IP netmask to %s" % self.op.master_netmask)
4217       result = self.rpc.call_node_change_master_netmask(master_params.name,
4218                                                         master_params.netmask,
4219                                                         self.op.master_netmask,
4220                                                         master_params.ip,
4221                                                         master_params.netdev)
4222       if result.fail_msg:
4223         msg = "Could not change the master IP netmask: %s" % result.fail_msg
4224         feedback_fn(msg)
4225
4226       self.cluster.master_netmask = self.op.master_netmask
4227
4228     self.cfg.Update(self.cluster, feedback_fn)
4229
4230     if self.op.master_netdev:
4231       master_params = self.cfg.GetMasterNetworkParameters()
4232       feedback_fn("Starting the master ip on the new master netdev (%s)" %
4233                   self.op.master_netdev)
4234       ems = self.cfg.GetUseExternalMipScript()
4235       result = self.rpc.call_node_activate_master_ip(master_params.name,
4236                                                      master_params, ems)
4237       if result.fail_msg:
4238         self.LogWarning("Could not re-enable the master ip on"
4239                         " the master, please restart manually: %s",
4240                         result.fail_msg)
4241
4242
4243 def _UploadHelper(lu, nodes, fname):
4244   """Helper for uploading a file and showing warnings.
4245
4246   """
4247   if os.path.exists(fname):
4248     result = lu.rpc.call_upload_file(nodes, fname)
4249     for to_node, to_result in result.items():
4250       msg = to_result.fail_msg
4251       if msg:
4252         msg = ("Copy of file %s to node %s failed: %s" %
4253                (fname, to_node, msg))
4254         lu.proc.LogWarning(msg)
4255
4256
4257 def _ComputeAncillaryFiles(cluster, redist):
4258   """Compute files external to Ganeti which need to be consistent.
4259
4260   @type redist: boolean
4261   @param redist: Whether to include files which need to be redistributed
4262
4263   """
4264   # Compute files for all nodes
4265   files_all = set([
4266     constants.SSH_KNOWN_HOSTS_FILE,
4267     constants.CONFD_HMAC_KEY,
4268     constants.CLUSTER_DOMAIN_SECRET_FILE,
4269     constants.SPICE_CERT_FILE,
4270     constants.SPICE_CACERT_FILE,
4271     constants.RAPI_USERS_FILE,
4272     ])
4273
4274   if not redist:
4275     files_all.update(constants.ALL_CERT_FILES)
4276     files_all.update(ssconf.SimpleStore().GetFileList())
4277   else:
4278     # we need to ship at least the RAPI certificate
4279     files_all.add(constants.RAPI_CERT_FILE)
4280
4281   if cluster.modify_etc_hosts:
4282     files_all.add(constants.ETC_HOSTS)
4283
4284   # Files which are optional, these must:
4285   # - be present in one other category as well
4286   # - either exist or not exist on all nodes of that category (mc, vm all)
4287   files_opt = set([
4288     constants.RAPI_USERS_FILE,
4289     ])
4290
4291   # Files which should only be on master candidates
4292   files_mc = set()
4293
4294   if not redist:
4295     files_mc.add(constants.CLUSTER_CONF_FILE)
4296
4297     # FIXME: this should also be replicated but Ganeti doesn't support files_mc
4298     # replication
4299     files_mc.add(constants.DEFAULT_MASTER_SETUP_SCRIPT)
4300
4301   # Files which should only be on VM-capable nodes
4302   files_vm = set(filename
4303     for hv_name in cluster.enabled_hypervisors
4304     for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[0])
4305
4306   files_opt |= set(filename
4307     for hv_name in cluster.enabled_hypervisors
4308     for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[1])
4309
4310   # Filenames in each category must be unique
4311   all_files_set = files_all | files_mc | files_vm
4312   assert (len(all_files_set) ==
4313           sum(map(len, [files_all, files_mc, files_vm]))), \
4314          "Found file listed in more than one file list"
4315
4316   # Optional files must be present in one other category
4317   assert all_files_set.issuperset(files_opt), \
4318          "Optional file not in a different required list"
4319
4320   return (files_all, files_opt, files_mc, files_vm)
4321
4322
4323 def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
4324   """Distribute additional files which are part of the cluster configuration.
4325
4326   ConfigWriter takes care of distributing the config and ssconf files, but
4327   there are more files which should be distributed to all nodes. This function
4328   makes sure those are copied.
4329
4330   @param lu: calling logical unit
4331   @param additional_nodes: list of nodes not in the config to distribute to
4332   @type additional_vm: boolean
4333   @param additional_vm: whether the additional nodes are vm-capable or not
4334
4335   """
4336   # Gather target nodes
4337   cluster = lu.cfg.GetClusterInfo()
4338   master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
4339
4340   online_nodes = lu.cfg.GetOnlineNodeList()
4341   vm_nodes = lu.cfg.GetVmCapableNodeList()
4342
4343   if additional_nodes is not None:
4344     online_nodes.extend(additional_nodes)
4345     if additional_vm:
4346       vm_nodes.extend(additional_nodes)
4347
4348   # Never distribute to master node
4349   for nodelist in [online_nodes, vm_nodes]:
4350     if master_info.name in nodelist:
4351       nodelist.remove(master_info.name)
4352
4353   # Gather file lists
4354   (files_all, _, files_mc, files_vm) = \
4355     _ComputeAncillaryFiles(cluster, True)
4356
4357   # Never re-distribute configuration file from here
4358   assert not (constants.CLUSTER_CONF_FILE in files_all or
4359               constants.CLUSTER_CONF_FILE in files_vm)
4360   assert not files_mc, "Master candidates not handled in this function"
4361
4362   filemap = [
4363     (online_nodes, files_all),
4364     (vm_nodes, files_vm),
4365     ]
4366
4367   # Upload the files
4368   for (node_list, files) in filemap:
4369     for fname in files:
4370       _UploadHelper(lu, node_list, fname)
4371
4372
4373 class LUClusterRedistConf(NoHooksLU):
4374   """Force the redistribution of cluster configuration.
4375
4376   This is a very simple LU.
4377
4378   """
4379   REQ_BGL = False
4380
4381   def ExpandNames(self):
4382     self.needed_locks = {
4383       locking.LEVEL_NODE: locking.ALL_SET,
4384     }
4385     self.share_locks[locking.LEVEL_NODE] = 1
4386
4387   def Exec(self, feedback_fn):
4388     """Redistribute the configuration.
4389
4390     """
4391     self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
4392     _RedistributeAncillaryFiles(self)
4393
4394
4395 class LUClusterActivateMasterIp(NoHooksLU):
4396   """Activate the master IP on the master node.
4397
4398   """
4399   def Exec(self, feedback_fn):
4400     """Activate the master IP.
4401
4402     """
4403     master_params = self.cfg.GetMasterNetworkParameters()
4404     ems = self.cfg.GetUseExternalMipScript()
4405     result = self.rpc.call_node_activate_master_ip(master_params.name,
4406                                                    master_params, ems)
4407     result.Raise("Could not activate the master IP")
4408
4409
4410 class LUClusterDeactivateMasterIp(NoHooksLU):
4411   """Deactivate the master IP on the master node.
4412
4413   """
4414   def Exec(self, feedback_fn):
4415     """Deactivate the master IP.
4416
4417     """
4418     master_params = self.cfg.GetMasterNetworkParameters()
4419     ems = self.cfg.GetUseExternalMipScript()
4420     result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4421                                                      master_params, ems)
4422     result.Raise("Could not deactivate the master IP")
4423
4424
4425 def _WaitForSync(lu, instance, disks=None, oneshot=False):
4426   """Sleep and poll for an instance's disk to sync.
4427
4428   """
4429   if not instance.disks or disks is not None and not disks:
4430     return True
4431
4432   disks = _ExpandCheckDisks(instance, disks)
4433
4434   if not oneshot:
4435     lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
4436
4437   node = instance.primary_node
4438
4439   for dev in disks:
4440     lu.cfg.SetDiskID(dev, node)
4441
4442   # TODO: Convert to utils.Retry
4443
4444   retries = 0
4445   degr_retries = 10 # in seconds, as we sleep 1 second each time
4446   while True:
4447     max_time = 0
4448     done = True
4449     cumul_degraded = False
4450     rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
4451     msg = rstats.fail_msg
4452     if msg:
4453       lu.LogWarning("Can't get any data from node %s: %s", node, msg)
4454       retries += 1
4455       if retries >= 10:
4456         raise errors.RemoteError("Can't contact node %s for mirror data,"
4457                                  " aborting." % node)
4458       time.sleep(6)
4459       continue
4460     rstats = rstats.payload
4461     retries = 0
4462     for i, mstat in enumerate(rstats):
4463       if mstat is None:
4464         lu.LogWarning("Can't compute data for node %s/%s",
4465                            node, disks[i].iv_name)
4466         continue
4467
4468       cumul_degraded = (cumul_degraded or
4469                         (mstat.is_degraded and mstat.sync_percent is None))
4470       if mstat.sync_percent is not None:
4471         done = False
4472         if mstat.estimated_time is not None:
4473           rem_time = ("%s remaining (estimated)" %
4474                       utils.FormatSeconds(mstat.estimated_time))
4475           max_time = mstat.estimated_time
4476         else:
4477           rem_time = "no time estimate"
4478         lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
4479                         (disks[i].iv_name, mstat.sync_percent, rem_time))
4480
4481     # if we're done but degraded, let's do a few small retries, to
4482     # make sure we see a stable and not transient situation; therefore
4483     # we force restart of the loop
4484     if (done or oneshot) and cumul_degraded and degr_retries > 0:
4485       logging.info("Degraded disks found, %d retries left", degr_retries)
4486       degr_retries -= 1
4487       time.sleep(1)
4488       continue
4489
4490     if done or oneshot:
4491       break
4492
4493     time.sleep(min(60, max_time))
4494
4495   if done:
4496     lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
4497   return not cumul_degraded
4498
4499
4500 def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
4501   """Check that mirrors are not degraded.
4502
4503   The ldisk parameter, if True, will change the test from the
4504   is_degraded attribute (which represents overall non-ok status for
4505   the device(s)) to the ldisk (representing the local storage status).
4506
4507   """
4508   lu.cfg.SetDiskID(dev, node)
4509
4510   result = True
4511
4512   if on_primary or dev.AssembleOnSecondary():
4513     rstats = lu.rpc.call_blockdev_find(node, dev)
4514     msg = rstats.fail_msg
4515     if msg:
4516       lu.LogWarning("Can't find disk on node %s: %s", node, msg)
4517       result = False
4518     elif not rstats.payload:
4519       lu.LogWarning("Can't find disk on node %s", node)
4520       result = False
4521     else:
4522       if ldisk:
4523         result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
4524       else:
4525         result = result and not rstats.payload.is_degraded
4526
4527   if dev.children:
4528     for child in dev.children:
4529       result = result and _CheckDiskConsistency(lu, child, node, on_primary)
4530
4531   return result
4532
4533
4534 class LUOobCommand(NoHooksLU):
4535   """Logical unit for OOB handling.
4536
4537   """
4538   REQ_BGL = False
4539   _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
4540
4541   def ExpandNames(self):
4542     """Gather locks we need.
4543
4544     """
4545     if self.op.node_names:
4546       self.op.node_names = _GetWantedNodes(self, self.op.node_names)
4547       lock_names = self.op.node_names
4548     else:
4549       lock_names = locking.ALL_SET
4550
4551     self.needed_locks = {
4552       locking.LEVEL_NODE: lock_names,
4553       }
4554
4555   def CheckPrereq(self):
4556     """Check prerequisites.
4557
4558     This checks:
4559      - the node exists in the configuration
4560      - OOB is supported
4561
4562     Any errors are signaled by raising errors.OpPrereqError.
4563
4564     """
4565     self.nodes = []
4566     self.master_node = self.cfg.GetMasterNode()
4567
4568     assert self.op.power_delay >= 0.0
4569
4570     if self.op.node_names:
4571       if (self.op.command in self._SKIP_MASTER and
4572           self.master_node in self.op.node_names):
4573         master_node_obj = self.cfg.GetNodeInfo(self.master_node)
4574         master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
4575
4576         if master_oob_handler:
4577           additional_text = ("run '%s %s %s' if you want to operate on the"
4578                              " master regardless") % (master_oob_handler,
4579                                                       self.op.command,
4580                                                       self.master_node)
4581         else:
4582           additional_text = "it does not support out-of-band operations"
4583
4584         raise errors.OpPrereqError(("Operating on the master node %s is not"
4585                                     " allowed for %s; %s") %
4586                                    (self.master_node, self.op.command,
4587                                     additional_text), errors.ECODE_INVAL)
4588     else:
4589       self.op.node_names = self.cfg.GetNodeList()
4590       if self.op.command in self._SKIP_MASTER:
4591         self.op.node_names.remove(self.master_node)
4592
4593     if self.op.command in self._SKIP_MASTER:
4594       assert self.master_node not in self.op.node_names
4595
4596     for (node_name, node) in self.cfg.GetMultiNodeInfo(self.op.node_names):
4597       if node is None:
4598         raise errors.OpPrereqError("Node %s not found" % node_name,
4599                                    errors.ECODE_NOENT)
4600       else:
4601         self.nodes.append(node)
4602
4603       if (not self.op.ignore_status and
4604           (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
4605         raise errors.OpPrereqError(("Cannot power off node %s because it is"
4606                                     " not marked offline") % node_name,
4607                                    errors.ECODE_STATE)
4608
4609   def Exec(self, feedback_fn):
4610     """Execute OOB and return result if we expect any.
4611
4612     """
4613     master_node = self.master_node
4614     ret = []
4615
4616     for idx, node in enumerate(utils.NiceSort(self.nodes,
4617                                               key=lambda node: node.name)):
4618       node_entry = [(constants.RS_NORMAL, node.name)]
4619       ret.append(node_entry)
4620
4621       oob_program = _SupportsOob(self.cfg, node)
4622
4623       if not oob_program:
4624         node_entry.append((constants.RS_UNAVAIL, None))
4625         continue
4626
4627       logging.info("Executing out-of-band command '%s' using '%s' on %s",
4628                    self.op.command, oob_program, node.name)
4629       result = self.rpc.call_run_oob(master_node, oob_program,
4630                                      self.op.command, node.name,
4631                                      self.op.timeout)
4632
4633       if result.fail_msg:
4634         self.LogWarning("Out-of-band RPC failed on node '%s': %s",
4635                         node.name, result.fail_msg)
4636         node_entry.append((constants.RS_NODATA, None))
4637       else:
4638         try:
4639           self._CheckPayload(result)
4640         except errors.OpExecError, err:
4641           self.LogWarning("Payload returned by node '%s' is not valid: %s",
4642                           node.name, err)
4643           node_entry.append((constants.RS_NODATA, None))
4644         else:
4645           if self.op.command == constants.OOB_HEALTH:
4646             # For health we should log important events
4647             for item, status in result.payload:
4648               if status in [constants.OOB_STATUS_WARNING,
4649                             constants.OOB_STATUS_CRITICAL]:
4650                 self.LogWarning("Item '%s' on node '%s' has status '%s'",
4651                                 item, node.name, status)
4652
4653           if self.op.command == constants.OOB_POWER_ON:
4654             node.powered = True
4655           elif self.op.command == constants.OOB_POWER_OFF:
4656             node.powered = False
4657           elif self.op.command == constants.OOB_POWER_STATUS:
4658             powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
4659             if powered != node.powered:
4660               logging.warning(("Recorded power state (%s) of node '%s' does not"
4661                                " match actual power state (%s)"), node.powered,
4662                               node.name, powered)
4663
4664           # For configuration changing commands we should update the node
4665           if self.op.command in (constants.OOB_POWER_ON,
4666                                  constants.OOB_POWER_OFF):
4667             self.cfg.Update(node, feedback_fn)
4668
4669           node_entry.append((constants.RS_NORMAL, result.payload))
4670
4671           if (self.op.command == constants.OOB_POWER_ON and
4672               idx < len(self.nodes) - 1):
4673             time.sleep(self.op.power_delay)
4674
4675     return ret
4676
4677   def _CheckPayload(self, result):
4678     """Checks if the payload is valid.
4679
4680     @param result: RPC result
4681     @raises errors.OpExecError: If payload is not valid
4682
4683     """
4684     errs = []
4685     if self.op.command == constants.OOB_HEALTH:
4686       if not isinstance(result.payload, list):
4687         errs.append("command 'health' is expected to return a list but got %s" %
4688                     type(result.payload))
4689       else:
4690         for item, status in result.payload:
4691           if status not in constants.OOB_STATUSES:
4692             errs.append("health item '%s' has invalid status '%s'" %
4693                         (item, status))
4694
4695     if self.op.command == constants.OOB_POWER_STATUS:
4696       if not isinstance(result.payload, dict):
4697         errs.append("power-status is expected to return a dict but got %s" %
4698                     type(result.payload))
4699
4700     if self.op.command in [
4701         constants.OOB_POWER_ON,
4702         constants.OOB_POWER_OFF,
4703         constants.OOB_POWER_CYCLE,
4704         ]:
4705       if result.payload is not None:
4706         errs.append("%s is expected to not return payload but got '%s'" %
4707                     (self.op.command, result.payload))
4708
4709     if errs:
4710       raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
4711                                utils.CommaJoin(errs))
4712
4713
4714 class _OsQuery(_QueryBase):
4715   FIELDS = query.OS_FIELDS
4716
4717   def ExpandNames(self, lu):
4718     # Lock all nodes in shared mode
4719     # Temporary removal of locks, should be reverted later
4720     # TODO: reintroduce locks when they are lighter-weight
4721     lu.needed_locks = {}
4722     #self.share_locks[locking.LEVEL_NODE] = 1
4723     #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4724
4725     # The following variables interact with _QueryBase._GetNames
4726     if self.names:
4727       self.wanted = self.names
4728     else:
4729       self.wanted = locking.ALL_SET
4730
4731     self.do_locking = self.use_locking
4732
4733   def DeclareLocks(self, lu, level):
4734     pass
4735
4736   @staticmethod
4737   def _DiagnoseByOS(rlist):
4738     """Remaps a per-node return list into an a per-os per-node dictionary
4739
4740     @param rlist: a map with node names as keys and OS objects as values
4741
4742     @rtype: dict
4743     @return: a dictionary with osnames as keys and as value another
4744         map, with nodes as keys and tuples of (path, status, diagnose,
4745         variants, parameters, api_versions) as values, eg::
4746
4747           {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
4748                                      (/srv/..., False, "invalid api")],
4749                            "node2": [(/srv/..., True, "", [], [])]}
4750           }
4751
4752     """
4753     all_os = {}
4754     # we build here the list of nodes that didn't fail the RPC (at RPC
4755     # level), so that nodes with a non-responding node daemon don't
4756     # make all OSes invalid
4757     good_nodes = [node_name for node_name in rlist
4758                   if not rlist[node_name].fail_msg]
4759     for node_name, nr in rlist.items():
4760       if nr.fail_msg or not nr.payload:
4761         continue
4762       for (name, path, status, diagnose, variants,
4763            params, api_versions) in nr.payload:
4764         if name not in all_os:
4765           # build a list of nodes for this os containing empty lists
4766           # for each node in node_list
4767           all_os[name] = {}
4768           for nname in good_nodes:
4769             all_os[name][nname] = []
4770         # convert params from [name, help] to (name, help)
4771         params = [tuple(v) for v in params]
4772         all_os[name][node_name].append((path, status, diagnose,
4773                                         variants, params, api_versions))
4774     return all_os
4775
4776   def _GetQueryData(self, lu):
4777     """Computes the list of nodes and their attributes.
4778
4779     """
4780     # Locking is not used
4781     assert not (compat.any(lu.glm.is_owned(level)
4782                            for level in locking.LEVELS
4783                            if level != locking.LEVEL_CLUSTER) or
4784                 self.do_locking or self.use_locking)
4785
4786     valid_nodes = [node.name
4787                    for node in lu.cfg.GetAllNodesInfo().values()
4788                    if not node.offline and node.vm_capable]
4789     pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
4790     cluster = lu.cfg.GetClusterInfo()
4791
4792     data = {}
4793
4794     for (os_name, os_data) in pol.items():
4795       info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
4796                           hidden=(os_name in cluster.hidden_os),
4797                           blacklisted=(os_name in cluster.blacklisted_os))
4798
4799       variants = set()
4800       parameters = set()
4801       api_versions = set()
4802
4803       for idx, osl in enumerate(os_data.values()):
4804         info.valid = bool(info.valid and osl and osl[0][1])
4805         if not info.valid:
4806           break
4807
4808         (node_variants, node_params, node_api) = osl[0][3:6]
4809         if idx == 0:
4810           # First entry
4811           variants.update(node_variants)
4812           parameters.update(node_params)
4813           api_versions.update(node_api)
4814         else:
4815           # Filter out inconsistent values
4816           variants.intersection_update(node_variants)
4817           parameters.intersection_update(node_params)
4818           api_versions.intersection_update(node_api)
4819
4820       info.variants = list(variants)
4821       info.parameters = list(parameters)
4822       info.api_versions = list(api_versions)
4823
4824       data[os_name] = info
4825
4826     # Prepare data in requested order
4827     return [data[name] for name in self._GetNames(lu, pol.keys(), None)
4828             if name in data]
4829
4830
4831 class LUOsDiagnose(NoHooksLU):
4832   """Logical unit for OS diagnose/query.
4833
4834   """
4835   REQ_BGL = False
4836
4837   @staticmethod
4838   def _BuildFilter(fields, names):
4839     """Builds a filter for querying OSes.
4840
4841     """
4842     name_filter = qlang.MakeSimpleFilter("name", names)
4843
4844     # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
4845     # respective field is not requested
4846     status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
4847                      for fname in ["hidden", "blacklisted"]
4848                      if fname not in fields]
4849     if "valid" not in fields:
4850       status_filter.append([qlang.OP_TRUE, "valid"])
4851
4852     if status_filter:
4853       status_filter.insert(0, qlang.OP_AND)
4854     else:
4855       status_filter = None
4856
4857     if name_filter and status_filter:
4858       return [qlang.OP_AND, name_filter, status_filter]
4859     elif name_filter:
4860       return name_filter
4861     else:
4862       return status_filter
4863
4864   def CheckArguments(self):
4865     self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
4866                        self.op.output_fields, False)
4867
4868   def ExpandNames(self):
4869     self.oq.ExpandNames(self)
4870
4871   def Exec(self, feedback_fn):
4872     return self.oq.OldStyleQuery(self)
4873
4874
4875 class LUNodeRemove(LogicalUnit):
4876   """Logical unit for removing a node.
4877
4878   """
4879   HPATH = "node-remove"
4880   HTYPE = constants.HTYPE_NODE
4881
4882   def BuildHooksEnv(self):
4883     """Build hooks env.
4884
4885     """
4886     return {
4887       "OP_TARGET": self.op.node_name,
4888       "NODE_NAME": self.op.node_name,
4889       }
4890
4891   def BuildHooksNodes(self):
4892     """Build hooks nodes.
4893
4894     This doesn't run on the target node in the pre phase as a failed
4895     node would then be impossible to remove.
4896
4897     """
4898     all_nodes = self.cfg.GetNodeList()
4899     try:
4900       all_nodes.remove(self.op.node_name)
4901     except ValueError:
4902       pass
4903     return (all_nodes, all_nodes)
4904
4905   def CheckPrereq(self):
4906     """Check prerequisites.
4907
4908     This checks:
4909      - the node exists in the configuration
4910      - it does not have primary or secondary instances
4911      - it's not the master
4912
4913     Any errors are signaled by raising errors.OpPrereqError.
4914
4915     """
4916     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4917     node = self.cfg.GetNodeInfo(self.op.node_name)
4918     assert node is not None
4919
4920     masternode = self.cfg.GetMasterNode()
4921     if node.name == masternode:
4922       raise errors.OpPrereqError("Node is the master node, failover to another"
4923                                  " node is required", errors.ECODE_INVAL)
4924
4925     for instance_name, instance in self.cfg.GetAllInstancesInfo().items():
4926       if node.name in instance.all_nodes:
4927         raise errors.OpPrereqError("Instance %s is still running on the node,"
4928                                    " please remove first" % instance_name,
4929                                    errors.ECODE_INVAL)
4930     self.op.node_name = node.name
4931     self.node = node
4932
4933   def Exec(self, feedback_fn):
4934     """Removes the node from the cluster.
4935
4936     """
4937     node = self.node
4938     logging.info("Stopping the node daemon and removing configs from node %s",
4939                  node.name)
4940
4941     modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
4942
4943     assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
4944       "Not owning BGL"
4945
4946     # Promote nodes to master candidate as needed
4947     _AdjustCandidatePool(self, exceptions=[node.name])
4948     self.context.RemoveNode(node.name)
4949
4950     # Run post hooks on the node before it's removed
4951     _RunPostHook(self, node.name)
4952
4953     result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
4954     msg = result.fail_msg
4955     if msg:
4956       self.LogWarning("Errors encountered on the remote node while leaving"
4957                       " the cluster: %s", msg)
4958
4959     # Remove node from our /etc/hosts
4960     if self.cfg.GetClusterInfo().modify_etc_hosts:
4961       master_node = self.cfg.GetMasterNode()
4962       result = self.rpc.call_etc_hosts_modify(master_node,
4963                                               constants.ETC_HOSTS_REMOVE,
4964                                               node.name, None)
4965       result.Raise("Can't update hosts file with new host data")
4966       _RedistributeAncillaryFiles(self)
4967
4968
4969 class _NodeQuery(_QueryBase):
4970   FIELDS = query.NODE_FIELDS
4971
4972   def ExpandNames(self, lu):
4973     lu.needed_locks = {}
4974     lu.share_locks = _ShareAll()
4975
4976     if self.names:
4977       self.wanted = _GetWantedNodes(lu, self.names)
4978     else:
4979       self.wanted = locking.ALL_SET
4980
4981     self.do_locking = (self.use_locking and
4982                        query.NQ_LIVE in self.requested_data)
4983
4984     if self.do_locking:
4985       # If any non-static field is requested we need to lock the nodes
4986       lu.needed_locks[locking.LEVEL_NODE] = self.wanted
4987
4988   def DeclareLocks(self, lu, level):
4989     pass
4990
4991   def _GetQueryData(self, lu):
4992     """Computes the list of nodes and their attributes.
4993
4994     """
4995     all_info = lu.cfg.GetAllNodesInfo()
4996
4997     nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
4998
4999     # Gather data as requested
5000     if query.NQ_LIVE in self.requested_data:
5001       # filter out non-vm_capable nodes
5002       toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
5003
5004       node_data = lu.rpc.call_node_info(toquery_nodes, [lu.cfg.GetVGName()],
5005                                         [lu.cfg.GetHypervisorType()])
5006       live_data = dict((name, _MakeLegacyNodeInfo(nresult.payload))
5007                        for (name, nresult) in node_data.items()
5008                        if not nresult.fail_msg and nresult.payload)
5009     else:
5010       live_data = None
5011
5012     if query.NQ_INST in self.requested_data:
5013       node_to_primary = dict([(name, set()) for name in nodenames])
5014       node_to_secondary = dict([(name, set()) for name in nodenames])
5015
5016       inst_data = lu.cfg.GetAllInstancesInfo()
5017
5018       for inst in inst_data.values():
5019         if inst.primary_node in node_to_primary:
5020           node_to_primary[inst.primary_node].add(inst.name)
5021         for secnode in inst.secondary_nodes:
5022           if secnode in node_to_secondary:
5023             node_to_secondary[secnode].add(inst.name)
5024     else:
5025       node_to_primary = None
5026       node_to_secondary = None
5027
5028     if query.NQ_OOB in self.requested_data:
5029       oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
5030                          for name, node in all_info.iteritems())
5031     else:
5032       oob_support = None
5033
5034     if query.NQ_GROUP in self.requested_data:
5035       groups = lu.cfg.GetAllNodeGroupsInfo()
5036     else:
5037       groups = {}
5038
5039     return query.NodeQueryData([all_info[name] for name in nodenames],
5040                                live_data, lu.cfg.GetMasterNode(),
5041                                node_to_primary, node_to_secondary, groups,
5042                                oob_support, lu.cfg.GetClusterInfo())
5043
5044
5045 class LUNodeQuery(NoHooksLU):
5046   """Logical unit for querying nodes.
5047
5048   """
5049   # pylint: disable=W0142
5050   REQ_BGL = False
5051
5052   def CheckArguments(self):
5053     self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
5054                          self.op.output_fields, self.op.use_locking)
5055
5056   def ExpandNames(self):
5057     self.nq.ExpandNames(self)
5058
5059   def DeclareLocks(self, level):
5060     self.nq.DeclareLocks(self, level)
5061
5062   def Exec(self, feedback_fn):
5063     return self.nq.OldStyleQuery(self)
5064
5065
5066 class LUNodeQueryvols(NoHooksLU):
5067   """Logical unit for getting volumes on node(s).
5068
5069   """
5070   REQ_BGL = False
5071   _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
5072   _FIELDS_STATIC = utils.FieldSet("node")
5073
5074   def CheckArguments(self):
5075     _CheckOutputFields(static=self._FIELDS_STATIC,
5076                        dynamic=self._FIELDS_DYNAMIC,
5077                        selected=self.op.output_fields)
5078
5079   def ExpandNames(self):
5080     self.share_locks = _ShareAll()
5081     self.needed_locks = {}
5082
5083     if not self.op.nodes:
5084       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5085     else:
5086       self.needed_locks[locking.LEVEL_NODE] = \
5087         _GetWantedNodes(self, self.op.nodes)
5088
5089   def Exec(self, feedback_fn):
5090     """Computes the list of nodes and their attributes.
5091
5092     """
5093     nodenames = self.owned_locks(locking.LEVEL_NODE)
5094     volumes = self.rpc.call_node_volumes(nodenames)
5095
5096     ilist = self.cfg.GetAllInstancesInfo()
5097     vol2inst = _MapInstanceDisksToNodes(ilist.values())
5098
5099     output = []
5100     for node in nodenames:
5101       nresult = volumes[node]
5102       if nresult.offline:
5103         continue
5104       msg = nresult.fail_msg
5105       if msg:
5106         self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
5107         continue
5108
5109       node_vols = sorted(nresult.payload,
5110                          key=operator.itemgetter("dev"))
5111
5112       for vol in node_vols:
5113         node_output = []
5114         for field in self.op.output_fields:
5115           if field == "node":
5116             val = node
5117           elif field == "phys":
5118             val = vol["dev"]
5119           elif field == "vg":
5120             val = vol["vg"]
5121           elif field == "name":
5122             val = vol["name"]
5123           elif field == "size":
5124             val = int(float(vol["size"]))
5125           elif field == "instance":
5126             val = vol2inst.get((node, vol["vg"] + "/" + vol["name"]), "-")
5127           else:
5128             raise errors.ParameterError(field)
5129           node_output.append(str(val))
5130
5131         output.append(node_output)
5132
5133     return output
5134
5135
5136 class LUNodeQueryStorage(NoHooksLU):
5137   """Logical unit for getting information on storage units on node(s).
5138
5139   """
5140   _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
5141   REQ_BGL = False
5142
5143   def CheckArguments(self):
5144     _CheckOutputFields(static=self._FIELDS_STATIC,
5145                        dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
5146                        selected=self.op.output_fields)
5147
5148   def ExpandNames(self):
5149     self.share_locks = _ShareAll()
5150     self.needed_locks = {}
5151
5152     if self.op.nodes:
5153       self.needed_locks[locking.LEVEL_NODE] = \
5154         _GetWantedNodes(self, self.op.nodes)
5155     else:
5156       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5157
5158   def Exec(self, feedback_fn):
5159     """Computes the list of nodes and their attributes.
5160
5161     """
5162     self.nodes = self.owned_locks(locking.LEVEL_NODE)
5163
5164     # Always get name to sort by
5165     if constants.SF_NAME in self.op.output_fields:
5166       fields = self.op.output_fields[:]
5167     else:
5168       fields = [constants.SF_NAME] + self.op.output_fields
5169
5170     # Never ask for node or type as it's only known to the LU
5171     for extra in [constants.SF_NODE, constants.SF_TYPE]:
5172       while extra in fields:
5173         fields.remove(extra)
5174
5175     field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
5176     name_idx = field_idx[constants.SF_NAME]
5177
5178     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5179     data = self.rpc.call_storage_list(self.nodes,
5180                                       self.op.storage_type, st_args,
5181                                       self.op.name, fields)
5182
5183     result = []
5184
5185     for node in utils.NiceSort(self.nodes):
5186       nresult = data[node]
5187       if nresult.offline:
5188         continue
5189
5190       msg = nresult.fail_msg
5191       if msg:
5192         self.LogWarning("Can't get storage data from node %s: %s", node, msg)
5193         continue
5194
5195       rows = dict([(row[name_idx], row) for row in nresult.payload])
5196
5197       for name in utils.NiceSort(rows.keys()):
5198         row = rows[name]
5199
5200         out = []
5201
5202         for field in self.op.output_fields:
5203           if field == constants.SF_NODE:
5204             val = node
5205           elif field == constants.SF_TYPE:
5206             val = self.op.storage_type
5207           elif field in field_idx:
5208             val = row[field_idx[field]]
5209           else:
5210             raise errors.ParameterError(field)
5211
5212           out.append(val)
5213
5214         result.append(out)
5215
5216     return result
5217
5218
5219 class _InstanceQuery(_QueryBase):
5220   FIELDS = query.INSTANCE_FIELDS
5221
5222   def ExpandNames(self, lu):
5223     lu.needed_locks = {}
5224     lu.share_locks = _ShareAll()
5225
5226     if self.names:
5227       self.wanted = _GetWantedInstances(lu, self.names)
5228     else:
5229       self.wanted = locking.ALL_SET
5230
5231     self.do_locking = (self.use_locking and
5232                        query.IQ_LIVE in self.requested_data)
5233     if self.do_locking:
5234       lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
5235       lu.needed_locks[locking.LEVEL_NODEGROUP] = []
5236       lu.needed_locks[locking.LEVEL_NODE] = []
5237       lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5238
5239     self.do_grouplocks = (self.do_locking and
5240                           query.IQ_NODES in self.requested_data)
5241
5242   def DeclareLocks(self, lu, level):
5243     if self.do_locking:
5244       if level == locking.LEVEL_NODEGROUP and self.do_grouplocks:
5245         assert not lu.needed_locks[locking.LEVEL_NODEGROUP]
5246
5247         # Lock all groups used by instances optimistically; this requires going
5248         # via the node before it's locked, requiring verification later on
5249         lu.needed_locks[locking.LEVEL_NODEGROUP] = \
5250           set(group_uuid
5251               for instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
5252               for group_uuid in lu.cfg.GetInstanceNodeGroups(instance_name))
5253       elif level == locking.LEVEL_NODE:
5254         lu._LockInstancesNodes() # pylint: disable=W0212
5255
5256   @staticmethod
5257   def _CheckGroupLocks(lu):
5258     owned_instances = frozenset(lu.owned_locks(locking.LEVEL_INSTANCE))
5259     owned_groups = frozenset(lu.owned_locks(locking.LEVEL_NODEGROUP))
5260
5261     # Check if node groups for locked instances are still correct
5262     for instance_name in owned_instances:
5263       _CheckInstanceNodeGroups(lu.cfg, instance_name, owned_groups)
5264
5265   def _GetQueryData(self, lu):
5266     """Computes the list of instances and their attributes.
5267
5268     """
5269     if self.do_grouplocks:
5270       self._CheckGroupLocks(lu)
5271
5272     cluster = lu.cfg.GetClusterInfo()
5273     all_info = lu.cfg.GetAllInstancesInfo()
5274
5275     instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
5276
5277     instance_list = [all_info[name] for name in instance_names]
5278     nodes = frozenset(itertools.chain(*(inst.all_nodes
5279                                         for inst in instance_list)))
5280     hv_list = list(set([inst.hypervisor for inst in instance_list]))
5281     bad_nodes = []
5282     offline_nodes = []
5283     wrongnode_inst = set()
5284
5285     # Gather data as requested
5286     if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
5287       live_data = {}
5288       node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
5289       for name in nodes:
5290         result = node_data[name]
5291         if result.offline:
5292           # offline nodes will be in both lists
5293           assert result.fail_msg
5294           offline_nodes.append(name)
5295         if result.fail_msg:
5296           bad_nodes.append(name)
5297         elif result.payload:
5298           for inst in result.payload:
5299             if inst in all_info:
5300               if all_info[inst].primary_node == name:
5301                 live_data.update(result.payload)
5302               else:
5303                 wrongnode_inst.add(inst)
5304             else:
5305               # orphan instance; we don't list it here as we don't
5306               # handle this case yet in the output of instance listing
5307               logging.warning("Orphan instance '%s' found on node %s",
5308                               inst, name)
5309         # else no instance is alive
5310     else:
5311       live_data = {}
5312
5313     if query.IQ_DISKUSAGE in self.requested_data:
5314       disk_usage = dict((inst.name,
5315                          _ComputeDiskSize(inst.disk_template,
5316                                           [{constants.IDISK_SIZE: disk.size}
5317                                            for disk in inst.disks]))
5318                         for inst in instance_list)
5319     else:
5320       disk_usage = None
5321
5322     if query.IQ_CONSOLE in self.requested_data:
5323       consinfo = {}
5324       for inst in instance_list:
5325         if inst.name in live_data:
5326           # Instance is running
5327           consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
5328         else:
5329           consinfo[inst.name] = None
5330       assert set(consinfo.keys()) == set(instance_names)
5331     else:
5332       consinfo = None
5333
5334     if query.IQ_NODES in self.requested_data:
5335       node_names = set(itertools.chain(*map(operator.attrgetter("all_nodes"),
5336                                             instance_list)))
5337       nodes = dict(lu.cfg.GetMultiNodeInfo(node_names))
5338       groups = dict((uuid, lu.cfg.GetNodeGroup(uuid))
5339                     for uuid in set(map(operator.attrgetter("group"),
5340                                         nodes.values())))
5341     else:
5342       nodes = None
5343       groups = None
5344
5345     return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
5346                                    disk_usage, offline_nodes, bad_nodes,
5347                                    live_data, wrongnode_inst, consinfo,
5348                                    nodes, groups)
5349
5350
5351 class LUQuery(NoHooksLU):
5352   """Query for resources/items of a certain kind.
5353
5354   """
5355   # pylint: disable=W0142
5356   REQ_BGL = False
5357
5358   def CheckArguments(self):
5359     qcls = _GetQueryImplementation(self.op.what)
5360
5361     self.impl = qcls(self.op.qfilter, self.op.fields, self.op.use_locking)
5362
5363   def ExpandNames(self):
5364     self.impl.ExpandNames(self)
5365
5366   def DeclareLocks(self, level):
5367     self.impl.DeclareLocks(self, level)
5368
5369   def Exec(self, feedback_fn):
5370     return self.impl.NewStyleQuery(self)
5371
5372
5373 class LUQueryFields(NoHooksLU):
5374   """Query for resources/items of a certain kind.
5375
5376   """
5377   # pylint: disable=W0142
5378   REQ_BGL = False
5379
5380   def CheckArguments(self):
5381     self.qcls = _GetQueryImplementation(self.op.what)
5382
5383   def ExpandNames(self):
5384     self.needed_locks = {}
5385
5386   def Exec(self, feedback_fn):
5387     return query.QueryFields(self.qcls.FIELDS, self.op.fields)
5388
5389
5390 class LUNodeModifyStorage(NoHooksLU):
5391   """Logical unit for modifying a storage volume on a node.
5392
5393   """
5394   REQ_BGL = False
5395
5396   def CheckArguments(self):
5397     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5398
5399     storage_type = self.op.storage_type
5400
5401     try:
5402       modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
5403     except KeyError:
5404       raise errors.OpPrereqError("Storage units of type '%s' can not be"
5405                                  " modified" % storage_type,
5406                                  errors.ECODE_INVAL)
5407
5408     diff = set(self.op.changes.keys()) - modifiable
5409     if diff:
5410       raise errors.OpPrereqError("The following fields can not be modified for"
5411                                  " storage units of type '%s': %r" %
5412                                  (storage_type, list(diff)),
5413                                  errors.ECODE_INVAL)
5414
5415   def ExpandNames(self):
5416     self.needed_locks = {
5417       locking.LEVEL_NODE: self.op.node_name,
5418       }
5419
5420   def Exec(self, feedback_fn):
5421     """Computes the list of nodes and their attributes.
5422
5423     """
5424     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5425     result = self.rpc.call_storage_modify(self.op.node_name,
5426                                           self.op.storage_type, st_args,
5427                                           self.op.name, self.op.changes)
5428     result.Raise("Failed to modify storage unit '%s' on %s" %
5429                  (self.op.name, self.op.node_name))
5430
5431
5432 class LUNodeAdd(LogicalUnit):
5433   """Logical unit for adding node to the cluster.
5434
5435   """
5436   HPATH = "node-add"
5437   HTYPE = constants.HTYPE_NODE
5438   _NFLAGS = ["master_capable", "vm_capable"]
5439
5440   def CheckArguments(self):
5441     self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
5442     # validate/normalize the node name
5443     self.hostname = netutils.GetHostname(name=self.op.node_name,
5444                                          family=self.primary_ip_family)
5445     self.op.node_name = self.hostname.name
5446
5447     if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
5448       raise errors.OpPrereqError("Cannot readd the master node",
5449                                  errors.ECODE_STATE)
5450
5451     if self.op.readd and self.op.group:
5452       raise errors.OpPrereqError("Cannot pass a node group when a node is"
5453                                  " being readded", errors.ECODE_INVAL)
5454
5455   def BuildHooksEnv(self):
5456     """Build hooks env.
5457
5458     This will run on all nodes before, and on all nodes + the new node after.
5459
5460     """
5461     return {
5462       "OP_TARGET": self.op.node_name,
5463       "NODE_NAME": self.op.node_name,
5464       "NODE_PIP": self.op.primary_ip,
5465       "NODE_SIP": self.op.secondary_ip,
5466       "MASTER_CAPABLE": str(self.op.master_capable),
5467       "VM_CAPABLE": str(self.op.vm_capable),
5468       }
5469
5470   def BuildHooksNodes(self):
5471     """Build hooks nodes.
5472
5473     """
5474     # Exclude added node
5475     pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
5476     post_nodes = pre_nodes + [self.op.node_name, ]
5477
5478     return (pre_nodes, post_nodes)
5479
5480   def CheckPrereq(self):
5481     """Check prerequisites.
5482
5483     This checks:
5484      - the new node is not already in the config
5485      - it is resolvable
5486      - its parameters (single/dual homed) matches the cluster
5487
5488     Any errors are signaled by raising errors.OpPrereqError.
5489
5490     """
5491     cfg = self.cfg
5492     hostname = self.hostname
5493     node = hostname.name
5494     primary_ip = self.op.primary_ip = hostname.ip
5495     if self.op.secondary_ip is None:
5496       if self.primary_ip_family == netutils.IP6Address.family:
5497         raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
5498                                    " IPv4 address must be given as secondary",
5499                                    errors.ECODE_INVAL)
5500       self.op.secondary_ip = primary_ip
5501
5502     secondary_ip = self.op.secondary_ip
5503     if not netutils.IP4Address.IsValid(secondary_ip):
5504       raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5505                                  " address" % secondary_ip, errors.ECODE_INVAL)
5506
5507     node_list = cfg.GetNodeList()
5508     if not self.op.readd and node in node_list:
5509       raise errors.OpPrereqError("Node %s is already in the configuration" %
5510                                  node, errors.ECODE_EXISTS)
5511     elif self.op.readd and node not in node_list:
5512       raise errors.OpPrereqError("Node %s is not in the configuration" % node,
5513                                  errors.ECODE_NOENT)
5514
5515     self.changed_primary_ip = False
5516
5517     for existing_node_name, existing_node in cfg.GetMultiNodeInfo(node_list):
5518       if self.op.readd and node == existing_node_name:
5519         if existing_node.secondary_ip != secondary_ip:
5520           raise errors.OpPrereqError("Readded node doesn't have the same IP"
5521                                      " address configuration as before",
5522                                      errors.ECODE_INVAL)
5523         if existing_node.primary_ip != primary_ip:
5524           self.changed_primary_ip = True
5525
5526         continue
5527
5528       if (existing_node.primary_ip == primary_ip or
5529           existing_node.secondary_ip == primary_ip or
5530           existing_node.primary_ip == secondary_ip or
5531           existing_node.secondary_ip == secondary_ip):
5532         raise errors.OpPrereqError("New node ip address(es) conflict with"
5533                                    " existing node %s" % existing_node.name,
5534                                    errors.ECODE_NOTUNIQUE)
5535
5536     # After this 'if' block, None is no longer a valid value for the
5537     # _capable op attributes
5538     if self.op.readd:
5539       old_node = self.cfg.GetNodeInfo(node)
5540       assert old_node is not None, "Can't retrieve locked node %s" % node
5541       for attr in self._NFLAGS:
5542         if getattr(self.op, attr) is None:
5543           setattr(self.op, attr, getattr(old_node, attr))
5544     else:
5545       for attr in self._NFLAGS:
5546         if getattr(self.op, attr) is None:
5547           setattr(self.op, attr, True)
5548
5549     if self.op.readd and not self.op.vm_capable:
5550       pri, sec = cfg.GetNodeInstances(node)
5551       if pri or sec:
5552         raise errors.OpPrereqError("Node %s being re-added with vm_capable"
5553                                    " flag set to false, but it already holds"
5554                                    " instances" % node,
5555                                    errors.ECODE_STATE)
5556
5557     # check that the type of the node (single versus dual homed) is the
5558     # same as for the master
5559     myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
5560     master_singlehomed = myself.secondary_ip == myself.primary_ip
5561     newbie_singlehomed = secondary_ip == primary_ip
5562     if master_singlehomed != newbie_singlehomed:
5563       if master_singlehomed:
5564         raise errors.OpPrereqError("The master has no secondary ip but the"
5565                                    " new node has one",
5566                                    errors.ECODE_INVAL)
5567       else:
5568         raise errors.OpPrereqError("The master has a secondary ip but the"
5569                                    " new node doesn't have one",
5570                                    errors.ECODE_INVAL)
5571
5572     # checks reachability
5573     if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
5574       raise errors.OpPrereqError("Node not reachable by ping",
5575                                  errors.ECODE_ENVIRON)
5576
5577     if not newbie_singlehomed:
5578       # check reachability from my secondary ip to newbie's secondary ip
5579       if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
5580                            source=myself.secondary_ip):
5581         raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5582                                    " based ping to node daemon port",
5583                                    errors.ECODE_ENVIRON)
5584
5585     if self.op.readd:
5586       exceptions = [node]
5587     else:
5588       exceptions = []
5589
5590     if self.op.master_capable:
5591       self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
5592     else:
5593       self.master_candidate = False
5594
5595     if self.op.readd:
5596       self.new_node = old_node
5597     else:
5598       node_group = cfg.LookupNodeGroup(self.op.group)
5599       self.new_node = objects.Node(name=node,
5600                                    primary_ip=primary_ip,
5601                                    secondary_ip=secondary_ip,
5602                                    master_candidate=self.master_candidate,
5603                                    offline=False, drained=False,
5604                                    group=node_group)
5605
5606     if self.op.ndparams:
5607       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
5608
5609     if self.op.hv_state:
5610       self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
5611
5612     if self.op.disk_state:
5613       self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
5614
5615     # TODO: If we need to have multiple DnsOnlyRunner we probably should make
5616     #       it a property on the base class.
5617     result = rpc.DnsOnlyRunner().call_version([node])[node]
5618     result.Raise("Can't get version information from node %s" % node)
5619     if constants.PROTOCOL_VERSION == result.payload:
5620       logging.info("Communication to node %s fine, sw version %s match",
5621                    node, result.payload)
5622     else:
5623       raise errors.OpPrereqError("Version mismatch master version %s,"
5624                                  " node version %s" %
5625                                  (constants.PROTOCOL_VERSION, result.payload),
5626                                  errors.ECODE_ENVIRON)
5627
5628   def Exec(self, feedback_fn):
5629     """Adds the new node to the cluster.
5630
5631     """
5632     new_node = self.new_node
5633     node = new_node.name
5634
5635     assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
5636       "Not owning BGL"
5637
5638     # We adding a new node so we assume it's powered
5639     new_node.powered = True
5640
5641     # for re-adds, reset the offline/drained/master-candidate flags;
5642     # we need to reset here, otherwise offline would prevent RPC calls
5643     # later in the procedure; this also means that if the re-add
5644     # fails, we are left with a non-offlined, broken node
5645     if self.op.readd:
5646       new_node.drained = new_node.offline = False # pylint: disable=W0201
5647       self.LogInfo("Readding a node, the offline/drained flags were reset")
5648       # if we demote the node, we do cleanup later in the procedure
5649       new_node.master_candidate = self.master_candidate
5650       if self.changed_primary_ip:
5651         new_node.primary_ip = self.op.primary_ip
5652
5653     # copy the master/vm_capable flags
5654     for attr in self._NFLAGS:
5655       setattr(new_node, attr, getattr(self.op, attr))
5656
5657     # notify the user about any possible mc promotion
5658     if new_node.master_candidate:
5659       self.LogInfo("Node will be a master candidate")
5660
5661     if self.op.ndparams:
5662       new_node.ndparams = self.op.ndparams
5663     else:
5664       new_node.ndparams = {}
5665
5666     if self.op.hv_state:
5667       new_node.hv_state_static = self.new_hv_state
5668
5669     if self.op.disk_state:
5670       new_node.disk_state_static = self.new_disk_state
5671
5672     # Add node to our /etc/hosts, and add key to known_hosts
5673     if self.cfg.GetClusterInfo().modify_etc_hosts:
5674       master_node = self.cfg.GetMasterNode()
5675       result = self.rpc.call_etc_hosts_modify(master_node,
5676                                               constants.ETC_HOSTS_ADD,
5677                                               self.hostname.name,
5678                                               self.hostname.ip)
5679       result.Raise("Can't update hosts file with new host data")
5680
5681     if new_node.secondary_ip != new_node.primary_ip:
5682       _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
5683                                False)
5684
5685     node_verify_list = [self.cfg.GetMasterNode()]
5686     node_verify_param = {
5687       constants.NV_NODELIST: ([node], {}),
5688       # TODO: do a node-net-test as well?
5689     }
5690
5691     result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
5692                                        self.cfg.GetClusterName())
5693     for verifier in node_verify_list:
5694       result[verifier].Raise("Cannot communicate with node %s" % verifier)
5695       nl_payload = result[verifier].payload[constants.NV_NODELIST]
5696       if nl_payload:
5697         for failed in nl_payload:
5698           feedback_fn("ssh/hostname verification failed"
5699                       " (checking from %s): %s" %
5700                       (verifier, nl_payload[failed]))
5701         raise errors.OpExecError("ssh/hostname verification failed")
5702
5703     if self.op.readd:
5704       _RedistributeAncillaryFiles(self)
5705       self.context.ReaddNode(new_node)
5706       # make sure we redistribute the config
5707       self.cfg.Update(new_node, feedback_fn)
5708       # and make sure the new node will not have old files around
5709       if not new_node.master_candidate:
5710         result = self.rpc.call_node_demote_from_mc(new_node.name)
5711         msg = result.fail_msg
5712         if msg:
5713           self.LogWarning("Node failed to demote itself from master"
5714                           " candidate status: %s" % msg)
5715     else:
5716       _RedistributeAncillaryFiles(self, additional_nodes=[node],
5717                                   additional_vm=self.op.vm_capable)
5718       self.context.AddNode(new_node, self.proc.GetECId())
5719
5720
5721 class LUNodeSetParams(LogicalUnit):
5722   """Modifies the parameters of a node.
5723
5724   @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
5725       to the node role (as _ROLE_*)
5726   @cvar _R2F: a dictionary from node role to tuples of flags
5727   @cvar _FLAGS: a list of attribute names corresponding to the flags
5728
5729   """
5730   HPATH = "node-modify"
5731   HTYPE = constants.HTYPE_NODE
5732   REQ_BGL = False
5733   (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
5734   _F2R = {
5735     (True, False, False): _ROLE_CANDIDATE,
5736     (False, True, False): _ROLE_DRAINED,
5737     (False, False, True): _ROLE_OFFLINE,
5738     (False, False, False): _ROLE_REGULAR,
5739     }
5740   _R2F = dict((v, k) for k, v in _F2R.items())
5741   _FLAGS = ["master_candidate", "drained", "offline"]
5742
5743   def CheckArguments(self):
5744     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5745     all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
5746                 self.op.master_capable, self.op.vm_capable,
5747                 self.op.secondary_ip, self.op.ndparams, self.op.hv_state,
5748                 self.op.disk_state]
5749     if all_mods.count(None) == len(all_mods):
5750       raise errors.OpPrereqError("Please pass at least one modification",
5751                                  errors.ECODE_INVAL)
5752     if all_mods.count(True) > 1:
5753       raise errors.OpPrereqError("Can't set the node into more than one"
5754                                  " state at the same time",
5755                                  errors.ECODE_INVAL)
5756
5757     # Boolean value that tells us whether we might be demoting from MC
5758     self.might_demote = (self.op.master_candidate == False or
5759                          self.op.offline == True or
5760                          self.op.drained == True or
5761                          self.op.master_capable == False)
5762
5763     if self.op.secondary_ip:
5764       if not netutils.IP4Address.IsValid(self.op.secondary_ip):
5765         raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5766                                    " address" % self.op.secondary_ip,
5767                                    errors.ECODE_INVAL)
5768
5769     self.lock_all = self.op.auto_promote and self.might_demote
5770     self.lock_instances = self.op.secondary_ip is not None
5771
5772   def _InstanceFilter(self, instance):
5773     """Filter for getting affected instances.
5774
5775     """
5776     return (instance.disk_template in constants.DTS_INT_MIRROR and
5777             self.op.node_name in instance.all_nodes)
5778
5779   def ExpandNames(self):
5780     if self.lock_all:
5781       self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
5782     else:
5783       self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
5784
5785     # Since modifying a node can have severe effects on currently running
5786     # operations the resource lock is at least acquired in shared mode
5787     self.needed_locks[locking.LEVEL_NODE_RES] = \
5788       self.needed_locks[locking.LEVEL_NODE]
5789
5790     # Get node resource and instance locks in shared mode; they are not used
5791     # for anything but read-only access
5792     self.share_locks[locking.LEVEL_NODE_RES] = 1
5793     self.share_locks[locking.LEVEL_INSTANCE] = 1
5794
5795     if self.lock_instances:
5796       self.needed_locks[locking.LEVEL_INSTANCE] = \
5797         frozenset(self.cfg.GetInstancesInfoByFilter(self._InstanceFilter))
5798
5799   def BuildHooksEnv(self):
5800     """Build hooks env.
5801
5802     This runs on the master node.
5803
5804     """
5805     return {
5806       "OP_TARGET": self.op.node_name,
5807       "MASTER_CANDIDATE": str(self.op.master_candidate),
5808       "OFFLINE": str(self.op.offline),
5809       "DRAINED": str(self.op.drained),
5810       "MASTER_CAPABLE": str(self.op.master_capable),
5811       "VM_CAPABLE": str(self.op.vm_capable),
5812       }
5813
5814   def BuildHooksNodes(self):
5815     """Build hooks nodes.
5816
5817     """
5818     nl = [self.cfg.GetMasterNode(), self.op.node_name]
5819     return (nl, nl)
5820
5821   def CheckPrereq(self):
5822     """Check prerequisites.
5823
5824     This only checks the instance list against the existing names.
5825
5826     """
5827     node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
5828
5829     if self.lock_instances:
5830       affected_instances = \
5831         self.cfg.GetInstancesInfoByFilter(self._InstanceFilter)
5832
5833       # Verify instance locks
5834       owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
5835       wanted_instances = frozenset(affected_instances.keys())
5836       if wanted_instances - owned_instances:
5837         raise errors.OpPrereqError("Instances affected by changing node %s's"
5838                                    " secondary IP address have changed since"
5839                                    " locks were acquired, wanted '%s', have"
5840                                    " '%s'; retry the operation" %
5841                                    (self.op.node_name,
5842                                     utils.CommaJoin(wanted_instances),
5843                                     utils.CommaJoin(owned_instances)),
5844                                    errors.ECODE_STATE)
5845     else:
5846       affected_instances = None
5847
5848     if (self.op.master_candidate is not None or
5849         self.op.drained is not None or
5850         self.op.offline is not None):
5851       # we can't change the master's node flags
5852       if self.op.node_name == self.cfg.GetMasterNode():
5853         raise errors.OpPrereqError("The master role can be changed"
5854                                    " only via master-failover",
5855                                    errors.ECODE_INVAL)
5856
5857     if self.op.master_candidate and not node.master_capable:
5858       raise errors.OpPrereqError("Node %s is not master capable, cannot make"
5859                                  " it a master candidate" % node.name,
5860                                  errors.ECODE_STATE)
5861
5862     if self.op.vm_capable == False:
5863       (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
5864       if ipri or isec:
5865         raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
5866                                    " the vm_capable flag" % node.name,
5867                                    errors.ECODE_STATE)
5868
5869     if node.master_candidate and self.might_demote and not self.lock_all:
5870       assert not self.op.auto_promote, "auto_promote set but lock_all not"
5871       # check if after removing the current node, we're missing master
5872       # candidates
5873       (mc_remaining, mc_should, _) = \
5874           self.cfg.GetMasterCandidateStats(exceptions=[node.name])
5875       if mc_remaining < mc_should:
5876         raise errors.OpPrereqError("Not enough master candidates, please"
5877                                    " pass auto promote option to allow"
5878                                    " promotion", errors.ECODE_STATE)
5879
5880     self.old_flags = old_flags = (node.master_candidate,
5881                                   node.drained, node.offline)
5882     assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
5883     self.old_role = old_role = self._F2R[old_flags]
5884
5885     # Check for ineffective changes
5886     for attr in self._FLAGS:
5887       if (getattr(self.op, attr) == False and getattr(node, attr) == False):
5888         self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
5889         setattr(self.op, attr, None)
5890
5891     # Past this point, any flag change to False means a transition
5892     # away from the respective state, as only real changes are kept
5893
5894     # TODO: We might query the real power state if it supports OOB
5895     if _SupportsOob(self.cfg, node):
5896       if self.op.offline is False and not (node.powered or
5897                                            self.op.powered == True):
5898         raise errors.OpPrereqError(("Node %s needs to be turned on before its"
5899                                     " offline status can be reset") %
5900                                    self.op.node_name)
5901     elif self.op.powered is not None:
5902       raise errors.OpPrereqError(("Unable to change powered state for node %s"
5903                                   " as it does not support out-of-band"
5904                                   " handling") % self.op.node_name)
5905
5906     # If we're being deofflined/drained, we'll MC ourself if needed
5907     if (self.op.drained == False or self.op.offline == False or
5908         (self.op.master_capable and not node.master_capable)):
5909       if _DecideSelfPromotion(self):
5910         self.op.master_candidate = True
5911         self.LogInfo("Auto-promoting node to master candidate")
5912
5913     # If we're no longer master capable, we'll demote ourselves from MC
5914     if self.op.master_capable == False and node.master_candidate:
5915       self.LogInfo("Demoting from master candidate")
5916       self.op.master_candidate = False
5917
5918     # Compute new role
5919     assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
5920     if self.op.master_candidate:
5921       new_role = self._ROLE_CANDIDATE
5922     elif self.op.drained:
5923       new_role = self._ROLE_DRAINED
5924     elif self.op.offline:
5925       new_role = self._ROLE_OFFLINE
5926     elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
5927       # False is still in new flags, which means we're un-setting (the
5928       # only) True flag
5929       new_role = self._ROLE_REGULAR
5930     else: # no new flags, nothing, keep old role
5931       new_role = old_role
5932
5933     self.new_role = new_role
5934
5935     if old_role == self._ROLE_OFFLINE and new_role != old_role:
5936       # Trying to transition out of offline status
5937       result = self.rpc.call_version([node.name])[node.name]
5938       if result.fail_msg:
5939         raise errors.OpPrereqError("Node %s is being de-offlined but fails"
5940                                    " to report its version: %s" %
5941                                    (node.name, result.fail_msg),
5942                                    errors.ECODE_STATE)
5943       else:
5944         self.LogWarning("Transitioning node from offline to online state"
5945                         " without using re-add. Please make sure the node"
5946                         " is healthy!")
5947
5948     if self.op.secondary_ip:
5949       # Ok even without locking, because this can't be changed by any LU
5950       master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
5951       master_singlehomed = master.secondary_ip == master.primary_ip
5952       if master_singlehomed and self.op.secondary_ip:
5953         raise errors.OpPrereqError("Cannot change the secondary ip on a single"
5954                                    " homed cluster", errors.ECODE_INVAL)
5955
5956       assert not (frozenset(affected_instances) -
5957                   self.owned_locks(locking.LEVEL_INSTANCE))
5958
5959       if node.offline:
5960         if affected_instances:
5961           raise errors.OpPrereqError("Cannot change secondary IP address:"
5962                                      " offline node has instances (%s)"
5963                                      " configured to use it" %
5964                                      utils.CommaJoin(affected_instances.keys()))
5965       else:
5966         # On online nodes, check that no instances are running, and that
5967         # the node has the new ip and we can reach it.
5968         for instance in affected_instances.values():
5969           _CheckInstanceState(self, instance, INSTANCE_DOWN,
5970                               msg="cannot change secondary ip")
5971
5972         _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
5973         if master.name != node.name:
5974           # check reachability from master secondary ip to new secondary ip
5975           if not netutils.TcpPing(self.op.secondary_ip,
5976                                   constants.DEFAULT_NODED_PORT,
5977                                   source=master.secondary_ip):
5978             raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5979                                        " based ping to node daemon port",
5980                                        errors.ECODE_ENVIRON)
5981
5982     if self.op.ndparams:
5983       new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
5984       utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
5985       self.new_ndparams = new_ndparams
5986
5987     if self.op.hv_state:
5988       self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
5989                                                  self.node.hv_state_static)
5990
5991     if self.op.disk_state:
5992       self.new_disk_state = \
5993         _MergeAndVerifyDiskState(self.op.disk_state,
5994                                  self.node.disk_state_static)
5995
5996   def Exec(self, feedback_fn):
5997     """Modifies a node.
5998
5999     """
6000     node = self.node
6001     old_role = self.old_role
6002     new_role = self.new_role
6003
6004     result = []
6005
6006     if self.op.ndparams:
6007       node.ndparams = self.new_ndparams
6008
6009     if self.op.powered is not None:
6010       node.powered = self.op.powered
6011
6012     if self.op.hv_state:
6013       node.hv_state_static = self.new_hv_state
6014
6015     if self.op.disk_state:
6016       node.disk_state_static = self.new_disk_state
6017
6018     for attr in ["master_capable", "vm_capable"]:
6019       val = getattr(self.op, attr)
6020       if val is not None:
6021         setattr(node, attr, val)
6022         result.append((attr, str(val)))
6023
6024     if new_role != old_role:
6025       # Tell the node to demote itself, if no longer MC and not offline
6026       if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
6027         msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
6028         if msg:
6029           self.LogWarning("Node failed to demote itself: %s", msg)
6030
6031       new_flags = self._R2F[new_role]
6032       for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
6033         if of != nf:
6034           result.append((desc, str(nf)))
6035       (node.master_candidate, node.drained, node.offline) = new_flags
6036
6037       # we locked all nodes, we adjust the CP before updating this node
6038       if self.lock_all:
6039         _AdjustCandidatePool(self, [node.name])
6040
6041     if self.op.secondary_ip:
6042       node.secondary_ip = self.op.secondary_ip
6043       result.append(("secondary_ip", self.op.secondary_ip))
6044
6045     # this will trigger configuration file update, if needed
6046     self.cfg.Update(node, feedback_fn)
6047
6048     # this will trigger job queue propagation or cleanup if the mc
6049     # flag changed
6050     if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
6051       self.context.ReaddNode(node)
6052
6053     return result
6054
6055
6056 class LUNodePowercycle(NoHooksLU):
6057   """Powercycles a node.
6058
6059   """
6060   REQ_BGL = False
6061
6062   def CheckArguments(self):
6063     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6064     if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
6065       raise errors.OpPrereqError("The node is the master and the force"
6066                                  " parameter was not set",
6067                                  errors.ECODE_INVAL)
6068
6069   def ExpandNames(self):
6070     """Locking for PowercycleNode.
6071
6072     This is a last-resort option and shouldn't block on other
6073     jobs. Therefore, we grab no locks.
6074
6075     """
6076     self.needed_locks = {}
6077
6078   def Exec(self, feedback_fn):
6079     """Reboots a node.
6080
6081     """
6082     result = self.rpc.call_node_powercycle(self.op.node_name,
6083                                            self.cfg.GetHypervisorType())
6084     result.Raise("Failed to schedule the reboot")
6085     return result.payload
6086
6087
6088 class LUClusterQuery(NoHooksLU):
6089   """Query cluster configuration.
6090
6091   """
6092   REQ_BGL = False
6093
6094   def ExpandNames(self):
6095     self.needed_locks = {}
6096
6097   def Exec(self, feedback_fn):
6098     """Return cluster config.
6099
6100     """
6101     cluster = self.cfg.GetClusterInfo()
6102     os_hvp = {}
6103
6104     # Filter just for enabled hypervisors
6105     for os_name, hv_dict in cluster.os_hvp.items():
6106       os_hvp[os_name] = {}
6107       for hv_name, hv_params in hv_dict.items():
6108         if hv_name in cluster.enabled_hypervisors:
6109           os_hvp[os_name][hv_name] = hv_params
6110
6111     # Convert ip_family to ip_version
6112     primary_ip_version = constants.IP4_VERSION
6113     if cluster.primary_ip_family == netutils.IP6Address.family:
6114       primary_ip_version = constants.IP6_VERSION
6115
6116     result = {
6117       "software_version": constants.RELEASE_VERSION,
6118       "protocol_version": constants.PROTOCOL_VERSION,
6119       "config_version": constants.CONFIG_VERSION,
6120       "os_api_version": max(constants.OS_API_VERSIONS),
6121       "export_version": constants.EXPORT_VERSION,
6122       "architecture": runtime.GetArchInfo(),
6123       "name": cluster.cluster_name,
6124       "master": cluster.master_node,
6125       "default_hypervisor": cluster.primary_hypervisor,
6126       "enabled_hypervisors": cluster.enabled_hypervisors,
6127       "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
6128                         for hypervisor_name in cluster.enabled_hypervisors]),
6129       "os_hvp": os_hvp,
6130       "beparams": cluster.beparams,
6131       "osparams": cluster.osparams,
6132       "ipolicy": cluster.ipolicy,
6133       "nicparams": cluster.nicparams,
6134       "ndparams": cluster.ndparams,
6135       "candidate_pool_size": cluster.candidate_pool_size,
6136       "master_netdev": cluster.master_netdev,
6137       "master_netmask": cluster.master_netmask,
6138       "use_external_mip_script": cluster.use_external_mip_script,
6139       "volume_group_name": cluster.volume_group_name,
6140       "drbd_usermode_helper": cluster.drbd_usermode_helper,
6141       "file_storage_dir": cluster.file_storage_dir,
6142       "shared_file_storage_dir": cluster.shared_file_storage_dir,
6143       "maintain_node_health": cluster.maintain_node_health,
6144       "ctime": cluster.ctime,
6145       "mtime": cluster.mtime,
6146       "uuid": cluster.uuid,
6147       "tags": list(cluster.GetTags()),
6148       "uid_pool": cluster.uid_pool,
6149       "default_iallocator": cluster.default_iallocator,
6150       "reserved_lvs": cluster.reserved_lvs,
6151       "primary_ip_version": primary_ip_version,
6152       "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
6153       "hidden_os": cluster.hidden_os,
6154       "blacklisted_os": cluster.blacklisted_os,
6155       }
6156
6157     return result
6158
6159
6160 class LUClusterConfigQuery(NoHooksLU):
6161   """Return configuration values.
6162
6163   """
6164   REQ_BGL = False
6165   _FIELDS_DYNAMIC = utils.FieldSet()
6166   _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
6167                                   "watcher_pause", "volume_group_name")
6168
6169   def CheckArguments(self):
6170     _CheckOutputFields(static=self._FIELDS_STATIC,
6171                        dynamic=self._FIELDS_DYNAMIC,
6172                        selected=self.op.output_fields)
6173
6174   def ExpandNames(self):
6175     self.needed_locks = {}
6176
6177   def Exec(self, feedback_fn):
6178     """Dump a representation of the cluster config to the standard output.
6179
6180     """
6181     values = []
6182     for field in self.op.output_fields:
6183       if field == "cluster_name":
6184         entry = self.cfg.GetClusterName()
6185       elif field == "master_node":
6186         entry = self.cfg.GetMasterNode()
6187       elif field == "drain_flag":
6188         entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
6189       elif field == "watcher_pause":
6190         entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
6191       elif field == "volume_group_name":
6192         entry = self.cfg.GetVGName()
6193       else:
6194         raise errors.ParameterError(field)
6195       values.append(entry)
6196     return values
6197
6198
6199 class LUInstanceActivateDisks(NoHooksLU):
6200   """Bring up an instance's disks.
6201
6202   """
6203   REQ_BGL = False
6204
6205   def ExpandNames(self):
6206     self._ExpandAndLockInstance()
6207     self.needed_locks[locking.LEVEL_NODE] = []
6208     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6209
6210   def DeclareLocks(self, level):
6211     if level == locking.LEVEL_NODE:
6212       self._LockInstancesNodes()
6213
6214   def CheckPrereq(self):
6215     """Check prerequisites.
6216
6217     This checks that the instance is in the cluster.
6218
6219     """
6220     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6221     assert self.instance is not None, \
6222       "Cannot retrieve locked instance %s" % self.op.instance_name
6223     _CheckNodeOnline(self, self.instance.primary_node)
6224
6225   def Exec(self, feedback_fn):
6226     """Activate the disks.
6227
6228     """
6229     disks_ok, disks_info = \
6230               _AssembleInstanceDisks(self, self.instance,
6231                                      ignore_size=self.op.ignore_size)
6232     if not disks_ok:
6233       raise errors.OpExecError("Cannot activate block devices")
6234
6235     return disks_info
6236
6237
6238 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
6239                            ignore_size=False):
6240   """Prepare the block devices for an instance.
6241
6242   This sets up the block devices on all nodes.
6243
6244   @type lu: L{LogicalUnit}
6245   @param lu: the logical unit on whose behalf we execute
6246   @type instance: L{objects.Instance}
6247   @param instance: the instance for whose disks we assemble
6248   @type disks: list of L{objects.Disk} or None
6249   @param disks: which disks to assemble (or all, if None)
6250   @type ignore_secondaries: boolean
6251   @param ignore_secondaries: if true, errors on secondary nodes
6252       won't result in an error return from the function
6253   @type ignore_size: boolean
6254   @param ignore_size: if true, the current known size of the disk
6255       will not be used during the disk activation, useful for cases
6256       when the size is wrong
6257   @return: False if the operation failed, otherwise a list of
6258       (host, instance_visible_name, node_visible_name)
6259       with the mapping from node devices to instance devices
6260
6261   """
6262   device_info = []
6263   disks_ok = True
6264   iname = instance.name
6265   disks = _ExpandCheckDisks(instance, disks)
6266
6267   # With the two passes mechanism we try to reduce the window of
6268   # opportunity for the race condition of switching DRBD to primary
6269   # before handshaking occured, but we do not eliminate it
6270
6271   # The proper fix would be to wait (with some limits) until the
6272   # connection has been made and drbd transitions from WFConnection
6273   # into any other network-connected state (Connected, SyncTarget,
6274   # SyncSource, etc.)
6275
6276   # 1st pass, assemble on all nodes in secondary mode
6277   for idx, inst_disk in enumerate(disks):
6278     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6279       if ignore_size:
6280         node_disk = node_disk.Copy()
6281         node_disk.UnsetSize()
6282       lu.cfg.SetDiskID(node_disk, node)
6283       result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False, idx)
6284       msg = result.fail_msg
6285       if msg:
6286         lu.proc.LogWarning("Could not prepare block device %s on node %s"
6287                            " (is_primary=False, pass=1): %s",
6288                            inst_disk.iv_name, node, msg)
6289         if not ignore_secondaries:
6290           disks_ok = False
6291
6292   # FIXME: race condition on drbd migration to primary
6293
6294   # 2nd pass, do only the primary node
6295   for idx, inst_disk in enumerate(disks):
6296     dev_path = None
6297
6298     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6299       if node != instance.primary_node:
6300         continue
6301       if ignore_size:
6302         node_disk = node_disk.Copy()
6303         node_disk.UnsetSize()
6304       lu.cfg.SetDiskID(node_disk, node)
6305       result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True, idx)
6306       msg = result.fail_msg
6307       if msg:
6308         lu.proc.LogWarning("Could not prepare block device %s on node %s"
6309                            " (is_primary=True, pass=2): %s",
6310                            inst_disk.iv_name, node, msg)
6311         disks_ok = False
6312       else:
6313         dev_path = result.payload
6314
6315     device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
6316
6317   # leave the disks configured for the primary node
6318   # this is a workaround that would be fixed better by
6319   # improving the logical/physical id handling
6320   for disk in disks:
6321     lu.cfg.SetDiskID(disk, instance.primary_node)
6322
6323   return disks_ok, device_info
6324
6325
6326 def _StartInstanceDisks(lu, instance, force):
6327   """Start the disks of an instance.
6328
6329   """
6330   disks_ok, _ = _AssembleInstanceDisks(lu, instance,
6331                                            ignore_secondaries=force)
6332   if not disks_ok:
6333     _ShutdownInstanceDisks(lu, instance)
6334     if force is not None and not force:
6335       lu.proc.LogWarning("", hint="If the message above refers to a"
6336                          " secondary node,"
6337                          " you can retry the operation using '--force'.")
6338     raise errors.OpExecError("Disk consistency error")
6339
6340
6341 class LUInstanceDeactivateDisks(NoHooksLU):
6342   """Shutdown an instance's disks.
6343
6344   """
6345   REQ_BGL = False
6346
6347   def ExpandNames(self):
6348     self._ExpandAndLockInstance()
6349     self.needed_locks[locking.LEVEL_NODE] = []
6350     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6351
6352   def DeclareLocks(self, level):
6353     if level == locking.LEVEL_NODE:
6354       self._LockInstancesNodes()
6355
6356   def CheckPrereq(self):
6357     """Check prerequisites.
6358
6359     This checks that the instance is in the cluster.
6360
6361     """
6362     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6363     assert self.instance is not None, \
6364       "Cannot retrieve locked instance %s" % self.op.instance_name
6365
6366   def Exec(self, feedback_fn):
6367     """Deactivate the disks
6368
6369     """
6370     instance = self.instance
6371     if self.op.force:
6372       _ShutdownInstanceDisks(self, instance)
6373     else:
6374       _SafeShutdownInstanceDisks(self, instance)
6375
6376
6377 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
6378   """Shutdown block devices of an instance.
6379
6380   This function checks if an instance is running, before calling
6381   _ShutdownInstanceDisks.
6382
6383   """
6384   _CheckInstanceState(lu, instance, INSTANCE_DOWN, msg="cannot shutdown disks")
6385   _ShutdownInstanceDisks(lu, instance, disks=disks)
6386
6387
6388 def _ExpandCheckDisks(instance, disks):
6389   """Return the instance disks selected by the disks list
6390
6391   @type disks: list of L{objects.Disk} or None
6392   @param disks: selected disks
6393   @rtype: list of L{objects.Disk}
6394   @return: selected instance disks to act on
6395
6396   """
6397   if disks is None:
6398     return instance.disks
6399   else:
6400     if not set(disks).issubset(instance.disks):
6401       raise errors.ProgrammerError("Can only act on disks belonging to the"
6402                                    " target instance")
6403     return disks
6404
6405
6406 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
6407   """Shutdown block devices of an instance.
6408
6409   This does the shutdown on all nodes of the instance.
6410
6411   If the ignore_primary is false, errors on the primary node are
6412   ignored.
6413
6414   """
6415   all_result = True
6416   disks = _ExpandCheckDisks(instance, disks)
6417
6418   for disk in disks:
6419     for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
6420       lu.cfg.SetDiskID(top_disk, node)
6421       result = lu.rpc.call_blockdev_shutdown(node, top_disk)
6422       msg = result.fail_msg
6423       if msg:
6424         lu.LogWarning("Could not shutdown block device %s on node %s: %s",
6425                       disk.iv_name, node, msg)
6426         if ((node == instance.primary_node and not ignore_primary) or
6427             (node != instance.primary_node and not result.offline)):
6428           all_result = False
6429   return all_result
6430
6431
6432 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
6433   """Checks if a node has enough free memory.
6434
6435   This function check if a given node has the needed amount of free
6436   memory. In case the node has less memory or we cannot get the
6437   information from the node, this function raise an OpPrereqError
6438   exception.
6439
6440   @type lu: C{LogicalUnit}
6441   @param lu: a logical unit from which we get configuration data
6442   @type node: C{str}
6443   @param node: the node to check
6444   @type reason: C{str}
6445   @param reason: string to use in the error message
6446   @type requested: C{int}
6447   @param requested: the amount of memory in MiB to check for
6448   @type hypervisor_name: C{str}
6449   @param hypervisor_name: the hypervisor to ask for memory stats
6450   @rtype: integer
6451   @return: node current free memory
6452   @raise errors.OpPrereqError: if the node doesn't have enough memory, or
6453       we cannot check the node
6454
6455   """
6456   nodeinfo = lu.rpc.call_node_info([node], None, [hypervisor_name])
6457   nodeinfo[node].Raise("Can't get data from node %s" % node,
6458                        prereq=True, ecode=errors.ECODE_ENVIRON)
6459   (_, _, (hv_info, )) = nodeinfo[node].payload
6460
6461   free_mem = hv_info.get("memory_free", None)
6462   if not isinstance(free_mem, int):
6463     raise errors.OpPrereqError("Can't compute free memory on node %s, result"
6464                                " was '%s'" % (node, free_mem),
6465                                errors.ECODE_ENVIRON)
6466   if requested > free_mem:
6467     raise errors.OpPrereqError("Not enough memory on node %s for %s:"
6468                                " needed %s MiB, available %s MiB" %
6469                                (node, reason, requested, free_mem),
6470                                errors.ECODE_NORES)
6471   return free_mem
6472
6473
6474 def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
6475   """Checks if nodes have enough free disk space in the all VGs.
6476
6477   This function check if all given nodes have the needed amount of
6478   free disk. In case any node has less disk or we cannot get the
6479   information from the node, this function raise an OpPrereqError
6480   exception.
6481
6482   @type lu: C{LogicalUnit}
6483   @param lu: a logical unit from which we get configuration data
6484   @type nodenames: C{list}
6485   @param nodenames: the list of node names to check
6486   @type req_sizes: C{dict}
6487   @param req_sizes: the hash of vg and corresponding amount of disk in
6488       MiB to check for
6489   @raise errors.OpPrereqError: if the node doesn't have enough disk,
6490       or we cannot check the node
6491
6492   """
6493   for vg, req_size in req_sizes.items():
6494     _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
6495
6496
6497 def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
6498   """Checks if nodes have enough free disk space in the specified VG.
6499
6500   This function check if all given nodes have the needed amount of
6501   free disk. In case any node has less disk or we cannot get the
6502   information from the node, this function raise an OpPrereqError
6503   exception.
6504
6505   @type lu: C{LogicalUnit}
6506   @param lu: a logical unit from which we get configuration data
6507   @type nodenames: C{list}
6508   @param nodenames: the list of node names to check
6509   @type vg: C{str}
6510   @param vg: the volume group to check
6511   @type requested: C{int}
6512   @param requested: the amount of disk in MiB to check for
6513   @raise errors.OpPrereqError: if the node doesn't have enough disk,
6514       or we cannot check the node
6515
6516   """
6517   nodeinfo = lu.rpc.call_node_info(nodenames, [vg], None)
6518   for node in nodenames:
6519     info = nodeinfo[node]
6520     info.Raise("Cannot get current information from node %s" % node,
6521                prereq=True, ecode=errors.ECODE_ENVIRON)
6522     (_, (vg_info, ), _) = info.payload
6523     vg_free = vg_info.get("vg_free", None)
6524     if not isinstance(vg_free, int):
6525       raise errors.OpPrereqError("Can't compute free disk space on node"
6526                                  " %s for vg %s, result was '%s'" %
6527                                  (node, vg, vg_free), errors.ECODE_ENVIRON)
6528     if requested > vg_free:
6529       raise errors.OpPrereqError("Not enough disk space on target node %s"
6530                                  " vg %s: required %d MiB, available %d MiB" %
6531                                  (node, vg, requested, vg_free),
6532                                  errors.ECODE_NORES)
6533
6534
6535 def _CheckNodesPhysicalCPUs(lu, nodenames, requested, hypervisor_name):
6536   """Checks if nodes have enough physical CPUs
6537
6538   This function checks if all given nodes have the needed number of
6539   physical CPUs. In case any node has less CPUs or we cannot get the
6540   information from the node, this function raises an OpPrereqError
6541   exception.
6542
6543   @type lu: C{LogicalUnit}
6544   @param lu: a logical unit from which we get configuration data
6545   @type nodenames: C{list}
6546   @param nodenames: the list of node names to check
6547   @type requested: C{int}
6548   @param requested: the minimum acceptable number of physical CPUs
6549   @raise errors.OpPrereqError: if the node doesn't have enough CPUs,
6550       or we cannot check the node
6551
6552   """
6553   nodeinfo = lu.rpc.call_node_info(nodenames, None, [hypervisor_name])
6554   for node in nodenames:
6555     info = nodeinfo[node]
6556     info.Raise("Cannot get current information from node %s" % node,
6557                prereq=True, ecode=errors.ECODE_ENVIRON)
6558     (_, _, (hv_info, )) = info.payload
6559     num_cpus = hv_info.get("cpu_total", None)
6560     if not isinstance(num_cpus, int):
6561       raise errors.OpPrereqError("Can't compute the number of physical CPUs"
6562                                  " on node %s, result was '%s'" %
6563                                  (node, num_cpus), errors.ECODE_ENVIRON)
6564     if requested > num_cpus:
6565       raise errors.OpPrereqError("Node %s has %s physical CPUs, but %s are "
6566                                  "required" % (node, num_cpus, requested),
6567                                  errors.ECODE_NORES)
6568
6569
6570 class LUInstanceStartup(LogicalUnit):
6571   """Starts an instance.
6572
6573   """
6574   HPATH = "instance-start"
6575   HTYPE = constants.HTYPE_INSTANCE
6576   REQ_BGL = False
6577
6578   def CheckArguments(self):
6579     # extra beparams
6580     if self.op.beparams:
6581       # fill the beparams dict
6582       objects.UpgradeBeParams(self.op.beparams)
6583       utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
6584
6585   def ExpandNames(self):
6586     self._ExpandAndLockInstance()
6587     self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
6588
6589   def DeclareLocks(self, level):
6590     if level == locking.LEVEL_NODE_RES:
6591       self._LockInstancesNodes(primary_only=True, level=locking.LEVEL_NODE_RES)
6592
6593   def BuildHooksEnv(self):
6594     """Build hooks env.
6595
6596     This runs on master, primary and secondary nodes of the instance.
6597
6598     """
6599     env = {
6600       "FORCE": self.op.force,
6601       }
6602
6603     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6604
6605     return env
6606
6607   def BuildHooksNodes(self):
6608     """Build hooks nodes.
6609
6610     """
6611     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6612     return (nl, nl)
6613
6614   def CheckPrereq(self):
6615     """Check prerequisites.
6616
6617     This checks that the instance is in the cluster.
6618
6619     """
6620     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6621     assert self.instance is not None, \
6622       "Cannot retrieve locked instance %s" % self.op.instance_name
6623
6624     # extra hvparams
6625     if self.op.hvparams:
6626       # check hypervisor parameter syntax (locally)
6627       cluster = self.cfg.GetClusterInfo()
6628       utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
6629       filled_hvp = cluster.FillHV(instance)
6630       filled_hvp.update(self.op.hvparams)
6631       hv_type = hypervisor.GetHypervisor(instance.hypervisor)
6632       hv_type.CheckParameterSyntax(filled_hvp)
6633       _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
6634
6635     _CheckInstanceState(self, instance, INSTANCE_ONLINE)
6636
6637     self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
6638
6639     if self.primary_offline and self.op.ignore_offline_nodes:
6640       self.proc.LogWarning("Ignoring offline primary node")
6641
6642       if self.op.hvparams or self.op.beparams:
6643         self.proc.LogWarning("Overridden parameters are ignored")
6644     else:
6645       _CheckNodeOnline(self, instance.primary_node)
6646
6647       bep = self.cfg.GetClusterInfo().FillBE(instance)
6648       bep.update(self.op.beparams)
6649
6650       # check bridges existence
6651       _CheckInstanceBridgesExist(self, instance)
6652
6653       remote_info = self.rpc.call_instance_info(instance.primary_node,
6654                                                 instance.name,
6655                                                 instance.hypervisor)
6656       remote_info.Raise("Error checking node %s" % instance.primary_node,
6657                         prereq=True, ecode=errors.ECODE_ENVIRON)
6658       if not remote_info.payload: # not running already
6659         _CheckNodeFreeMemory(self, instance.primary_node,
6660                              "starting instance %s" % instance.name,
6661                              bep[constants.BE_MINMEM], instance.hypervisor)
6662
6663   def Exec(self, feedback_fn):
6664     """Start the instance.
6665
6666     """
6667     instance = self.instance
6668     force = self.op.force
6669
6670     if not self.op.no_remember:
6671       self.cfg.MarkInstanceUp(instance.name)
6672
6673     if self.primary_offline:
6674       assert self.op.ignore_offline_nodes
6675       self.proc.LogInfo("Primary node offline, marked instance as started")
6676     else:
6677       node_current = instance.primary_node
6678
6679       _StartInstanceDisks(self, instance, force)
6680
6681       result = \
6682         self.rpc.call_instance_start(node_current,
6683                                      (instance, self.op.hvparams,
6684                                       self.op.beparams),
6685                                      self.op.startup_paused)
6686       msg = result.fail_msg
6687       if msg:
6688         _ShutdownInstanceDisks(self, instance)
6689         raise errors.OpExecError("Could not start instance: %s" % msg)
6690
6691
6692 class LUInstanceReboot(LogicalUnit):
6693   """Reboot an instance.
6694
6695   """
6696   HPATH = "instance-reboot"
6697   HTYPE = constants.HTYPE_INSTANCE
6698   REQ_BGL = False
6699
6700   def ExpandNames(self):
6701     self._ExpandAndLockInstance()
6702
6703   def BuildHooksEnv(self):
6704     """Build hooks env.
6705
6706     This runs on master, primary and secondary nodes of the instance.
6707
6708     """
6709     env = {
6710       "IGNORE_SECONDARIES": self.op.ignore_secondaries,
6711       "REBOOT_TYPE": self.op.reboot_type,
6712       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6713       }
6714
6715     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6716
6717     return env
6718
6719   def BuildHooksNodes(self):
6720     """Build hooks nodes.
6721
6722     """
6723     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6724     return (nl, nl)
6725
6726   def CheckPrereq(self):
6727     """Check prerequisites.
6728
6729     This checks that the instance is in the cluster.
6730
6731     """
6732     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6733     assert self.instance is not None, \
6734       "Cannot retrieve locked instance %s" % self.op.instance_name
6735     _CheckInstanceState(self, instance, INSTANCE_ONLINE)
6736     _CheckNodeOnline(self, instance.primary_node)
6737
6738     # check bridges existence
6739     _CheckInstanceBridgesExist(self, instance)
6740
6741   def Exec(self, feedback_fn):
6742     """Reboot the instance.
6743
6744     """
6745     instance = self.instance
6746     ignore_secondaries = self.op.ignore_secondaries
6747     reboot_type = self.op.reboot_type
6748
6749     remote_info = self.rpc.call_instance_info(instance.primary_node,
6750                                               instance.name,
6751                                               instance.hypervisor)
6752     remote_info.Raise("Error checking node %s" % instance.primary_node)
6753     instance_running = bool(remote_info.payload)
6754
6755     node_current = instance.primary_node
6756
6757     if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
6758                                             constants.INSTANCE_REBOOT_HARD]:
6759       for disk in instance.disks:
6760         self.cfg.SetDiskID(disk, node_current)
6761       result = self.rpc.call_instance_reboot(node_current, instance,
6762                                              reboot_type,
6763                                              self.op.shutdown_timeout)
6764       result.Raise("Could not reboot instance")
6765     else:
6766       if instance_running:
6767         result = self.rpc.call_instance_shutdown(node_current, instance,
6768                                                  self.op.shutdown_timeout)
6769         result.Raise("Could not shutdown instance for full reboot")
6770         _ShutdownInstanceDisks(self, instance)
6771       else:
6772         self.LogInfo("Instance %s was already stopped, starting now",
6773                      instance.name)
6774       _StartInstanceDisks(self, instance, ignore_secondaries)
6775       result = self.rpc.call_instance_start(node_current,
6776                                             (instance, None, None), False)
6777       msg = result.fail_msg
6778       if msg:
6779         _ShutdownInstanceDisks(self, instance)
6780         raise errors.OpExecError("Could not start instance for"
6781                                  " full reboot: %s" % msg)
6782
6783     self.cfg.MarkInstanceUp(instance.name)
6784
6785
6786 class LUInstanceShutdown(LogicalUnit):
6787   """Shutdown an instance.
6788
6789   """
6790   HPATH = "instance-stop"
6791   HTYPE = constants.HTYPE_INSTANCE
6792   REQ_BGL = False
6793
6794   def ExpandNames(self):
6795     self._ExpandAndLockInstance()
6796
6797   def BuildHooksEnv(self):
6798     """Build hooks env.
6799
6800     This runs on master, primary and secondary nodes of the instance.
6801
6802     """
6803     env = _BuildInstanceHookEnvByObject(self, self.instance)
6804     env["TIMEOUT"] = self.op.timeout
6805     return env
6806
6807   def BuildHooksNodes(self):
6808     """Build hooks nodes.
6809
6810     """
6811     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6812     return (nl, nl)
6813
6814   def CheckPrereq(self):
6815     """Check prerequisites.
6816
6817     This checks that the instance is in the cluster.
6818
6819     """
6820     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6821     assert self.instance is not None, \
6822       "Cannot retrieve locked instance %s" % self.op.instance_name
6823
6824     _CheckInstanceState(self, self.instance, INSTANCE_ONLINE)
6825
6826     self.primary_offline = \
6827       self.cfg.GetNodeInfo(self.instance.primary_node).offline
6828
6829     if self.primary_offline and self.op.ignore_offline_nodes:
6830       self.proc.LogWarning("Ignoring offline primary node")
6831     else:
6832       _CheckNodeOnline(self, self.instance.primary_node)
6833
6834   def Exec(self, feedback_fn):
6835     """Shutdown the instance.
6836
6837     """
6838     instance = self.instance
6839     node_current = instance.primary_node
6840     timeout = self.op.timeout
6841
6842     if not self.op.no_remember:
6843       self.cfg.MarkInstanceDown(instance.name)
6844
6845     if self.primary_offline:
6846       assert self.op.ignore_offline_nodes
6847       self.proc.LogInfo("Primary node offline, marked instance as stopped")
6848     else:
6849       result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
6850       msg = result.fail_msg
6851       if msg:
6852         self.proc.LogWarning("Could not shutdown instance: %s" % msg)
6853
6854       _ShutdownInstanceDisks(self, instance)
6855
6856
6857 class LUInstanceReinstall(LogicalUnit):
6858   """Reinstall an instance.
6859
6860   """
6861   HPATH = "instance-reinstall"
6862   HTYPE = constants.HTYPE_INSTANCE
6863   REQ_BGL = False
6864
6865   def ExpandNames(self):
6866     self._ExpandAndLockInstance()
6867
6868   def BuildHooksEnv(self):
6869     """Build hooks env.
6870
6871     This runs on master, primary and secondary nodes of the instance.
6872
6873     """
6874     return _BuildInstanceHookEnvByObject(self, self.instance)
6875
6876   def BuildHooksNodes(self):
6877     """Build hooks nodes.
6878
6879     """
6880     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6881     return (nl, nl)
6882
6883   def CheckPrereq(self):
6884     """Check prerequisites.
6885
6886     This checks that the instance is in the cluster and is not running.
6887
6888     """
6889     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6890     assert instance is not None, \
6891       "Cannot retrieve locked instance %s" % self.op.instance_name
6892     _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
6893                      " offline, cannot reinstall")
6894     for node in instance.secondary_nodes:
6895       _CheckNodeOnline(self, node, "Instance secondary node offline,"
6896                        " cannot reinstall")
6897
6898     if instance.disk_template == constants.DT_DISKLESS:
6899       raise errors.OpPrereqError("Instance '%s' has no disks" %
6900                                  self.op.instance_name,
6901                                  errors.ECODE_INVAL)
6902     _CheckInstanceState(self, instance, INSTANCE_DOWN, msg="cannot reinstall")
6903
6904     if self.op.os_type is not None:
6905       # OS verification
6906       pnode = _ExpandNodeName(self.cfg, instance.primary_node)
6907       _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
6908       instance_os = self.op.os_type
6909     else:
6910       instance_os = instance.os
6911
6912     nodelist = list(instance.all_nodes)
6913
6914     if self.op.osparams:
6915       i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
6916       _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
6917       self.os_inst = i_osdict # the new dict (without defaults)
6918     else:
6919       self.os_inst = None
6920
6921     self.instance = instance
6922
6923   def Exec(self, feedback_fn):
6924     """Reinstall the instance.
6925
6926     """
6927     inst = self.instance
6928
6929     if self.op.os_type is not None:
6930       feedback_fn("Changing OS to '%s'..." % self.op.os_type)
6931       inst.os = self.op.os_type
6932       # Write to configuration
6933       self.cfg.Update(inst, feedback_fn)
6934
6935     _StartInstanceDisks(self, inst, None)
6936     try:
6937       feedback_fn("Running the instance OS create scripts...")
6938       # FIXME: pass debug option from opcode to backend
6939       result = self.rpc.call_instance_os_add(inst.primary_node,
6940                                              (inst, self.os_inst), True,
6941                                              self.op.debug_level)
6942       result.Raise("Could not install OS for instance %s on node %s" %
6943                    (inst.name, inst.primary_node))
6944     finally:
6945       _ShutdownInstanceDisks(self, inst)
6946
6947
6948 class LUInstanceRecreateDisks(LogicalUnit):
6949   """Recreate an instance's missing disks.
6950
6951   """
6952   HPATH = "instance-recreate-disks"
6953   HTYPE = constants.HTYPE_INSTANCE
6954   REQ_BGL = False
6955
6956   _MODIFYABLE = frozenset([
6957     constants.IDISK_SIZE,
6958     constants.IDISK_MODE,
6959     ])
6960
6961   # New or changed disk parameters may have different semantics
6962   assert constants.IDISK_PARAMS == (_MODIFYABLE | frozenset([
6963     constants.IDISK_ADOPT,
6964
6965     # TODO: Implement support changing VG while recreating
6966     constants.IDISK_VG,
6967     constants.IDISK_METAVG,
6968     ]))
6969
6970   def CheckArguments(self):
6971     if self.op.disks and ht.TPositiveInt(self.op.disks[0]):
6972       # Normalize and convert deprecated list of disk indices
6973       self.op.disks = [(idx, {}) for idx in sorted(frozenset(self.op.disks))]
6974
6975     duplicates = utils.FindDuplicates(map(compat.fst, self.op.disks))
6976     if duplicates:
6977       raise errors.OpPrereqError("Some disks have been specified more than"
6978                                  " once: %s" % utils.CommaJoin(duplicates),
6979                                  errors.ECODE_INVAL)
6980
6981     for (idx, params) in self.op.disks:
6982       utils.ForceDictType(params, constants.IDISK_PARAMS_TYPES)
6983       unsupported = frozenset(params.keys()) - self._MODIFYABLE
6984       if unsupported:
6985         raise errors.OpPrereqError("Parameters for disk %s try to change"
6986                                    " unmodifyable parameter(s): %s" %
6987                                    (idx, utils.CommaJoin(unsupported)),
6988                                    errors.ECODE_INVAL)
6989
6990   def ExpandNames(self):
6991     self._ExpandAndLockInstance()
6992     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6993     if self.op.nodes:
6994       self.op.nodes = [_ExpandNodeName(self.cfg, n) for n in self.op.nodes]
6995       self.needed_locks[locking.LEVEL_NODE] = list(self.op.nodes)
6996     else:
6997       self.needed_locks[locking.LEVEL_NODE] = []
6998     self.needed_locks[locking.LEVEL_NODE_RES] = []
6999
7000   def DeclareLocks(self, level):
7001     if level == locking.LEVEL_NODE:
7002       # if we replace the nodes, we only need to lock the old primary,
7003       # otherwise we need to lock all nodes for disk re-creation
7004       primary_only = bool(self.op.nodes)
7005       self._LockInstancesNodes(primary_only=primary_only)
7006     elif level == locking.LEVEL_NODE_RES:
7007       # Copy node locks
7008       self.needed_locks[locking.LEVEL_NODE_RES] = \
7009         self.needed_locks[locking.LEVEL_NODE][:]
7010
7011   def BuildHooksEnv(self):
7012     """Build hooks env.
7013
7014     This runs on master, primary and secondary nodes of the instance.
7015
7016     """
7017     return _BuildInstanceHookEnvByObject(self, self.instance)
7018
7019   def BuildHooksNodes(self):
7020     """Build hooks nodes.
7021
7022     """
7023     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7024     return (nl, nl)
7025
7026   def CheckPrereq(self):
7027     """Check prerequisites.
7028
7029     This checks that the instance is in the cluster and is not running.
7030
7031     """
7032     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7033     assert instance is not None, \
7034       "Cannot retrieve locked instance %s" % self.op.instance_name
7035     if self.op.nodes:
7036       if len(self.op.nodes) != len(instance.all_nodes):
7037         raise errors.OpPrereqError("Instance %s currently has %d nodes, but"
7038                                    " %d replacement nodes were specified" %
7039                                    (instance.name, len(instance.all_nodes),
7040                                     len(self.op.nodes)),
7041                                    errors.ECODE_INVAL)
7042       assert instance.disk_template != constants.DT_DRBD8 or \
7043           len(self.op.nodes) == 2
7044       assert instance.disk_template != constants.DT_PLAIN or \
7045           len(self.op.nodes) == 1
7046       primary_node = self.op.nodes[0]
7047     else:
7048       primary_node = instance.primary_node
7049     _CheckNodeOnline(self, primary_node)
7050
7051     if instance.disk_template == constants.DT_DISKLESS:
7052       raise errors.OpPrereqError("Instance '%s' has no disks" %
7053                                  self.op.instance_name, errors.ECODE_INVAL)
7054
7055     # if we replace nodes *and* the old primary is offline, we don't
7056     # check
7057     assert instance.primary_node in self.owned_locks(locking.LEVEL_NODE)
7058     assert instance.primary_node in self.owned_locks(locking.LEVEL_NODE_RES)
7059     old_pnode = self.cfg.GetNodeInfo(instance.primary_node)
7060     if not (self.op.nodes and old_pnode.offline):
7061       _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
7062                           msg="cannot recreate disks")
7063
7064     if self.op.disks:
7065       self.disks = dict(self.op.disks)
7066     else:
7067       self.disks = dict((idx, {}) for idx in range(len(instance.disks)))
7068
7069     maxidx = max(self.disks.keys())
7070     if maxidx >= len(instance.disks):
7071       raise errors.OpPrereqError("Invalid disk index '%s'" % maxidx,
7072                                  errors.ECODE_INVAL)
7073
7074     if (self.op.nodes and
7075         sorted(self.disks.keys()) != range(len(instance.disks))):
7076       raise errors.OpPrereqError("Can't recreate disks partially and"
7077                                  " change the nodes at the same time",
7078                                  errors.ECODE_INVAL)
7079
7080     self.instance = instance
7081
7082   def Exec(self, feedback_fn):
7083     """Recreate the disks.
7084
7085     """
7086     instance = self.instance
7087
7088     assert (self.owned_locks(locking.LEVEL_NODE) ==
7089             self.owned_locks(locking.LEVEL_NODE_RES))
7090
7091     to_skip = []
7092     mods = [] # keeps track of needed changes
7093
7094     for idx, disk in enumerate(instance.disks):
7095       try:
7096         changes = self.disks[idx]
7097       except KeyError:
7098         # Disk should not be recreated
7099         to_skip.append(idx)
7100         continue
7101
7102       # update secondaries for disks, if needed
7103       if self.op.nodes and disk.dev_type == constants.LD_DRBD8:
7104         # need to update the nodes and minors
7105         assert len(self.op.nodes) == 2
7106         assert len(disk.logical_id) == 6 # otherwise disk internals
7107                                          # have changed
7108         (_, _, old_port, _, _, old_secret) = disk.logical_id
7109         new_minors = self.cfg.AllocateDRBDMinor(self.op.nodes, instance.name)
7110         new_id = (self.op.nodes[0], self.op.nodes[1], old_port,
7111                   new_minors[0], new_minors[1], old_secret)
7112         assert len(disk.logical_id) == len(new_id)
7113       else:
7114         new_id = None
7115
7116       mods.append((idx, new_id, changes))
7117
7118     # now that we have passed all asserts above, we can apply the mods
7119     # in a single run (to avoid partial changes)
7120     for idx, new_id, changes in mods:
7121       disk = instance.disks[idx]
7122       if new_id is not None:
7123         assert disk.dev_type == constants.LD_DRBD8
7124         disk.logical_id = new_id
7125       if changes:
7126         disk.Update(size=changes.get(constants.IDISK_SIZE, None),
7127                     mode=changes.get(constants.IDISK_MODE, None))
7128
7129     # change primary node, if needed
7130     if self.op.nodes:
7131       instance.primary_node = self.op.nodes[0]
7132       self.LogWarning("Changing the instance's nodes, you will have to"
7133                       " remove any disks left on the older nodes manually")
7134
7135     if self.op.nodes:
7136       self.cfg.Update(instance, feedback_fn)
7137
7138     _CreateDisks(self, instance, to_skip=to_skip)
7139
7140
7141 class LUInstanceRename(LogicalUnit):
7142   """Rename an instance.
7143
7144   """
7145   HPATH = "instance-rename"
7146   HTYPE = constants.HTYPE_INSTANCE
7147
7148   def CheckArguments(self):
7149     """Check arguments.
7150
7151     """
7152     if self.op.ip_check and not self.op.name_check:
7153       # TODO: make the ip check more flexible and not depend on the name check
7154       raise errors.OpPrereqError("IP address check requires a name check",
7155                                  errors.ECODE_INVAL)
7156
7157   def BuildHooksEnv(self):
7158     """Build hooks env.
7159
7160     This runs on master, primary and secondary nodes of the instance.
7161
7162     """
7163     env = _BuildInstanceHookEnvByObject(self, self.instance)
7164     env["INSTANCE_NEW_NAME"] = self.op.new_name
7165     return env
7166
7167   def BuildHooksNodes(self):
7168     """Build hooks nodes.
7169
7170     """
7171     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7172     return (nl, nl)
7173
7174   def CheckPrereq(self):
7175     """Check prerequisites.
7176
7177     This checks that the instance is in the cluster and is not running.
7178
7179     """
7180     self.op.instance_name = _ExpandInstanceName(self.cfg,
7181                                                 self.op.instance_name)
7182     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7183     assert instance is not None
7184     _CheckNodeOnline(self, instance.primary_node)
7185     _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
7186                         msg="cannot rename")
7187     self.instance = instance
7188
7189     new_name = self.op.new_name
7190     if self.op.name_check:
7191       hostname = netutils.GetHostname(name=new_name)
7192       if hostname.name != new_name:
7193         self.LogInfo("Resolved given name '%s' to '%s'", new_name,
7194                      hostname.name)
7195       if not utils.MatchNameComponent(self.op.new_name, [hostname.name]):
7196         raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
7197                                     " same as given hostname '%s'") %
7198                                     (hostname.name, self.op.new_name),
7199                                     errors.ECODE_INVAL)
7200       new_name = self.op.new_name = hostname.name
7201       if (self.op.ip_check and
7202           netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
7203         raise errors.OpPrereqError("IP %s of instance %s already in use" %
7204                                    (hostname.ip, new_name),
7205                                    errors.ECODE_NOTUNIQUE)
7206
7207     instance_list = self.cfg.GetInstanceList()
7208     if new_name in instance_list and new_name != instance.name:
7209       raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
7210                                  new_name, errors.ECODE_EXISTS)
7211
7212   def Exec(self, feedback_fn):
7213     """Rename the instance.
7214
7215     """
7216     inst = self.instance
7217     old_name = inst.name
7218
7219     rename_file_storage = False
7220     if (inst.disk_template in constants.DTS_FILEBASED and
7221         self.op.new_name != inst.name):
7222       old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
7223       rename_file_storage = True
7224
7225     self.cfg.RenameInstance(inst.name, self.op.new_name)
7226     # Change the instance lock. This is definitely safe while we hold the BGL.
7227     # Otherwise the new lock would have to be added in acquired mode.
7228     assert self.REQ_BGL
7229     self.glm.remove(locking.LEVEL_INSTANCE, old_name)
7230     self.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
7231
7232     # re-read the instance from the configuration after rename
7233     inst = self.cfg.GetInstanceInfo(self.op.new_name)
7234
7235     if rename_file_storage:
7236       new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
7237       result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
7238                                                      old_file_storage_dir,
7239                                                      new_file_storage_dir)
7240       result.Raise("Could not rename on node %s directory '%s' to '%s'"
7241                    " (but the instance has been renamed in Ganeti)" %
7242                    (inst.primary_node, old_file_storage_dir,
7243                     new_file_storage_dir))
7244
7245     _StartInstanceDisks(self, inst, None)
7246     try:
7247       result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
7248                                                  old_name, self.op.debug_level)
7249       msg = result.fail_msg
7250       if msg:
7251         msg = ("Could not run OS rename script for instance %s on node %s"
7252                " (but the instance has been renamed in Ganeti): %s" %
7253                (inst.name, inst.primary_node, msg))
7254         self.proc.LogWarning(msg)
7255     finally:
7256       _ShutdownInstanceDisks(self, inst)
7257
7258     return inst.name
7259
7260
7261 class LUInstanceRemove(LogicalUnit):
7262   """Remove an instance.
7263
7264   """
7265   HPATH = "instance-remove"
7266   HTYPE = constants.HTYPE_INSTANCE
7267   REQ_BGL = False
7268
7269   def ExpandNames(self):
7270     self._ExpandAndLockInstance()
7271     self.needed_locks[locking.LEVEL_NODE] = []
7272     self.needed_locks[locking.LEVEL_NODE_RES] = []
7273     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7274
7275   def DeclareLocks(self, level):
7276     if level == locking.LEVEL_NODE:
7277       self._LockInstancesNodes()
7278     elif level == locking.LEVEL_NODE_RES:
7279       # Copy node locks
7280       self.needed_locks[locking.LEVEL_NODE_RES] = \
7281         self.needed_locks[locking.LEVEL_NODE][:]
7282
7283   def BuildHooksEnv(self):
7284     """Build hooks env.
7285
7286     This runs on master, primary and secondary nodes of the instance.
7287
7288     """
7289     env = _BuildInstanceHookEnvByObject(self, self.instance)
7290     env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
7291     return env
7292
7293   def BuildHooksNodes(self):
7294     """Build hooks nodes.
7295
7296     """
7297     nl = [self.cfg.GetMasterNode()]
7298     nl_post = list(self.instance.all_nodes) + nl
7299     return (nl, nl_post)
7300
7301   def CheckPrereq(self):
7302     """Check prerequisites.
7303
7304     This checks that the instance is in the cluster.
7305
7306     """
7307     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7308     assert self.instance is not None, \
7309       "Cannot retrieve locked instance %s" % self.op.instance_name
7310
7311   def Exec(self, feedback_fn):
7312     """Remove the instance.
7313
7314     """
7315     instance = self.instance
7316     logging.info("Shutting down instance %s on node %s",
7317                  instance.name, instance.primary_node)
7318
7319     result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
7320                                              self.op.shutdown_timeout)
7321     msg = result.fail_msg
7322     if msg:
7323       if self.op.ignore_failures:
7324         feedback_fn("Warning: can't shutdown instance: %s" % msg)
7325       else:
7326         raise errors.OpExecError("Could not shutdown instance %s on"
7327                                  " node %s: %s" %
7328                                  (instance.name, instance.primary_node, msg))
7329
7330     assert (self.owned_locks(locking.LEVEL_NODE) ==
7331             self.owned_locks(locking.LEVEL_NODE_RES))
7332     assert not (set(instance.all_nodes) -
7333                 self.owned_locks(locking.LEVEL_NODE)), \
7334       "Not owning correct locks"
7335
7336     _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
7337
7338
7339 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
7340   """Utility function to remove an instance.
7341
7342   """
7343   logging.info("Removing block devices for instance %s", instance.name)
7344
7345   if not _RemoveDisks(lu, instance, ignore_failures=ignore_failures):
7346     if not ignore_failures:
7347       raise errors.OpExecError("Can't remove instance's disks")
7348     feedback_fn("Warning: can't remove instance's disks")
7349
7350   logging.info("Removing instance %s out of cluster config", instance.name)
7351
7352   lu.cfg.RemoveInstance(instance.name)
7353
7354   assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
7355     "Instance lock removal conflict"
7356
7357   # Remove lock for the instance
7358   lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
7359
7360
7361 class LUInstanceQuery(NoHooksLU):
7362   """Logical unit for querying instances.
7363
7364   """
7365   # pylint: disable=W0142
7366   REQ_BGL = False
7367
7368   def CheckArguments(self):
7369     self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
7370                              self.op.output_fields, self.op.use_locking)
7371
7372   def ExpandNames(self):
7373     self.iq.ExpandNames(self)
7374
7375   def DeclareLocks(self, level):
7376     self.iq.DeclareLocks(self, level)
7377
7378   def Exec(self, feedback_fn):
7379     return self.iq.OldStyleQuery(self)
7380
7381
7382 class LUInstanceFailover(LogicalUnit):
7383   """Failover an instance.
7384
7385   """
7386   HPATH = "instance-failover"
7387   HTYPE = constants.HTYPE_INSTANCE
7388   REQ_BGL = False
7389
7390   def CheckArguments(self):
7391     """Check the arguments.
7392
7393     """
7394     self.iallocator = getattr(self.op, "iallocator", None)
7395     self.target_node = getattr(self.op, "target_node", None)
7396
7397   def ExpandNames(self):
7398     self._ExpandAndLockInstance()
7399
7400     if self.op.target_node is not None:
7401       self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7402
7403     self.needed_locks[locking.LEVEL_NODE] = []
7404     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7405
7406     self.needed_locks[locking.LEVEL_NODE_RES] = []
7407     self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
7408
7409     ignore_consistency = self.op.ignore_consistency
7410     shutdown_timeout = self.op.shutdown_timeout
7411     self._migrater = TLMigrateInstance(self, self.op.instance_name,
7412                                        cleanup=False,
7413                                        failover=True,
7414                                        ignore_consistency=ignore_consistency,
7415                                        shutdown_timeout=shutdown_timeout,
7416                                        ignore_ipolicy=self.op.ignore_ipolicy)
7417     self.tasklets = [self._migrater]
7418
7419   def DeclareLocks(self, level):
7420     if level == locking.LEVEL_NODE:
7421       instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
7422       if instance.disk_template in constants.DTS_EXT_MIRROR:
7423         if self.op.target_node is None:
7424           self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7425         else:
7426           self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
7427                                                    self.op.target_node]
7428         del self.recalculate_locks[locking.LEVEL_NODE]
7429       else:
7430         self._LockInstancesNodes()
7431     elif level == locking.LEVEL_NODE_RES:
7432       # Copy node locks
7433       self.needed_locks[locking.LEVEL_NODE_RES] = \
7434         self.needed_locks[locking.LEVEL_NODE][:]
7435
7436   def BuildHooksEnv(self):
7437     """Build hooks env.
7438
7439     This runs on master, primary and secondary nodes of the instance.
7440
7441     """
7442     instance = self._migrater.instance
7443     source_node = instance.primary_node
7444     target_node = self.op.target_node
7445     env = {
7446       "IGNORE_CONSISTENCY": self.op.ignore_consistency,
7447       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7448       "OLD_PRIMARY": source_node,
7449       "NEW_PRIMARY": target_node,
7450       }
7451
7452     if instance.disk_template in constants.DTS_INT_MIRROR:
7453       env["OLD_SECONDARY"] = instance.secondary_nodes[0]
7454       env["NEW_SECONDARY"] = source_node
7455     else:
7456       env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
7457
7458     env.update(_BuildInstanceHookEnvByObject(self, instance))
7459
7460     return env
7461
7462   def BuildHooksNodes(self):
7463     """Build hooks nodes.
7464
7465     """
7466     instance = self._migrater.instance
7467     nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
7468     return (nl, nl + [instance.primary_node])
7469
7470
7471 class LUInstanceMigrate(LogicalUnit):
7472   """Migrate an instance.
7473
7474   This is migration without shutting down, compared to the failover,
7475   which is done with shutdown.
7476
7477   """
7478   HPATH = "instance-migrate"
7479   HTYPE = constants.HTYPE_INSTANCE
7480   REQ_BGL = False
7481
7482   def ExpandNames(self):
7483     self._ExpandAndLockInstance()
7484
7485     if self.op.target_node is not None:
7486       self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7487
7488     self.needed_locks[locking.LEVEL_NODE] = []
7489     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7490
7491     self.needed_locks[locking.LEVEL_NODE] = []
7492     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7493
7494     self._migrater = \
7495       TLMigrateInstance(self, self.op.instance_name,
7496                         cleanup=self.op.cleanup,
7497                         failover=False,
7498                         fallback=self.op.allow_failover,
7499                         allow_runtime_changes=self.op.allow_runtime_changes,
7500                         ignore_ipolicy=self.op.ignore_ipolicy)
7501     self.tasklets = [self._migrater]
7502
7503   def DeclareLocks(self, level):
7504     if level == locking.LEVEL_NODE:
7505       instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
7506       if instance.disk_template in constants.DTS_EXT_MIRROR:
7507         if self.op.target_node is None:
7508           self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7509         else:
7510           self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
7511                                                    self.op.target_node]
7512         del self.recalculate_locks[locking.LEVEL_NODE]
7513       else:
7514         self._LockInstancesNodes()
7515     elif level == locking.LEVEL_NODE_RES:
7516       # Copy node locks
7517       self.needed_locks[locking.LEVEL_NODE_RES] = \
7518         self.needed_locks[locking.LEVEL_NODE][:]
7519
7520   def BuildHooksEnv(self):
7521     """Build hooks env.
7522
7523     This runs on master, primary and secondary nodes of the instance.
7524
7525     """
7526     instance = self._migrater.instance
7527     source_node = instance.primary_node
7528     target_node = self.op.target_node
7529     env = _BuildInstanceHookEnvByObject(self, instance)
7530     env.update({
7531       "MIGRATE_LIVE": self._migrater.live,
7532       "MIGRATE_CLEANUP": self.op.cleanup,
7533       "OLD_PRIMARY": source_node,
7534       "NEW_PRIMARY": target_node,
7535       "ALLOW_RUNTIME_CHANGES": self.op.allow_runtime_changes,
7536       })
7537
7538     if instance.disk_template in constants.DTS_INT_MIRROR:
7539       env["OLD_SECONDARY"] = target_node
7540       env["NEW_SECONDARY"] = source_node
7541     else:
7542       env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
7543
7544     return env
7545
7546   def BuildHooksNodes(self):
7547     """Build hooks nodes.
7548
7549     """
7550     instance = self._migrater.instance
7551     nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
7552     return (nl, nl + [instance.primary_node])
7553
7554
7555 class LUInstanceMove(LogicalUnit):
7556   """Move an instance by data-copying.
7557
7558   """
7559   HPATH = "instance-move"
7560   HTYPE = constants.HTYPE_INSTANCE
7561   REQ_BGL = False
7562
7563   def ExpandNames(self):
7564     self._ExpandAndLockInstance()
7565     target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7566     self.op.target_node = target_node
7567     self.needed_locks[locking.LEVEL_NODE] = [target_node]
7568     self.needed_locks[locking.LEVEL_NODE_RES] = []
7569     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7570
7571   def DeclareLocks(self, level):
7572     if level == locking.LEVEL_NODE:
7573       self._LockInstancesNodes(primary_only=True)
7574     elif level == locking.LEVEL_NODE_RES:
7575       # Copy node locks
7576       self.needed_locks[locking.LEVEL_NODE_RES] = \
7577         self.needed_locks[locking.LEVEL_NODE][:]
7578
7579   def BuildHooksEnv(self):
7580     """Build hooks env.
7581
7582     This runs on master, primary and secondary nodes of the instance.
7583
7584     """
7585     env = {
7586       "TARGET_NODE": self.op.target_node,
7587       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7588       }
7589     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
7590     return env
7591
7592   def BuildHooksNodes(self):
7593     """Build hooks nodes.
7594
7595     """
7596     nl = [
7597       self.cfg.GetMasterNode(),
7598       self.instance.primary_node,
7599       self.op.target_node,
7600       ]
7601     return (nl, nl)
7602
7603   def CheckPrereq(self):
7604     """Check prerequisites.
7605
7606     This checks that the instance is in the cluster.
7607
7608     """
7609     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7610     assert self.instance is not None, \
7611       "Cannot retrieve locked instance %s" % self.op.instance_name
7612
7613     node = self.cfg.GetNodeInfo(self.op.target_node)
7614     assert node is not None, \
7615       "Cannot retrieve locked node %s" % self.op.target_node
7616
7617     self.target_node = target_node = node.name
7618
7619     if target_node == instance.primary_node:
7620       raise errors.OpPrereqError("Instance %s is already on the node %s" %
7621                                  (instance.name, target_node),
7622                                  errors.ECODE_STATE)
7623
7624     bep = self.cfg.GetClusterInfo().FillBE(instance)
7625
7626     for idx, dsk in enumerate(instance.disks):
7627       if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
7628         raise errors.OpPrereqError("Instance disk %d has a complex layout,"
7629                                    " cannot copy" % idx, errors.ECODE_STATE)
7630
7631     _CheckNodeOnline(self, target_node)
7632     _CheckNodeNotDrained(self, target_node)
7633     _CheckNodeVmCapable(self, target_node)
7634     ipolicy = _CalculateGroupIPolicy(self.cfg.GetClusterInfo(),
7635                                      self.cfg.GetNodeGroup(node.group))
7636     _CheckTargetNodeIPolicy(self, ipolicy, instance, node,
7637                             ignore=self.op.ignore_ipolicy)
7638
7639     if instance.admin_state == constants.ADMINST_UP:
7640       # check memory requirements on the secondary node
7641       _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
7642                            instance.name, bep[constants.BE_MAXMEM],
7643                            instance.hypervisor)
7644     else:
7645       self.LogInfo("Not checking memory on the secondary node as"
7646                    " instance will not be started")
7647
7648     # check bridge existance
7649     _CheckInstanceBridgesExist(self, instance, node=target_node)
7650
7651   def Exec(self, feedback_fn):
7652     """Move an instance.
7653
7654     The move is done by shutting it down on its present node, copying
7655     the data over (slow) and starting it on the new node.
7656
7657     """
7658     instance = self.instance
7659
7660     source_node = instance.primary_node
7661     target_node = self.target_node
7662
7663     self.LogInfo("Shutting down instance %s on source node %s",
7664                  instance.name, source_node)
7665
7666     assert (self.owned_locks(locking.LEVEL_NODE) ==
7667             self.owned_locks(locking.LEVEL_NODE_RES))
7668
7669     result = self.rpc.call_instance_shutdown(source_node, instance,
7670                                              self.op.shutdown_timeout)
7671     msg = result.fail_msg
7672     if msg:
7673       if self.op.ignore_consistency:
7674         self.proc.LogWarning("Could not shutdown instance %s on node %s."
7675                              " Proceeding anyway. Please make sure node"
7676                              " %s is down. Error details: %s",
7677                              instance.name, source_node, source_node, msg)
7678       else:
7679         raise errors.OpExecError("Could not shutdown instance %s on"
7680                                  " node %s: %s" %
7681                                  (instance.name, source_node, msg))
7682
7683     # create the target disks
7684     try:
7685       _CreateDisks(self, instance, target_node=target_node)
7686     except errors.OpExecError:
7687       self.LogWarning("Device creation failed, reverting...")
7688       try:
7689         _RemoveDisks(self, instance, target_node=target_node)
7690       finally:
7691         self.cfg.ReleaseDRBDMinors(instance.name)
7692         raise
7693
7694     cluster_name = self.cfg.GetClusterInfo().cluster_name
7695
7696     errs = []
7697     # activate, get path, copy the data over
7698     for idx, disk in enumerate(instance.disks):
7699       self.LogInfo("Copying data for disk %d", idx)
7700       result = self.rpc.call_blockdev_assemble(target_node, disk,
7701                                                instance.name, True, idx)
7702       if result.fail_msg:
7703         self.LogWarning("Can't assemble newly created disk %d: %s",
7704                         idx, result.fail_msg)
7705         errs.append(result.fail_msg)
7706         break
7707       dev_path = result.payload
7708       result = self.rpc.call_blockdev_export(source_node, disk,
7709                                              target_node, dev_path,
7710                                              cluster_name)
7711       if result.fail_msg:
7712         self.LogWarning("Can't copy data over for disk %d: %s",
7713                         idx, result.fail_msg)
7714         errs.append(result.fail_msg)
7715         break
7716
7717     if errs:
7718       self.LogWarning("Some disks failed to copy, aborting")
7719       try:
7720         _RemoveDisks(self, instance, target_node=target_node)
7721       finally:
7722         self.cfg.ReleaseDRBDMinors(instance.name)
7723         raise errors.OpExecError("Errors during disk copy: %s" %
7724                                  (",".join(errs),))
7725
7726     instance.primary_node = target_node
7727     self.cfg.Update(instance, feedback_fn)
7728
7729     self.LogInfo("Removing the disks on the original node")
7730     _RemoveDisks(self, instance, target_node=source_node)
7731
7732     # Only start the instance if it's marked as up
7733     if instance.admin_state == constants.ADMINST_UP:
7734       self.LogInfo("Starting instance %s on node %s",
7735                    instance.name, target_node)
7736
7737       disks_ok, _ = _AssembleInstanceDisks(self, instance,
7738                                            ignore_secondaries=True)
7739       if not disks_ok:
7740         _ShutdownInstanceDisks(self, instance)
7741         raise errors.OpExecError("Can't activate the instance's disks")
7742
7743       result = self.rpc.call_instance_start(target_node,
7744                                             (instance, None, None), False)
7745       msg = result.fail_msg
7746       if msg:
7747         _ShutdownInstanceDisks(self, instance)
7748         raise errors.OpExecError("Could not start instance %s on node %s: %s" %
7749                                  (instance.name, target_node, msg))
7750
7751
7752 class LUNodeMigrate(LogicalUnit):
7753   """Migrate all instances from a node.
7754
7755   """
7756   HPATH = "node-migrate"
7757   HTYPE = constants.HTYPE_NODE
7758   REQ_BGL = False
7759
7760   def CheckArguments(self):
7761     pass
7762
7763   def ExpandNames(self):
7764     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
7765
7766     self.share_locks = _ShareAll()
7767     self.needed_locks = {
7768       locking.LEVEL_NODE: [self.op.node_name],
7769       }
7770
7771   def BuildHooksEnv(self):
7772     """Build hooks env.
7773
7774     This runs on the master, the primary and all the secondaries.
7775
7776     """
7777     return {
7778       "NODE_NAME": self.op.node_name,
7779       "ALLOW_RUNTIME_CHANGES": self.op.allow_runtime_changes,
7780       }
7781
7782   def BuildHooksNodes(self):
7783     """Build hooks nodes.
7784
7785     """
7786     nl = [self.cfg.GetMasterNode()]
7787     return (nl, nl)
7788
7789   def CheckPrereq(self):
7790     pass
7791
7792   def Exec(self, feedback_fn):
7793     # Prepare jobs for migration instances
7794     allow_runtime_changes = self.op.allow_runtime_changes
7795     jobs = [
7796       [opcodes.OpInstanceMigrate(instance_name=inst.name,
7797                                  mode=self.op.mode,
7798                                  live=self.op.live,
7799                                  iallocator=self.op.iallocator,
7800                                  target_node=self.op.target_node,
7801                                  allow_runtime_changes=allow_runtime_changes,
7802                                  ignore_ipolicy=self.op.ignore_ipolicy)]
7803       for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name)
7804       ]
7805
7806     # TODO: Run iallocator in this opcode and pass correct placement options to
7807     # OpInstanceMigrate. Since other jobs can modify the cluster between
7808     # running the iallocator and the actual migration, a good consistency model
7809     # will have to be found.
7810
7811     assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
7812             frozenset([self.op.node_name]))
7813
7814     return ResultWithJobs(jobs)
7815
7816
7817 class TLMigrateInstance(Tasklet):
7818   """Tasklet class for instance migration.
7819
7820   @type live: boolean
7821   @ivar live: whether the migration will be done live or non-live;
7822       this variable is initalized only after CheckPrereq has run
7823   @type cleanup: boolean
7824   @ivar cleanup: Wheater we cleanup from a failed migration
7825   @type iallocator: string
7826   @ivar iallocator: The iallocator used to determine target_node
7827   @type target_node: string
7828   @ivar target_node: If given, the target_node to reallocate the instance to
7829   @type failover: boolean
7830   @ivar failover: Whether operation results in failover or migration
7831   @type fallback: boolean
7832   @ivar fallback: Whether fallback to failover is allowed if migration not
7833                   possible
7834   @type ignore_consistency: boolean
7835   @ivar ignore_consistency: Wheter we should ignore consistency between source
7836                             and target node
7837   @type shutdown_timeout: int
7838   @ivar shutdown_timeout: In case of failover timeout of the shutdown
7839   @type ignore_ipolicy: bool
7840   @ivar ignore_ipolicy: If true, we can ignore instance policy when migrating
7841
7842   """
7843
7844   # Constants
7845   _MIGRATION_POLL_INTERVAL = 1      # seconds
7846   _MIGRATION_FEEDBACK_INTERVAL = 10 # seconds
7847
7848   def __init__(self, lu, instance_name, cleanup=False,
7849                failover=False, fallback=False,
7850                ignore_consistency=False,
7851                allow_runtime_changes=True,
7852                shutdown_timeout=constants.DEFAULT_SHUTDOWN_TIMEOUT,
7853                ignore_ipolicy=False):
7854     """Initializes this class.
7855
7856     """
7857     Tasklet.__init__(self, lu)
7858
7859     # Parameters
7860     self.instance_name = instance_name
7861     self.cleanup = cleanup
7862     self.live = False # will be overridden later
7863     self.failover = failover
7864     self.fallback = fallback
7865     self.ignore_consistency = ignore_consistency
7866     self.shutdown_timeout = shutdown_timeout
7867     self.ignore_ipolicy = ignore_ipolicy
7868     self.allow_runtime_changes = allow_runtime_changes
7869
7870   def CheckPrereq(self):
7871     """Check prerequisites.
7872
7873     This checks that the instance is in the cluster.
7874
7875     """
7876     instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
7877     instance = self.cfg.GetInstanceInfo(instance_name)
7878     assert instance is not None
7879     self.instance = instance
7880     cluster = self.cfg.GetClusterInfo()
7881
7882     if (not self.cleanup and
7883         not instance.admin_state == constants.ADMINST_UP and
7884         not self.failover and self.fallback):
7885       self.lu.LogInfo("Instance is marked down or offline, fallback allowed,"
7886                       " switching to failover")
7887       self.failover = True
7888
7889     if instance.disk_template not in constants.DTS_MIRRORED:
7890       if self.failover:
7891         text = "failovers"
7892       else:
7893         text = "migrations"
7894       raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
7895                                  " %s" % (instance.disk_template, text),
7896                                  errors.ECODE_STATE)
7897
7898     if instance.disk_template in constants.DTS_EXT_MIRROR:
7899       _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
7900
7901       if self.lu.op.iallocator:
7902         self._RunAllocator()
7903       else:
7904         # We set set self.target_node as it is required by
7905         # BuildHooksEnv
7906         self.target_node = self.lu.op.target_node
7907
7908       # Check that the target node is correct in terms of instance policy
7909       nodeinfo = self.cfg.GetNodeInfo(self.target_node)
7910       group_info = self.cfg.GetNodeGroup(nodeinfo.group)
7911       ipolicy = _CalculateGroupIPolicy(cluster, group_info)
7912       _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo,
7913                               ignore=self.ignore_ipolicy)
7914
7915       # self.target_node is already populated, either directly or by the
7916       # iallocator run
7917       target_node = self.target_node
7918       if self.target_node == instance.primary_node:
7919         raise errors.OpPrereqError("Cannot migrate instance %s"
7920                                    " to its primary (%s)" %
7921                                    (instance.name, instance.primary_node))
7922
7923       if len(self.lu.tasklets) == 1:
7924         # It is safe to release locks only when we're the only tasklet
7925         # in the LU
7926         _ReleaseLocks(self.lu, locking.LEVEL_NODE,
7927                       keep=[instance.primary_node, self.target_node])
7928
7929     else:
7930       secondary_nodes = instance.secondary_nodes
7931       if not secondary_nodes:
7932         raise errors.ConfigurationError("No secondary node but using"
7933                                         " %s disk template" %
7934                                         instance.disk_template)
7935       target_node = secondary_nodes[0]
7936       if self.lu.op.iallocator or (self.lu.op.target_node and
7937                                    self.lu.op.target_node != target_node):
7938         if self.failover:
7939           text = "failed over"
7940         else:
7941           text = "migrated"
7942         raise errors.OpPrereqError("Instances with disk template %s cannot"
7943                                    " be %s to arbitrary nodes"
7944                                    " (neither an iallocator nor a target"
7945                                    " node can be passed)" %
7946                                    (instance.disk_template, text),
7947                                    errors.ECODE_INVAL)
7948       nodeinfo = self.cfg.GetNodeInfo(target_node)
7949       group_info = self.cfg.GetNodeGroup(nodeinfo.group)
7950       ipolicy = _CalculateGroupIPolicy(cluster, group_info)
7951       _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo,
7952                               ignore=self.ignore_ipolicy)
7953
7954     i_be = cluster.FillBE(instance)
7955
7956     # check memory requirements on the secondary node
7957     if (not self.cleanup and
7958          (not self.failover or instance.admin_state == constants.ADMINST_UP)):
7959       self.tgt_free_mem = _CheckNodeFreeMemory(self.lu, target_node,
7960                                                "migrating instance %s" %
7961                                                instance.name,
7962                                                i_be[constants.BE_MINMEM],
7963                                                instance.hypervisor)
7964     else:
7965       self.lu.LogInfo("Not checking memory on the secondary node as"
7966                       " instance will not be started")
7967
7968     # check if failover must be forced instead of migration
7969     if (not self.cleanup and not self.failover and
7970         i_be[constants.BE_ALWAYS_FAILOVER]):
7971       if self.fallback:
7972         self.lu.LogInfo("Instance configured to always failover; fallback"
7973                         " to failover")
7974         self.failover = True
7975       else:
7976         raise errors.OpPrereqError("This instance has been configured to"
7977                                    " always failover, please allow failover",
7978                                    errors.ECODE_STATE)
7979
7980     # check bridge existance
7981     _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
7982
7983     if not self.cleanup:
7984       _CheckNodeNotDrained(self.lu, target_node)
7985       if not self.failover:
7986         result = self.rpc.call_instance_migratable(instance.primary_node,
7987                                                    instance)
7988         if result.fail_msg and self.fallback:
7989           self.lu.LogInfo("Can't migrate, instance offline, fallback to"
7990                           " failover")
7991           self.failover = True
7992         else:
7993           result.Raise("Can't migrate, please use failover",
7994                        prereq=True, ecode=errors.ECODE_STATE)
7995
7996     assert not (self.failover and self.cleanup)
7997
7998     if not self.failover:
7999       if self.lu.op.live is not None and self.lu.op.mode is not None:
8000         raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
8001                                    " parameters are accepted",
8002                                    errors.ECODE_INVAL)
8003       if self.lu.op.live is not None:
8004         if self.lu.op.live:
8005           self.lu.op.mode = constants.HT_MIGRATION_LIVE
8006         else:
8007           self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
8008         # reset the 'live' parameter to None so that repeated
8009         # invocations of CheckPrereq do not raise an exception
8010         self.lu.op.live = None
8011       elif self.lu.op.mode is None:
8012         # read the default value from the hypervisor
8013         i_hv = cluster.FillHV(self.instance, skip_globals=False)
8014         self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
8015
8016       self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
8017     else:
8018       # Failover is never live
8019       self.live = False
8020
8021     if not (self.failover or self.cleanup):
8022       remote_info = self.rpc.call_instance_info(instance.primary_node,
8023                                                 instance.name,
8024                                                 instance.hypervisor)
8025       remote_info.Raise("Error checking instance on node %s" %
8026                         instance.primary_node)
8027       instance_running = bool(remote_info.payload)
8028       if instance_running:
8029         self.current_mem = int(remote_info.payload["memory"])
8030
8031   def _RunAllocator(self):
8032     """Run the allocator based on input opcode.
8033
8034     """
8035     # FIXME: add a self.ignore_ipolicy option
8036     ial = IAllocator(self.cfg, self.rpc,
8037                      mode=constants.IALLOCATOR_MODE_RELOC,
8038                      name=self.instance_name,
8039                      relocate_from=[self.instance.primary_node],
8040                      )
8041
8042     ial.Run(self.lu.op.iallocator)
8043
8044     if not ial.success:
8045       raise errors.OpPrereqError("Can't compute nodes using"
8046                                  " iallocator '%s': %s" %
8047                                  (self.lu.op.iallocator, ial.info),
8048                                  errors.ECODE_NORES)
8049     if len(ial.result) != ial.required_nodes:
8050       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
8051                                  " of nodes (%s), required %s" %
8052                                  (self.lu.op.iallocator, len(ial.result),
8053                                   ial.required_nodes), errors.ECODE_FAULT)
8054     self.target_node = ial.result[0]
8055     self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
8056                  self.instance_name, self.lu.op.iallocator,
8057                  utils.CommaJoin(ial.result))
8058
8059   def _WaitUntilSync(self):
8060     """Poll with custom rpc for disk sync.
8061
8062     This uses our own step-based rpc call.
8063
8064     """
8065     self.feedback_fn("* wait until resync is done")
8066     all_done = False
8067     while not all_done:
8068       all_done = True
8069       result = self.rpc.call_drbd_wait_sync(self.all_nodes,
8070                                             self.nodes_ip,
8071                                             self.instance.disks)
8072       min_percent = 100
8073       for node, nres in result.items():
8074         nres.Raise("Cannot resync disks on node %s" % node)
8075         node_done, node_percent = nres.payload
8076         all_done = all_done and node_done
8077         if node_percent is not None:
8078           min_percent = min(min_percent, node_percent)
8079       if not all_done:
8080         if min_percent < 100:
8081           self.feedback_fn("   - progress: %.1f%%" % min_percent)
8082         time.sleep(2)
8083
8084   def _EnsureSecondary(self, node):
8085     """Demote a node to secondary.
8086
8087     """
8088     self.feedback_fn("* switching node %s to secondary mode" % node)
8089
8090     for dev in self.instance.disks:
8091       self.cfg.SetDiskID(dev, node)
8092
8093     result = self.rpc.call_blockdev_close(node, self.instance.name,
8094                                           self.instance.disks)
8095     result.Raise("Cannot change disk to secondary on node %s" % node)
8096
8097   def _GoStandalone(self):
8098     """Disconnect from the network.
8099
8100     """
8101     self.feedback_fn("* changing into standalone mode")
8102     result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
8103                                                self.instance.disks)
8104     for node, nres in result.items():
8105       nres.Raise("Cannot disconnect disks node %s" % node)
8106
8107   def _GoReconnect(self, multimaster):
8108     """Reconnect to the network.
8109
8110     """
8111     if multimaster:
8112       msg = "dual-master"
8113     else:
8114       msg = "single-master"
8115     self.feedback_fn("* changing disks into %s mode" % msg)
8116     result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
8117                                            self.instance.disks,
8118                                            self.instance.name, multimaster)
8119     for node, nres in result.items():
8120       nres.Raise("Cannot change disks config on node %s" % node)
8121
8122   def _ExecCleanup(self):
8123     """Try to cleanup after a failed migration.
8124
8125     The cleanup is done by:
8126       - check that the instance is running only on one node
8127         (and update the config if needed)
8128       - change disks on its secondary node to secondary
8129       - wait until disks are fully synchronized
8130       - disconnect from the network
8131       - change disks into single-master mode
8132       - wait again until disks are fully synchronized
8133
8134     """
8135     instance = self.instance
8136     target_node = self.target_node
8137     source_node = self.source_node
8138
8139     # check running on only one node
8140     self.feedback_fn("* checking where the instance actually runs"
8141                      " (if this hangs, the hypervisor might be in"
8142                      " a bad state)")
8143     ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
8144     for node, result in ins_l.items():
8145       result.Raise("Can't contact node %s" % node)
8146
8147     runningon_source = instance.name in ins_l[source_node].payload
8148     runningon_target = instance.name in ins_l[target_node].payload
8149
8150     if runningon_source and runningon_target:
8151       raise errors.OpExecError("Instance seems to be running on two nodes,"
8152                                " or the hypervisor is confused; you will have"
8153                                " to ensure manually that it runs only on one"
8154                                " and restart this operation")
8155
8156     if not (runningon_source or runningon_target):
8157       raise errors.OpExecError("Instance does not seem to be running at all;"
8158                                " in this case it's safer to repair by"
8159                                " running 'gnt-instance stop' to ensure disk"
8160                                " shutdown, and then restarting it")
8161
8162     if runningon_target:
8163       # the migration has actually succeeded, we need to update the config
8164       self.feedback_fn("* instance running on secondary node (%s),"
8165                        " updating config" % target_node)
8166       instance.primary_node = target_node
8167       self.cfg.Update(instance, self.feedback_fn)
8168       demoted_node = source_node
8169     else:
8170       self.feedback_fn("* instance confirmed to be running on its"
8171                        " primary node (%s)" % source_node)
8172       demoted_node = target_node
8173
8174     if instance.disk_template in constants.DTS_INT_MIRROR:
8175       self._EnsureSecondary(demoted_node)
8176       try:
8177         self._WaitUntilSync()
8178       except errors.OpExecError:
8179         # we ignore here errors, since if the device is standalone, it
8180         # won't be able to sync
8181         pass
8182       self._GoStandalone()
8183       self._GoReconnect(False)
8184       self._WaitUntilSync()
8185
8186     self.feedback_fn("* done")
8187
8188   def _RevertDiskStatus(self):
8189     """Try to revert the disk status after a failed migration.
8190
8191     """
8192     target_node = self.target_node
8193     if self.instance.disk_template in constants.DTS_EXT_MIRROR:
8194       return
8195
8196     try:
8197       self._EnsureSecondary(target_node)
8198       self._GoStandalone()
8199       self._GoReconnect(False)
8200       self._WaitUntilSync()
8201     except errors.OpExecError, err:
8202       self.lu.LogWarning("Migration failed and I can't reconnect the drives,"
8203                          " please try to recover the instance manually;"
8204                          " error '%s'" % str(err))
8205
8206   def _AbortMigration(self):
8207     """Call the hypervisor code to abort a started migration.
8208
8209     """
8210     instance = self.instance
8211     target_node = self.target_node
8212     source_node = self.source_node
8213     migration_info = self.migration_info
8214
8215     abort_result = self.rpc.call_instance_finalize_migration_dst(target_node,
8216                                                                  instance,
8217                                                                  migration_info,
8218                                                                  False)
8219     abort_msg = abort_result.fail_msg
8220     if abort_msg:
8221       logging.error("Aborting migration failed on target node %s: %s",
8222                     target_node, abort_msg)
8223       # Don't raise an exception here, as we stil have to try to revert the
8224       # disk status, even if this step failed.
8225
8226     abort_result = self.rpc.call_instance_finalize_migration_src(source_node,
8227         instance, False, self.live)
8228     abort_msg = abort_result.fail_msg
8229     if abort_msg:
8230       logging.error("Aborting migration failed on source node %s: %s",
8231                     source_node, abort_msg)
8232
8233   def _ExecMigration(self):
8234     """Migrate an instance.
8235
8236     The migrate is done by:
8237       - change the disks into dual-master mode
8238       - wait until disks are fully synchronized again
8239       - migrate the instance
8240       - change disks on the new secondary node (the old primary) to secondary
8241       - wait until disks are fully synchronized
8242       - change disks into single-master mode
8243
8244     """
8245     instance = self.instance
8246     target_node = self.target_node
8247     source_node = self.source_node
8248
8249     # Check for hypervisor version mismatch and warn the user.
8250     nodeinfo = self.rpc.call_node_info([source_node, target_node],
8251                                        None, [self.instance.hypervisor])
8252     for ninfo in nodeinfo.values():
8253       ninfo.Raise("Unable to retrieve node information from node '%s'" %
8254                   ninfo.node)
8255     (_, _, (src_info, )) = nodeinfo[source_node].payload
8256     (_, _, (dst_info, )) = nodeinfo[target_node].payload
8257
8258     if ((constants.HV_NODEINFO_KEY_VERSION in src_info) and
8259         (constants.HV_NODEINFO_KEY_VERSION in dst_info)):
8260       src_version = src_info[constants.HV_NODEINFO_KEY_VERSION]
8261       dst_version = dst_info[constants.HV_NODEINFO_KEY_VERSION]
8262       if src_version != dst_version:
8263         self.feedback_fn("* warning: hypervisor version mismatch between"
8264                          " source (%s) and target (%s) node" %
8265                          (src_version, dst_version))
8266
8267     self.feedback_fn("* checking disk consistency between source and target")
8268     for (idx, dev) in enumerate(instance.disks):
8269       if not _CheckDiskConsistency(self.lu, dev, target_node, False):
8270         raise errors.OpExecError("Disk %s is degraded or not fully"
8271                                  " synchronized on target node,"
8272                                  " aborting migration" % idx)
8273
8274     if self.current_mem > self.tgt_free_mem:
8275       if not self.allow_runtime_changes:
8276         raise errors.OpExecError("Memory ballooning not allowed and not enough"
8277                                  " free memory to fit instance %s on target"
8278                                  " node %s (have %dMB, need %dMB)" %
8279                                  (instance.name, target_node,
8280                                   self.tgt_free_mem, self.current_mem))
8281       self.feedback_fn("* setting instance memory to %s" % self.tgt_free_mem)
8282       rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node,
8283                                                      instance,
8284                                                      self.tgt_free_mem)
8285       rpcres.Raise("Cannot modify instance runtime memory")
8286
8287     # First get the migration information from the remote node
8288     result = self.rpc.call_migration_info(source_node, instance)
8289     msg = result.fail_msg
8290     if msg:
8291       log_err = ("Failed fetching source migration information from %s: %s" %
8292                  (source_node, msg))
8293       logging.error(log_err)
8294       raise errors.OpExecError(log_err)
8295
8296     self.migration_info = migration_info = result.payload
8297
8298     if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
8299       # Then switch the disks to master/master mode
8300       self._EnsureSecondary(target_node)
8301       self._GoStandalone()
8302       self._GoReconnect(True)
8303       self._WaitUntilSync()
8304
8305     self.feedback_fn("* preparing %s to accept the instance" % target_node)
8306     result = self.rpc.call_accept_instance(target_node,
8307                                            instance,
8308                                            migration_info,
8309                                            self.nodes_ip[target_node])
8310
8311     msg = result.fail_msg
8312     if msg:
8313       logging.error("Instance pre-migration failed, trying to revert"
8314                     " disk status: %s", msg)
8315       self.feedback_fn("Pre-migration failed, aborting")
8316       self._AbortMigration()
8317       self._RevertDiskStatus()
8318       raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
8319                                (instance.name, msg))
8320
8321     self.feedback_fn("* migrating instance to %s" % target_node)
8322     result = self.rpc.call_instance_migrate(source_node, instance,
8323                                             self.nodes_ip[target_node],
8324                                             self.live)
8325     msg = result.fail_msg
8326     if msg:
8327       logging.error("Instance migration failed, trying to revert"
8328                     " disk status: %s", msg)
8329       self.feedback_fn("Migration failed, aborting")
8330       self._AbortMigration()
8331       self._RevertDiskStatus()
8332       raise errors.OpExecError("Could not migrate instance %s: %s" %
8333                                (instance.name, msg))
8334
8335     self.feedback_fn("* starting memory transfer")
8336     last_feedback = time.time()
8337     while True:
8338       result = self.rpc.call_instance_get_migration_status(source_node,
8339                                                            instance)
8340       msg = result.fail_msg
8341       ms = result.payload   # MigrationStatus instance
8342       if msg or (ms.status in constants.HV_MIGRATION_FAILED_STATUSES):
8343         logging.error("Instance migration failed, trying to revert"
8344                       " disk status: %s", msg)
8345         self.feedback_fn("Migration failed, aborting")
8346         self._AbortMigration()
8347         self._RevertDiskStatus()
8348         raise errors.OpExecError("Could not migrate instance %s: %s" %
8349                                  (instance.name, msg))
8350
8351       if result.payload.status != constants.HV_MIGRATION_ACTIVE:
8352         self.feedback_fn("* memory transfer complete")
8353         break
8354
8355       if (utils.TimeoutExpired(last_feedback,
8356                                self._MIGRATION_FEEDBACK_INTERVAL) and
8357           ms.transferred_ram is not None):
8358         mem_progress = 100 * float(ms.transferred_ram) / float(ms.total_ram)
8359         self.feedback_fn("* memory transfer progress: %.2f %%" % mem_progress)
8360         last_feedback = time.time()
8361
8362       time.sleep(self._MIGRATION_POLL_INTERVAL)
8363
8364     result = self.rpc.call_instance_finalize_migration_src(source_node,
8365                                                            instance,
8366                                                            True,
8367                                                            self.live)
8368     msg = result.fail_msg
8369     if msg:
8370       logging.error("Instance migration succeeded, but finalization failed"
8371                     " on the source node: %s", msg)
8372       raise errors.OpExecError("Could not finalize instance migration: %s" %
8373                                msg)
8374
8375     instance.primary_node = target_node
8376
8377     # distribute new instance config to the other nodes
8378     self.cfg.Update(instance, self.feedback_fn)
8379
8380     result = self.rpc.call_instance_finalize_migration_dst(target_node,
8381                                                            instance,
8382                                                            migration_info,
8383                                                            True)
8384     msg = result.fail_msg
8385     if msg:
8386       logging.error("Instance migration succeeded, but finalization failed"
8387                     " on the target node: %s", msg)
8388       raise errors.OpExecError("Could not finalize instance migration: %s" %
8389                                msg)
8390
8391     if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
8392       self._EnsureSecondary(source_node)
8393       self._WaitUntilSync()
8394       self._GoStandalone()
8395       self._GoReconnect(False)
8396       self._WaitUntilSync()
8397
8398     # If the instance's disk template is `rbd' and there was a successful
8399     # migration, unmap the device from the source node.
8400     if self.instance.disk_template == constants.DT_RBD:
8401       disks = _ExpandCheckDisks(instance, instance.disks)
8402       self.feedback_fn("* unmapping instance's disks from %s" % source_node)
8403       for disk in disks:
8404         result = self.rpc.call_blockdev_shutdown(source_node, disk)
8405         msg = result.fail_msg
8406         if msg:
8407           logging.error("Migration was successful, but couldn't unmap the"
8408                         " block device %s on source node %s: %s",
8409                         disk.iv_name, source_node, msg)
8410           logging.error("You need to unmap the device %s manually on %s",
8411                         disk.iv_name, source_node)
8412
8413     self.feedback_fn("* done")
8414
8415   def _ExecFailover(self):
8416     """Failover an instance.
8417
8418     The failover is done by shutting it down on its present node and
8419     starting it on the secondary.
8420
8421     """
8422     instance = self.instance
8423     primary_node = self.cfg.GetNodeInfo(instance.primary_node)
8424
8425     source_node = instance.primary_node
8426     target_node = self.target_node
8427
8428     if instance.admin_state == constants.ADMINST_UP:
8429       self.feedback_fn("* checking disk consistency between source and target")
8430       for (idx, dev) in enumerate(instance.disks):
8431         # for drbd, these are drbd over lvm
8432         if not _CheckDiskConsistency(self.lu, dev, target_node, False):
8433           if primary_node.offline:
8434             self.feedback_fn("Node %s is offline, ignoring degraded disk %s on"
8435                              " target node %s" %
8436                              (primary_node.name, idx, target_node))
8437           elif not self.ignore_consistency:
8438             raise errors.OpExecError("Disk %s is degraded on target node,"
8439                                      " aborting failover" % idx)
8440     else:
8441       self.feedback_fn("* not checking disk consistency as instance is not"
8442                        " running")
8443
8444     self.feedback_fn("* shutting down instance on source node")
8445     logging.info("Shutting down instance %s on node %s",
8446                  instance.name, source_node)
8447
8448     result = self.rpc.call_instance_shutdown(source_node, instance,
8449                                              self.shutdown_timeout)
8450     msg = result.fail_msg
8451     if msg:
8452       if self.ignore_consistency or primary_node.offline:
8453         self.lu.LogWarning("Could not shutdown instance %s on node %s,"
8454                            " proceeding anyway; please make sure node"
8455                            " %s is down; error details: %s",
8456                            instance.name, source_node, source_node, msg)
8457       else:
8458         raise errors.OpExecError("Could not shutdown instance %s on"
8459                                  " node %s: %s" %
8460                                  (instance.name, source_node, msg))
8461
8462     self.feedback_fn("* deactivating the instance's disks on source node")
8463     if not _ShutdownInstanceDisks(self.lu, instance, ignore_primary=True):
8464       raise errors.OpExecError("Can't shut down the instance's disks")
8465
8466     instance.primary_node = target_node
8467     # distribute new instance config to the other nodes
8468     self.cfg.Update(instance, self.feedback_fn)
8469
8470     # Only start the instance if it's marked as up
8471     if instance.admin_state == constants.ADMINST_UP:
8472       self.feedback_fn("* activating the instance's disks on target node %s" %
8473                        target_node)
8474       logging.info("Starting instance %s on node %s",
8475                    instance.name, target_node)
8476
8477       disks_ok, _ = _AssembleInstanceDisks(self.lu, instance,
8478                                            ignore_secondaries=True)
8479       if not disks_ok:
8480         _ShutdownInstanceDisks(self.lu, instance)
8481         raise errors.OpExecError("Can't activate the instance's disks")
8482
8483       self.feedback_fn("* starting the instance on the target node %s" %
8484                        target_node)
8485       result = self.rpc.call_instance_start(target_node, (instance, None, None),
8486                                             False)
8487       msg = result.fail_msg
8488       if msg:
8489         _ShutdownInstanceDisks(self.lu, instance)
8490         raise errors.OpExecError("Could not start instance %s on node %s: %s" %
8491                                  (instance.name, target_node, msg))
8492
8493   def Exec(self, feedback_fn):
8494     """Perform the migration.
8495
8496     """
8497     self.feedback_fn = feedback_fn
8498     self.source_node = self.instance.primary_node
8499
8500     # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
8501     if self.instance.disk_template in constants.DTS_INT_MIRROR:
8502       self.target_node = self.instance.secondary_nodes[0]
8503       # Otherwise self.target_node has been populated either
8504       # directly, or through an iallocator.
8505
8506     self.all_nodes = [self.source_node, self.target_node]
8507     self.nodes_ip = dict((name, node.secondary_ip) for (name, node)
8508                          in self.cfg.GetMultiNodeInfo(self.all_nodes))
8509
8510     if self.failover:
8511       feedback_fn("Failover instance %s" % self.instance.name)
8512       self._ExecFailover()
8513     else:
8514       feedback_fn("Migrating instance %s" % self.instance.name)
8515
8516       if self.cleanup:
8517         return self._ExecCleanup()
8518       else:
8519         return self._ExecMigration()
8520
8521
8522 def _CreateBlockDev(lu, node, instance, device, force_create,
8523                     info, force_open):
8524   """Create a tree of block devices on a given node.
8525
8526   If this device type has to be created on secondaries, create it and
8527   all its children.
8528
8529   If not, just recurse to children keeping the same 'force' value.
8530
8531   @param lu: the lu on whose behalf we execute
8532   @param node: the node on which to create the device
8533   @type instance: L{objects.Instance}
8534   @param instance: the instance which owns the device
8535   @type device: L{objects.Disk}
8536   @param device: the device to create
8537   @type force_create: boolean
8538   @param force_create: whether to force creation of this device; this
8539       will be change to True whenever we find a device which has
8540       CreateOnSecondary() attribute
8541   @param info: the extra 'metadata' we should attach to the device
8542       (this will be represented as a LVM tag)
8543   @type force_open: boolean
8544   @param force_open: this parameter will be passes to the
8545       L{backend.BlockdevCreate} function where it specifies
8546       whether we run on primary or not, and it affects both
8547       the child assembly and the device own Open() execution
8548
8549   """
8550   if device.CreateOnSecondary():
8551     force_create = True
8552
8553   if device.children:
8554     for child in device.children:
8555       _CreateBlockDev(lu, node, instance, child, force_create,
8556                       info, force_open)
8557
8558   if not force_create:
8559     return
8560
8561   _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
8562
8563
8564 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
8565   """Create a single block device on a given node.
8566
8567   This will not recurse over children of the device, so they must be
8568   created in advance.
8569
8570   @param lu: the lu on whose behalf we execute
8571   @param node: the node on which to create the device
8572   @type instance: L{objects.Instance}
8573   @param instance: the instance which owns the device
8574   @type device: L{objects.Disk}
8575   @param device: the device to create
8576   @param info: the extra 'metadata' we should attach to the device
8577       (this will be represented as a LVM tag)
8578   @type force_open: boolean
8579   @param force_open: this parameter will be passes to the
8580       L{backend.BlockdevCreate} function where it specifies
8581       whether we run on primary or not, and it affects both
8582       the child assembly and the device own Open() execution
8583
8584   """
8585   lu.cfg.SetDiskID(device, node)
8586   result = lu.rpc.call_blockdev_create(node, device, device.size,
8587                                        instance.name, force_open, info)
8588   result.Raise("Can't create block device %s on"
8589                " node %s for instance %s" % (device, node, instance.name))
8590   if device.physical_id is None:
8591     device.physical_id = result.payload
8592
8593
8594 def _GenerateUniqueNames(lu, exts):
8595   """Generate a suitable LV name.
8596
8597   This will generate a logical volume name for the given instance.
8598
8599   """
8600   results = []
8601   for val in exts:
8602     new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
8603     results.append("%s%s" % (new_id, val))
8604   return results
8605
8606
8607 def _ComputeLDParams(disk_template, disk_params):
8608   """Computes Logical Disk parameters from Disk Template parameters.
8609
8610   @type disk_template: string
8611   @param disk_template: disk template, one of L{constants.DISK_TEMPLATES}
8612   @type disk_params: dict
8613   @param disk_params: disk template parameters; dict(template_name -> parameters
8614   @rtype: list(dict)
8615   @return: a list of dicts, one for each node of the disk hierarchy. Each dict
8616     contains the LD parameters of the node. The tree is flattened in-order.
8617
8618   """
8619   if disk_template not in constants.DISK_TEMPLATES:
8620     raise errors.ProgrammerError("Unknown disk template %s" % disk_template)
8621
8622   result = list()
8623   dt_params = disk_params[disk_template]
8624   if disk_template == constants.DT_DRBD8:
8625     drbd_params = {
8626       constants.LDP_RESYNC_RATE: dt_params[constants.DRBD_RESYNC_RATE],
8627       constants.LDP_BARRIERS: dt_params[constants.DRBD_DISK_BARRIERS],
8628       constants.LDP_NO_META_FLUSH: dt_params[constants.DRBD_META_BARRIERS],
8629       constants.LDP_DEFAULT_METAVG: dt_params[constants.DRBD_DEFAULT_METAVG],
8630       constants.LDP_DISK_CUSTOM: dt_params[constants.DRBD_DISK_CUSTOM],
8631       constants.LDP_NET_CUSTOM: dt_params[constants.DRBD_NET_CUSTOM],
8632       constants.LDP_DYNAMIC_RESYNC: dt_params[constants.DRBD_DYNAMIC_RESYNC],
8633       constants.LDP_PLAN_AHEAD: dt_params[constants.DRBD_PLAN_AHEAD],
8634       constants.LDP_FILL_TARGET: dt_params[constants.DRBD_FILL_TARGET],
8635       constants.LDP_DELAY_TARGET: dt_params[constants.DRBD_DELAY_TARGET],
8636       constants.LDP_MAX_RATE: dt_params[constants.DRBD_MAX_RATE],
8637       constants.LDP_MIN_RATE: dt_params[constants.DRBD_MIN_RATE],
8638       }
8639
8640     drbd_params = \
8641       objects.FillDict(constants.DISK_LD_DEFAULTS[constants.LD_DRBD8],
8642                        drbd_params)
8643
8644     result.append(drbd_params)
8645
8646     # data LV
8647     data_params = {
8648       constants.LDP_STRIPES: dt_params[constants.DRBD_DATA_STRIPES],
8649       }
8650     data_params = \
8651       objects.FillDict(constants.DISK_LD_DEFAULTS[constants.LD_LV],
8652                        data_params)
8653     result.append(data_params)
8654
8655     # metadata LV
8656     meta_params = {
8657       constants.LDP_STRIPES: dt_params[constants.DRBD_META_STRIPES],
8658       }
8659     meta_params = \
8660       objects.FillDict(constants.DISK_LD_DEFAULTS[constants.LD_LV],
8661                        meta_params)
8662     result.append(meta_params)
8663
8664   elif (disk_template == constants.DT_FILE or
8665         disk_template == constants.DT_SHARED_FILE):
8666     result.append(constants.DISK_LD_DEFAULTS[constants.LD_FILE])
8667
8668   elif disk_template == constants.DT_PLAIN:
8669     params = {
8670       constants.LDP_STRIPES: dt_params[constants.LV_STRIPES],
8671       }
8672     params = \
8673       objects.FillDict(constants.DISK_LD_DEFAULTS[constants.LD_LV],
8674                        params)
8675     result.append(params)
8676
8677   elif disk_template == constants.DT_BLOCK:
8678     result.append(constants.DISK_LD_DEFAULTS[constants.LD_BLOCKDEV])
8679
8680   elif disk_template == constants.DT_RBD:
8681     params = {
8682       constants.LDP_POOL: dt_params[constants.RBD_POOL]
8683       }
8684     params = \
8685       objects.FillDict(constants.DISK_LD_DEFAULTS[constants.LD_RBD],
8686                        params)
8687     result.append(params)
8688
8689   return result
8690
8691
8692 def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
8693                          iv_name, p_minor, s_minor, drbd_params, data_params,
8694                          meta_params):
8695   """Generate a drbd8 device complete with its children.
8696
8697   """
8698   assert len(vgnames) == len(names) == 2
8699   port = lu.cfg.AllocatePort()
8700   shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
8701
8702   dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
8703                           logical_id=(vgnames[0], names[0]),
8704                           params=data_params)
8705   dev_meta = objects.Disk(dev_type=constants.LD_LV, size=DRBD_META_SIZE,
8706                           logical_id=(vgnames[1], names[1]),
8707                           params=meta_params)
8708   drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
8709                           logical_id=(primary, secondary, port,
8710                                       p_minor, s_minor,
8711                                       shared_secret),
8712                           children=[dev_data, dev_meta],
8713                           iv_name=iv_name, params=drbd_params)
8714   return drbd_dev
8715
8716
8717 _DISK_TEMPLATE_NAME_PREFIX = {
8718   constants.DT_PLAIN: "",
8719   constants.DT_RBD: ".rbd",
8720   }
8721
8722
8723 _DISK_TEMPLATE_DEVICE_TYPE = {
8724   constants.DT_PLAIN: constants.LD_LV,
8725   constants.DT_FILE: constants.LD_FILE,
8726   constants.DT_SHARED_FILE: constants.LD_FILE,
8727   constants.DT_BLOCK: constants.LD_BLOCKDEV,
8728   constants.DT_RBD: constants.LD_RBD,
8729   }
8730
8731
8732 def _GenerateDiskTemplate(lu, template_name, instance_name, primary_node,
8733     secondary_nodes, disk_info, file_storage_dir, file_driver, base_index,
8734     feedback_fn, disk_params,
8735     _req_file_storage=opcodes.RequireFileStorage,
8736     _req_shr_file_storage=opcodes.RequireSharedFileStorage):
8737   """Generate the entire disk layout for a given template type.
8738
8739   """
8740   #TODO: compute space requirements
8741
8742   vgname = lu.cfg.GetVGName()
8743   disk_count = len(disk_info)
8744   disks = []
8745   ld_params = _ComputeLDParams(template_name, disk_params)
8746
8747   if template_name == constants.DT_DISKLESS:
8748     pass
8749   elif template_name == constants.DT_DRBD8:
8750     drbd_params, data_params, meta_params = ld_params
8751     if len(secondary_nodes) != 1:
8752       raise errors.ProgrammerError("Wrong template configuration")
8753     remote_node = secondary_nodes[0]
8754     minors = lu.cfg.AllocateDRBDMinor(
8755       [primary_node, remote_node] * len(disk_info), instance_name)
8756
8757     names = []
8758     for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
8759                                                for i in range(disk_count)]):
8760       names.append(lv_prefix + "_data")
8761       names.append(lv_prefix + "_meta")
8762     for idx, disk in enumerate(disk_info):
8763       disk_index = idx + base_index
8764       drbd_default_metavg = drbd_params[constants.LDP_DEFAULT_METAVG]
8765       data_vg = disk.get(constants.IDISK_VG, vgname)
8766       meta_vg = disk.get(constants.IDISK_METAVG, drbd_default_metavg)
8767       disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
8768                                       disk[constants.IDISK_SIZE],
8769                                       [data_vg, meta_vg],
8770                                       names[idx * 2:idx * 2 + 2],
8771                                       "disk/%d" % disk_index,
8772                                       minors[idx * 2], minors[idx * 2 + 1],
8773                                       drbd_params, data_params, meta_params)
8774       disk_dev.mode = disk[constants.IDISK_MODE]
8775       disks.append(disk_dev)
8776   else:
8777     if secondary_nodes:
8778       raise errors.ProgrammerError("Wrong template configuration")
8779
8780     if template_name == constants.DT_FILE:
8781       _req_file_storage()
8782     elif template_name == constants.DT_SHARED_FILE:
8783       _req_shr_file_storage()
8784
8785     name_prefix = _DISK_TEMPLATE_NAME_PREFIX.get(template_name, None)
8786     if name_prefix is None:
8787       names = None
8788     else:
8789       names = _GenerateUniqueNames(lu, ["%s.disk%s" %
8790                                         (name_prefix, base_index + i)
8791                                         for i in range(disk_count)])
8792
8793     dev_type = _DISK_TEMPLATE_DEVICE_TYPE[template_name]
8794
8795     if template_name == constants.DT_PLAIN:
8796       def logical_id_fn(idx, _, disk):
8797         vg = disk.get(constants.IDISK_VG, vgname)
8798         return (vg, names[idx])
8799     elif template_name in (constants.DT_FILE, constants.DT_SHARED_FILE):
8800       logical_id_fn = \
8801         lambda _, disk_index, disk: (file_driver,
8802                                      "%s/disk%d" % (file_storage_dir,
8803                                                     disk_index))
8804     elif template_name == constants.DT_BLOCK:
8805       logical_id_fn = \
8806         lambda idx, disk_index, disk: (constants.BLOCKDEV_DRIVER_MANUAL,
8807                                        disk[constants.IDISK_ADOPT])
8808     elif template_name == constants.DT_RBD:
8809       logical_id_fn = lambda idx, _, disk: ("rbd", names[idx])
8810     else:
8811       raise errors.ProgrammerError("Unknown disk template '%s'" % template_name)
8812
8813     for idx, disk in enumerate(disk_info):
8814       disk_index = idx + base_index
8815       size = disk[constants.IDISK_SIZE]
8816       feedback_fn("* disk %s, size %s" %
8817                   (disk_index, utils.FormatUnit(size, "h")))
8818       disks.append(objects.Disk(dev_type=dev_type, size=size,
8819                                 logical_id=logical_id_fn(idx, disk_index, disk),
8820                                 iv_name="disk/%d" % disk_index,
8821                                 mode=disk[constants.IDISK_MODE],
8822                                 params=ld_params[0]))
8823
8824   return disks
8825
8826
8827 def _GetInstanceInfoText(instance):
8828   """Compute that text that should be added to the disk's metadata.
8829
8830   """
8831   return "originstname+%s" % instance.name
8832
8833
8834 def _CalcEta(time_taken, written, total_size):
8835   """Calculates the ETA based on size written and total size.
8836
8837   @param time_taken: The time taken so far
8838   @param written: amount written so far
8839   @param total_size: The total size of data to be written
8840   @return: The remaining time in seconds
8841
8842   """
8843   avg_time = time_taken / float(written)
8844   return (total_size - written) * avg_time
8845
8846
8847 def _WipeDisks(lu, instance):
8848   """Wipes instance disks.
8849
8850   @type lu: L{LogicalUnit}
8851   @param lu: the logical unit on whose behalf we execute
8852   @type instance: L{objects.Instance}
8853   @param instance: the instance whose disks we should create
8854   @return: the success of the wipe
8855
8856   """
8857   node = instance.primary_node
8858
8859   for device in instance.disks:
8860     lu.cfg.SetDiskID(device, node)
8861
8862   logging.info("Pause sync of instance %s disks", instance.name)
8863   result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, True)
8864
8865   for idx, success in enumerate(result.payload):
8866     if not success:
8867       logging.warn("pause-sync of instance %s for disks %d failed",
8868                    instance.name, idx)
8869
8870   try:
8871     for idx, device in enumerate(instance.disks):
8872       # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
8873       # MAX_WIPE_CHUNK at max
8874       wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 *
8875                             constants.MIN_WIPE_CHUNK_PERCENT)
8876       # we _must_ make this an int, otherwise rounding errors will
8877       # occur
8878       wipe_chunk_size = int(wipe_chunk_size)
8879
8880       lu.LogInfo("* Wiping disk %d", idx)
8881       logging.info("Wiping disk %d for instance %s, node %s using"
8882                    " chunk size %s", idx, instance.name, node, wipe_chunk_size)
8883
8884       offset = 0
8885       size = device.size
8886       last_output = 0
8887       start_time = time.time()
8888
8889       while offset < size:
8890         wipe_size = min(wipe_chunk_size, size - offset)
8891         logging.debug("Wiping disk %d, offset %s, chunk %s",
8892                       idx, offset, wipe_size)
8893         result = lu.rpc.call_blockdev_wipe(node, device, offset, wipe_size)
8894         result.Raise("Could not wipe disk %d at offset %d for size %d" %
8895                      (idx, offset, wipe_size))
8896         now = time.time()
8897         offset += wipe_size
8898         if now - last_output >= 60:
8899           eta = _CalcEta(now - start_time, offset, size)
8900           lu.LogInfo(" - done: %.1f%% ETA: %s" %
8901                      (offset / float(size) * 100, utils.FormatSeconds(eta)))
8902           last_output = now
8903   finally:
8904     logging.info("Resume sync of instance %s disks", instance.name)
8905
8906     result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, False)
8907
8908     for idx, success in enumerate(result.payload):
8909       if not success:
8910         lu.LogWarning("Resume sync of disk %d failed, please have a"
8911                       " look at the status and troubleshoot the issue", idx)
8912         logging.warn("resume-sync of instance %s for disks %d failed",
8913                      instance.name, idx)
8914
8915
8916 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
8917   """Create all disks for an instance.
8918
8919   This abstracts away some work from AddInstance.
8920
8921   @type lu: L{LogicalUnit}
8922   @param lu: the logical unit on whose behalf we execute
8923   @type instance: L{objects.Instance}
8924   @param instance: the instance whose disks we should create
8925   @type to_skip: list
8926   @param to_skip: list of indices to skip
8927   @type target_node: string
8928   @param target_node: if passed, overrides the target node for creation
8929   @rtype: boolean
8930   @return: the success of the creation
8931
8932   """
8933   info = _GetInstanceInfoText(instance)
8934   if target_node is None:
8935     pnode = instance.primary_node
8936     all_nodes = instance.all_nodes
8937   else:
8938     pnode = target_node
8939     all_nodes = [pnode]
8940
8941   if instance.disk_template in constants.DTS_FILEBASED:
8942     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
8943     result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
8944
8945     result.Raise("Failed to create directory '%s' on"
8946                  " node %s" % (file_storage_dir, pnode))
8947
8948   # Note: this needs to be kept in sync with adding of disks in
8949   # LUInstanceSetParams
8950   for idx, device in enumerate(instance.disks):
8951     if to_skip and idx in to_skip:
8952       continue
8953     logging.info("Creating disk %s for instance '%s'", idx, instance.name)
8954     #HARDCODE
8955     for node in all_nodes:
8956       f_create = node == pnode
8957       _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
8958
8959
8960 def _RemoveDisks(lu, instance, target_node=None, ignore_failures=False):
8961   """Remove all disks for an instance.
8962
8963   This abstracts away some work from `AddInstance()` and
8964   `RemoveInstance()`. Note that in case some of the devices couldn't
8965   be removed, the removal will continue with the other ones (compare
8966   with `_CreateDisks()`).
8967
8968   @type lu: L{LogicalUnit}
8969   @param lu: the logical unit on whose behalf we execute
8970   @type instance: L{objects.Instance}
8971   @param instance: the instance whose disks we should remove
8972   @type target_node: string
8973   @param target_node: used to override the node on which to remove the disks
8974   @rtype: boolean
8975   @return: the success of the removal
8976
8977   """
8978   logging.info("Removing block devices for instance %s", instance.name)
8979
8980   all_result = True
8981   ports_to_release = set()
8982   for (idx, device) in enumerate(instance.disks):
8983     if target_node:
8984       edata = [(target_node, device)]
8985     else:
8986       edata = device.ComputeNodeTree(instance.primary_node)
8987     for node, disk in edata:
8988       lu.cfg.SetDiskID(disk, node)
8989       msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
8990       if msg:
8991         lu.LogWarning("Could not remove disk %s on node %s,"
8992                       " continuing anyway: %s", idx, node, msg)
8993         all_result = False
8994
8995     # if this is a DRBD disk, return its port to the pool
8996     if device.dev_type in constants.LDS_DRBD:
8997       ports_to_release.add(device.logical_id[2])
8998
8999   if all_result or ignore_failures:
9000     for port in ports_to_release:
9001       lu.cfg.AddTcpUdpPort(port)
9002
9003   if instance.disk_template == constants.DT_FILE:
9004     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
9005     if target_node:
9006       tgt = target_node
9007     else:
9008       tgt = instance.primary_node
9009     result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
9010     if result.fail_msg:
9011       lu.LogWarning("Could not remove directory '%s' on node %s: %s",
9012                     file_storage_dir, instance.primary_node, result.fail_msg)
9013       all_result = False
9014
9015   return all_result
9016
9017
9018 def _ComputeDiskSizePerVG(disk_template, disks):
9019   """Compute disk size requirements in the volume group
9020
9021   """
9022   def _compute(disks, payload):
9023     """Universal algorithm.
9024
9025     """
9026     vgs = {}
9027     for disk in disks:
9028       vgs[disk[constants.IDISK_VG]] = \
9029         vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
9030
9031     return vgs
9032
9033   # Required free disk space as a function of disk and swap space
9034   req_size_dict = {
9035     constants.DT_DISKLESS: {},
9036     constants.DT_PLAIN: _compute(disks, 0),
9037     # 128 MB are added for drbd metadata for each disk
9038     constants.DT_DRBD8: _compute(disks, DRBD_META_SIZE),
9039     constants.DT_FILE: {},
9040     constants.DT_SHARED_FILE: {},
9041   }
9042
9043   if disk_template not in req_size_dict:
9044     raise errors.ProgrammerError("Disk template '%s' size requirement"
9045                                  " is unknown" % disk_template)
9046
9047   return req_size_dict[disk_template]
9048
9049
9050 def _ComputeDiskSize(disk_template, disks):
9051   """Compute disk size requirements in the volume group
9052
9053   """
9054   # Required free disk space as a function of disk and swap space
9055   req_size_dict = {
9056     constants.DT_DISKLESS: None,
9057     constants.DT_PLAIN: sum(d[constants.IDISK_SIZE] for d in disks),
9058     # 128 MB are added for drbd metadata for each disk
9059     constants.DT_DRBD8:
9060       sum(d[constants.IDISK_SIZE] + DRBD_META_SIZE for d in disks),
9061     constants.DT_FILE: None,
9062     constants.DT_SHARED_FILE: 0,
9063     constants.DT_BLOCK: 0,
9064     constants.DT_RBD: 0,
9065   }
9066
9067   if disk_template not in req_size_dict:
9068     raise errors.ProgrammerError("Disk template '%s' size requirement"
9069                                  " is unknown" % disk_template)
9070
9071   return req_size_dict[disk_template]
9072
9073
9074 def _FilterVmNodes(lu, nodenames):
9075   """Filters out non-vm_capable nodes from a list.
9076
9077   @type lu: L{LogicalUnit}
9078   @param lu: the logical unit for which we check
9079   @type nodenames: list
9080   @param nodenames: the list of nodes on which we should check
9081   @rtype: list
9082   @return: the list of vm-capable nodes
9083
9084   """
9085   vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
9086   return [name for name in nodenames if name not in vm_nodes]
9087
9088
9089 def _CheckHVParams(lu, nodenames, hvname, hvparams):
9090   """Hypervisor parameter validation.
9091
9092   This function abstract the hypervisor parameter validation to be
9093   used in both instance create and instance modify.
9094
9095   @type lu: L{LogicalUnit}
9096   @param lu: the logical unit for which we check
9097   @type nodenames: list
9098   @param nodenames: the list of nodes on which we should check
9099   @type hvname: string
9100   @param hvname: the name of the hypervisor we should use
9101   @type hvparams: dict
9102   @param hvparams: the parameters which we need to check
9103   @raise errors.OpPrereqError: if the parameters are not valid
9104
9105   """
9106   nodenames = _FilterVmNodes(lu, nodenames)
9107
9108   cluster = lu.cfg.GetClusterInfo()
9109   hvfull = objects.FillDict(cluster.hvparams.get(hvname, {}), hvparams)
9110
9111   hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames, hvname, hvfull)
9112   for node in nodenames:
9113     info = hvinfo[node]
9114     if info.offline:
9115       continue
9116     info.Raise("Hypervisor parameter validation failed on node %s" % node)
9117
9118
9119 def _CheckOSParams(lu, required, nodenames, osname, osparams):
9120   """OS parameters validation.
9121
9122   @type lu: L{LogicalUnit}
9123   @param lu: the logical unit for which we check
9124   @type required: boolean
9125   @param required: whether the validation should fail if the OS is not
9126       found
9127   @type nodenames: list
9128   @param nodenames: the list of nodes on which we should check
9129   @type osname: string
9130   @param osname: the name of the hypervisor we should use
9131   @type osparams: dict
9132   @param osparams: the parameters which we need to check
9133   @raise errors.OpPrereqError: if the parameters are not valid
9134
9135   """
9136   nodenames = _FilterVmNodes(lu, nodenames)
9137   result = lu.rpc.call_os_validate(nodenames, required, osname,
9138                                    [constants.OS_VALIDATE_PARAMETERS],
9139                                    osparams)
9140   for node, nres in result.items():
9141     # we don't check for offline cases since this should be run only
9142     # against the master node and/or an instance's nodes
9143     nres.Raise("OS Parameters validation failed on node %s" % node)
9144     if not nres.payload:
9145       lu.LogInfo("OS %s not found on node %s, validation skipped",
9146                  osname, node)
9147
9148
9149 class LUInstanceCreate(LogicalUnit):
9150   """Create an instance.
9151
9152   """
9153   HPATH = "instance-add"
9154   HTYPE = constants.HTYPE_INSTANCE
9155   REQ_BGL = False
9156
9157   def CheckArguments(self):
9158     """Check arguments.
9159
9160     """
9161     # do not require name_check to ease forward/backward compatibility
9162     # for tools
9163     if self.op.no_install and self.op.start:
9164       self.LogInfo("No-installation mode selected, disabling startup")
9165       self.op.start = False
9166     # validate/normalize the instance name
9167     self.op.instance_name = \
9168       netutils.Hostname.GetNormalizedName(self.op.instance_name)
9169
9170     if self.op.ip_check and not self.op.name_check:
9171       # TODO: make the ip check more flexible and not depend on the name check
9172       raise errors.OpPrereqError("Cannot do IP address check without a name"
9173                                  " check", errors.ECODE_INVAL)
9174
9175     # check nics' parameter names
9176     for nic in self.op.nics:
9177       utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
9178
9179     # check disks. parameter names and consistent adopt/no-adopt strategy
9180     has_adopt = has_no_adopt = False
9181     for disk in self.op.disks:
9182       utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
9183       if constants.IDISK_ADOPT in disk:
9184         has_adopt = True
9185       else:
9186         has_no_adopt = True
9187     if has_adopt and has_no_adopt:
9188       raise errors.OpPrereqError("Either all disks are adopted or none is",
9189                                  errors.ECODE_INVAL)
9190     if has_adopt:
9191       if self.op.disk_template not in constants.DTS_MAY_ADOPT:
9192         raise errors.OpPrereqError("Disk adoption is not supported for the"
9193                                    " '%s' disk template" %
9194                                    self.op.disk_template,
9195                                    errors.ECODE_INVAL)
9196       if self.op.iallocator is not None:
9197         raise errors.OpPrereqError("Disk adoption not allowed with an"
9198                                    " iallocator script", errors.ECODE_INVAL)
9199       if self.op.mode == constants.INSTANCE_IMPORT:
9200         raise errors.OpPrereqError("Disk adoption not allowed for"
9201                                    " instance import", errors.ECODE_INVAL)
9202     else:
9203       if self.op.disk_template in constants.DTS_MUST_ADOPT:
9204         raise errors.OpPrereqError("Disk template %s requires disk adoption,"
9205                                    " but no 'adopt' parameter given" %
9206                                    self.op.disk_template,
9207                                    errors.ECODE_INVAL)
9208
9209     self.adopt_disks = has_adopt
9210
9211     # instance name verification
9212     if self.op.name_check:
9213       self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
9214       self.op.instance_name = self.hostname1.name
9215       # used in CheckPrereq for ip ping check
9216       self.check_ip = self.hostname1.ip
9217     else:
9218       self.check_ip = None
9219
9220     # file storage checks
9221     if (self.op.file_driver and
9222         not self.op.file_driver in constants.FILE_DRIVER):
9223       raise errors.OpPrereqError("Invalid file driver name '%s'" %
9224                                  self.op.file_driver, errors.ECODE_INVAL)
9225
9226     if self.op.disk_template == constants.DT_FILE:
9227       opcodes.RequireFileStorage()
9228     elif self.op.disk_template == constants.DT_SHARED_FILE:
9229       opcodes.RequireSharedFileStorage()
9230
9231     ### Node/iallocator related checks
9232     _CheckIAllocatorOrNode(self, "iallocator", "pnode")
9233
9234     if self.op.pnode is not None:
9235       if self.op.disk_template in constants.DTS_INT_MIRROR:
9236         if self.op.snode is None:
9237           raise errors.OpPrereqError("The networked disk templates need"
9238                                      " a mirror node", errors.ECODE_INVAL)
9239       elif self.op.snode:
9240         self.LogWarning("Secondary node will be ignored on non-mirrored disk"
9241                         " template")
9242         self.op.snode = None
9243
9244     self._cds = _GetClusterDomainSecret()
9245
9246     if self.op.mode == constants.INSTANCE_IMPORT:
9247       # On import force_variant must be True, because if we forced it at
9248       # initial install, our only chance when importing it back is that it
9249       # works again!
9250       self.op.force_variant = True
9251
9252       if self.op.no_install:
9253         self.LogInfo("No-installation mode has no effect during import")
9254
9255     elif self.op.mode == constants.INSTANCE_CREATE:
9256       if self.op.os_type is None:
9257         raise errors.OpPrereqError("No guest OS specified",
9258                                    errors.ECODE_INVAL)
9259       if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
9260         raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
9261                                    " installation" % self.op.os_type,
9262                                    errors.ECODE_STATE)
9263       if self.op.disk_template is None:
9264         raise errors.OpPrereqError("No disk template specified",
9265                                    errors.ECODE_INVAL)
9266
9267     elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
9268       # Check handshake to ensure both clusters have the same domain secret
9269       src_handshake = self.op.source_handshake
9270       if not src_handshake:
9271         raise errors.OpPrereqError("Missing source handshake",
9272                                    errors.ECODE_INVAL)
9273
9274       errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
9275                                                            src_handshake)
9276       if errmsg:
9277         raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
9278                                    errors.ECODE_INVAL)
9279
9280       # Load and check source CA
9281       self.source_x509_ca_pem = self.op.source_x509_ca
9282       if not self.source_x509_ca_pem:
9283         raise errors.OpPrereqError("Missing source X509 CA",
9284                                    errors.ECODE_INVAL)
9285
9286       try:
9287         (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
9288                                                     self._cds)
9289       except OpenSSL.crypto.Error, err:
9290         raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
9291                                    (err, ), errors.ECODE_INVAL)
9292
9293       (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
9294       if errcode is not None:
9295         raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
9296                                    errors.ECODE_INVAL)
9297
9298       self.source_x509_ca = cert
9299
9300       src_instance_name = self.op.source_instance_name
9301       if not src_instance_name:
9302         raise errors.OpPrereqError("Missing source instance name",
9303                                    errors.ECODE_INVAL)
9304
9305       self.source_instance_name = \
9306           netutils.GetHostname(name=src_instance_name).name
9307
9308     else:
9309       raise errors.OpPrereqError("Invalid instance creation mode %r" %
9310                                  self.op.mode, errors.ECODE_INVAL)
9311
9312   def ExpandNames(self):
9313     """ExpandNames for CreateInstance.
9314
9315     Figure out the right locks for instance creation.
9316
9317     """
9318     self.needed_locks = {}
9319
9320     instance_name = self.op.instance_name
9321     # this is just a preventive check, but someone might still add this
9322     # instance in the meantime, and creation will fail at lock-add time
9323     if instance_name in self.cfg.GetInstanceList():
9324       raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
9325                                  instance_name, errors.ECODE_EXISTS)
9326
9327     self.add_locks[locking.LEVEL_INSTANCE] = instance_name
9328
9329     if self.op.iallocator:
9330       # TODO: Find a solution to not lock all nodes in the cluster, e.g. by
9331       # specifying a group on instance creation and then selecting nodes from
9332       # that group
9333       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9334       self.needed_locks[locking.LEVEL_NODE_RES] = locking.ALL_SET
9335     else:
9336       self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
9337       nodelist = [self.op.pnode]
9338       if self.op.snode is not None:
9339         self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
9340         nodelist.append(self.op.snode)
9341       self.needed_locks[locking.LEVEL_NODE] = nodelist
9342       # Lock resources of instance's primary and secondary nodes (copy to
9343       # prevent accidential modification)
9344       self.needed_locks[locking.LEVEL_NODE_RES] = list(nodelist)
9345
9346     # in case of import lock the source node too
9347     if self.op.mode == constants.INSTANCE_IMPORT:
9348       src_node = self.op.src_node
9349       src_path = self.op.src_path
9350
9351       if src_path is None:
9352         self.op.src_path = src_path = self.op.instance_name
9353
9354       if src_node is None:
9355         self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9356         self.op.src_node = None
9357         if os.path.isabs(src_path):
9358           raise errors.OpPrereqError("Importing an instance from a path"
9359                                      " requires a source node option",
9360                                      errors.ECODE_INVAL)
9361       else:
9362         self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
9363         if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
9364           self.needed_locks[locking.LEVEL_NODE].append(src_node)
9365         if not os.path.isabs(src_path):
9366           self.op.src_path = src_path = \
9367             utils.PathJoin(constants.EXPORT_DIR, src_path)
9368
9369   def _RunAllocator(self):
9370     """Run the allocator based on input opcode.
9371
9372     """
9373     nics = [n.ToDict() for n in self.nics]
9374     ial = IAllocator(self.cfg, self.rpc,
9375                      mode=constants.IALLOCATOR_MODE_ALLOC,
9376                      name=self.op.instance_name,
9377                      disk_template=self.op.disk_template,
9378                      tags=self.op.tags,
9379                      os=self.op.os_type,
9380                      vcpus=self.be_full[constants.BE_VCPUS],
9381                      memory=self.be_full[constants.BE_MAXMEM],
9382                      spindle_use=self.be_full[constants.BE_SPINDLE_USE],
9383                      disks=self.disks,
9384                      nics=nics,
9385                      hypervisor=self.op.hypervisor,
9386                      )
9387
9388     ial.Run(self.op.iallocator)
9389
9390     if not ial.success:
9391       raise errors.OpPrereqError("Can't compute nodes using"
9392                                  " iallocator '%s': %s" %
9393                                  (self.op.iallocator, ial.info),
9394                                  errors.ECODE_NORES)
9395     if len(ial.result) != ial.required_nodes:
9396       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
9397                                  " of nodes (%s), required %s" %
9398                                  (self.op.iallocator, len(ial.result),
9399                                   ial.required_nodes), errors.ECODE_FAULT)
9400     self.op.pnode = ial.result[0]
9401     self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
9402                  self.op.instance_name, self.op.iallocator,
9403                  utils.CommaJoin(ial.result))
9404     if ial.required_nodes == 2:
9405       self.op.snode = ial.result[1]
9406
9407   def BuildHooksEnv(self):
9408     """Build hooks env.
9409
9410     This runs on master, primary and secondary nodes of the instance.
9411
9412     """
9413     env = {
9414       "ADD_MODE": self.op.mode,
9415       }
9416     if self.op.mode == constants.INSTANCE_IMPORT:
9417       env["SRC_NODE"] = self.op.src_node
9418       env["SRC_PATH"] = self.op.src_path
9419       env["SRC_IMAGES"] = self.src_images
9420
9421     env.update(_BuildInstanceHookEnv(
9422       name=self.op.instance_name,
9423       primary_node=self.op.pnode,
9424       secondary_nodes=self.secondaries,
9425       status=self.op.start,
9426       os_type=self.op.os_type,
9427       minmem=self.be_full[constants.BE_MINMEM],
9428       maxmem=self.be_full[constants.BE_MAXMEM],
9429       vcpus=self.be_full[constants.BE_VCPUS],
9430       nics=_NICListToTuple(self, self.nics),
9431       disk_template=self.op.disk_template,
9432       disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE])
9433              for d in self.disks],
9434       bep=self.be_full,
9435       hvp=self.hv_full,
9436       hypervisor_name=self.op.hypervisor,
9437       tags=self.op.tags,
9438     ))
9439
9440     return env
9441
9442   def BuildHooksNodes(self):
9443     """Build hooks nodes.
9444
9445     """
9446     nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
9447     return nl, nl
9448
9449   def _ReadExportInfo(self):
9450     """Reads the export information from disk.
9451
9452     It will override the opcode source node and path with the actual
9453     information, if these two were not specified before.
9454
9455     @return: the export information
9456
9457     """
9458     assert self.op.mode == constants.INSTANCE_IMPORT
9459
9460     src_node = self.op.src_node
9461     src_path = self.op.src_path
9462
9463     if src_node is None:
9464       locked_nodes = self.owned_locks(locking.LEVEL_NODE)
9465       exp_list = self.rpc.call_export_list(locked_nodes)
9466       found = False
9467       for node in exp_list:
9468         if exp_list[node].fail_msg:
9469           continue
9470         if src_path in exp_list[node].payload:
9471           found = True
9472           self.op.src_node = src_node = node
9473           self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
9474                                                        src_path)
9475           break
9476       if not found:
9477         raise errors.OpPrereqError("No export found for relative path %s" %
9478                                     src_path, errors.ECODE_INVAL)
9479
9480     _CheckNodeOnline(self, src_node)
9481     result = self.rpc.call_export_info(src_node, src_path)
9482     result.Raise("No export or invalid export found in dir %s" % src_path)
9483
9484     export_info = objects.SerializableConfigParser.Loads(str(result.payload))
9485     if not export_info.has_section(constants.INISECT_EXP):
9486       raise errors.ProgrammerError("Corrupted export config",
9487                                    errors.ECODE_ENVIRON)
9488
9489     ei_version = export_info.get(constants.INISECT_EXP, "version")
9490     if (int(ei_version) != constants.EXPORT_VERSION):
9491       raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
9492                                  (ei_version, constants.EXPORT_VERSION),
9493                                  errors.ECODE_ENVIRON)
9494     return export_info
9495
9496   def _ReadExportParams(self, einfo):
9497     """Use export parameters as defaults.
9498
9499     In case the opcode doesn't specify (as in override) some instance
9500     parameters, then try to use them from the export information, if
9501     that declares them.
9502
9503     """
9504     self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
9505
9506     if self.op.disk_template is None:
9507       if einfo.has_option(constants.INISECT_INS, "disk_template"):
9508         self.op.disk_template = einfo.get(constants.INISECT_INS,
9509                                           "disk_template")
9510         if self.op.disk_template not in constants.DISK_TEMPLATES:
9511           raise errors.OpPrereqError("Disk template specified in configuration"
9512                                      " file is not one of the allowed values:"
9513                                      " %s" % " ".join(constants.DISK_TEMPLATES))
9514       else:
9515         raise errors.OpPrereqError("No disk template specified and the export"
9516                                    " is missing the disk_template information",
9517                                    errors.ECODE_INVAL)
9518
9519     if not self.op.disks:
9520       disks = []
9521       # TODO: import the disk iv_name too
9522       for idx in range(constants.MAX_DISKS):
9523         if einfo.has_option(constants.INISECT_INS, "disk%d_size" % idx):
9524           disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
9525           disks.append({constants.IDISK_SIZE: disk_sz})
9526       self.op.disks = disks
9527       if not disks and self.op.disk_template != constants.DT_DISKLESS:
9528         raise errors.OpPrereqError("No disk info specified and the export"
9529                                    " is missing the disk information",
9530                                    errors.ECODE_INVAL)
9531
9532     if not self.op.nics:
9533       nics = []
9534       for idx in range(constants.MAX_NICS):
9535         if einfo.has_option(constants.INISECT_INS, "nic%d_mac" % idx):
9536           ndict = {}
9537           for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
9538             v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
9539             ndict[name] = v
9540           nics.append(ndict)
9541         else:
9542           break
9543       self.op.nics = nics
9544
9545     if not self.op.tags and einfo.has_option(constants.INISECT_INS, "tags"):
9546       self.op.tags = einfo.get(constants.INISECT_INS, "tags").split()
9547
9548     if (self.op.hypervisor is None and
9549         einfo.has_option(constants.INISECT_INS, "hypervisor")):
9550       self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
9551
9552     if einfo.has_section(constants.INISECT_HYP):
9553       # use the export parameters but do not override the ones
9554       # specified by the user
9555       for name, value in einfo.items(constants.INISECT_HYP):
9556         if name not in self.op.hvparams:
9557           self.op.hvparams[name] = value
9558
9559     if einfo.has_section(constants.INISECT_BEP):
9560       # use the parameters, without overriding
9561       for name, value in einfo.items(constants.INISECT_BEP):
9562         if name not in self.op.beparams:
9563           self.op.beparams[name] = value
9564         # Compatibility for the old "memory" be param
9565         if name == constants.BE_MEMORY:
9566           if constants.BE_MAXMEM not in self.op.beparams:
9567             self.op.beparams[constants.BE_MAXMEM] = value
9568           if constants.BE_MINMEM not in self.op.beparams:
9569             self.op.beparams[constants.BE_MINMEM] = value
9570     else:
9571       # try to read the parameters old style, from the main section
9572       for name in constants.BES_PARAMETERS:
9573         if (name not in self.op.beparams and
9574             einfo.has_option(constants.INISECT_INS, name)):
9575           self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
9576
9577     if einfo.has_section(constants.INISECT_OSP):
9578       # use the parameters, without overriding
9579       for name, value in einfo.items(constants.INISECT_OSP):
9580         if name not in self.op.osparams:
9581           self.op.osparams[name] = value
9582
9583   def _RevertToDefaults(self, cluster):
9584     """Revert the instance parameters to the default values.
9585
9586     """
9587     # hvparams
9588     hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
9589     for name in self.op.hvparams.keys():
9590       if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
9591         del self.op.hvparams[name]
9592     # beparams
9593     be_defs = cluster.SimpleFillBE({})
9594     for name in self.op.beparams.keys():
9595       if name in be_defs and be_defs[name] == self.op.beparams[name]:
9596         del self.op.beparams[name]
9597     # nic params
9598     nic_defs = cluster.SimpleFillNIC({})
9599     for nic in self.op.nics:
9600       for name in constants.NICS_PARAMETERS:
9601         if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
9602           del nic[name]
9603     # osparams
9604     os_defs = cluster.SimpleFillOS(self.op.os_type, {})
9605     for name in self.op.osparams.keys():
9606       if name in os_defs and os_defs[name] == self.op.osparams[name]:
9607         del self.op.osparams[name]
9608
9609   def _CalculateFileStorageDir(self):
9610     """Calculate final instance file storage dir.
9611
9612     """
9613     # file storage dir calculation/check
9614     self.instance_file_storage_dir = None
9615     if self.op.disk_template in constants.DTS_FILEBASED:
9616       # build the full file storage dir path
9617       joinargs = []
9618
9619       if self.op.disk_template == constants.DT_SHARED_FILE:
9620         get_fsd_fn = self.cfg.GetSharedFileStorageDir
9621       else:
9622         get_fsd_fn = self.cfg.GetFileStorageDir
9623
9624       cfg_storagedir = get_fsd_fn()
9625       if not cfg_storagedir:
9626         raise errors.OpPrereqError("Cluster file storage dir not defined")
9627       joinargs.append(cfg_storagedir)
9628
9629       if self.op.file_storage_dir is not None:
9630         joinargs.append(self.op.file_storage_dir)
9631
9632       joinargs.append(self.op.instance_name)
9633
9634       # pylint: disable=W0142
9635       self.instance_file_storage_dir = utils.PathJoin(*joinargs)
9636
9637   def CheckPrereq(self): # pylint: disable=R0914
9638     """Check prerequisites.
9639
9640     """
9641     self._CalculateFileStorageDir()
9642
9643     if self.op.mode == constants.INSTANCE_IMPORT:
9644       export_info = self._ReadExportInfo()
9645       self._ReadExportParams(export_info)
9646
9647     if (not self.cfg.GetVGName() and
9648         self.op.disk_template not in constants.DTS_NOT_LVM):
9649       raise errors.OpPrereqError("Cluster does not support lvm-based"
9650                                  " instances", errors.ECODE_STATE)
9651
9652     if (self.op.hypervisor is None or
9653         self.op.hypervisor == constants.VALUE_AUTO):
9654       self.op.hypervisor = self.cfg.GetHypervisorType()
9655
9656     cluster = self.cfg.GetClusterInfo()
9657     enabled_hvs = cluster.enabled_hypervisors
9658     if self.op.hypervisor not in enabled_hvs:
9659       raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
9660                                  " cluster (%s)" % (self.op.hypervisor,
9661                                   ",".join(enabled_hvs)),
9662                                  errors.ECODE_STATE)
9663
9664     # Check tag validity
9665     for tag in self.op.tags:
9666       objects.TaggableObject.ValidateTag(tag)
9667
9668     # check hypervisor parameter syntax (locally)
9669     utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
9670     filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
9671                                       self.op.hvparams)
9672     hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
9673     hv_type.CheckParameterSyntax(filled_hvp)
9674     self.hv_full = filled_hvp
9675     # check that we don't specify global parameters on an instance
9676     _CheckGlobalHvParams(self.op.hvparams)
9677
9678     # fill and remember the beparams dict
9679     default_beparams = cluster.beparams[constants.PP_DEFAULT]
9680     for param, value in self.op.beparams.iteritems():
9681       if value == constants.VALUE_AUTO:
9682         self.op.beparams[param] = default_beparams[param]
9683     objects.UpgradeBeParams(self.op.beparams)
9684     utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
9685     self.be_full = cluster.SimpleFillBE(self.op.beparams)
9686
9687     # build os parameters
9688     self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
9689
9690     # now that hvp/bep are in final format, let's reset to defaults,
9691     # if told to do so
9692     if self.op.identify_defaults:
9693       self._RevertToDefaults(cluster)
9694
9695     # NIC buildup
9696     self.nics = []
9697     for idx, nic in enumerate(self.op.nics):
9698       nic_mode_req = nic.get(constants.INIC_MODE, None)
9699       nic_mode = nic_mode_req
9700       if nic_mode is None or nic_mode == constants.VALUE_AUTO:
9701         nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
9702
9703       # in routed mode, for the first nic, the default ip is 'auto'
9704       if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
9705         default_ip_mode = constants.VALUE_AUTO
9706       else:
9707         default_ip_mode = constants.VALUE_NONE
9708
9709       # ip validity checks
9710       ip = nic.get(constants.INIC_IP, default_ip_mode)
9711       if ip is None or ip.lower() == constants.VALUE_NONE:
9712         nic_ip = None
9713       elif ip.lower() == constants.VALUE_AUTO:
9714         if not self.op.name_check:
9715           raise errors.OpPrereqError("IP address set to auto but name checks"
9716                                      " have been skipped",
9717                                      errors.ECODE_INVAL)
9718         nic_ip = self.hostname1.ip
9719       else:
9720         if not netutils.IPAddress.IsValid(ip):
9721           raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
9722                                      errors.ECODE_INVAL)
9723         nic_ip = ip
9724
9725       # TODO: check the ip address for uniqueness
9726       if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
9727         raise errors.OpPrereqError("Routed nic mode requires an ip address",
9728                                    errors.ECODE_INVAL)
9729
9730       # MAC address verification
9731       mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
9732       if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9733         mac = utils.NormalizeAndValidateMac(mac)
9734
9735         try:
9736           self.cfg.ReserveMAC(mac, self.proc.GetECId())
9737         except errors.ReservationError:
9738           raise errors.OpPrereqError("MAC address %s already in use"
9739                                      " in cluster" % mac,
9740                                      errors.ECODE_NOTUNIQUE)
9741
9742       #  Build nic parameters
9743       link = nic.get(constants.INIC_LINK, None)
9744       if link == constants.VALUE_AUTO:
9745         link = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_LINK]
9746       nicparams = {}
9747       if nic_mode_req:
9748         nicparams[constants.NIC_MODE] = nic_mode
9749       if link:
9750         nicparams[constants.NIC_LINK] = link
9751
9752       check_params = cluster.SimpleFillNIC(nicparams)
9753       objects.NIC.CheckParameterSyntax(check_params)
9754       self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
9755
9756     # disk checks/pre-build
9757     default_vg = self.cfg.GetVGName()
9758     self.disks = []
9759     for disk in self.op.disks:
9760       mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
9761       if mode not in constants.DISK_ACCESS_SET:
9762         raise errors.OpPrereqError("Invalid disk access mode '%s'" %
9763                                    mode, errors.ECODE_INVAL)
9764       size = disk.get(constants.IDISK_SIZE, None)
9765       if size is None:
9766         raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
9767       try:
9768         size = int(size)
9769       except (TypeError, ValueError):
9770         raise errors.OpPrereqError("Invalid disk size '%s'" % size,
9771                                    errors.ECODE_INVAL)
9772
9773       data_vg = disk.get(constants.IDISK_VG, default_vg)
9774       new_disk = {
9775         constants.IDISK_SIZE: size,
9776         constants.IDISK_MODE: mode,
9777         constants.IDISK_VG: data_vg,
9778         }
9779       if constants.IDISK_METAVG in disk:
9780         new_disk[constants.IDISK_METAVG] = disk[constants.IDISK_METAVG]
9781       if constants.IDISK_ADOPT in disk:
9782         new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
9783       self.disks.append(new_disk)
9784
9785     if self.op.mode == constants.INSTANCE_IMPORT:
9786       disk_images = []
9787       for idx in range(len(self.disks)):
9788         option = "disk%d_dump" % idx
9789         if export_info.has_option(constants.INISECT_INS, option):
9790           # FIXME: are the old os-es, disk sizes, etc. useful?
9791           export_name = export_info.get(constants.INISECT_INS, option)
9792           image = utils.PathJoin(self.op.src_path, export_name)
9793           disk_images.append(image)
9794         else:
9795           disk_images.append(False)
9796
9797       self.src_images = disk_images
9798
9799       old_name = export_info.get(constants.INISECT_INS, "name")
9800       if self.op.instance_name == old_name:
9801         for idx, nic in enumerate(self.nics):
9802           if nic.mac == constants.VALUE_AUTO:
9803             nic_mac_ini = "nic%d_mac" % idx
9804             nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
9805
9806     # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
9807
9808     # ip ping checks (we use the same ip that was resolved in ExpandNames)
9809     if self.op.ip_check:
9810       if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
9811         raise errors.OpPrereqError("IP %s of instance %s already in use" %
9812                                    (self.check_ip, self.op.instance_name),
9813                                    errors.ECODE_NOTUNIQUE)
9814
9815     #### mac address generation
9816     # By generating here the mac address both the allocator and the hooks get
9817     # the real final mac address rather than the 'auto' or 'generate' value.
9818     # There is a race condition between the generation and the instance object
9819     # creation, which means that we know the mac is valid now, but we're not
9820     # sure it will be when we actually add the instance. If things go bad
9821     # adding the instance will abort because of a duplicate mac, and the
9822     # creation job will fail.
9823     for nic in self.nics:
9824       if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9825         nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
9826
9827     #### allocator run
9828
9829     if self.op.iallocator is not None:
9830       self._RunAllocator()
9831
9832     # Release all unneeded node locks
9833     _ReleaseLocks(self, locking.LEVEL_NODE,
9834                   keep=filter(None, [self.op.pnode, self.op.snode,
9835                                      self.op.src_node]))
9836     _ReleaseLocks(self, locking.LEVEL_NODE_RES,
9837                   keep=filter(None, [self.op.pnode, self.op.snode,
9838                                      self.op.src_node]))
9839
9840     #### node related checks
9841
9842     # check primary node
9843     self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
9844     assert self.pnode is not None, \
9845       "Cannot retrieve locked node %s" % self.op.pnode
9846     if pnode.offline:
9847       raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
9848                                  pnode.name, errors.ECODE_STATE)
9849     if pnode.drained:
9850       raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
9851                                  pnode.name, errors.ECODE_STATE)
9852     if not pnode.vm_capable:
9853       raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
9854                                  " '%s'" % pnode.name, errors.ECODE_STATE)
9855
9856     self.secondaries = []
9857
9858     # mirror node verification
9859     if self.op.disk_template in constants.DTS_INT_MIRROR:
9860       if self.op.snode == pnode.name:
9861         raise errors.OpPrereqError("The secondary node cannot be the"
9862                                    " primary node", errors.ECODE_INVAL)
9863       _CheckNodeOnline(self, self.op.snode)
9864       _CheckNodeNotDrained(self, self.op.snode)
9865       _CheckNodeVmCapable(self, self.op.snode)
9866       self.secondaries.append(self.op.snode)
9867
9868       snode = self.cfg.GetNodeInfo(self.op.snode)
9869       if pnode.group != snode.group:
9870         self.LogWarning("The primary and secondary nodes are in two"
9871                         " different node groups; the disk parameters"
9872                         " from the first disk's node group will be"
9873                         " used")
9874
9875     nodenames = [pnode.name] + self.secondaries
9876
9877     # Verify instance specs
9878     spindle_use = self.be_full.get(constants.BE_SPINDLE_USE, None)
9879     ispec = {
9880       constants.ISPEC_MEM_SIZE: self.be_full.get(constants.BE_MAXMEM, None),
9881       constants.ISPEC_CPU_COUNT: self.be_full.get(constants.BE_VCPUS, None),
9882       constants.ISPEC_DISK_COUNT: len(self.disks),
9883       constants.ISPEC_DISK_SIZE: [disk["size"] for disk in self.disks],
9884       constants.ISPEC_NIC_COUNT: len(self.nics),
9885       constants.ISPEC_SPINDLE_USE: spindle_use,
9886       }
9887
9888     group_info = self.cfg.GetNodeGroup(pnode.group)
9889     ipolicy = _CalculateGroupIPolicy(cluster, group_info)
9890     res = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec)
9891     if not self.op.ignore_ipolicy and res:
9892       raise errors.OpPrereqError(("Instance allocation to group %s violates"
9893                                   " policy: %s") % (pnode.group,
9894                                                     utils.CommaJoin(res)),
9895                                   errors.ECODE_INVAL)
9896
9897     # disk parameters (not customizable at instance or node level)
9898     # just use the primary node parameters, ignoring the secondary.
9899     self.diskparams = group_info.diskparams
9900
9901     if not self.adopt_disks:
9902       if self.op.disk_template == constants.DT_RBD:
9903         # _CheckRADOSFreeSpace() is just a placeholder.
9904         # Any function that checks prerequisites can be placed here.
9905         # Check if there is enough space on the RADOS cluster.
9906         _CheckRADOSFreeSpace()
9907       else:
9908         # Check lv size requirements, if not adopting
9909         req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
9910         _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
9911
9912     elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
9913       all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
9914                                 disk[constants.IDISK_ADOPT])
9915                      for disk in self.disks])
9916       if len(all_lvs) != len(self.disks):
9917         raise errors.OpPrereqError("Duplicate volume names given for adoption",
9918                                    errors.ECODE_INVAL)
9919       for lv_name in all_lvs:
9920         try:
9921           # FIXME: lv_name here is "vg/lv" need to ensure that other calls
9922           # to ReserveLV uses the same syntax
9923           self.cfg.ReserveLV(lv_name, self.proc.GetECId())
9924         except errors.ReservationError:
9925           raise errors.OpPrereqError("LV named %s used by another instance" %
9926                                      lv_name, errors.ECODE_NOTUNIQUE)
9927
9928       vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
9929       vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
9930
9931       node_lvs = self.rpc.call_lv_list([pnode.name],
9932                                        vg_names.payload.keys())[pnode.name]
9933       node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
9934       node_lvs = node_lvs.payload
9935
9936       delta = all_lvs.difference(node_lvs.keys())
9937       if delta:
9938         raise errors.OpPrereqError("Missing logical volume(s): %s" %
9939                                    utils.CommaJoin(delta),
9940                                    errors.ECODE_INVAL)
9941       online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
9942       if online_lvs:
9943         raise errors.OpPrereqError("Online logical volumes found, cannot"
9944                                    " adopt: %s" % utils.CommaJoin(online_lvs),
9945                                    errors.ECODE_STATE)
9946       # update the size of disk based on what is found
9947       for dsk in self.disks:
9948         dsk[constants.IDISK_SIZE] = \
9949           int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
9950                                         dsk[constants.IDISK_ADOPT])][0]))
9951
9952     elif self.op.disk_template == constants.DT_BLOCK:
9953       # Normalize and de-duplicate device paths
9954       all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
9955                        for disk in self.disks])
9956       if len(all_disks) != len(self.disks):
9957         raise errors.OpPrereqError("Duplicate disk names given for adoption",
9958                                    errors.ECODE_INVAL)
9959       baddisks = [d for d in all_disks
9960                   if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
9961       if baddisks:
9962         raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
9963                                    " cannot be adopted" %
9964                                    (", ".join(baddisks),
9965                                     constants.ADOPTABLE_BLOCKDEV_ROOT),
9966                                    errors.ECODE_INVAL)
9967
9968       node_disks = self.rpc.call_bdev_sizes([pnode.name],
9969                                             list(all_disks))[pnode.name]
9970       node_disks.Raise("Cannot get block device information from node %s" %
9971                        pnode.name)
9972       node_disks = node_disks.payload
9973       delta = all_disks.difference(node_disks.keys())
9974       if delta:
9975         raise errors.OpPrereqError("Missing block device(s): %s" %
9976                                    utils.CommaJoin(delta),
9977                                    errors.ECODE_INVAL)
9978       for dsk in self.disks:
9979         dsk[constants.IDISK_SIZE] = \
9980           int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
9981
9982     _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
9983
9984     _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
9985     # check OS parameters (remotely)
9986     _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
9987
9988     _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
9989
9990     # memory check on primary node
9991     #TODO(dynmem): use MINMEM for checking
9992     if self.op.start:
9993       _CheckNodeFreeMemory(self, self.pnode.name,
9994                            "creating instance %s" % self.op.instance_name,
9995                            self.be_full[constants.BE_MAXMEM],
9996                            self.op.hypervisor)
9997
9998     self.dry_run_result = list(nodenames)
9999
10000   def Exec(self, feedback_fn):
10001     """Create and add the instance to the cluster.
10002
10003     """
10004     instance = self.op.instance_name
10005     pnode_name = self.pnode.name
10006
10007     assert not (self.owned_locks(locking.LEVEL_NODE_RES) -
10008                 self.owned_locks(locking.LEVEL_NODE)), \
10009       "Node locks differ from node resource locks"
10010
10011     ht_kind = self.op.hypervisor
10012     if ht_kind in constants.HTS_REQ_PORT:
10013       network_port = self.cfg.AllocatePort()
10014     else:
10015       network_port = None
10016
10017     disks = _GenerateDiskTemplate(self,
10018                                   self.op.disk_template,
10019                                   instance, pnode_name,
10020                                   self.secondaries,
10021                                   self.disks,
10022                                   self.instance_file_storage_dir,
10023                                   self.op.file_driver,
10024                                   0,
10025                                   feedback_fn,
10026                                   self.diskparams)
10027
10028     iobj = objects.Instance(name=instance, os=self.op.os_type,
10029                             primary_node=pnode_name,
10030                             nics=self.nics, disks=disks,
10031                             disk_template=self.op.disk_template,
10032                             admin_state=constants.ADMINST_DOWN,
10033                             network_port=network_port,
10034                             beparams=self.op.beparams,
10035                             hvparams=self.op.hvparams,
10036                             hypervisor=self.op.hypervisor,
10037                             osparams=self.op.osparams,
10038                             )
10039
10040     if self.op.tags:
10041       for tag in self.op.tags:
10042         iobj.AddTag(tag)
10043
10044     if self.adopt_disks:
10045       if self.op.disk_template == constants.DT_PLAIN:
10046         # rename LVs to the newly-generated names; we need to construct
10047         # 'fake' LV disks with the old data, plus the new unique_id
10048         tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
10049         rename_to = []
10050         for t_dsk, a_dsk in zip(tmp_disks, self.disks):
10051           rename_to.append(t_dsk.logical_id)
10052           t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
10053           self.cfg.SetDiskID(t_dsk, pnode_name)
10054         result = self.rpc.call_blockdev_rename(pnode_name,
10055                                                zip(tmp_disks, rename_to))
10056         result.Raise("Failed to rename adoped LVs")
10057     else:
10058       feedback_fn("* creating instance disks...")
10059       try:
10060         _CreateDisks(self, iobj)
10061       except errors.OpExecError:
10062         self.LogWarning("Device creation failed, reverting...")
10063         try:
10064           _RemoveDisks(self, iobj)
10065         finally:
10066           self.cfg.ReleaseDRBDMinors(instance)
10067           raise
10068
10069     feedback_fn("adding instance %s to cluster config" % instance)
10070
10071     self.cfg.AddInstance(iobj, self.proc.GetECId())
10072
10073     # Declare that we don't want to remove the instance lock anymore, as we've
10074     # added the instance to the config
10075     del self.remove_locks[locking.LEVEL_INSTANCE]
10076
10077     if self.op.mode == constants.INSTANCE_IMPORT:
10078       # Release unused nodes
10079       _ReleaseLocks(self, locking.LEVEL_NODE, keep=[self.op.src_node])
10080     else:
10081       # Release all nodes
10082       _ReleaseLocks(self, locking.LEVEL_NODE)
10083
10084     disk_abort = False
10085     if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
10086       feedback_fn("* wiping instance disks...")
10087       try:
10088         _WipeDisks(self, iobj)
10089       except errors.OpExecError, err:
10090         logging.exception("Wiping disks failed")
10091         self.LogWarning("Wiping instance disks failed (%s)", err)
10092         disk_abort = True
10093
10094     if disk_abort:
10095       # Something is already wrong with the disks, don't do anything else
10096       pass
10097     elif self.op.wait_for_sync:
10098       disk_abort = not _WaitForSync(self, iobj)
10099     elif iobj.disk_template in constants.DTS_INT_MIRROR:
10100       # make sure the disks are not degraded (still sync-ing is ok)
10101       feedback_fn("* checking mirrors status")
10102       disk_abort = not _WaitForSync(self, iobj, oneshot=True)
10103     else:
10104       disk_abort = False
10105
10106     if disk_abort:
10107       _RemoveDisks(self, iobj)
10108       self.cfg.RemoveInstance(iobj.name)
10109       # Make sure the instance lock gets removed
10110       self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
10111       raise errors.OpExecError("There are some degraded disks for"
10112                                " this instance")
10113
10114     # Release all node resource locks
10115     _ReleaseLocks(self, locking.LEVEL_NODE_RES)
10116
10117     if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
10118       if self.op.mode == constants.INSTANCE_CREATE:
10119         if not self.op.no_install:
10120           pause_sync = (iobj.disk_template in constants.DTS_INT_MIRROR and
10121                         not self.op.wait_for_sync)
10122           if pause_sync:
10123             feedback_fn("* pausing disk sync to install instance OS")
10124             result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
10125                                                               iobj.disks, True)
10126             for idx, success in enumerate(result.payload):
10127               if not success:
10128                 logging.warn("pause-sync of instance %s for disk %d failed",
10129                              instance, idx)
10130
10131           feedback_fn("* running the instance OS create scripts...")
10132           # FIXME: pass debug option from opcode to backend
10133           os_add_result = \
10134             self.rpc.call_instance_os_add(pnode_name, (iobj, None), False,
10135                                           self.op.debug_level)
10136           if pause_sync:
10137             feedback_fn("* resuming disk sync")
10138             result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
10139                                                               iobj.disks, False)
10140             for idx, success in enumerate(result.payload):
10141               if not success:
10142                 logging.warn("resume-sync of instance %s for disk %d failed",
10143                              instance, idx)
10144
10145           os_add_result.Raise("Could not add os for instance %s"
10146                               " on node %s" % (instance, pnode_name))
10147
10148       elif self.op.mode == constants.INSTANCE_IMPORT:
10149         feedback_fn("* running the instance OS import scripts...")
10150
10151         transfers = []
10152
10153         for idx, image in enumerate(self.src_images):
10154           if not image:
10155             continue
10156
10157           # FIXME: pass debug option from opcode to backend
10158           dt = masterd.instance.DiskTransfer("disk/%s" % idx,
10159                                              constants.IEIO_FILE, (image, ),
10160                                              constants.IEIO_SCRIPT,
10161                                              (iobj.disks[idx], idx),
10162                                              None)
10163           transfers.append(dt)
10164
10165         import_result = \
10166           masterd.instance.TransferInstanceData(self, feedback_fn,
10167                                                 self.op.src_node, pnode_name,
10168                                                 self.pnode.secondary_ip,
10169                                                 iobj, transfers)
10170         if not compat.all(import_result):
10171           self.LogWarning("Some disks for instance %s on node %s were not"
10172                           " imported successfully" % (instance, pnode_name))
10173
10174       elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
10175         feedback_fn("* preparing remote import...")
10176         # The source cluster will stop the instance before attempting to make a
10177         # connection. In some cases stopping an instance can take a long time,
10178         # hence the shutdown timeout is added to the connection timeout.
10179         connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
10180                            self.op.source_shutdown_timeout)
10181         timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
10182
10183         assert iobj.primary_node == self.pnode.name
10184         disk_results = \
10185           masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
10186                                         self.source_x509_ca,
10187                                         self._cds, timeouts)
10188         if not compat.all(disk_results):
10189           # TODO: Should the instance still be started, even if some disks
10190           # failed to import (valid for local imports, too)?
10191           self.LogWarning("Some disks for instance %s on node %s were not"
10192                           " imported successfully" % (instance, pnode_name))
10193
10194         # Run rename script on newly imported instance
10195         assert iobj.name == instance
10196         feedback_fn("Running rename script for %s" % instance)
10197         result = self.rpc.call_instance_run_rename(pnode_name, iobj,
10198                                                    self.source_instance_name,
10199                                                    self.op.debug_level)
10200         if result.fail_msg:
10201           self.LogWarning("Failed to run rename script for %s on node"
10202                           " %s: %s" % (instance, pnode_name, result.fail_msg))
10203
10204       else:
10205         # also checked in the prereq part
10206         raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
10207                                      % self.op.mode)
10208
10209     assert not self.owned_locks(locking.LEVEL_NODE_RES)
10210
10211     if self.op.start:
10212       iobj.admin_state = constants.ADMINST_UP
10213       self.cfg.Update(iobj, feedback_fn)
10214       logging.info("Starting instance %s on node %s", instance, pnode_name)
10215       feedback_fn("* starting instance...")
10216       result = self.rpc.call_instance_start(pnode_name, (iobj, None, None),
10217                                             False)
10218       result.Raise("Could not start instance")
10219
10220     return list(iobj.all_nodes)
10221
10222
10223 def _CheckRADOSFreeSpace():
10224   """Compute disk size requirements inside the RADOS cluster.
10225
10226   """
10227   # For the RADOS cluster we assume there is always enough space.
10228   pass
10229
10230
10231 class LUInstanceConsole(NoHooksLU):
10232   """Connect to an instance's console.
10233
10234   This is somewhat special in that it returns the command line that
10235   you need to run on the master node in order to connect to the
10236   console.
10237
10238   """
10239   REQ_BGL = False
10240
10241   def ExpandNames(self):
10242     self.share_locks = _ShareAll()
10243     self._ExpandAndLockInstance()
10244
10245   def CheckPrereq(self):
10246     """Check prerequisites.
10247
10248     This checks that the instance is in the cluster.
10249
10250     """
10251     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10252     assert self.instance is not None, \
10253       "Cannot retrieve locked instance %s" % self.op.instance_name
10254     _CheckNodeOnline(self, self.instance.primary_node)
10255
10256   def Exec(self, feedback_fn):
10257     """Connect to the console of an instance
10258
10259     """
10260     instance = self.instance
10261     node = instance.primary_node
10262
10263     node_insts = self.rpc.call_instance_list([node],
10264                                              [instance.hypervisor])[node]
10265     node_insts.Raise("Can't get node information from %s" % node)
10266
10267     if instance.name not in node_insts.payload:
10268       if instance.admin_state == constants.ADMINST_UP:
10269         state = constants.INSTST_ERRORDOWN
10270       elif instance.admin_state == constants.ADMINST_DOWN:
10271         state = constants.INSTST_ADMINDOWN
10272       else:
10273         state = constants.INSTST_ADMINOFFLINE
10274       raise errors.OpExecError("Instance %s is not running (state %s)" %
10275                                (instance.name, state))
10276
10277     logging.debug("Connecting to console of %s on %s", instance.name, node)
10278
10279     return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
10280
10281
10282 def _GetInstanceConsole(cluster, instance):
10283   """Returns console information for an instance.
10284
10285   @type cluster: L{objects.Cluster}
10286   @type instance: L{objects.Instance}
10287   @rtype: dict
10288
10289   """
10290   hyper = hypervisor.GetHypervisor(instance.hypervisor)
10291   # beparams and hvparams are passed separately, to avoid editing the
10292   # instance and then saving the defaults in the instance itself.
10293   hvparams = cluster.FillHV(instance)
10294   beparams = cluster.FillBE(instance)
10295   console = hyper.GetInstanceConsole(instance, hvparams, beparams)
10296
10297   assert console.instance == instance.name
10298   assert console.Validate()
10299
10300   return console.ToDict()
10301
10302
10303 class LUInstanceReplaceDisks(LogicalUnit):
10304   """Replace the disks of an instance.
10305
10306   """
10307   HPATH = "mirrors-replace"
10308   HTYPE = constants.HTYPE_INSTANCE
10309   REQ_BGL = False
10310
10311   def CheckArguments(self):
10312     TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
10313                                   self.op.iallocator)
10314
10315   def ExpandNames(self):
10316     self._ExpandAndLockInstance()
10317
10318     assert locking.LEVEL_NODE not in self.needed_locks
10319     assert locking.LEVEL_NODE_RES not in self.needed_locks
10320     assert locking.LEVEL_NODEGROUP not in self.needed_locks
10321
10322     assert self.op.iallocator is None or self.op.remote_node is None, \
10323       "Conflicting options"
10324
10325     if self.op.remote_node is not None:
10326       self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10327
10328       # Warning: do not remove the locking of the new secondary here
10329       # unless DRBD8.AddChildren is changed to work in parallel;
10330       # currently it doesn't since parallel invocations of
10331       # FindUnusedMinor will conflict
10332       self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
10333       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
10334     else:
10335       self.needed_locks[locking.LEVEL_NODE] = []
10336       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10337
10338       if self.op.iallocator is not None:
10339         # iallocator will select a new node in the same group
10340         self.needed_locks[locking.LEVEL_NODEGROUP] = []
10341
10342     self.needed_locks[locking.LEVEL_NODE_RES] = []
10343
10344     self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
10345                                    self.op.iallocator, self.op.remote_node,
10346                                    self.op.disks, False, self.op.early_release,
10347                                    self.op.ignore_ipolicy)
10348
10349     self.tasklets = [self.replacer]
10350
10351   def DeclareLocks(self, level):
10352     if level == locking.LEVEL_NODEGROUP:
10353       assert self.op.remote_node is None
10354       assert self.op.iallocator is not None
10355       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
10356
10357       self.share_locks[locking.LEVEL_NODEGROUP] = 1
10358       # Lock all groups used by instance optimistically; this requires going
10359       # via the node before it's locked, requiring verification later on
10360       self.needed_locks[locking.LEVEL_NODEGROUP] = \
10361         self.cfg.GetInstanceNodeGroups(self.op.instance_name)
10362
10363     elif level == locking.LEVEL_NODE:
10364       if self.op.iallocator is not None:
10365         assert self.op.remote_node is None
10366         assert not self.needed_locks[locking.LEVEL_NODE]
10367
10368         # Lock member nodes of all locked groups
10369         self.needed_locks[locking.LEVEL_NODE] = [node_name
10370           for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
10371           for node_name in self.cfg.GetNodeGroup(group_uuid).members]
10372       else:
10373         self._LockInstancesNodes()
10374     elif level == locking.LEVEL_NODE_RES:
10375       # Reuse node locks
10376       self.needed_locks[locking.LEVEL_NODE_RES] = \
10377         self.needed_locks[locking.LEVEL_NODE]
10378
10379   def BuildHooksEnv(self):
10380     """Build hooks env.
10381
10382     This runs on the master, the primary and all the secondaries.
10383
10384     """
10385     instance = self.replacer.instance
10386     env = {
10387       "MODE": self.op.mode,
10388       "NEW_SECONDARY": self.op.remote_node,
10389       "OLD_SECONDARY": instance.secondary_nodes[0],
10390       }
10391     env.update(_BuildInstanceHookEnvByObject(self, instance))
10392     return env
10393
10394   def BuildHooksNodes(self):
10395     """Build hooks nodes.
10396
10397     """
10398     instance = self.replacer.instance
10399     nl = [
10400       self.cfg.GetMasterNode(),
10401       instance.primary_node,
10402       ]
10403     if self.op.remote_node is not None:
10404       nl.append(self.op.remote_node)
10405     return nl, nl
10406
10407   def CheckPrereq(self):
10408     """Check prerequisites.
10409
10410     """
10411     assert (self.glm.is_owned(locking.LEVEL_NODEGROUP) or
10412             self.op.iallocator is None)
10413
10414     # Verify if node group locks are still correct
10415     owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
10416     if owned_groups:
10417       _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
10418
10419     return LogicalUnit.CheckPrereq(self)
10420
10421
10422 class TLReplaceDisks(Tasklet):
10423   """Replaces disks for an instance.
10424
10425   Note: Locking is not within the scope of this class.
10426
10427   """
10428   def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
10429                disks, delay_iallocator, early_release, ignore_ipolicy):
10430     """Initializes this class.
10431
10432     """
10433     Tasklet.__init__(self, lu)
10434
10435     # Parameters
10436     self.instance_name = instance_name
10437     self.mode = mode
10438     self.iallocator_name = iallocator_name
10439     self.remote_node = remote_node
10440     self.disks = disks
10441     self.delay_iallocator = delay_iallocator
10442     self.early_release = early_release
10443     self.ignore_ipolicy = ignore_ipolicy
10444
10445     # Runtime data
10446     self.instance = None
10447     self.new_node = None
10448     self.target_node = None
10449     self.other_node = None
10450     self.remote_node_info = None
10451     self.node_secondary_ip = None
10452
10453   @staticmethod
10454   def CheckArguments(mode, remote_node, iallocator):
10455     """Helper function for users of this class.
10456
10457     """
10458     # check for valid parameter combination
10459     if mode == constants.REPLACE_DISK_CHG:
10460       if remote_node is None and iallocator is None:
10461         raise errors.OpPrereqError("When changing the secondary either an"
10462                                    " iallocator script must be used or the"
10463                                    " new node given", errors.ECODE_INVAL)
10464
10465       if remote_node is not None and iallocator is not None:
10466         raise errors.OpPrereqError("Give either the iallocator or the new"
10467                                    " secondary, not both", errors.ECODE_INVAL)
10468
10469     elif remote_node is not None or iallocator is not None:
10470       # Not replacing the secondary
10471       raise errors.OpPrereqError("The iallocator and new node options can"
10472                                  " only be used when changing the"
10473                                  " secondary node", errors.ECODE_INVAL)
10474
10475   @staticmethod
10476   def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
10477     """Compute a new secondary node using an IAllocator.
10478
10479     """
10480     ial = IAllocator(lu.cfg, lu.rpc,
10481                      mode=constants.IALLOCATOR_MODE_RELOC,
10482                      name=instance_name,
10483                      relocate_from=list(relocate_from))
10484
10485     ial.Run(iallocator_name)
10486
10487     if not ial.success:
10488       raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
10489                                  " %s" % (iallocator_name, ial.info),
10490                                  errors.ECODE_NORES)
10491
10492     if len(ial.result) != ial.required_nodes:
10493       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
10494                                  " of nodes (%s), required %s" %
10495                                  (iallocator_name,
10496                                   len(ial.result), ial.required_nodes),
10497                                  errors.ECODE_FAULT)
10498
10499     remote_node_name = ial.result[0]
10500
10501     lu.LogInfo("Selected new secondary for instance '%s': %s",
10502                instance_name, remote_node_name)
10503
10504     return remote_node_name
10505
10506   def _FindFaultyDisks(self, node_name):
10507     """Wrapper for L{_FindFaultyInstanceDisks}.
10508
10509     """
10510     return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
10511                                     node_name, True)
10512
10513   def _CheckDisksActivated(self, instance):
10514     """Checks if the instance disks are activated.
10515
10516     @param instance: The instance to check disks
10517     @return: True if they are activated, False otherwise
10518
10519     """
10520     nodes = instance.all_nodes
10521
10522     for idx, dev in enumerate(instance.disks):
10523       for node in nodes:
10524         self.lu.LogInfo("Checking disk/%d on %s", idx, node)
10525         self.cfg.SetDiskID(dev, node)
10526
10527         result = self.rpc.call_blockdev_find(node, dev)
10528
10529         if result.offline:
10530           continue
10531         elif result.fail_msg or not result.payload:
10532           return False
10533
10534     return True
10535
10536   def CheckPrereq(self):
10537     """Check prerequisites.
10538
10539     This checks that the instance is in the cluster.
10540
10541     """
10542     self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
10543     assert instance is not None, \
10544       "Cannot retrieve locked instance %s" % self.instance_name
10545
10546     if instance.disk_template != constants.DT_DRBD8:
10547       raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
10548                                  " instances", errors.ECODE_INVAL)
10549
10550     if len(instance.secondary_nodes) != 1:
10551       raise errors.OpPrereqError("The instance has a strange layout,"
10552                                  " expected one secondary but found %d" %
10553                                  len(instance.secondary_nodes),
10554                                  errors.ECODE_FAULT)
10555
10556     if not self.delay_iallocator:
10557       self._CheckPrereq2()
10558
10559   def _CheckPrereq2(self):
10560     """Check prerequisites, second part.
10561
10562     This function should always be part of CheckPrereq. It was separated and is
10563     now called from Exec because during node evacuation iallocator was only
10564     called with an unmodified cluster model, not taking planned changes into
10565     account.
10566
10567     """
10568     instance = self.instance
10569     secondary_node = instance.secondary_nodes[0]
10570
10571     if self.iallocator_name is None:
10572       remote_node = self.remote_node
10573     else:
10574       remote_node = self._RunAllocator(self.lu, self.iallocator_name,
10575                                        instance.name, instance.secondary_nodes)
10576
10577     if remote_node is None:
10578       self.remote_node_info = None
10579     else:
10580       assert remote_node in self.lu.owned_locks(locking.LEVEL_NODE), \
10581              "Remote node '%s' is not locked" % remote_node
10582
10583       self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
10584       assert self.remote_node_info is not None, \
10585         "Cannot retrieve locked node %s" % remote_node
10586
10587     if remote_node == self.instance.primary_node:
10588       raise errors.OpPrereqError("The specified node is the primary node of"
10589                                  " the instance", errors.ECODE_INVAL)
10590
10591     if remote_node == secondary_node:
10592       raise errors.OpPrereqError("The specified node is already the"
10593                                  " secondary node of the instance",
10594                                  errors.ECODE_INVAL)
10595
10596     if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
10597                                     constants.REPLACE_DISK_CHG):
10598       raise errors.OpPrereqError("Cannot specify disks to be replaced",
10599                                  errors.ECODE_INVAL)
10600
10601     if self.mode == constants.REPLACE_DISK_AUTO:
10602       if not self._CheckDisksActivated(instance):
10603         raise errors.OpPrereqError("Please run activate-disks on instance %s"
10604                                    " first" % self.instance_name,
10605                                    errors.ECODE_STATE)
10606       faulty_primary = self._FindFaultyDisks(instance.primary_node)
10607       faulty_secondary = self._FindFaultyDisks(secondary_node)
10608
10609       if faulty_primary and faulty_secondary:
10610         raise errors.OpPrereqError("Instance %s has faulty disks on more than"
10611                                    " one node and can not be repaired"
10612                                    " automatically" % self.instance_name,
10613                                    errors.ECODE_STATE)
10614
10615       if faulty_primary:
10616         self.disks = faulty_primary
10617         self.target_node = instance.primary_node
10618         self.other_node = secondary_node
10619         check_nodes = [self.target_node, self.other_node]
10620       elif faulty_secondary:
10621         self.disks = faulty_secondary
10622         self.target_node = secondary_node
10623         self.other_node = instance.primary_node
10624         check_nodes = [self.target_node, self.other_node]
10625       else:
10626         self.disks = []
10627         check_nodes = []
10628
10629     else:
10630       # Non-automatic modes
10631       if self.mode == constants.REPLACE_DISK_PRI:
10632         self.target_node = instance.primary_node
10633         self.other_node = secondary_node
10634         check_nodes = [self.target_node, self.other_node]
10635
10636       elif self.mode == constants.REPLACE_DISK_SEC:
10637         self.target_node = secondary_node
10638         self.other_node = instance.primary_node
10639         check_nodes = [self.target_node, self.other_node]
10640
10641       elif self.mode == constants.REPLACE_DISK_CHG:
10642         self.new_node = remote_node
10643         self.other_node = instance.primary_node
10644         self.target_node = secondary_node
10645         check_nodes = [self.new_node, self.other_node]
10646
10647         _CheckNodeNotDrained(self.lu, remote_node)
10648         _CheckNodeVmCapable(self.lu, remote_node)
10649
10650         old_node_info = self.cfg.GetNodeInfo(secondary_node)
10651         assert old_node_info is not None
10652         if old_node_info.offline and not self.early_release:
10653           # doesn't make sense to delay the release
10654           self.early_release = True
10655           self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
10656                           " early-release mode", secondary_node)
10657
10658       else:
10659         raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
10660                                      self.mode)
10661
10662       # If not specified all disks should be replaced
10663       if not self.disks:
10664         self.disks = range(len(self.instance.disks))
10665
10666     # TODO: This is ugly, but right now we can't distinguish between internal
10667     # submitted opcode and external one. We should fix that.
10668     if self.remote_node_info:
10669       # We change the node, lets verify it still meets instance policy
10670       new_group_info = self.cfg.GetNodeGroup(self.remote_node_info.group)
10671       ipolicy = _CalculateGroupIPolicy(self.cfg.GetClusterInfo(),
10672                                        new_group_info)
10673       _CheckTargetNodeIPolicy(self, ipolicy, instance, self.remote_node_info,
10674                               ignore=self.ignore_ipolicy)
10675
10676     # TODO: compute disk parameters
10677     primary_node_info = self.cfg.GetNodeInfo(instance.primary_node)
10678     secondary_node_info = self.cfg.GetNodeInfo(secondary_node)
10679     if primary_node_info.group != secondary_node_info.group:
10680       self.lu.LogInfo("The instance primary and secondary nodes are in two"
10681                       " different node groups; the disk parameters of the"
10682                       " primary node's group will be applied.")
10683
10684     self.diskparams = self.cfg.GetNodeGroup(primary_node_info.group).diskparams
10685
10686     for node in check_nodes:
10687       _CheckNodeOnline(self.lu, node)
10688
10689     touched_nodes = frozenset(node_name for node_name in [self.new_node,
10690                                                           self.other_node,
10691                                                           self.target_node]
10692                               if node_name is not None)
10693
10694     # Release unneeded node and node resource locks
10695     _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
10696     _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES, keep=touched_nodes)
10697
10698     # Release any owned node group
10699     if self.lu.glm.is_owned(locking.LEVEL_NODEGROUP):
10700       _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
10701
10702     # Check whether disks are valid
10703     for disk_idx in self.disks:
10704       instance.FindDisk(disk_idx)
10705
10706     # Get secondary node IP addresses
10707     self.node_secondary_ip = dict((name, node.secondary_ip) for (name, node)
10708                                   in self.cfg.GetMultiNodeInfo(touched_nodes))
10709
10710   def Exec(self, feedback_fn):
10711     """Execute disk replacement.
10712
10713     This dispatches the disk replacement to the appropriate handler.
10714
10715     """
10716     if self.delay_iallocator:
10717       self._CheckPrereq2()
10718
10719     if __debug__:
10720       # Verify owned locks before starting operation
10721       owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
10722       assert set(owned_nodes) == set(self.node_secondary_ip), \
10723           ("Incorrect node locks, owning %s, expected %s" %
10724            (owned_nodes, self.node_secondary_ip.keys()))
10725       assert (self.lu.owned_locks(locking.LEVEL_NODE) ==
10726               self.lu.owned_locks(locking.LEVEL_NODE_RES))
10727
10728       owned_instances = self.lu.owned_locks(locking.LEVEL_INSTANCE)
10729       assert list(owned_instances) == [self.instance_name], \
10730           "Instance '%s' not locked" % self.instance_name
10731
10732       assert not self.lu.glm.is_owned(locking.LEVEL_NODEGROUP), \
10733           "Should not own any node group lock at this point"
10734
10735     if not self.disks:
10736       feedback_fn("No disks need replacement")
10737       return
10738
10739     feedback_fn("Replacing disk(s) %s for %s" %
10740                 (utils.CommaJoin(self.disks), self.instance.name))
10741
10742     activate_disks = (self.instance.admin_state != constants.ADMINST_UP)
10743
10744     # Activate the instance disks if we're replacing them on a down instance
10745     if activate_disks:
10746       _StartInstanceDisks(self.lu, self.instance, True)
10747
10748     try:
10749       # Should we replace the secondary node?
10750       if self.new_node is not None:
10751         fn = self._ExecDrbd8Secondary
10752       else:
10753         fn = self._ExecDrbd8DiskOnly
10754
10755       result = fn(feedback_fn)
10756     finally:
10757       # Deactivate the instance disks if we're replacing them on a
10758       # down instance
10759       if activate_disks:
10760         _SafeShutdownInstanceDisks(self.lu, self.instance)
10761
10762     assert not self.lu.owned_locks(locking.LEVEL_NODE)
10763
10764     if __debug__:
10765       # Verify owned locks
10766       owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE_RES)
10767       nodes = frozenset(self.node_secondary_ip)
10768       assert ((self.early_release and not owned_nodes) or
10769               (not self.early_release and not (set(owned_nodes) - nodes))), \
10770         ("Not owning the correct locks, early_release=%s, owned=%r,"
10771          " nodes=%r" % (self.early_release, owned_nodes, nodes))
10772
10773     return result
10774
10775   def _CheckVolumeGroup(self, nodes):
10776     self.lu.LogInfo("Checking volume groups")
10777
10778     vgname = self.cfg.GetVGName()
10779
10780     # Make sure volume group exists on all involved nodes
10781     results = self.rpc.call_vg_list(nodes)
10782     if not results:
10783       raise errors.OpExecError("Can't list volume groups on the nodes")
10784
10785     for node in nodes:
10786       res = results[node]
10787       res.Raise("Error checking node %s" % node)
10788       if vgname not in res.payload:
10789         raise errors.OpExecError("Volume group '%s' not found on node %s" %
10790                                  (vgname, node))
10791
10792   def _CheckDisksExistence(self, nodes):
10793     # Check disk existence
10794     for idx, dev in enumerate(self.instance.disks):
10795       if idx not in self.disks:
10796         continue
10797
10798       for node in nodes:
10799         self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
10800         self.cfg.SetDiskID(dev, node)
10801
10802         result = self.rpc.call_blockdev_find(node, dev)
10803
10804         msg = result.fail_msg
10805         if msg or not result.payload:
10806           if not msg:
10807             msg = "disk not found"
10808           raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
10809                                    (idx, node, msg))
10810
10811   def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
10812     for idx, dev in enumerate(self.instance.disks):
10813       if idx not in self.disks:
10814         continue
10815
10816       self.lu.LogInfo("Checking disk/%d consistency on node %s" %
10817                       (idx, node_name))
10818
10819       if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
10820                                    ldisk=ldisk):
10821         raise errors.OpExecError("Node %s has degraded storage, unsafe to"
10822                                  " replace disks for instance %s" %
10823                                  (node_name, self.instance.name))
10824
10825   def _CreateNewStorage(self, node_name):
10826     """Create new storage on the primary or secondary node.
10827
10828     This is only used for same-node replaces, not for changing the
10829     secondary node, hence we don't want to modify the existing disk.
10830
10831     """
10832     iv_names = {}
10833
10834     for idx, dev in enumerate(self.instance.disks):
10835       if idx not in self.disks:
10836         continue
10837
10838       self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
10839
10840       self.cfg.SetDiskID(dev, node_name)
10841
10842       lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
10843       names = _GenerateUniqueNames(self.lu, lv_names)
10844
10845       _, data_p, meta_p = _ComputeLDParams(constants.DT_DRBD8, self.diskparams)
10846
10847       vg_data = dev.children[0].logical_id[0]
10848       lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
10849                              logical_id=(vg_data, names[0]), params=data_p)
10850       vg_meta = dev.children[1].logical_id[0]
10851       lv_meta = objects.Disk(dev_type=constants.LD_LV, size=DRBD_META_SIZE,
10852                              logical_id=(vg_meta, names[1]), params=meta_p)
10853
10854       new_lvs = [lv_data, lv_meta]
10855       old_lvs = [child.Copy() for child in dev.children]
10856       iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
10857
10858       # we pass force_create=True to force the LVM creation
10859       for new_lv in new_lvs:
10860         _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
10861                         _GetInstanceInfoText(self.instance), False)
10862
10863     return iv_names
10864
10865   def _CheckDevices(self, node_name, iv_names):
10866     for name, (dev, _, _) in iv_names.iteritems():
10867       self.cfg.SetDiskID(dev, node_name)
10868
10869       result = self.rpc.call_blockdev_find(node_name, dev)
10870
10871       msg = result.fail_msg
10872       if msg or not result.payload:
10873         if not msg:
10874           msg = "disk not found"
10875         raise errors.OpExecError("Can't find DRBD device %s: %s" %
10876                                  (name, msg))
10877
10878       if result.payload.is_degraded:
10879         raise errors.OpExecError("DRBD device %s is degraded!" % name)
10880
10881   def _RemoveOldStorage(self, node_name, iv_names):
10882     for name, (_, old_lvs, _) in iv_names.iteritems():
10883       self.lu.LogInfo("Remove logical volumes for %s" % name)
10884
10885       for lv in old_lvs:
10886         self.cfg.SetDiskID(lv, node_name)
10887
10888         msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
10889         if msg:
10890           self.lu.LogWarning("Can't remove old LV: %s" % msg,
10891                              hint="remove unused LVs manually")
10892
10893   def _ExecDrbd8DiskOnly(self, feedback_fn): # pylint: disable=W0613
10894     """Replace a disk on the primary or secondary for DRBD 8.
10895
10896     The algorithm for replace is quite complicated:
10897
10898       1. for each disk to be replaced:
10899
10900         1. create new LVs on the target node with unique names
10901         1. detach old LVs from the drbd device
10902         1. rename old LVs to name_replaced.<time_t>
10903         1. rename new LVs to old LVs
10904         1. attach the new LVs (with the old names now) to the drbd device
10905
10906       1. wait for sync across all devices
10907
10908       1. for each modified disk:
10909
10910         1. remove old LVs (which have the name name_replaces.<time_t>)
10911
10912     Failures are not very well handled.
10913
10914     """
10915     steps_total = 6
10916
10917     # Step: check device activation
10918     self.lu.LogStep(1, steps_total, "Check device existence")
10919     self._CheckDisksExistence([self.other_node, self.target_node])
10920     self._CheckVolumeGroup([self.target_node, self.other_node])
10921
10922     # Step: check other node consistency
10923     self.lu.LogStep(2, steps_total, "Check peer consistency")
10924     self._CheckDisksConsistency(self.other_node,
10925                                 self.other_node == self.instance.primary_node,
10926                                 False)
10927
10928     # Step: create new storage
10929     self.lu.LogStep(3, steps_total, "Allocate new storage")
10930     iv_names = self._CreateNewStorage(self.target_node)
10931
10932     # Step: for each lv, detach+rename*2+attach
10933     self.lu.LogStep(4, steps_total, "Changing drbd configuration")
10934     for dev, old_lvs, new_lvs in iv_names.itervalues():
10935       self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
10936
10937       result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
10938                                                      old_lvs)
10939       result.Raise("Can't detach drbd from local storage on node"
10940                    " %s for device %s" % (self.target_node, dev.iv_name))
10941       #dev.children = []
10942       #cfg.Update(instance)
10943
10944       # ok, we created the new LVs, so now we know we have the needed
10945       # storage; as such, we proceed on the target node to rename
10946       # old_lv to _old, and new_lv to old_lv; note that we rename LVs
10947       # using the assumption that logical_id == physical_id (which in
10948       # turn is the unique_id on that node)
10949
10950       # FIXME(iustin): use a better name for the replaced LVs
10951       temp_suffix = int(time.time())
10952       ren_fn = lambda d, suff: (d.physical_id[0],
10953                                 d.physical_id[1] + "_replaced-%s" % suff)
10954
10955       # Build the rename list based on what LVs exist on the node
10956       rename_old_to_new = []
10957       for to_ren in old_lvs:
10958         result = self.rpc.call_blockdev_find(self.target_node, to_ren)
10959         if not result.fail_msg and result.payload:
10960           # device exists
10961           rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
10962
10963       self.lu.LogInfo("Renaming the old LVs on the target node")
10964       result = self.rpc.call_blockdev_rename(self.target_node,
10965                                              rename_old_to_new)
10966       result.Raise("Can't rename old LVs on node %s" % self.target_node)
10967
10968       # Now we rename the new LVs to the old LVs
10969       self.lu.LogInfo("Renaming the new LVs on the target node")
10970       rename_new_to_old = [(new, old.physical_id)
10971                            for old, new in zip(old_lvs, new_lvs)]
10972       result = self.rpc.call_blockdev_rename(self.target_node,
10973                                              rename_new_to_old)
10974       result.Raise("Can't rename new LVs on node %s" % self.target_node)
10975
10976       # Intermediate steps of in memory modifications
10977       for old, new in zip(old_lvs, new_lvs):
10978         new.logical_id = old.logical_id
10979         self.cfg.SetDiskID(new, self.target_node)
10980
10981       # We need to modify old_lvs so that removal later removes the
10982       # right LVs, not the newly added ones; note that old_lvs is a
10983       # copy here
10984       for disk in old_lvs:
10985         disk.logical_id = ren_fn(disk, temp_suffix)
10986         self.cfg.SetDiskID(disk, self.target_node)
10987
10988       # Now that the new lvs have the old name, we can add them to the device
10989       self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
10990       result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
10991                                                   new_lvs)
10992       msg = result.fail_msg
10993       if msg:
10994         for new_lv in new_lvs:
10995           msg2 = self.rpc.call_blockdev_remove(self.target_node,
10996                                                new_lv).fail_msg
10997           if msg2:
10998             self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
10999                                hint=("cleanup manually the unused logical"
11000                                      "volumes"))
11001         raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
11002
11003     cstep = itertools.count(5)
11004
11005     if self.early_release:
11006       self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11007       self._RemoveOldStorage(self.target_node, iv_names)
11008       # TODO: Check if releasing locks early still makes sense
11009       _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
11010     else:
11011       # Release all resource locks except those used by the instance
11012       _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
11013                     keep=self.node_secondary_ip.keys())
11014
11015     # Release all node locks while waiting for sync
11016     _ReleaseLocks(self.lu, locking.LEVEL_NODE)
11017
11018     # TODO: Can the instance lock be downgraded here? Take the optional disk
11019     # shutdown in the caller into consideration.
11020
11021     # Wait for sync
11022     # This can fail as the old devices are degraded and _WaitForSync
11023     # does a combined result over all disks, so we don't check its return value
11024     self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
11025     _WaitForSync(self.lu, self.instance)
11026
11027     # Check all devices manually
11028     self._CheckDevices(self.instance.primary_node, iv_names)
11029
11030     # Step: remove old storage
11031     if not self.early_release:
11032       self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11033       self._RemoveOldStorage(self.target_node, iv_names)
11034
11035   def _ExecDrbd8Secondary(self, feedback_fn):
11036     """Replace the secondary node for DRBD 8.
11037
11038     The algorithm for replace is quite complicated:
11039       - for all disks of the instance:
11040         - create new LVs on the new node with same names
11041         - shutdown the drbd device on the old secondary
11042         - disconnect the drbd network on the primary
11043         - create the drbd device on the new secondary
11044         - network attach the drbd on the primary, using an artifice:
11045           the drbd code for Attach() will connect to the network if it
11046           finds a device which is connected to the good local disks but
11047           not network enabled
11048       - wait for sync across all devices
11049       - remove all disks from the old secondary
11050
11051     Failures are not very well handled.
11052
11053     """
11054     steps_total = 6
11055
11056     pnode = self.instance.primary_node
11057
11058     # Step: check device activation
11059     self.lu.LogStep(1, steps_total, "Check device existence")
11060     self._CheckDisksExistence([self.instance.primary_node])
11061     self._CheckVolumeGroup([self.instance.primary_node])
11062
11063     # Step: check other node consistency
11064     self.lu.LogStep(2, steps_total, "Check peer consistency")
11065     self._CheckDisksConsistency(self.instance.primary_node, True, True)
11066
11067     # Step: create new storage
11068     self.lu.LogStep(3, steps_total, "Allocate new storage")
11069     for idx, dev in enumerate(self.instance.disks):
11070       self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
11071                       (self.new_node, idx))
11072       # we pass force_create=True to force LVM creation
11073       for new_lv in dev.children:
11074         _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
11075                         _GetInstanceInfoText(self.instance), False)
11076
11077     # Step 4: dbrd minors and drbd setups changes
11078     # after this, we must manually remove the drbd minors on both the
11079     # error and the success paths
11080     self.lu.LogStep(4, steps_total, "Changing drbd configuration")
11081     minors = self.cfg.AllocateDRBDMinor([self.new_node
11082                                          for dev in self.instance.disks],
11083                                         self.instance.name)
11084     logging.debug("Allocated minors %r", minors)
11085
11086     iv_names = {}
11087     for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
11088       self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
11089                       (self.new_node, idx))
11090       # create new devices on new_node; note that we create two IDs:
11091       # one without port, so the drbd will be activated without
11092       # networking information on the new node at this stage, and one
11093       # with network, for the latter activation in step 4
11094       (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
11095       if self.instance.primary_node == o_node1:
11096         p_minor = o_minor1
11097       else:
11098         assert self.instance.primary_node == o_node2, "Three-node instance?"
11099         p_minor = o_minor2
11100
11101       new_alone_id = (self.instance.primary_node, self.new_node, None,
11102                       p_minor, new_minor, o_secret)
11103       new_net_id = (self.instance.primary_node, self.new_node, o_port,
11104                     p_minor, new_minor, o_secret)
11105
11106       iv_names[idx] = (dev, dev.children, new_net_id)
11107       logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
11108                     new_net_id)
11109       drbd_params, _, _ = _ComputeLDParams(constants.DT_DRBD8, self.diskparams)
11110       new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
11111                               logical_id=new_alone_id,
11112                               children=dev.children,
11113                               size=dev.size,
11114                               params=drbd_params)
11115       try:
11116         _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
11117                               _GetInstanceInfoText(self.instance), False)
11118       except errors.GenericError:
11119         self.cfg.ReleaseDRBDMinors(self.instance.name)
11120         raise
11121
11122     # We have new devices, shutdown the drbd on the old secondary
11123     for idx, dev in enumerate(self.instance.disks):
11124       self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
11125       self.cfg.SetDiskID(dev, self.target_node)
11126       msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
11127       if msg:
11128         self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
11129                            "node: %s" % (idx, msg),
11130                            hint=("Please cleanup this device manually as"
11131                                  " soon as possible"))
11132
11133     self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
11134     result = self.rpc.call_drbd_disconnect_net([pnode], self.node_secondary_ip,
11135                                                self.instance.disks)[pnode]
11136
11137     msg = result.fail_msg
11138     if msg:
11139       # detaches didn't succeed (unlikely)
11140       self.cfg.ReleaseDRBDMinors(self.instance.name)
11141       raise errors.OpExecError("Can't detach the disks from the network on"
11142                                " old node: %s" % (msg,))
11143
11144     # if we managed to detach at least one, we update all the disks of
11145     # the instance to point to the new secondary
11146     self.lu.LogInfo("Updating instance configuration")
11147     for dev, _, new_logical_id in iv_names.itervalues():
11148       dev.logical_id = new_logical_id
11149       self.cfg.SetDiskID(dev, self.instance.primary_node)
11150
11151     self.cfg.Update(self.instance, feedback_fn)
11152
11153     # Release all node locks (the configuration has been updated)
11154     _ReleaseLocks(self.lu, locking.LEVEL_NODE)
11155
11156     # and now perform the drbd attach
11157     self.lu.LogInfo("Attaching primary drbds to new secondary"
11158                     " (standalone => connected)")
11159     result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
11160                                             self.new_node],
11161                                            self.node_secondary_ip,
11162                                            self.instance.disks,
11163                                            self.instance.name,
11164                                            False)
11165     for to_node, to_result in result.items():
11166       msg = to_result.fail_msg
11167       if msg:
11168         self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
11169                            to_node, msg,
11170                            hint=("please do a gnt-instance info to see the"
11171                                  " status of disks"))
11172
11173     cstep = itertools.count(5)
11174
11175     if self.early_release:
11176       self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11177       self._RemoveOldStorage(self.target_node, iv_names)
11178       # TODO: Check if releasing locks early still makes sense
11179       _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
11180     else:
11181       # Release all resource locks except those used by the instance
11182       _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
11183                     keep=self.node_secondary_ip.keys())
11184
11185     # TODO: Can the instance lock be downgraded here? Take the optional disk
11186     # shutdown in the caller into consideration.
11187
11188     # Wait for sync
11189     # This can fail as the old devices are degraded and _WaitForSync
11190     # does a combined result over all disks, so we don't check its return value
11191     self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
11192     _WaitForSync(self.lu, self.instance)
11193
11194     # Check all devices manually
11195     self._CheckDevices(self.instance.primary_node, iv_names)
11196
11197     # Step: remove old storage
11198     if not self.early_release:
11199       self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11200       self._RemoveOldStorage(self.target_node, iv_names)
11201
11202
11203 class LURepairNodeStorage(NoHooksLU):
11204   """Repairs the volume group on a node.
11205
11206   """
11207   REQ_BGL = False
11208
11209   def CheckArguments(self):
11210     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
11211
11212     storage_type = self.op.storage_type
11213
11214     if (constants.SO_FIX_CONSISTENCY not in
11215         constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
11216       raise errors.OpPrereqError("Storage units of type '%s' can not be"
11217                                  " repaired" % storage_type,
11218                                  errors.ECODE_INVAL)
11219
11220   def ExpandNames(self):
11221     self.needed_locks = {
11222       locking.LEVEL_NODE: [self.op.node_name],
11223       }
11224
11225   def _CheckFaultyDisks(self, instance, node_name):
11226     """Ensure faulty disks abort the opcode or at least warn."""
11227     try:
11228       if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
11229                                   node_name, True):
11230         raise errors.OpPrereqError("Instance '%s' has faulty disks on"
11231                                    " node '%s'" % (instance.name, node_name),
11232                                    errors.ECODE_STATE)
11233     except errors.OpPrereqError, err:
11234       if self.op.ignore_consistency:
11235         self.proc.LogWarning(str(err.args[0]))
11236       else:
11237         raise
11238
11239   def CheckPrereq(self):
11240     """Check prerequisites.
11241
11242     """
11243     # Check whether any instance on this node has faulty disks
11244     for inst in _GetNodeInstances(self.cfg, self.op.node_name):
11245       if inst.admin_state != constants.ADMINST_UP:
11246         continue
11247       check_nodes = set(inst.all_nodes)
11248       check_nodes.discard(self.op.node_name)
11249       for inst_node_name in check_nodes:
11250         self._CheckFaultyDisks(inst, inst_node_name)
11251
11252   def Exec(self, feedback_fn):
11253     feedback_fn("Repairing storage unit '%s' on %s ..." %
11254                 (self.op.name, self.op.node_name))
11255
11256     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
11257     result = self.rpc.call_storage_execute(self.op.node_name,
11258                                            self.op.storage_type, st_args,
11259                                            self.op.name,
11260                                            constants.SO_FIX_CONSISTENCY)
11261     result.Raise("Failed to repair storage unit '%s' on %s" %
11262                  (self.op.name, self.op.node_name))
11263
11264
11265 class LUNodeEvacuate(NoHooksLU):
11266   """Evacuates instances off a list of nodes.
11267
11268   """
11269   REQ_BGL = False
11270
11271   _MODE2IALLOCATOR = {
11272     constants.NODE_EVAC_PRI: constants.IALLOCATOR_NEVAC_PRI,
11273     constants.NODE_EVAC_SEC: constants.IALLOCATOR_NEVAC_SEC,
11274     constants.NODE_EVAC_ALL: constants.IALLOCATOR_NEVAC_ALL,
11275     }
11276   assert frozenset(_MODE2IALLOCATOR.keys()) == constants.NODE_EVAC_MODES
11277   assert (frozenset(_MODE2IALLOCATOR.values()) ==
11278           constants.IALLOCATOR_NEVAC_MODES)
11279
11280   def CheckArguments(self):
11281     _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
11282
11283   def ExpandNames(self):
11284     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
11285
11286     if self.op.remote_node is not None:
11287       self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
11288       assert self.op.remote_node
11289
11290       if self.op.remote_node == self.op.node_name:
11291         raise errors.OpPrereqError("Can not use evacuated node as a new"
11292                                    " secondary node", errors.ECODE_INVAL)
11293
11294       if self.op.mode != constants.NODE_EVAC_SEC:
11295         raise errors.OpPrereqError("Without the use of an iallocator only"
11296                                    " secondary instances can be evacuated",
11297                                    errors.ECODE_INVAL)
11298
11299     # Declare locks
11300     self.share_locks = _ShareAll()
11301     self.needed_locks = {
11302       locking.LEVEL_INSTANCE: [],
11303       locking.LEVEL_NODEGROUP: [],
11304       locking.LEVEL_NODE: [],
11305       }
11306
11307     # Determine nodes (via group) optimistically, needs verification once locks
11308     # have been acquired
11309     self.lock_nodes = self._DetermineNodes()
11310
11311   def _DetermineNodes(self):
11312     """Gets the list of nodes to operate on.
11313
11314     """
11315     if self.op.remote_node is None:
11316       # Iallocator will choose any node(s) in the same group
11317       group_nodes = self.cfg.GetNodeGroupMembersByNodes([self.op.node_name])
11318     else:
11319       group_nodes = frozenset([self.op.remote_node])
11320
11321     # Determine nodes to be locked
11322     return set([self.op.node_name]) | group_nodes
11323
11324   def _DetermineInstances(self):
11325     """Builds list of instances to operate on.
11326
11327     """
11328     assert self.op.mode in constants.NODE_EVAC_MODES
11329
11330     if self.op.mode == constants.NODE_EVAC_PRI:
11331       # Primary instances only
11332       inst_fn = _GetNodePrimaryInstances
11333       assert self.op.remote_node is None, \
11334         "Evacuating primary instances requires iallocator"
11335     elif self.op.mode == constants.NODE_EVAC_SEC:
11336       # Secondary instances only
11337       inst_fn = _GetNodeSecondaryInstances
11338     else:
11339       # All instances
11340       assert self.op.mode == constants.NODE_EVAC_ALL
11341       inst_fn = _GetNodeInstances
11342       # TODO: In 2.6, change the iallocator interface to take an evacuation mode
11343       # per instance
11344       raise errors.OpPrereqError("Due to an issue with the iallocator"
11345                                  " interface it is not possible to evacuate"
11346                                  " all instances at once; specify explicitly"
11347                                  " whether to evacuate primary or secondary"
11348                                  " instances",
11349                                  errors.ECODE_INVAL)
11350
11351     return inst_fn(self.cfg, self.op.node_name)
11352
11353   def DeclareLocks(self, level):
11354     if level == locking.LEVEL_INSTANCE:
11355       # Lock instances optimistically, needs verification once node and group
11356       # locks have been acquired
11357       self.needed_locks[locking.LEVEL_INSTANCE] = \
11358         set(i.name for i in self._DetermineInstances())
11359
11360     elif level == locking.LEVEL_NODEGROUP:
11361       # Lock node groups for all potential target nodes optimistically, needs
11362       # verification once nodes have been acquired
11363       self.needed_locks[locking.LEVEL_NODEGROUP] = \
11364         self.cfg.GetNodeGroupsFromNodes(self.lock_nodes)
11365
11366     elif level == locking.LEVEL_NODE:
11367       self.needed_locks[locking.LEVEL_NODE] = self.lock_nodes
11368
11369   def CheckPrereq(self):
11370     # Verify locks
11371     owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
11372     owned_nodes = self.owned_locks(locking.LEVEL_NODE)
11373     owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
11374
11375     need_nodes = self._DetermineNodes()
11376
11377     if not owned_nodes.issuperset(need_nodes):
11378       raise errors.OpPrereqError("Nodes in same group as '%s' changed since"
11379                                  " locks were acquired, current nodes are"
11380                                  " are '%s', used to be '%s'; retry the"
11381                                  " operation" %
11382                                  (self.op.node_name,
11383                                   utils.CommaJoin(need_nodes),
11384                                   utils.CommaJoin(owned_nodes)),
11385                                  errors.ECODE_STATE)
11386
11387     wanted_groups = self.cfg.GetNodeGroupsFromNodes(owned_nodes)
11388     if owned_groups != wanted_groups:
11389       raise errors.OpExecError("Node groups changed since locks were acquired,"
11390                                " current groups are '%s', used to be '%s';"
11391                                " retry the operation" %
11392                                (utils.CommaJoin(wanted_groups),
11393                                 utils.CommaJoin(owned_groups)))
11394
11395     # Determine affected instances
11396     self.instances = self._DetermineInstances()
11397     self.instance_names = [i.name for i in self.instances]
11398
11399     if set(self.instance_names) != owned_instances:
11400       raise errors.OpExecError("Instances on node '%s' changed since locks"
11401                                " were acquired, current instances are '%s',"
11402                                " used to be '%s'; retry the operation" %
11403                                (self.op.node_name,
11404                                 utils.CommaJoin(self.instance_names),
11405                                 utils.CommaJoin(owned_instances)))
11406
11407     if self.instance_names:
11408       self.LogInfo("Evacuating instances from node '%s': %s",
11409                    self.op.node_name,
11410                    utils.CommaJoin(utils.NiceSort(self.instance_names)))
11411     else:
11412       self.LogInfo("No instances to evacuate from node '%s'",
11413                    self.op.node_name)
11414
11415     if self.op.remote_node is not None:
11416       for i in self.instances:
11417         if i.primary_node == self.op.remote_node:
11418           raise errors.OpPrereqError("Node %s is the primary node of"
11419                                      " instance %s, cannot use it as"
11420                                      " secondary" %
11421                                      (self.op.remote_node, i.name),
11422                                      errors.ECODE_INVAL)
11423
11424   def Exec(self, feedback_fn):
11425     assert (self.op.iallocator is not None) ^ (self.op.remote_node is not None)
11426
11427     if not self.instance_names:
11428       # No instances to evacuate
11429       jobs = []
11430
11431     elif self.op.iallocator is not None:
11432       # TODO: Implement relocation to other group
11433       ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_NODE_EVAC,
11434                        evac_mode=self._MODE2IALLOCATOR[self.op.mode],
11435                        instances=list(self.instance_names))
11436
11437       ial.Run(self.op.iallocator)
11438
11439       if not ial.success:
11440         raise errors.OpPrereqError("Can't compute node evacuation using"
11441                                    " iallocator '%s': %s" %
11442                                    (self.op.iallocator, ial.info),
11443                                    errors.ECODE_NORES)
11444
11445       jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, True)
11446
11447     elif self.op.remote_node is not None:
11448       assert self.op.mode == constants.NODE_EVAC_SEC
11449       jobs = [
11450         [opcodes.OpInstanceReplaceDisks(instance_name=instance_name,
11451                                         remote_node=self.op.remote_node,
11452                                         disks=[],
11453                                         mode=constants.REPLACE_DISK_CHG,
11454                                         early_release=self.op.early_release)]
11455         for instance_name in self.instance_names
11456         ]
11457
11458     else:
11459       raise errors.ProgrammerError("No iallocator or remote node")
11460
11461     return ResultWithJobs(jobs)
11462
11463
11464 def _SetOpEarlyRelease(early_release, op):
11465   """Sets C{early_release} flag on opcodes if available.
11466
11467   """
11468   try:
11469     op.early_release = early_release
11470   except AttributeError:
11471     assert not isinstance(op, opcodes.OpInstanceReplaceDisks)
11472
11473   return op
11474
11475
11476 def _NodeEvacDest(use_nodes, group, nodes):
11477   """Returns group or nodes depending on caller's choice.
11478
11479   """
11480   if use_nodes:
11481     return utils.CommaJoin(nodes)
11482   else:
11483     return group
11484
11485
11486 def _LoadNodeEvacResult(lu, alloc_result, early_release, use_nodes):
11487   """Unpacks the result of change-group and node-evacuate iallocator requests.
11488
11489   Iallocator modes L{constants.IALLOCATOR_MODE_NODE_EVAC} and
11490   L{constants.IALLOCATOR_MODE_CHG_GROUP}.
11491
11492   @type lu: L{LogicalUnit}
11493   @param lu: Logical unit instance
11494   @type alloc_result: tuple/list
11495   @param alloc_result: Result from iallocator
11496   @type early_release: bool
11497   @param early_release: Whether to release locks early if possible
11498   @type use_nodes: bool
11499   @param use_nodes: Whether to display node names instead of groups
11500
11501   """
11502   (moved, failed, jobs) = alloc_result
11503
11504   if failed:
11505     failreason = utils.CommaJoin("%s (%s)" % (name, reason)
11506                                  for (name, reason) in failed)
11507     lu.LogWarning("Unable to evacuate instances %s", failreason)
11508     raise errors.OpExecError("Unable to evacuate instances %s" % failreason)
11509
11510   if moved:
11511     lu.LogInfo("Instances to be moved: %s",
11512                utils.CommaJoin("%s (to %s)" %
11513                                (name, _NodeEvacDest(use_nodes, group, nodes))
11514                                for (name, group, nodes) in moved))
11515
11516   return [map(compat.partial(_SetOpEarlyRelease, early_release),
11517               map(opcodes.OpCode.LoadOpCode, ops))
11518           for ops in jobs]
11519
11520
11521 class LUInstanceGrowDisk(LogicalUnit):
11522   """Grow a disk of an instance.
11523
11524   """
11525   HPATH = "disk-grow"
11526   HTYPE = constants.HTYPE_INSTANCE
11527   REQ_BGL = False
11528
11529   def ExpandNames(self):
11530     self._ExpandAndLockInstance()
11531     self.needed_locks[locking.LEVEL_NODE] = []
11532     self.needed_locks[locking.LEVEL_NODE_RES] = []
11533     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
11534     self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
11535
11536   def DeclareLocks(self, level):
11537     if level == locking.LEVEL_NODE:
11538       self._LockInstancesNodes()
11539     elif level == locking.LEVEL_NODE_RES:
11540       # Copy node locks
11541       self.needed_locks[locking.LEVEL_NODE_RES] = \
11542         self.needed_locks[locking.LEVEL_NODE][:]
11543
11544   def BuildHooksEnv(self):
11545     """Build hooks env.
11546
11547     This runs on the master, the primary and all the secondaries.
11548
11549     """
11550     env = {
11551       "DISK": self.op.disk,
11552       "AMOUNT": self.op.amount,
11553       }
11554     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
11555     return env
11556
11557   def BuildHooksNodes(self):
11558     """Build hooks nodes.
11559
11560     """
11561     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
11562     return (nl, nl)
11563
11564   def CheckPrereq(self):
11565     """Check prerequisites.
11566
11567     This checks that the instance is in the cluster.
11568
11569     """
11570     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
11571     assert instance is not None, \
11572       "Cannot retrieve locked instance %s" % self.op.instance_name
11573     nodenames = list(instance.all_nodes)
11574     for node in nodenames:
11575       _CheckNodeOnline(self, node)
11576
11577     self.instance = instance
11578
11579     if instance.disk_template not in constants.DTS_GROWABLE:
11580       raise errors.OpPrereqError("Instance's disk layout does not support"
11581                                  " growing", errors.ECODE_INVAL)
11582
11583     self.disk = instance.FindDisk(self.op.disk)
11584
11585     if instance.disk_template not in (constants.DT_FILE,
11586                                       constants.DT_SHARED_FILE,
11587                                       constants.DT_RBD):
11588       # TODO: check the free disk space for file, when that feature will be
11589       # supported
11590       _CheckNodesFreeDiskPerVG(self, nodenames,
11591                                self.disk.ComputeGrowth(self.op.amount))
11592
11593   def Exec(self, feedback_fn):
11594     """Execute disk grow.
11595
11596     """
11597     instance = self.instance
11598     disk = self.disk
11599
11600     assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
11601     assert (self.owned_locks(locking.LEVEL_NODE) ==
11602             self.owned_locks(locking.LEVEL_NODE_RES))
11603
11604     disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
11605     if not disks_ok:
11606       raise errors.OpExecError("Cannot activate block device to grow")
11607
11608     feedback_fn("Growing disk %s of instance '%s' by %s" %
11609                 (self.op.disk, instance.name,
11610                  utils.FormatUnit(self.op.amount, "h")))
11611
11612     # First run all grow ops in dry-run mode
11613     for node in instance.all_nodes:
11614       self.cfg.SetDiskID(disk, node)
11615       result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, True)
11616       result.Raise("Grow request failed to node %s" % node)
11617
11618     # We know that (as far as we can test) operations across different
11619     # nodes will succeed, time to run it for real
11620     for node in instance.all_nodes:
11621       self.cfg.SetDiskID(disk, node)
11622       result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, False)
11623       result.Raise("Grow request failed to node %s" % node)
11624
11625       # TODO: Rewrite code to work properly
11626       # DRBD goes into sync mode for a short amount of time after executing the
11627       # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
11628       # calling "resize" in sync mode fails. Sleeping for a short amount of
11629       # time is a work-around.
11630       time.sleep(5)
11631
11632     disk.RecordGrow(self.op.amount)
11633     self.cfg.Update(instance, feedback_fn)
11634
11635     # Changes have been recorded, release node lock
11636     _ReleaseLocks(self, locking.LEVEL_NODE)
11637
11638     # Downgrade lock while waiting for sync
11639     self.glm.downgrade(locking.LEVEL_INSTANCE)
11640
11641     if self.op.wait_for_sync:
11642       disk_abort = not _WaitForSync(self, instance, disks=[disk])
11643       if disk_abort:
11644         self.proc.LogWarning("Disk sync-ing has not returned a good"
11645                              " status; please check the instance")
11646       if instance.admin_state != constants.ADMINST_UP:
11647         _SafeShutdownInstanceDisks(self, instance, disks=[disk])
11648     elif instance.admin_state != constants.ADMINST_UP:
11649       self.proc.LogWarning("Not shutting down the disk even if the instance is"
11650                            " not supposed to be running because no wait for"
11651                            " sync mode was requested")
11652
11653     assert self.owned_locks(locking.LEVEL_NODE_RES)
11654     assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
11655
11656
11657 class LUInstanceQueryData(NoHooksLU):
11658   """Query runtime instance data.
11659
11660   """
11661   REQ_BGL = False
11662
11663   def ExpandNames(self):
11664     self.needed_locks = {}
11665
11666     # Use locking if requested or when non-static information is wanted
11667     if not (self.op.static or self.op.use_locking):
11668       self.LogWarning("Non-static data requested, locks need to be acquired")
11669       self.op.use_locking = True
11670
11671     if self.op.instances or not self.op.use_locking:
11672       # Expand instance names right here
11673       self.wanted_names = _GetWantedInstances(self, self.op.instances)
11674     else:
11675       # Will use acquired locks
11676       self.wanted_names = None
11677
11678     if self.op.use_locking:
11679       self.share_locks = _ShareAll()
11680
11681       if self.wanted_names is None:
11682         self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
11683       else:
11684         self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
11685
11686       self.needed_locks[locking.LEVEL_NODEGROUP] = []
11687       self.needed_locks[locking.LEVEL_NODE] = []
11688       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
11689
11690   def DeclareLocks(self, level):
11691     if self.op.use_locking:
11692       if level == locking.LEVEL_NODEGROUP:
11693         owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
11694
11695         # Lock all groups used by instances optimistically; this requires going
11696         # via the node before it's locked, requiring verification later on
11697         self.needed_locks[locking.LEVEL_NODEGROUP] = \
11698           frozenset(group_uuid
11699                     for instance_name in owned_instances
11700                     for group_uuid in
11701                       self.cfg.GetInstanceNodeGroups(instance_name))
11702
11703       elif level == locking.LEVEL_NODE:
11704         self._LockInstancesNodes()
11705
11706   def CheckPrereq(self):
11707     """Check prerequisites.
11708
11709     This only checks the optional instance list against the existing names.
11710
11711     """
11712     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
11713     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
11714     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
11715
11716     if self.wanted_names is None:
11717       assert self.op.use_locking, "Locking was not used"
11718       self.wanted_names = owned_instances
11719
11720     instances = dict(self.cfg.GetMultiInstanceInfo(self.wanted_names))
11721
11722     if self.op.use_locking:
11723       _CheckInstancesNodeGroups(self.cfg, instances, owned_groups, owned_nodes,
11724                                 None)
11725     else:
11726       assert not (owned_instances or owned_groups or owned_nodes)
11727
11728     self.wanted_instances = instances.values()
11729
11730   def _ComputeBlockdevStatus(self, node, instance_name, dev):
11731     """Returns the status of a block device
11732
11733     """
11734     if self.op.static or not node:
11735       return None
11736
11737     self.cfg.SetDiskID(dev, node)
11738
11739     result = self.rpc.call_blockdev_find(node, dev)
11740     if result.offline:
11741       return None
11742
11743     result.Raise("Can't compute disk status for %s" % instance_name)
11744
11745     status = result.payload
11746     if status is None:
11747       return None
11748
11749     return (status.dev_path, status.major, status.minor,
11750             status.sync_percent, status.estimated_time,
11751             status.is_degraded, status.ldisk_status)
11752
11753   def _ComputeDiskStatus(self, instance, snode, dev):
11754     """Compute block device status.
11755
11756     """
11757     if dev.dev_type in constants.LDS_DRBD:
11758       # we change the snode then (otherwise we use the one passed in)
11759       if dev.logical_id[0] == instance.primary_node:
11760         snode = dev.logical_id[1]
11761       else:
11762         snode = dev.logical_id[0]
11763
11764     dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
11765                                               instance.name, dev)
11766     dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
11767
11768     if dev.children:
11769       dev_children = map(compat.partial(self._ComputeDiskStatus,
11770                                         instance, snode),
11771                          dev.children)
11772     else:
11773       dev_children = []
11774
11775     return {
11776       "iv_name": dev.iv_name,
11777       "dev_type": dev.dev_type,
11778       "logical_id": dev.logical_id,
11779       "physical_id": dev.physical_id,
11780       "pstatus": dev_pstatus,
11781       "sstatus": dev_sstatus,
11782       "children": dev_children,
11783       "mode": dev.mode,
11784       "size": dev.size,
11785       }
11786
11787   def Exec(self, feedback_fn):
11788     """Gather and return data"""
11789     result = {}
11790
11791     cluster = self.cfg.GetClusterInfo()
11792
11793     node_names = itertools.chain(*(i.all_nodes for i in self.wanted_instances))
11794     nodes = dict(self.cfg.GetMultiNodeInfo(node_names))
11795
11796     groups = dict(self.cfg.GetMultiNodeGroupInfo(node.group
11797                                                  for node in nodes.values()))
11798
11799     group2name_fn = lambda uuid: groups[uuid].name
11800
11801     for instance in self.wanted_instances:
11802       pnode = nodes[instance.primary_node]
11803
11804       if self.op.static or pnode.offline:
11805         remote_state = None
11806         if pnode.offline:
11807           self.LogWarning("Primary node %s is marked offline, returning static"
11808                           " information only for instance %s" %
11809                           (pnode.name, instance.name))
11810       else:
11811         remote_info = self.rpc.call_instance_info(instance.primary_node,
11812                                                   instance.name,
11813                                                   instance.hypervisor)
11814         remote_info.Raise("Error checking node %s" % instance.primary_node)
11815         remote_info = remote_info.payload
11816         if remote_info and "state" in remote_info:
11817           remote_state = "up"
11818         else:
11819           if instance.admin_state == constants.ADMINST_UP:
11820             remote_state = "down"
11821           else:
11822             remote_state = instance.admin_state
11823
11824       disks = map(compat.partial(self._ComputeDiskStatus, instance, None),
11825                   instance.disks)
11826
11827       snodes_group_uuids = [nodes[snode_name].group
11828                             for snode_name in instance.secondary_nodes]
11829
11830       result[instance.name] = {
11831         "name": instance.name,
11832         "config_state": instance.admin_state,
11833         "run_state": remote_state,
11834         "pnode": instance.primary_node,
11835         "pnode_group_uuid": pnode.group,
11836         "pnode_group_name": group2name_fn(pnode.group),
11837         "snodes": instance.secondary_nodes,
11838         "snodes_group_uuids": snodes_group_uuids,
11839         "snodes_group_names": map(group2name_fn, snodes_group_uuids),
11840         "os": instance.os,
11841         # this happens to be the same format used for hooks
11842         "nics": _NICListToTuple(self, instance.nics),
11843         "disk_template": instance.disk_template,
11844         "disks": disks,
11845         "hypervisor": instance.hypervisor,
11846         "network_port": instance.network_port,
11847         "hv_instance": instance.hvparams,
11848         "hv_actual": cluster.FillHV(instance, skip_globals=True),
11849         "be_instance": instance.beparams,
11850         "be_actual": cluster.FillBE(instance),
11851         "os_instance": instance.osparams,
11852         "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
11853         "serial_no": instance.serial_no,
11854         "mtime": instance.mtime,
11855         "ctime": instance.ctime,
11856         "uuid": instance.uuid,
11857         }
11858
11859     return result
11860
11861
11862 def PrepareContainerMods(mods, private_fn):
11863   """Prepares a list of container modifications by adding a private data field.
11864
11865   @type mods: list of tuples; (operation, index, parameters)
11866   @param mods: List of modifications
11867   @type private_fn: callable or None
11868   @param private_fn: Callable for constructing a private data field for a
11869     modification
11870   @rtype: list
11871
11872   """
11873   if private_fn is None:
11874     fn = lambda: None
11875   else:
11876     fn = private_fn
11877
11878   return [(op, idx, params, fn()) for (op, idx, params) in mods]
11879
11880
11881 #: Type description for changes as returned by L{ApplyContainerMods}'s
11882 #: callbacks
11883 _TApplyContModsCbChanges = \
11884   ht.TMaybeListOf(ht.TAnd(ht.TIsLength(2), ht.TItems([
11885     ht.TNonEmptyString,
11886     ht.TAny,
11887     ])))
11888
11889
11890 def ApplyContainerMods(kind, container, chgdesc, mods,
11891                        create_fn, modify_fn, remove_fn):
11892   """Applies descriptions in C{mods} to C{container}.
11893
11894   @type kind: string
11895   @param kind: One-word item description
11896   @type container: list
11897   @param container: Container to modify
11898   @type chgdesc: None or list
11899   @param chgdesc: List of applied changes
11900   @type mods: list
11901   @param mods: Modifications as returned by L{PrepareContainerMods}
11902   @type create_fn: callable
11903   @param create_fn: Callback for creating a new item (L{constants.DDM_ADD});
11904     receives absolute item index, parameters and private data object as added
11905     by L{PrepareContainerMods}, returns tuple containing new item and changes
11906     as list
11907   @type modify_fn: callable
11908   @param modify_fn: Callback for modifying an existing item
11909     (L{constants.DDM_MODIFY}); receives absolute item index, item, parameters
11910     and private data object as added by L{PrepareContainerMods}, returns
11911     changes as list
11912   @type remove_fn: callable
11913   @param remove_fn: Callback on removing item; receives absolute item index,
11914     item and private data object as added by L{PrepareContainerMods}
11915
11916   """
11917   for (op, idx, params, private) in mods:
11918     if idx == -1:
11919       # Append
11920       absidx = len(container) - 1
11921     elif idx < 0:
11922       raise IndexError("Not accepting negative indices other than -1")
11923     elif idx > len(container):
11924       raise IndexError("Got %s index %s, but there are only %s" %
11925                        (kind, idx, len(container)))
11926     else:
11927       absidx = idx
11928
11929     changes = None
11930
11931     if op == constants.DDM_ADD:
11932       # Calculate where item will be added
11933       if idx == -1:
11934         addidx = len(container)
11935       else:
11936         addidx = idx
11937
11938       if create_fn is None:
11939         item = params
11940       else:
11941         (item, changes) = create_fn(addidx, params, private)
11942
11943       if idx == -1:
11944         container.append(item)
11945       else:
11946         assert idx >= 0
11947         assert idx <= len(container)
11948         # list.insert does so before the specified index
11949         container.insert(idx, item)
11950     else:
11951       # Retrieve existing item
11952       try:
11953         item = container[absidx]
11954       except IndexError:
11955         raise IndexError("Invalid %s index %s" % (kind, idx))
11956
11957       if op == constants.DDM_REMOVE:
11958         assert not params
11959
11960         if remove_fn is not None:
11961           remove_fn(absidx, item, private)
11962
11963         changes = [("%s/%s" % (kind, absidx), "remove")]
11964
11965         assert container[absidx] == item
11966         del container[absidx]
11967       elif op == constants.DDM_MODIFY:
11968         if modify_fn is not None:
11969           changes = modify_fn(absidx, item, params, private)
11970       else:
11971         raise errors.ProgrammerError("Unhandled operation '%s'" % op)
11972
11973     assert _TApplyContModsCbChanges(changes)
11974
11975     if not (chgdesc is None or changes is None):
11976       chgdesc.extend(changes)
11977
11978
11979 def _UpdateIvNames(base_index, disks):
11980   """Updates the C{iv_name} attribute of disks.
11981
11982   @type disks: list of L{objects.Disk}
11983
11984   """
11985   for (idx, disk) in enumerate(disks):
11986     disk.iv_name = "disk/%s" % (base_index + idx, )
11987
11988
11989 class _InstNicModPrivate:
11990   """Data structure for network interface modifications.
11991
11992   Used by L{LUInstanceSetParams}.
11993
11994   """
11995   def __init__(self):
11996     self.params = None
11997     self.filled = None
11998
11999
12000 class LUInstanceSetParams(LogicalUnit):
12001   """Modifies an instances's parameters.
12002
12003   """
12004   HPATH = "instance-modify"
12005   HTYPE = constants.HTYPE_INSTANCE
12006   REQ_BGL = False
12007
12008   @staticmethod
12009   def _UpgradeDiskNicMods(kind, mods, verify_fn):
12010     assert ht.TList(mods)
12011     assert not mods or len(mods[0]) in (2, 3)
12012
12013     if mods and len(mods[0]) == 2:
12014       result = []
12015
12016       addremove = 0
12017       for op, params in mods:
12018         if op in (constants.DDM_ADD, constants.DDM_REMOVE):
12019           result.append((op, -1, params))
12020           addremove += 1
12021
12022           if addremove > 1:
12023             raise errors.OpPrereqError("Only one %s add or remove operation is"
12024                                        " supported at a time" % kind,
12025                                        errors.ECODE_INVAL)
12026         else:
12027           result.append((constants.DDM_MODIFY, op, params))
12028
12029       assert verify_fn(result)
12030     else:
12031       result = mods
12032
12033     return result
12034
12035   @staticmethod
12036   def _CheckMods(kind, mods, key_types, item_fn):
12037     """Ensures requested disk/NIC modifications are valid.
12038
12039     """
12040     for (op, _, params) in mods:
12041       assert ht.TDict(params)
12042
12043       utils.ForceDictType(params, key_types)
12044
12045       if op == constants.DDM_REMOVE:
12046         if params:
12047           raise errors.OpPrereqError("No settings should be passed when"
12048                                      " removing a %s" % kind,
12049                                      errors.ECODE_INVAL)
12050       elif op in (constants.DDM_ADD, constants.DDM_MODIFY):
12051         item_fn(op, params)
12052       else:
12053         raise errors.ProgrammerError("Unhandled operation '%s'" % op)
12054
12055   @staticmethod
12056   def _VerifyDiskModification(op, params):
12057     """Verifies a disk modification.
12058
12059     """
12060     if op == constants.DDM_ADD:
12061       mode = params.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
12062       if mode not in constants.DISK_ACCESS_SET:
12063         raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
12064                                    errors.ECODE_INVAL)
12065
12066       size = params.get(constants.IDISK_SIZE, None)
12067       if size is None:
12068         raise errors.OpPrereqError("Required disk parameter '%s' missing" %
12069                                    constants.IDISK_SIZE, errors.ECODE_INVAL)
12070
12071       try:
12072         size = int(size)
12073       except (TypeError, ValueError), err:
12074         raise errors.OpPrereqError("Invalid disk size parameter: %s" % err,
12075                                    errors.ECODE_INVAL)
12076
12077       params[constants.IDISK_SIZE] = size
12078
12079     elif op == constants.DDM_MODIFY and constants.IDISK_SIZE in params:
12080       raise errors.OpPrereqError("Disk size change not possible, use"
12081                                  " grow-disk", errors.ECODE_INVAL)
12082
12083   @staticmethod
12084   def _VerifyNicModification(op, params):
12085     """Verifies a network interface modification.
12086
12087     """
12088     if op in (constants.DDM_ADD, constants.DDM_MODIFY):
12089       ip = params.get(constants.INIC_IP, None)
12090       if ip is None:
12091         pass
12092       elif ip.lower() == constants.VALUE_NONE:
12093         params[constants.INIC_IP] = None
12094       elif not netutils.IPAddress.IsValid(ip):
12095         raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
12096                                    errors.ECODE_INVAL)
12097
12098       bridge = params.get("bridge", None)
12099       link = params.get(constants.INIC_LINK, None)
12100       if bridge and link:
12101         raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
12102                                    " at the same time", errors.ECODE_INVAL)
12103       elif bridge and bridge.lower() == constants.VALUE_NONE:
12104         params["bridge"] = None
12105       elif link and link.lower() == constants.VALUE_NONE:
12106         params[constants.INIC_LINK] = None
12107
12108       if op == constants.DDM_ADD:
12109         macaddr = params.get(constants.INIC_MAC, None)
12110         if macaddr is None:
12111           params[constants.INIC_MAC] = constants.VALUE_AUTO
12112
12113       if constants.INIC_MAC in params:
12114         macaddr = params[constants.INIC_MAC]
12115         if macaddr not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
12116           macaddr = utils.NormalizeAndValidateMac(macaddr)
12117
12118         if op == constants.DDM_MODIFY and macaddr == constants.VALUE_AUTO:
12119           raise errors.OpPrereqError("'auto' is not a valid MAC address when"
12120                                      " modifying an existing NIC",
12121                                      errors.ECODE_INVAL)
12122
12123   def CheckArguments(self):
12124     if not (self.op.nics or self.op.disks or self.op.disk_template or
12125             self.op.hvparams or self.op.beparams or self.op.os_name or
12126             self.op.offline is not None or self.op.runtime_mem):
12127       raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
12128
12129     if self.op.hvparams:
12130       _CheckGlobalHvParams(self.op.hvparams)
12131
12132     self.op.disks = \
12133       self._UpgradeDiskNicMods("disk", self.op.disks,
12134         opcodes.OpInstanceSetParams.TestDiskModifications)
12135     self.op.nics = \
12136       self._UpgradeDiskNicMods("NIC", self.op.nics,
12137         opcodes.OpInstanceSetParams.TestNicModifications)
12138
12139     # Check disk modifications
12140     self._CheckMods("disk", self.op.disks, constants.IDISK_PARAMS_TYPES,
12141                     self._VerifyDiskModification)
12142
12143     if self.op.disks and self.op.disk_template is not None:
12144       raise errors.OpPrereqError("Disk template conversion and other disk"
12145                                  " changes not supported at the same time",
12146                                  errors.ECODE_INVAL)
12147
12148     if (self.op.disk_template and
12149         self.op.disk_template in constants.DTS_INT_MIRROR and
12150         self.op.remote_node is None):
12151       raise errors.OpPrereqError("Changing the disk template to a mirrored"
12152                                  " one requires specifying a secondary node",
12153                                  errors.ECODE_INVAL)
12154
12155     # Check NIC modifications
12156     self._CheckMods("NIC", self.op.nics, constants.INIC_PARAMS_TYPES,
12157                     self._VerifyNicModification)
12158
12159   def ExpandNames(self):
12160     self._ExpandAndLockInstance()
12161     # Can't even acquire node locks in shared mode as upcoming changes in
12162     # Ganeti 2.6 will start to modify the node object on disk conversion
12163     self.needed_locks[locking.LEVEL_NODE] = []
12164     self.needed_locks[locking.LEVEL_NODE_RES] = []
12165     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
12166
12167   def DeclareLocks(self, level):
12168     # TODO: Acquire group lock in shared mode (disk parameters)
12169     if level == locking.LEVEL_NODE:
12170       self._LockInstancesNodes()
12171       if self.op.disk_template and self.op.remote_node:
12172         self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
12173         self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
12174     elif level == locking.LEVEL_NODE_RES and self.op.disk_template:
12175       # Copy node locks
12176       self.needed_locks[locking.LEVEL_NODE_RES] = \
12177         self.needed_locks[locking.LEVEL_NODE][:]
12178
12179   def BuildHooksEnv(self):
12180     """Build hooks env.
12181
12182     This runs on the master, primary and secondaries.
12183
12184     """
12185     args = dict()
12186     if constants.BE_MINMEM in self.be_new:
12187       args["minmem"] = self.be_new[constants.BE_MINMEM]
12188     if constants.BE_MAXMEM in self.be_new:
12189       args["maxmem"] = self.be_new[constants.BE_MAXMEM]
12190     if constants.BE_VCPUS in self.be_new:
12191       args["vcpus"] = self.be_new[constants.BE_VCPUS]
12192     # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
12193     # information at all.
12194
12195     if self._new_nics is not None:
12196       nics = []
12197
12198       for nic in self._new_nics:
12199         nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
12200         mode = nicparams[constants.NIC_MODE]
12201         link = nicparams[constants.NIC_LINK]
12202         nics.append((nic.ip, nic.mac, mode, link))
12203
12204       args["nics"] = nics
12205
12206     env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
12207     if self.op.disk_template:
12208       env["NEW_DISK_TEMPLATE"] = self.op.disk_template
12209     if self.op.runtime_mem:
12210       env["RUNTIME_MEMORY"] = self.op.runtime_mem
12211
12212     return env
12213
12214   def BuildHooksNodes(self):
12215     """Build hooks nodes.
12216
12217     """
12218     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
12219     return (nl, nl)
12220
12221   def _PrepareNicModification(self, params, private, old_ip, old_params,
12222                               cluster, pnode):
12223     update_params_dict = dict([(key, params[key])
12224                                for key in constants.NICS_PARAMETERS
12225                                if key in params])
12226
12227     if "bridge" in params:
12228       update_params_dict[constants.NIC_LINK] = params["bridge"]
12229
12230     new_params = _GetUpdatedParams(old_params, update_params_dict)
12231     utils.ForceDictType(new_params, constants.NICS_PARAMETER_TYPES)
12232
12233     new_filled_params = cluster.SimpleFillNIC(new_params)
12234     objects.NIC.CheckParameterSyntax(new_filled_params)
12235
12236     new_mode = new_filled_params[constants.NIC_MODE]
12237     if new_mode == constants.NIC_MODE_BRIDGED:
12238       bridge = new_filled_params[constants.NIC_LINK]
12239       msg = self.rpc.call_bridges_exist(pnode, [bridge]).fail_msg
12240       if msg:
12241         msg = "Error checking bridges on node '%s': %s" % (pnode, msg)
12242         if self.op.force:
12243           self.warn.append(msg)
12244         else:
12245           raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
12246
12247     elif new_mode == constants.NIC_MODE_ROUTED:
12248       ip = params.get(constants.INIC_IP, old_ip)
12249       if ip is None:
12250         raise errors.OpPrereqError("Cannot set the NIC IP address to None"
12251                                    " on a routed NIC", errors.ECODE_INVAL)
12252
12253     if constants.INIC_MAC in params:
12254       mac = params[constants.INIC_MAC]
12255       if mac is None:
12256         raise errors.OpPrereqError("Cannot unset the NIC MAC address",
12257                                    errors.ECODE_INVAL)
12258       elif mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
12259         # otherwise generate the MAC address
12260         params[constants.INIC_MAC] = \
12261           self.cfg.GenerateMAC(self.proc.GetECId())
12262       else:
12263         # or validate/reserve the current one
12264         try:
12265           self.cfg.ReserveMAC(mac, self.proc.GetECId())
12266         except errors.ReservationError:
12267           raise errors.OpPrereqError("MAC address '%s' already in use"
12268                                      " in cluster" % mac,
12269                                      errors.ECODE_NOTUNIQUE)
12270
12271     private.params = new_params
12272     private.filled = new_filled_params
12273
12274     return (None, None)
12275
12276   def CheckPrereq(self):
12277     """Check prerequisites.
12278
12279     This only checks the instance list against the existing names.
12280
12281     """
12282     # checking the new params on the primary/secondary nodes
12283
12284     instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
12285     cluster = self.cluster = self.cfg.GetClusterInfo()
12286     assert self.instance is not None, \
12287       "Cannot retrieve locked instance %s" % self.op.instance_name
12288     pnode = instance.primary_node
12289     nodelist = list(instance.all_nodes)
12290     pnode_info = self.cfg.GetNodeInfo(pnode)
12291     self.diskparams = self.cfg.GetNodeGroup(pnode_info.group).diskparams
12292
12293     # Prepare disk/NIC modifications
12294     self.diskmod = PrepareContainerMods(self.op.disks, None)
12295     self.nicmod = PrepareContainerMods(self.op.nics, _InstNicModPrivate)
12296
12297     # OS change
12298     if self.op.os_name and not self.op.force:
12299       _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
12300                       self.op.force_variant)
12301       instance_os = self.op.os_name
12302     else:
12303       instance_os = instance.os
12304
12305     assert not (self.op.disk_template and self.op.disks), \
12306       "Can't modify disk template and apply disk changes at the same time"
12307
12308     if self.op.disk_template:
12309       if instance.disk_template == self.op.disk_template:
12310         raise errors.OpPrereqError("Instance already has disk template %s" %
12311                                    instance.disk_template, errors.ECODE_INVAL)
12312
12313       if (instance.disk_template,
12314           self.op.disk_template) not in self._DISK_CONVERSIONS:
12315         raise errors.OpPrereqError("Unsupported disk template conversion from"
12316                                    " %s to %s" % (instance.disk_template,
12317                                                   self.op.disk_template),
12318                                    errors.ECODE_INVAL)
12319       _CheckInstanceState(self, instance, INSTANCE_DOWN,
12320                           msg="cannot change disk template")
12321       if self.op.disk_template in constants.DTS_INT_MIRROR:
12322         if self.op.remote_node == pnode:
12323           raise errors.OpPrereqError("Given new secondary node %s is the same"
12324                                      " as the primary node of the instance" %
12325                                      self.op.remote_node, errors.ECODE_STATE)
12326         _CheckNodeOnline(self, self.op.remote_node)
12327         _CheckNodeNotDrained(self, self.op.remote_node)
12328         # FIXME: here we assume that the old instance type is DT_PLAIN
12329         assert instance.disk_template == constants.DT_PLAIN
12330         disks = [{constants.IDISK_SIZE: d.size,
12331                   constants.IDISK_VG: d.logical_id[0]}
12332                  for d in instance.disks]
12333         required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
12334         _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
12335
12336         snode_info = self.cfg.GetNodeInfo(self.op.remote_node)
12337         snode_group = self.cfg.GetNodeGroup(snode_info.group)
12338         ipolicy = _CalculateGroupIPolicy(cluster, snode_group)
12339         _CheckTargetNodeIPolicy(self, ipolicy, instance, snode_info,
12340                                 ignore=self.op.ignore_ipolicy)
12341         if pnode_info.group != snode_info.group:
12342           self.LogWarning("The primary and secondary nodes are in two"
12343                           " different node groups; the disk parameters"
12344                           " from the first disk's node group will be"
12345                           " used")
12346
12347     # hvparams processing
12348     if self.op.hvparams:
12349       hv_type = instance.hypervisor
12350       i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
12351       utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
12352       hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
12353
12354       # local check
12355       hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
12356       _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
12357       self.hv_proposed = self.hv_new = hv_new # the new actual values
12358       self.hv_inst = i_hvdict # the new dict (without defaults)
12359     else:
12360       self.hv_proposed = cluster.SimpleFillHV(instance.hypervisor, instance.os,
12361                                               instance.hvparams)
12362       self.hv_new = self.hv_inst = {}
12363
12364     # beparams processing
12365     if self.op.beparams:
12366       i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
12367                                    use_none=True)
12368       objects.UpgradeBeParams(i_bedict)
12369       utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
12370       be_new = cluster.SimpleFillBE(i_bedict)
12371       self.be_proposed = self.be_new = be_new # the new actual values
12372       self.be_inst = i_bedict # the new dict (without defaults)
12373     else:
12374       self.be_new = self.be_inst = {}
12375       self.be_proposed = cluster.SimpleFillBE(instance.beparams)
12376     be_old = cluster.FillBE(instance)
12377
12378     # CPU param validation -- checking every time a paramtere is
12379     # changed to cover all cases where either CPU mask or vcpus have
12380     # changed
12381     if (constants.BE_VCPUS in self.be_proposed and
12382         constants.HV_CPU_MASK in self.hv_proposed):
12383       cpu_list = \
12384         utils.ParseMultiCpuMask(self.hv_proposed[constants.HV_CPU_MASK])
12385       # Verify mask is consistent with number of vCPUs. Can skip this
12386       # test if only 1 entry in the CPU mask, which means same mask
12387       # is applied to all vCPUs.
12388       if (len(cpu_list) > 1 and
12389           len(cpu_list) != self.be_proposed[constants.BE_VCPUS]):
12390         raise errors.OpPrereqError("Number of vCPUs [%d] does not match the"
12391                                    " CPU mask [%s]" %
12392                                    (self.be_proposed[constants.BE_VCPUS],
12393                                     self.hv_proposed[constants.HV_CPU_MASK]),
12394                                    errors.ECODE_INVAL)
12395
12396       # Only perform this test if a new CPU mask is given
12397       if constants.HV_CPU_MASK in self.hv_new:
12398         # Calculate the largest CPU number requested
12399         max_requested_cpu = max(map(max, cpu_list))
12400         # Check that all of the instance's nodes have enough physical CPUs to
12401         # satisfy the requested CPU mask
12402         _CheckNodesPhysicalCPUs(self, instance.all_nodes,
12403                                 max_requested_cpu + 1, instance.hypervisor)
12404
12405     # osparams processing
12406     if self.op.osparams:
12407       i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
12408       _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
12409       self.os_inst = i_osdict # the new dict (without defaults)
12410     else:
12411       self.os_inst = {}
12412
12413     self.warn = []
12414
12415     #TODO(dynmem): do the appropriate check involving MINMEM
12416     if (constants.BE_MAXMEM in self.op.beparams and not self.op.force and
12417         be_new[constants.BE_MAXMEM] > be_old[constants.BE_MAXMEM]):
12418       mem_check_list = [pnode]
12419       if be_new[constants.BE_AUTO_BALANCE]:
12420         # either we changed auto_balance to yes or it was from before
12421         mem_check_list.extend(instance.secondary_nodes)
12422       instance_info = self.rpc.call_instance_info(pnode, instance.name,
12423                                                   instance.hypervisor)
12424       nodeinfo = self.rpc.call_node_info(mem_check_list, None,
12425                                          [instance.hypervisor])
12426       pninfo = nodeinfo[pnode]
12427       msg = pninfo.fail_msg
12428       if msg:
12429         # Assume the primary node is unreachable and go ahead
12430         self.warn.append("Can't get info from primary node %s: %s" %
12431                          (pnode, msg))
12432       else:
12433         (_, _, (pnhvinfo, )) = pninfo.payload
12434         if not isinstance(pnhvinfo.get("memory_free", None), int):
12435           self.warn.append("Node data from primary node %s doesn't contain"
12436                            " free memory information" % pnode)
12437         elif instance_info.fail_msg:
12438           self.warn.append("Can't get instance runtime information: %s" %
12439                           instance_info.fail_msg)
12440         else:
12441           if instance_info.payload:
12442             current_mem = int(instance_info.payload["memory"])
12443           else:
12444             # Assume instance not running
12445             # (there is a slight race condition here, but it's not very
12446             # probable, and we have no other way to check)
12447             # TODO: Describe race condition
12448             current_mem = 0
12449           #TODO(dynmem): do the appropriate check involving MINMEM
12450           miss_mem = (be_new[constants.BE_MAXMEM] - current_mem -
12451                       pnhvinfo["memory_free"])
12452           if miss_mem > 0:
12453             raise errors.OpPrereqError("This change will prevent the instance"
12454                                        " from starting, due to %d MB of memory"
12455                                        " missing on its primary node" %
12456                                        miss_mem,
12457                                        errors.ECODE_NORES)
12458
12459       if be_new[constants.BE_AUTO_BALANCE]:
12460         for node, nres in nodeinfo.items():
12461           if node not in instance.secondary_nodes:
12462             continue
12463           nres.Raise("Can't get info from secondary node %s" % node,
12464                      prereq=True, ecode=errors.ECODE_STATE)
12465           (_, _, (nhvinfo, )) = nres.payload
12466           if not isinstance(nhvinfo.get("memory_free", None), int):
12467             raise errors.OpPrereqError("Secondary node %s didn't return free"
12468                                        " memory information" % node,
12469                                        errors.ECODE_STATE)
12470           #TODO(dynmem): do the appropriate check involving MINMEM
12471           elif be_new[constants.BE_MAXMEM] > nhvinfo["memory_free"]:
12472             raise errors.OpPrereqError("This change will prevent the instance"
12473                                        " from failover to its secondary node"
12474                                        " %s, due to not enough memory" % node,
12475                                        errors.ECODE_STATE)
12476
12477     if self.op.runtime_mem:
12478       remote_info = self.rpc.call_instance_info(instance.primary_node,
12479                                                 instance.name,
12480                                                 instance.hypervisor)
12481       remote_info.Raise("Error checking node %s" % instance.primary_node)
12482       if not remote_info.payload: # not running already
12483         raise errors.OpPrereqError("Instance %s is not running" % instance.name,
12484                                    errors.ECODE_STATE)
12485
12486       current_memory = remote_info.payload["memory"]
12487       if (not self.op.force and
12488            (self.op.runtime_mem > self.be_proposed[constants.BE_MAXMEM] or
12489             self.op.runtime_mem < self.be_proposed[constants.BE_MINMEM])):
12490         raise errors.OpPrereqError("Instance %s must have memory between %d"
12491                                    " and %d MB of memory unless --force is"
12492                                    " given" % (instance.name,
12493                                     self.be_proposed[constants.BE_MINMEM],
12494                                     self.be_proposed[constants.BE_MAXMEM]),
12495                                    errors.ECODE_INVAL)
12496
12497       if self.op.runtime_mem > current_memory:
12498         _CheckNodeFreeMemory(self, instance.primary_node,
12499                              "ballooning memory for instance %s" %
12500                              instance.name,
12501                              self.op.memory - current_memory,
12502                              instance.hypervisor)
12503
12504     if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
12505       raise errors.OpPrereqError("Disk operations not supported for"
12506                                  " diskless instances",
12507                                  errors.ECODE_INVAL)
12508
12509     def _PrepareNicCreate(_, params, private):
12510       return self._PrepareNicModification(params, private, None, {},
12511                                           cluster, pnode)
12512
12513     def _PrepareNicMod(_, nic, params, private):
12514       return self._PrepareNicModification(params, private, nic.ip,
12515                                           nic.nicparams, cluster, pnode)
12516
12517     # Verify NIC changes (operating on copy)
12518     nics = instance.nics[:]
12519     ApplyContainerMods("NIC", nics, None, self.nicmod,
12520                        _PrepareNicCreate, _PrepareNicMod, None)
12521     if len(nics) > constants.MAX_NICS:
12522       raise errors.OpPrereqError("Instance has too many network interfaces"
12523                                  " (%d), cannot add more" % constants.MAX_NICS,
12524                                  errors.ECODE_STATE)
12525
12526     # Verify disk changes (operating on a copy)
12527     disks = instance.disks[:]
12528     ApplyContainerMods("disk", disks, None, self.diskmod, None, None, None)
12529     if len(disks) > constants.MAX_DISKS:
12530       raise errors.OpPrereqError("Instance has too many disks (%d), cannot add"
12531                                  " more" % constants.MAX_DISKS,
12532                                  errors.ECODE_STATE)
12533
12534     if self.op.offline is not None:
12535       if self.op.offline:
12536         msg = "can't change to offline"
12537       else:
12538         msg = "can't change to online"
12539       _CheckInstanceState(self, instance, CAN_CHANGE_INSTANCE_OFFLINE, msg=msg)
12540
12541     # Pre-compute NIC changes (necessary to use result in hooks)
12542     self._nic_chgdesc = []
12543     if self.nicmod:
12544       # Operate on copies as this is still in prereq
12545       nics = [nic.Copy() for nic in instance.nics]
12546       ApplyContainerMods("NIC", nics, self._nic_chgdesc, self.nicmod,
12547                          self._CreateNewNic, self._ApplyNicMods, None)
12548       self._new_nics = nics
12549     else:
12550       self._new_nics = None
12551
12552   def _ConvertPlainToDrbd(self, feedback_fn):
12553     """Converts an instance from plain to drbd.
12554
12555     """
12556     feedback_fn("Converting template to drbd")
12557     instance = self.instance
12558     pnode = instance.primary_node
12559     snode = self.op.remote_node
12560
12561     assert instance.disk_template == constants.DT_PLAIN
12562
12563     # create a fake disk info for _GenerateDiskTemplate
12564     disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
12565                   constants.IDISK_VG: d.logical_id[0]}
12566                  for d in instance.disks]
12567     new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
12568                                       instance.name, pnode, [snode],
12569                                       disk_info, None, None, 0, feedback_fn,
12570                                       self.diskparams)
12571     info = _GetInstanceInfoText(instance)
12572     feedback_fn("Creating additional volumes...")
12573     # first, create the missing data and meta devices
12574     for disk in new_disks:
12575       # unfortunately this is... not too nice
12576       _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
12577                             info, True)
12578       for child in disk.children:
12579         _CreateSingleBlockDev(self, snode, instance, child, info, True)
12580     # at this stage, all new LVs have been created, we can rename the
12581     # old ones
12582     feedback_fn("Renaming original volumes...")
12583     rename_list = [(o, n.children[0].logical_id)
12584                    for (o, n) in zip(instance.disks, new_disks)]
12585     result = self.rpc.call_blockdev_rename(pnode, rename_list)
12586     result.Raise("Failed to rename original LVs")
12587
12588     feedback_fn("Initializing DRBD devices...")
12589     # all child devices are in place, we can now create the DRBD devices
12590     for disk in new_disks:
12591       for node in [pnode, snode]:
12592         f_create = node == pnode
12593         _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
12594
12595     # at this point, the instance has been modified
12596     instance.disk_template = constants.DT_DRBD8
12597     instance.disks = new_disks
12598     self.cfg.Update(instance, feedback_fn)
12599
12600     # Release node locks while waiting for sync
12601     _ReleaseLocks(self, locking.LEVEL_NODE)
12602
12603     # disks are created, waiting for sync
12604     disk_abort = not _WaitForSync(self, instance,
12605                                   oneshot=not self.op.wait_for_sync)
12606     if disk_abort:
12607       raise errors.OpExecError("There are some degraded disks for"
12608                                " this instance, please cleanup manually")
12609
12610     # Node resource locks will be released by caller
12611
12612   def _ConvertDrbdToPlain(self, feedback_fn):
12613     """Converts an instance from drbd to plain.
12614
12615     """
12616     instance = self.instance
12617
12618     assert len(instance.secondary_nodes) == 1
12619     assert instance.disk_template == constants.DT_DRBD8
12620
12621     pnode = instance.primary_node
12622     snode = instance.secondary_nodes[0]
12623     feedback_fn("Converting template to plain")
12624
12625     old_disks = instance.disks
12626     new_disks = [d.children[0] for d in old_disks]
12627
12628     # copy over size and mode
12629     for parent, child in zip(old_disks, new_disks):
12630       child.size = parent.size
12631       child.mode = parent.mode
12632
12633     # this is a DRBD disk, return its port to the pool
12634     # NOTE: this must be done right before the call to cfg.Update!
12635     for disk in old_disks:
12636       tcp_port = disk.logical_id[2]
12637       self.cfg.AddTcpUdpPort(tcp_port)
12638
12639     # update instance structure
12640     instance.disks = new_disks
12641     instance.disk_template = constants.DT_PLAIN
12642     self.cfg.Update(instance, feedback_fn)
12643
12644     # Release locks in case removing disks takes a while
12645     _ReleaseLocks(self, locking.LEVEL_NODE)
12646
12647     feedback_fn("Removing volumes on the secondary node...")
12648     for disk in old_disks:
12649       self.cfg.SetDiskID(disk, snode)
12650       msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
12651       if msg:
12652         self.LogWarning("Could not remove block device %s on node %s,"
12653                         " continuing anyway: %s", disk.iv_name, snode, msg)
12654
12655     feedback_fn("Removing unneeded volumes on the primary node...")
12656     for idx, disk in enumerate(old_disks):
12657       meta = disk.children[1]
12658       self.cfg.SetDiskID(meta, pnode)
12659       msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
12660       if msg:
12661         self.LogWarning("Could not remove metadata for disk %d on node %s,"
12662                         " continuing anyway: %s", idx, pnode, msg)
12663
12664   def _CreateNewDisk(self, idx, params, _):
12665     """Creates a new disk.
12666
12667     """
12668     instance = self.instance
12669
12670     # add a new disk
12671     if instance.disk_template in constants.DTS_FILEBASED:
12672       (file_driver, file_path) = instance.disks[0].logical_id
12673       file_path = os.path.dirname(file_path)
12674     else:
12675       file_driver = file_path = None
12676
12677     disk = \
12678       _GenerateDiskTemplate(self, instance.disk_template, instance.name,
12679                             instance.primary_node, instance.secondary_nodes,
12680                             [params], file_path, file_driver, idx,
12681                             self.Log, self.diskparams)[0]
12682
12683     info = _GetInstanceInfoText(instance)
12684
12685     logging.info("Creating volume %s for instance %s",
12686                  disk.iv_name, instance.name)
12687     # Note: this needs to be kept in sync with _CreateDisks
12688     #HARDCODE
12689     for node in instance.all_nodes:
12690       f_create = (node == instance.primary_node)
12691       try:
12692         _CreateBlockDev(self, node, instance, disk, f_create, info, f_create)
12693       except errors.OpExecError, err:
12694         self.LogWarning("Failed to create volume %s (%s) on node '%s': %s",
12695                         disk.iv_name, disk, node, err)
12696
12697     return (disk, [
12698       ("disk/%d" % idx, "add:size=%s,mode=%s" % (disk.size, disk.mode)),
12699       ])
12700
12701   @staticmethod
12702   def _ModifyDisk(idx, disk, params, _):
12703     """Modifies a disk.
12704
12705     """
12706     disk.mode = params[constants.IDISK_MODE]
12707
12708     return [
12709       ("disk.mode/%d" % idx, disk.mode),
12710       ]
12711
12712   def _RemoveDisk(self, idx, root, _):
12713     """Removes a disk.
12714
12715     """
12716     for node, disk in root.ComputeNodeTree(self.instance.primary_node):
12717       self.cfg.SetDiskID(disk, node)
12718       msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
12719       if msg:
12720         self.LogWarning("Could not remove disk/%d on node '%s': %s,"
12721                         " continuing anyway", idx, node, msg)
12722
12723     # if this is a DRBD disk, return its port to the pool
12724     if root.dev_type in constants.LDS_DRBD:
12725       self.cfg.AddTcpUdpPort(root.logical_id[2])
12726
12727   @staticmethod
12728   def _CreateNewNic(idx, params, private):
12729     """Creates data structure for a new network interface.
12730
12731     """
12732     mac = params[constants.INIC_MAC]
12733     ip = params.get(constants.INIC_IP, None)
12734     nicparams = private.params
12735
12736     return (objects.NIC(mac=mac, ip=ip, nicparams=nicparams), [
12737       ("nic.%d" % idx,
12738        "add:mac=%s,ip=%s,mode=%s,link=%s" %
12739        (mac, ip, private.filled[constants.NIC_MODE],
12740        private.filled[constants.NIC_LINK])),
12741       ])
12742
12743   @staticmethod
12744   def _ApplyNicMods(idx, nic, params, private):
12745     """Modifies a network interface.
12746
12747     """
12748     changes = []
12749
12750     for key in [constants.INIC_MAC, constants.INIC_IP]:
12751       if key in params:
12752         changes.append(("nic.%s/%d" % (key, idx), params[key]))
12753         setattr(nic, key, params[key])
12754
12755     if private.params:
12756       nic.nicparams = private.params
12757
12758       for (key, val) in params.items():
12759         changes.append(("nic.%s/%d" % (key, idx), val))
12760
12761     return changes
12762
12763   def Exec(self, feedback_fn):
12764     """Modifies an instance.
12765
12766     All parameters take effect only at the next restart of the instance.
12767
12768     """
12769     # Process here the warnings from CheckPrereq, as we don't have a
12770     # feedback_fn there.
12771     # TODO: Replace with self.LogWarning
12772     for warn in self.warn:
12773       feedback_fn("WARNING: %s" % warn)
12774
12775     assert ((self.op.disk_template is None) ^
12776             bool(self.owned_locks(locking.LEVEL_NODE_RES))), \
12777       "Not owning any node resource locks"
12778
12779     result = []
12780     instance = self.instance
12781
12782     # runtime memory
12783     if self.op.runtime_mem:
12784       rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node,
12785                                                      instance,
12786                                                      self.op.runtime_mem)
12787       rpcres.Raise("Cannot modify instance runtime memory")
12788       result.append(("runtime_memory", self.op.runtime_mem))
12789
12790     # Apply disk changes
12791     ApplyContainerMods("disk", instance.disks, result, self.diskmod,
12792                        self._CreateNewDisk, self._ModifyDisk, self._RemoveDisk)
12793     _UpdateIvNames(0, instance.disks)
12794
12795     if self.op.disk_template:
12796       if __debug__:
12797         check_nodes = set(instance.all_nodes)
12798         if self.op.remote_node:
12799           check_nodes.add(self.op.remote_node)
12800         for level in [locking.LEVEL_NODE, locking.LEVEL_NODE_RES]:
12801           owned = self.owned_locks(level)
12802           assert not (check_nodes - owned), \
12803             ("Not owning the correct locks, owning %r, expected at least %r" %
12804              (owned, check_nodes))
12805
12806       r_shut = _ShutdownInstanceDisks(self, instance)
12807       if not r_shut:
12808         raise errors.OpExecError("Cannot shutdown instance disks, unable to"
12809                                  " proceed with disk template conversion")
12810       mode = (instance.disk_template, self.op.disk_template)
12811       try:
12812         self._DISK_CONVERSIONS[mode](self, feedback_fn)
12813       except:
12814         self.cfg.ReleaseDRBDMinors(instance.name)
12815         raise
12816       result.append(("disk_template", self.op.disk_template))
12817
12818       assert instance.disk_template == self.op.disk_template, \
12819         ("Expected disk template '%s', found '%s'" %
12820          (self.op.disk_template, instance.disk_template))
12821
12822     # Release node and resource locks if there are any (they might already have
12823     # been released during disk conversion)
12824     _ReleaseLocks(self, locking.LEVEL_NODE)
12825     _ReleaseLocks(self, locking.LEVEL_NODE_RES)
12826
12827     # Apply NIC changes
12828     if self._new_nics is not None:
12829       instance.nics = self._new_nics
12830       result.extend(self._nic_chgdesc)
12831
12832     # hvparams changes
12833     if self.op.hvparams:
12834       instance.hvparams = self.hv_inst
12835       for key, val in self.op.hvparams.iteritems():
12836         result.append(("hv/%s" % key, val))
12837
12838     # beparams changes
12839     if self.op.beparams:
12840       instance.beparams = self.be_inst
12841       for key, val in self.op.beparams.iteritems():
12842         result.append(("be/%s" % key, val))
12843
12844     # OS change
12845     if self.op.os_name:
12846       instance.os = self.op.os_name
12847
12848     # osparams changes
12849     if self.op.osparams:
12850       instance.osparams = self.os_inst
12851       for key, val in self.op.osparams.iteritems():
12852         result.append(("os/%s" % key, val))
12853
12854     if self.op.offline is None:
12855       # Ignore
12856       pass
12857     elif self.op.offline:
12858       # Mark instance as offline
12859       self.cfg.MarkInstanceOffline(instance.name)
12860       result.append(("admin_state", constants.ADMINST_OFFLINE))
12861     else:
12862       # Mark instance as online, but stopped
12863       self.cfg.MarkInstanceDown(instance.name)
12864       result.append(("admin_state", constants.ADMINST_DOWN))
12865
12866     self.cfg.Update(instance, feedback_fn)
12867
12868     assert not (self.owned_locks(locking.LEVEL_NODE_RES) or
12869                 self.owned_locks(locking.LEVEL_NODE)), \
12870       "All node locks should have been released by now"
12871
12872     return result
12873
12874   _DISK_CONVERSIONS = {
12875     (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
12876     (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
12877     }
12878
12879
12880 class LUInstanceChangeGroup(LogicalUnit):
12881   HPATH = "instance-change-group"
12882   HTYPE = constants.HTYPE_INSTANCE
12883   REQ_BGL = False
12884
12885   def ExpandNames(self):
12886     self.share_locks = _ShareAll()
12887     self.needed_locks = {
12888       locking.LEVEL_NODEGROUP: [],
12889       locking.LEVEL_NODE: [],
12890       }
12891
12892     self._ExpandAndLockInstance()
12893
12894     if self.op.target_groups:
12895       self.req_target_uuids = map(self.cfg.LookupNodeGroup,
12896                                   self.op.target_groups)
12897     else:
12898       self.req_target_uuids = None
12899
12900     self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
12901
12902   def DeclareLocks(self, level):
12903     if level == locking.LEVEL_NODEGROUP:
12904       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
12905
12906       if self.req_target_uuids:
12907         lock_groups = set(self.req_target_uuids)
12908
12909         # Lock all groups used by instance optimistically; this requires going
12910         # via the node before it's locked, requiring verification later on
12911         instance_groups = self.cfg.GetInstanceNodeGroups(self.op.instance_name)
12912         lock_groups.update(instance_groups)
12913       else:
12914         # No target groups, need to lock all of them
12915         lock_groups = locking.ALL_SET
12916
12917       self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
12918
12919     elif level == locking.LEVEL_NODE:
12920       if self.req_target_uuids:
12921         # Lock all nodes used by instances
12922         self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
12923         self._LockInstancesNodes()
12924
12925         # Lock all nodes in all potential target groups
12926         lock_groups = (frozenset(self.owned_locks(locking.LEVEL_NODEGROUP)) -
12927                        self.cfg.GetInstanceNodeGroups(self.op.instance_name))
12928         member_nodes = [node_name
12929                         for group in lock_groups
12930                         for node_name in self.cfg.GetNodeGroup(group).members]
12931         self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
12932       else:
12933         # Lock all nodes as all groups are potential targets
12934         self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
12935
12936   def CheckPrereq(self):
12937     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
12938     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
12939     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
12940
12941     assert (self.req_target_uuids is None or
12942             owned_groups.issuperset(self.req_target_uuids))
12943     assert owned_instances == set([self.op.instance_name])
12944
12945     # Get instance information
12946     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
12947
12948     # Check if node groups for locked instance are still correct
12949     assert owned_nodes.issuperset(self.instance.all_nodes), \
12950       ("Instance %s's nodes changed while we kept the lock" %
12951        self.op.instance_name)
12952
12953     inst_groups = _CheckInstanceNodeGroups(self.cfg, self.op.instance_name,
12954                                            owned_groups)
12955
12956     if self.req_target_uuids:
12957       # User requested specific target groups
12958       self.target_uuids = frozenset(self.req_target_uuids)
12959     else:
12960       # All groups except those used by the instance are potential targets
12961       self.target_uuids = owned_groups - inst_groups
12962
12963     conflicting_groups = self.target_uuids & inst_groups
12964     if conflicting_groups:
12965       raise errors.OpPrereqError("Can't use group(s) '%s' as targets, they are"
12966                                  " used by the instance '%s'" %
12967                                  (utils.CommaJoin(conflicting_groups),
12968                                   self.op.instance_name),
12969                                  errors.ECODE_INVAL)
12970
12971     if not self.target_uuids:
12972       raise errors.OpPrereqError("There are no possible target groups",
12973                                  errors.ECODE_INVAL)
12974
12975   def BuildHooksEnv(self):
12976     """Build hooks env.
12977
12978     """
12979     assert self.target_uuids
12980
12981     env = {
12982       "TARGET_GROUPS": " ".join(self.target_uuids),
12983       }
12984
12985     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
12986
12987     return env
12988
12989   def BuildHooksNodes(self):
12990     """Build hooks nodes.
12991
12992     """
12993     mn = self.cfg.GetMasterNode()
12994     return ([mn], [mn])
12995
12996   def Exec(self, feedback_fn):
12997     instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
12998
12999     assert instances == [self.op.instance_name], "Instance not locked"
13000
13001     ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
13002                      instances=instances, target_groups=list(self.target_uuids))
13003
13004     ial.Run(self.op.iallocator)
13005
13006     if not ial.success:
13007       raise errors.OpPrereqError("Can't compute solution for changing group of"
13008                                  " instance '%s' using iallocator '%s': %s" %
13009                                  (self.op.instance_name, self.op.iallocator,
13010                                   ial.info),
13011                                  errors.ECODE_NORES)
13012
13013     jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
13014
13015     self.LogInfo("Iallocator returned %s job(s) for changing group of"
13016                  " instance '%s'", len(jobs), self.op.instance_name)
13017
13018     return ResultWithJobs(jobs)
13019
13020
13021 class LUBackupQuery(NoHooksLU):
13022   """Query the exports list
13023
13024   """
13025   REQ_BGL = False
13026
13027   def ExpandNames(self):
13028     self.needed_locks = {}
13029     self.share_locks[locking.LEVEL_NODE] = 1
13030     if not self.op.nodes:
13031       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
13032     else:
13033       self.needed_locks[locking.LEVEL_NODE] = \
13034         _GetWantedNodes(self, self.op.nodes)
13035
13036   def Exec(self, feedback_fn):
13037     """Compute the list of all the exported system images.
13038
13039     @rtype: dict
13040     @return: a dictionary with the structure node->(export-list)
13041         where export-list is a list of the instances exported on
13042         that node.
13043
13044     """
13045     self.nodes = self.owned_locks(locking.LEVEL_NODE)
13046     rpcresult = self.rpc.call_export_list(self.nodes)
13047     result = {}
13048     for node in rpcresult:
13049       if rpcresult[node].fail_msg:
13050         result[node] = False
13051       else:
13052         result[node] = rpcresult[node].payload
13053
13054     return result
13055
13056
13057 class LUBackupPrepare(NoHooksLU):
13058   """Prepares an instance for an export and returns useful information.
13059
13060   """
13061   REQ_BGL = False
13062
13063   def ExpandNames(self):
13064     self._ExpandAndLockInstance()
13065
13066   def CheckPrereq(self):
13067     """Check prerequisites.
13068
13069     """
13070     instance_name = self.op.instance_name
13071
13072     self.instance = self.cfg.GetInstanceInfo(instance_name)
13073     assert self.instance is not None, \
13074           "Cannot retrieve locked instance %s" % self.op.instance_name
13075     _CheckNodeOnline(self, self.instance.primary_node)
13076
13077     self._cds = _GetClusterDomainSecret()
13078
13079   def Exec(self, feedback_fn):
13080     """Prepares an instance for an export.
13081
13082     """
13083     instance = self.instance
13084
13085     if self.op.mode == constants.EXPORT_MODE_REMOTE:
13086       salt = utils.GenerateSecret(8)
13087
13088       feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
13089       result = self.rpc.call_x509_cert_create(instance.primary_node,
13090                                               constants.RIE_CERT_VALIDITY)
13091       result.Raise("Can't create X509 key and certificate on %s" % result.node)
13092
13093       (name, cert_pem) = result.payload
13094
13095       cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
13096                                              cert_pem)
13097
13098       return {
13099         "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
13100         "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
13101                           salt),
13102         "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
13103         }
13104
13105     return None
13106
13107
13108 class LUBackupExport(LogicalUnit):
13109   """Export an instance to an image in the cluster.
13110
13111   """
13112   HPATH = "instance-export"
13113   HTYPE = constants.HTYPE_INSTANCE
13114   REQ_BGL = False
13115
13116   def CheckArguments(self):
13117     """Check the arguments.
13118
13119     """
13120     self.x509_key_name = self.op.x509_key_name
13121     self.dest_x509_ca_pem = self.op.destination_x509_ca
13122
13123     if self.op.mode == constants.EXPORT_MODE_REMOTE:
13124       if not self.x509_key_name:
13125         raise errors.OpPrereqError("Missing X509 key name for encryption",
13126                                    errors.ECODE_INVAL)
13127
13128       if not self.dest_x509_ca_pem:
13129         raise errors.OpPrereqError("Missing destination X509 CA",
13130                                    errors.ECODE_INVAL)
13131
13132   def ExpandNames(self):
13133     self._ExpandAndLockInstance()
13134
13135     # Lock all nodes for local exports
13136     if self.op.mode == constants.EXPORT_MODE_LOCAL:
13137       # FIXME: lock only instance primary and destination node
13138       #
13139       # Sad but true, for now we have do lock all nodes, as we don't know where
13140       # the previous export might be, and in this LU we search for it and
13141       # remove it from its current node. In the future we could fix this by:
13142       #  - making a tasklet to search (share-lock all), then create the
13143       #    new one, then one to remove, after
13144       #  - removing the removal operation altogether
13145       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
13146
13147   def DeclareLocks(self, level):
13148     """Last minute lock declaration."""
13149     # All nodes are locked anyway, so nothing to do here.
13150
13151   def BuildHooksEnv(self):
13152     """Build hooks env.
13153
13154     This will run on the master, primary node and target node.
13155
13156     """
13157     env = {
13158       "EXPORT_MODE": self.op.mode,
13159       "EXPORT_NODE": self.op.target_node,
13160       "EXPORT_DO_SHUTDOWN": self.op.shutdown,
13161       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
13162       # TODO: Generic function for boolean env variables
13163       "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
13164       }
13165
13166     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
13167
13168     return env
13169
13170   def BuildHooksNodes(self):
13171     """Build hooks nodes.
13172
13173     """
13174     nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
13175
13176     if self.op.mode == constants.EXPORT_MODE_LOCAL:
13177       nl.append(self.op.target_node)
13178
13179     return (nl, nl)
13180
13181   def CheckPrereq(self):
13182     """Check prerequisites.
13183
13184     This checks that the instance and node names are valid.
13185
13186     """
13187     instance_name = self.op.instance_name
13188
13189     self.instance = self.cfg.GetInstanceInfo(instance_name)
13190     assert self.instance is not None, \
13191           "Cannot retrieve locked instance %s" % self.op.instance_name
13192     _CheckNodeOnline(self, self.instance.primary_node)
13193
13194     if (self.op.remove_instance and
13195         self.instance.admin_state == constants.ADMINST_UP and
13196         not self.op.shutdown):
13197       raise errors.OpPrereqError("Can not remove instance without shutting it"
13198                                  " down before")
13199
13200     if self.op.mode == constants.EXPORT_MODE_LOCAL:
13201       self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
13202       self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
13203       assert self.dst_node is not None
13204
13205       _CheckNodeOnline(self, self.dst_node.name)
13206       _CheckNodeNotDrained(self, self.dst_node.name)
13207
13208       self._cds = None
13209       self.dest_disk_info = None
13210       self.dest_x509_ca = None
13211
13212     elif self.op.mode == constants.EXPORT_MODE_REMOTE:
13213       self.dst_node = None
13214
13215       if len(self.op.target_node) != len(self.instance.disks):
13216         raise errors.OpPrereqError(("Received destination information for %s"
13217                                     " disks, but instance %s has %s disks") %
13218                                    (len(self.op.target_node), instance_name,
13219                                     len(self.instance.disks)),
13220                                    errors.ECODE_INVAL)
13221
13222       cds = _GetClusterDomainSecret()
13223
13224       # Check X509 key name
13225       try:
13226         (key_name, hmac_digest, hmac_salt) = self.x509_key_name
13227       except (TypeError, ValueError), err:
13228         raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
13229
13230       if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
13231         raise errors.OpPrereqError("HMAC for X509 key name is wrong",
13232                                    errors.ECODE_INVAL)
13233
13234       # Load and verify CA
13235       try:
13236         (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
13237       except OpenSSL.crypto.Error, err:
13238         raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
13239                                    (err, ), errors.ECODE_INVAL)
13240
13241       (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
13242       if errcode is not None:
13243         raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
13244                                    (msg, ), errors.ECODE_INVAL)
13245
13246       self.dest_x509_ca = cert
13247
13248       # Verify target information
13249       disk_info = []
13250       for idx, disk_data in enumerate(self.op.target_node):
13251         try:
13252           (host, port, magic) = \
13253             masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
13254         except errors.GenericError, err:
13255           raise errors.OpPrereqError("Target info for disk %s: %s" %
13256                                      (idx, err), errors.ECODE_INVAL)
13257
13258         disk_info.append((host, port, magic))
13259
13260       assert len(disk_info) == len(self.op.target_node)
13261       self.dest_disk_info = disk_info
13262
13263     else:
13264       raise errors.ProgrammerError("Unhandled export mode %r" %
13265                                    self.op.mode)
13266
13267     # instance disk type verification
13268     # TODO: Implement export support for file-based disks
13269     for disk in self.instance.disks:
13270       if disk.dev_type == constants.LD_FILE:
13271         raise errors.OpPrereqError("Export not supported for instances with"
13272                                    " file-based disks", errors.ECODE_INVAL)
13273
13274   def _CleanupExports(self, feedback_fn):
13275     """Removes exports of current instance from all other nodes.
13276
13277     If an instance in a cluster with nodes A..D was exported to node C, its
13278     exports will be removed from the nodes A, B and D.
13279
13280     """
13281     assert self.op.mode != constants.EXPORT_MODE_REMOTE
13282
13283     nodelist = self.cfg.GetNodeList()
13284     nodelist.remove(self.dst_node.name)
13285
13286     # on one-node clusters nodelist will be empty after the removal
13287     # if we proceed the backup would be removed because OpBackupQuery
13288     # substitutes an empty list with the full cluster node list.
13289     iname = self.instance.name
13290     if nodelist:
13291       feedback_fn("Removing old exports for instance %s" % iname)
13292       exportlist = self.rpc.call_export_list(nodelist)
13293       for node in exportlist:
13294         if exportlist[node].fail_msg:
13295           continue
13296         if iname in exportlist[node].payload:
13297           msg = self.rpc.call_export_remove(node, iname).fail_msg
13298           if msg:
13299             self.LogWarning("Could not remove older export for instance %s"
13300                             " on node %s: %s", iname, node, msg)
13301
13302   def Exec(self, feedback_fn):
13303     """Export an instance to an image in the cluster.
13304
13305     """
13306     assert self.op.mode in constants.EXPORT_MODES
13307
13308     instance = self.instance
13309     src_node = instance.primary_node
13310
13311     if self.op.shutdown:
13312       # shutdown the instance, but not the disks
13313       feedback_fn("Shutting down instance %s" % instance.name)
13314       result = self.rpc.call_instance_shutdown(src_node, instance,
13315                                                self.op.shutdown_timeout)
13316       # TODO: Maybe ignore failures if ignore_remove_failures is set
13317       result.Raise("Could not shutdown instance %s on"
13318                    " node %s" % (instance.name, src_node))
13319
13320     # set the disks ID correctly since call_instance_start needs the
13321     # correct drbd minor to create the symlinks
13322     for disk in instance.disks:
13323       self.cfg.SetDiskID(disk, src_node)
13324
13325     activate_disks = (instance.admin_state != constants.ADMINST_UP)
13326
13327     if activate_disks:
13328       # Activate the instance disks if we'exporting a stopped instance
13329       feedback_fn("Activating disks for %s" % instance.name)
13330       _StartInstanceDisks(self, instance, None)
13331
13332     try:
13333       helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
13334                                                      instance)
13335
13336       helper.CreateSnapshots()
13337       try:
13338         if (self.op.shutdown and
13339             instance.admin_state == constants.ADMINST_UP and
13340             not self.op.remove_instance):
13341           assert not activate_disks
13342           feedback_fn("Starting instance %s" % instance.name)
13343           result = self.rpc.call_instance_start(src_node,
13344                                                 (instance, None, None), False)
13345           msg = result.fail_msg
13346           if msg:
13347             feedback_fn("Failed to start instance: %s" % msg)
13348             _ShutdownInstanceDisks(self, instance)
13349             raise errors.OpExecError("Could not start instance: %s" % msg)
13350
13351         if self.op.mode == constants.EXPORT_MODE_LOCAL:
13352           (fin_resu, dresults) = helper.LocalExport(self.dst_node)
13353         elif self.op.mode == constants.EXPORT_MODE_REMOTE:
13354           connect_timeout = constants.RIE_CONNECT_TIMEOUT
13355           timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
13356
13357           (key_name, _, _) = self.x509_key_name
13358
13359           dest_ca_pem = \
13360             OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
13361                                             self.dest_x509_ca)
13362
13363           (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
13364                                                      key_name, dest_ca_pem,
13365                                                      timeouts)
13366       finally:
13367         helper.Cleanup()
13368
13369       # Check for backwards compatibility
13370       assert len(dresults) == len(instance.disks)
13371       assert compat.all(isinstance(i, bool) for i in dresults), \
13372              "Not all results are boolean: %r" % dresults
13373
13374     finally:
13375       if activate_disks:
13376         feedback_fn("Deactivating disks for %s" % instance.name)
13377         _ShutdownInstanceDisks(self, instance)
13378
13379     if not (compat.all(dresults) and fin_resu):
13380       failures = []
13381       if not fin_resu:
13382         failures.append("export finalization")
13383       if not compat.all(dresults):
13384         fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
13385                                if not dsk)
13386         failures.append("disk export: disk(s) %s" % fdsk)
13387
13388       raise errors.OpExecError("Export failed, errors in %s" %
13389                                utils.CommaJoin(failures))
13390
13391     # At this point, the export was successful, we can cleanup/finish
13392
13393     # Remove instance if requested
13394     if self.op.remove_instance:
13395       feedback_fn("Removing instance %s" % instance.name)
13396       _RemoveInstance(self, feedback_fn, instance,
13397                       self.op.ignore_remove_failures)
13398
13399     if self.op.mode == constants.EXPORT_MODE_LOCAL:
13400       self._CleanupExports(feedback_fn)
13401
13402     return fin_resu, dresults
13403
13404
13405 class LUBackupRemove(NoHooksLU):
13406   """Remove exports related to the named instance.
13407
13408   """
13409   REQ_BGL = False
13410
13411   def ExpandNames(self):
13412     self.needed_locks = {}
13413     # We need all nodes to be locked in order for RemoveExport to work, but we
13414     # don't need to lock the instance itself, as nothing will happen to it (and
13415     # we can remove exports also for a removed instance)
13416     self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
13417
13418   def Exec(self, feedback_fn):
13419     """Remove any export.
13420
13421     """
13422     instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
13423     # If the instance was not found we'll try with the name that was passed in.
13424     # This will only work if it was an FQDN, though.
13425     fqdn_warn = False
13426     if not instance_name:
13427       fqdn_warn = True
13428       instance_name = self.op.instance_name
13429
13430     locked_nodes = self.owned_locks(locking.LEVEL_NODE)
13431     exportlist = self.rpc.call_export_list(locked_nodes)
13432     found = False
13433     for node in exportlist:
13434       msg = exportlist[node].fail_msg
13435       if msg:
13436         self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
13437         continue
13438       if instance_name in exportlist[node].payload:
13439         found = True
13440         result = self.rpc.call_export_remove(node, instance_name)
13441         msg = result.fail_msg
13442         if msg:
13443           logging.error("Could not remove export for instance %s"
13444                         " on node %s: %s", instance_name, node, msg)
13445
13446     if fqdn_warn and not found:
13447       feedback_fn("Export not found. If trying to remove an export belonging"
13448                   " to a deleted instance please use its Fully Qualified"
13449                   " Domain Name.")
13450
13451
13452 class LUGroupAdd(LogicalUnit):
13453   """Logical unit for creating node groups.
13454
13455   """
13456   HPATH = "group-add"
13457   HTYPE = constants.HTYPE_GROUP
13458   REQ_BGL = False
13459
13460   def ExpandNames(self):
13461     # We need the new group's UUID here so that we can create and acquire the
13462     # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
13463     # that it should not check whether the UUID exists in the configuration.
13464     self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
13465     self.needed_locks = {}
13466     self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
13467
13468   def CheckPrereq(self):
13469     """Check prerequisites.
13470
13471     This checks that the given group name is not an existing node group
13472     already.
13473
13474     """
13475     try:
13476       existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13477     except errors.OpPrereqError:
13478       pass
13479     else:
13480       raise errors.OpPrereqError("Desired group name '%s' already exists as a"
13481                                  " node group (UUID: %s)" %
13482                                  (self.op.group_name, existing_uuid),
13483                                  errors.ECODE_EXISTS)
13484
13485     if self.op.ndparams:
13486       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
13487
13488     if self.op.hv_state:
13489       self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
13490     else:
13491       self.new_hv_state = None
13492
13493     if self.op.disk_state:
13494       self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
13495     else:
13496       self.new_disk_state = None
13497
13498     if self.op.diskparams:
13499       for templ in constants.DISK_TEMPLATES:
13500         if templ not in self.op.diskparams:
13501           self.op.diskparams[templ] = {}
13502         utils.ForceDictType(self.op.diskparams[templ], constants.DISK_DT_TYPES)
13503     else:
13504       self.op.diskparams = self.cfg.GetClusterInfo().diskparams
13505
13506     if self.op.ipolicy:
13507       cluster = self.cfg.GetClusterInfo()
13508       full_ipolicy = cluster.SimpleFillIPolicy(self.op.ipolicy)
13509       try:
13510         objects.InstancePolicy.CheckParameterSyntax(full_ipolicy)
13511       except errors.ConfigurationError, err:
13512         raise errors.OpPrereqError("Invalid instance policy: %s" % err,
13513                                    errors.ECODE_INVAL)
13514
13515   def BuildHooksEnv(self):
13516     """Build hooks env.
13517
13518     """
13519     return {
13520       "GROUP_NAME": self.op.group_name,
13521       }
13522
13523   def BuildHooksNodes(self):
13524     """Build hooks nodes.
13525
13526     """
13527     mn = self.cfg.GetMasterNode()
13528     return ([mn], [mn])
13529
13530   def Exec(self, feedback_fn):
13531     """Add the node group to the cluster.
13532
13533     """
13534     group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
13535                                   uuid=self.group_uuid,
13536                                   alloc_policy=self.op.alloc_policy,
13537                                   ndparams=self.op.ndparams,
13538                                   diskparams=self.op.diskparams,
13539                                   ipolicy=self.op.ipolicy,
13540                                   hv_state_static=self.new_hv_state,
13541                                   disk_state_static=self.new_disk_state)
13542
13543     self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
13544     del self.remove_locks[locking.LEVEL_NODEGROUP]
13545
13546
13547 class LUGroupAssignNodes(NoHooksLU):
13548   """Logical unit for assigning nodes to groups.
13549
13550   """
13551   REQ_BGL = False
13552
13553   def ExpandNames(self):
13554     # These raise errors.OpPrereqError on their own:
13555     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13556     self.op.nodes = _GetWantedNodes(self, self.op.nodes)
13557
13558     # We want to lock all the affected nodes and groups. We have readily
13559     # available the list of nodes, and the *destination* group. To gather the
13560     # list of "source" groups, we need to fetch node information later on.
13561     self.needed_locks = {
13562       locking.LEVEL_NODEGROUP: set([self.group_uuid]),
13563       locking.LEVEL_NODE: self.op.nodes,
13564       }
13565
13566   def DeclareLocks(self, level):
13567     if level == locking.LEVEL_NODEGROUP:
13568       assert len(self.needed_locks[locking.LEVEL_NODEGROUP]) == 1
13569
13570       # Try to get all affected nodes' groups without having the group or node
13571       # lock yet. Needs verification later in the code flow.
13572       groups = self.cfg.GetNodeGroupsFromNodes(self.op.nodes)
13573
13574       self.needed_locks[locking.LEVEL_NODEGROUP].update(groups)
13575
13576   def CheckPrereq(self):
13577     """Check prerequisites.
13578
13579     """
13580     assert self.needed_locks[locking.LEVEL_NODEGROUP]
13581     assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
13582             frozenset(self.op.nodes))
13583
13584     expected_locks = (set([self.group_uuid]) |
13585                       self.cfg.GetNodeGroupsFromNodes(self.op.nodes))
13586     actual_locks = self.owned_locks(locking.LEVEL_NODEGROUP)
13587     if actual_locks != expected_locks:
13588       raise errors.OpExecError("Nodes changed groups since locks were acquired,"
13589                                " current groups are '%s', used to be '%s'" %
13590                                (utils.CommaJoin(expected_locks),
13591                                 utils.CommaJoin(actual_locks)))
13592
13593     self.node_data = self.cfg.GetAllNodesInfo()
13594     self.group = self.cfg.GetNodeGroup(self.group_uuid)
13595     instance_data = self.cfg.GetAllInstancesInfo()
13596
13597     if self.group is None:
13598       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
13599                                (self.op.group_name, self.group_uuid))
13600
13601     (new_splits, previous_splits) = \
13602       self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
13603                                              for node in self.op.nodes],
13604                                             self.node_data, instance_data)
13605
13606     if new_splits:
13607       fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
13608
13609       if not self.op.force:
13610         raise errors.OpExecError("The following instances get split by this"
13611                                  " change and --force was not given: %s" %
13612                                  fmt_new_splits)
13613       else:
13614         self.LogWarning("This operation will split the following instances: %s",
13615                         fmt_new_splits)
13616
13617         if previous_splits:
13618           self.LogWarning("In addition, these already-split instances continue"
13619                           " to be split across groups: %s",
13620                           utils.CommaJoin(utils.NiceSort(previous_splits)))
13621
13622   def Exec(self, feedback_fn):
13623     """Assign nodes to a new group.
13624
13625     """
13626     mods = [(node_name, self.group_uuid) for node_name in self.op.nodes]
13627
13628     self.cfg.AssignGroupNodes(mods)
13629
13630   @staticmethod
13631   def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
13632     """Check for split instances after a node assignment.
13633
13634     This method considers a series of node assignments as an atomic operation,
13635     and returns information about split instances after applying the set of
13636     changes.
13637
13638     In particular, it returns information about newly split instances, and
13639     instances that were already split, and remain so after the change.
13640
13641     Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
13642     considered.
13643
13644     @type changes: list of (node_name, new_group_uuid) pairs.
13645     @param changes: list of node assignments to consider.
13646     @param node_data: a dict with data for all nodes
13647     @param instance_data: a dict with all instances to consider
13648     @rtype: a two-tuple
13649     @return: a list of instances that were previously okay and result split as a
13650       consequence of this change, and a list of instances that were previously
13651       split and this change does not fix.
13652
13653     """
13654     changed_nodes = dict((node, group) for node, group in changes
13655                          if node_data[node].group != group)
13656
13657     all_split_instances = set()
13658     previously_split_instances = set()
13659
13660     def InstanceNodes(instance):
13661       return [instance.primary_node] + list(instance.secondary_nodes)
13662
13663     for inst in instance_data.values():
13664       if inst.disk_template not in constants.DTS_INT_MIRROR:
13665         continue
13666
13667       instance_nodes = InstanceNodes(inst)
13668
13669       if len(set(node_data[node].group for node in instance_nodes)) > 1:
13670         previously_split_instances.add(inst.name)
13671
13672       if len(set(changed_nodes.get(node, node_data[node].group)
13673                  for node in instance_nodes)) > 1:
13674         all_split_instances.add(inst.name)
13675
13676     return (list(all_split_instances - previously_split_instances),
13677             list(previously_split_instances & all_split_instances))
13678
13679
13680 class _GroupQuery(_QueryBase):
13681   FIELDS = query.GROUP_FIELDS
13682
13683   def ExpandNames(self, lu):
13684     lu.needed_locks = {}
13685
13686     self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
13687     self._cluster = lu.cfg.GetClusterInfo()
13688     name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
13689
13690     if not self.names:
13691       self.wanted = [name_to_uuid[name]
13692                      for name in utils.NiceSort(name_to_uuid.keys())]
13693     else:
13694       # Accept names to be either names or UUIDs.
13695       missing = []
13696       self.wanted = []
13697       all_uuid = frozenset(self._all_groups.keys())
13698
13699       for name in self.names:
13700         if name in all_uuid:
13701           self.wanted.append(name)
13702         elif name in name_to_uuid:
13703           self.wanted.append(name_to_uuid[name])
13704         else:
13705           missing.append(name)
13706
13707       if missing:
13708         raise errors.OpPrereqError("Some groups do not exist: %s" %
13709                                    utils.CommaJoin(missing),
13710                                    errors.ECODE_NOENT)
13711
13712   def DeclareLocks(self, lu, level):
13713     pass
13714
13715   def _GetQueryData(self, lu):
13716     """Computes the list of node groups and their attributes.
13717
13718     """
13719     do_nodes = query.GQ_NODE in self.requested_data
13720     do_instances = query.GQ_INST in self.requested_data
13721
13722     group_to_nodes = None
13723     group_to_instances = None
13724
13725     # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
13726     # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
13727     # latter GetAllInstancesInfo() is not enough, for we have to go through
13728     # instance->node. Hence, we will need to process nodes even if we only need
13729     # instance information.
13730     if do_nodes or do_instances:
13731       all_nodes = lu.cfg.GetAllNodesInfo()
13732       group_to_nodes = dict((uuid, []) for uuid in self.wanted)
13733       node_to_group = {}
13734
13735       for node in all_nodes.values():
13736         if node.group in group_to_nodes:
13737           group_to_nodes[node.group].append(node.name)
13738           node_to_group[node.name] = node.group
13739
13740       if do_instances:
13741         all_instances = lu.cfg.GetAllInstancesInfo()
13742         group_to_instances = dict((uuid, []) for uuid in self.wanted)
13743
13744         for instance in all_instances.values():
13745           node = instance.primary_node
13746           if node in node_to_group:
13747             group_to_instances[node_to_group[node]].append(instance.name)
13748
13749         if not do_nodes:
13750           # Do not pass on node information if it was not requested.
13751           group_to_nodes = None
13752
13753     return query.GroupQueryData(self._cluster,
13754                                 [self._all_groups[uuid]
13755                                  for uuid in self.wanted],
13756                                 group_to_nodes, group_to_instances)
13757
13758
13759 class LUGroupQuery(NoHooksLU):
13760   """Logical unit for querying node groups.
13761
13762   """
13763   REQ_BGL = False
13764
13765   def CheckArguments(self):
13766     self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
13767                           self.op.output_fields, False)
13768
13769   def ExpandNames(self):
13770     self.gq.ExpandNames(self)
13771
13772   def DeclareLocks(self, level):
13773     self.gq.DeclareLocks(self, level)
13774
13775   def Exec(self, feedback_fn):
13776     return self.gq.OldStyleQuery(self)
13777
13778
13779 class LUGroupSetParams(LogicalUnit):
13780   """Modifies the parameters of a node group.
13781
13782   """
13783   HPATH = "group-modify"
13784   HTYPE = constants.HTYPE_GROUP
13785   REQ_BGL = False
13786
13787   def CheckArguments(self):
13788     all_changes = [
13789       self.op.ndparams,
13790       self.op.diskparams,
13791       self.op.alloc_policy,
13792       self.op.hv_state,
13793       self.op.disk_state,
13794       self.op.ipolicy,
13795       ]
13796
13797     if all_changes.count(None) == len(all_changes):
13798       raise errors.OpPrereqError("Please pass at least one modification",
13799                                  errors.ECODE_INVAL)
13800
13801   def ExpandNames(self):
13802     # This raises errors.OpPrereqError on its own:
13803     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13804
13805     self.needed_locks = {
13806       locking.LEVEL_INSTANCE: [],
13807       locking.LEVEL_NODEGROUP: [self.group_uuid],
13808       }
13809
13810     self.share_locks[locking.LEVEL_INSTANCE] = 1
13811
13812   def DeclareLocks(self, level):
13813     if level == locking.LEVEL_INSTANCE:
13814       assert not self.needed_locks[locking.LEVEL_INSTANCE]
13815
13816       # Lock instances optimistically, needs verification once group lock has
13817       # been acquired
13818       self.needed_locks[locking.LEVEL_INSTANCE] = \
13819           self.cfg.GetNodeGroupInstances(self.group_uuid)
13820
13821   def CheckPrereq(self):
13822     """Check prerequisites.
13823
13824     """
13825     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
13826
13827     # Check if locked instances are still correct
13828     _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
13829
13830     self.group = self.cfg.GetNodeGroup(self.group_uuid)
13831     cluster = self.cfg.GetClusterInfo()
13832
13833     if self.group is None:
13834       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
13835                                (self.op.group_name, self.group_uuid))
13836
13837     if self.op.ndparams:
13838       new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
13839       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
13840       self.new_ndparams = new_ndparams
13841
13842     if self.op.diskparams:
13843       self.new_diskparams = dict()
13844       for templ in constants.DISK_TEMPLATES:
13845         if templ not in self.op.diskparams:
13846           self.op.diskparams[templ] = {}
13847         new_templ_params = _GetUpdatedParams(self.group.diskparams[templ],
13848                                              self.op.diskparams[templ])
13849         utils.ForceDictType(new_templ_params, constants.DISK_DT_TYPES)
13850         self.new_diskparams[templ] = new_templ_params
13851
13852     if self.op.hv_state:
13853       self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
13854                                                  self.group.hv_state_static)
13855
13856     if self.op.disk_state:
13857       self.new_disk_state = \
13858         _MergeAndVerifyDiskState(self.op.disk_state,
13859                                  self.group.disk_state_static)
13860
13861     if self.op.ipolicy:
13862       self.new_ipolicy = _GetUpdatedIPolicy(self.group.ipolicy,
13863                                             self.op.ipolicy,
13864                                             group_policy=True)
13865
13866       new_ipolicy = cluster.SimpleFillIPolicy(self.new_ipolicy)
13867       inst_filter = lambda inst: inst.name in owned_instances
13868       instances = self.cfg.GetInstancesInfoByFilter(inst_filter).values()
13869       violations = \
13870           _ComputeNewInstanceViolations(_CalculateGroupIPolicy(cluster,
13871                                                                self.group),
13872                                         new_ipolicy, instances)
13873
13874       if violations:
13875         self.LogWarning("After the ipolicy change the following instances"
13876                         " violate them: %s",
13877                         utils.CommaJoin(violations))
13878
13879   def BuildHooksEnv(self):
13880     """Build hooks env.
13881
13882     """
13883     return {
13884       "GROUP_NAME": self.op.group_name,
13885       "NEW_ALLOC_POLICY": self.op.alloc_policy,
13886       }
13887
13888   def BuildHooksNodes(self):
13889     """Build hooks nodes.
13890
13891     """
13892     mn = self.cfg.GetMasterNode()
13893     return ([mn], [mn])
13894
13895   def Exec(self, feedback_fn):
13896     """Modifies the node group.
13897
13898     """
13899     result = []
13900
13901     if self.op.ndparams:
13902       self.group.ndparams = self.new_ndparams
13903       result.append(("ndparams", str(self.group.ndparams)))
13904
13905     if self.op.diskparams:
13906       self.group.diskparams = self.new_diskparams
13907       result.append(("diskparams", str(self.group.diskparams)))
13908
13909     if self.op.alloc_policy:
13910       self.group.alloc_policy = self.op.alloc_policy
13911
13912     if self.op.hv_state:
13913       self.group.hv_state_static = self.new_hv_state
13914
13915     if self.op.disk_state:
13916       self.group.disk_state_static = self.new_disk_state
13917
13918     if self.op.ipolicy:
13919       self.group.ipolicy = self.new_ipolicy
13920
13921     self.cfg.Update(self.group, feedback_fn)
13922     return result
13923
13924
13925 class LUGroupRemove(LogicalUnit):
13926   HPATH = "group-remove"
13927   HTYPE = constants.HTYPE_GROUP
13928   REQ_BGL = False
13929
13930   def ExpandNames(self):
13931     # This will raises errors.OpPrereqError on its own:
13932     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13933     self.needed_locks = {
13934       locking.LEVEL_NODEGROUP: [self.group_uuid],
13935       }
13936
13937   def CheckPrereq(self):
13938     """Check prerequisites.
13939
13940     This checks that the given group name exists as a node group, that is
13941     empty (i.e., contains no nodes), and that is not the last group of the
13942     cluster.
13943
13944     """
13945     # Verify that the group is empty.
13946     group_nodes = [node.name
13947                    for node in self.cfg.GetAllNodesInfo().values()
13948                    if node.group == self.group_uuid]
13949
13950     if group_nodes:
13951       raise errors.OpPrereqError("Group '%s' not empty, has the following"
13952                                  " nodes: %s" %
13953                                  (self.op.group_name,
13954                                   utils.CommaJoin(utils.NiceSort(group_nodes))),
13955                                  errors.ECODE_STATE)
13956
13957     # Verify the cluster would not be left group-less.
13958     if len(self.cfg.GetNodeGroupList()) == 1:
13959       raise errors.OpPrereqError("Group '%s' is the only group,"
13960                                  " cannot be removed" %
13961                                  self.op.group_name,
13962                                  errors.ECODE_STATE)
13963
13964   def BuildHooksEnv(self):
13965     """Build hooks env.
13966
13967     """
13968     return {
13969       "GROUP_NAME": self.op.group_name,
13970       }
13971
13972   def BuildHooksNodes(self):
13973     """Build hooks nodes.
13974
13975     """
13976     mn = self.cfg.GetMasterNode()
13977     return ([mn], [mn])
13978
13979   def Exec(self, feedback_fn):
13980     """Remove the node group.
13981
13982     """
13983     try:
13984       self.cfg.RemoveNodeGroup(self.group_uuid)
13985     except errors.ConfigurationError:
13986       raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
13987                                (self.op.group_name, self.group_uuid))
13988
13989     self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
13990
13991
13992 class LUGroupRename(LogicalUnit):
13993   HPATH = "group-rename"
13994   HTYPE = constants.HTYPE_GROUP
13995   REQ_BGL = False
13996
13997   def ExpandNames(self):
13998     # This raises errors.OpPrereqError on its own:
13999     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14000
14001     self.needed_locks = {
14002       locking.LEVEL_NODEGROUP: [self.group_uuid],
14003       }
14004
14005   def CheckPrereq(self):
14006     """Check prerequisites.
14007
14008     Ensures requested new name is not yet used.
14009
14010     """
14011     try:
14012       new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
14013     except errors.OpPrereqError:
14014       pass
14015     else:
14016       raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
14017                                  " node group (UUID: %s)" %
14018                                  (self.op.new_name, new_name_uuid),
14019                                  errors.ECODE_EXISTS)
14020
14021   def BuildHooksEnv(self):
14022     """Build hooks env.
14023
14024     """
14025     return {
14026       "OLD_NAME": self.op.group_name,
14027       "NEW_NAME": self.op.new_name,
14028       }
14029
14030   def BuildHooksNodes(self):
14031     """Build hooks nodes.
14032
14033     """
14034     mn = self.cfg.GetMasterNode()
14035
14036     all_nodes = self.cfg.GetAllNodesInfo()
14037     all_nodes.pop(mn, None)
14038
14039     run_nodes = [mn]
14040     run_nodes.extend(node.name for node in all_nodes.values()
14041                      if node.group == self.group_uuid)
14042
14043     return (run_nodes, run_nodes)
14044
14045   def Exec(self, feedback_fn):
14046     """Rename the node group.
14047
14048     """
14049     group = self.cfg.GetNodeGroup(self.group_uuid)
14050
14051     if group is None:
14052       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
14053                                (self.op.group_name, self.group_uuid))
14054
14055     group.name = self.op.new_name
14056     self.cfg.Update(group, feedback_fn)
14057
14058     return self.op.new_name
14059
14060
14061 class LUGroupEvacuate(LogicalUnit):
14062   HPATH = "group-evacuate"
14063   HTYPE = constants.HTYPE_GROUP
14064   REQ_BGL = False
14065
14066   def ExpandNames(self):
14067     # This raises errors.OpPrereqError on its own:
14068     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14069
14070     if self.op.target_groups:
14071       self.req_target_uuids = map(self.cfg.LookupNodeGroup,
14072                                   self.op.target_groups)
14073     else:
14074       self.req_target_uuids = []
14075
14076     if self.group_uuid in self.req_target_uuids:
14077       raise errors.OpPrereqError("Group to be evacuated (%s) can not be used"
14078                                  " as a target group (targets are %s)" %
14079                                  (self.group_uuid,
14080                                   utils.CommaJoin(self.req_target_uuids)),
14081                                  errors.ECODE_INVAL)
14082
14083     self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
14084
14085     self.share_locks = _ShareAll()
14086     self.needed_locks = {
14087       locking.LEVEL_INSTANCE: [],
14088       locking.LEVEL_NODEGROUP: [],
14089       locking.LEVEL_NODE: [],
14090       }
14091
14092   def DeclareLocks(self, level):
14093     if level == locking.LEVEL_INSTANCE:
14094       assert not self.needed_locks[locking.LEVEL_INSTANCE]
14095
14096       # Lock instances optimistically, needs verification once node and group
14097       # locks have been acquired
14098       self.needed_locks[locking.LEVEL_INSTANCE] = \
14099         self.cfg.GetNodeGroupInstances(self.group_uuid)
14100
14101     elif level == locking.LEVEL_NODEGROUP:
14102       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
14103
14104       if self.req_target_uuids:
14105         lock_groups = set([self.group_uuid] + self.req_target_uuids)
14106
14107         # Lock all groups used by instances optimistically; this requires going
14108         # via the node before it's locked, requiring verification later on
14109         lock_groups.update(group_uuid
14110                            for instance_name in
14111                              self.owned_locks(locking.LEVEL_INSTANCE)
14112                            for group_uuid in
14113                              self.cfg.GetInstanceNodeGroups(instance_name))
14114       else:
14115         # No target groups, need to lock all of them
14116         lock_groups = locking.ALL_SET
14117
14118       self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
14119
14120     elif level == locking.LEVEL_NODE:
14121       # This will only lock the nodes in the group to be evacuated which
14122       # contain actual instances
14123       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
14124       self._LockInstancesNodes()
14125
14126       # Lock all nodes in group to be evacuated and target groups
14127       owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
14128       assert self.group_uuid in owned_groups
14129       member_nodes = [node_name
14130                       for group in owned_groups
14131                       for node_name in self.cfg.GetNodeGroup(group).members]
14132       self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
14133
14134   def CheckPrereq(self):
14135     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
14136     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
14137     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
14138
14139     assert owned_groups.issuperset(self.req_target_uuids)
14140     assert self.group_uuid in owned_groups
14141
14142     # Check if locked instances are still correct
14143     _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
14144
14145     # Get instance information
14146     self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
14147
14148     # Check if node groups for locked instances are still correct
14149     _CheckInstancesNodeGroups(self.cfg, self.instances,
14150                               owned_groups, owned_nodes, self.group_uuid)
14151
14152     if self.req_target_uuids:
14153       # User requested specific target groups
14154       self.target_uuids = self.req_target_uuids
14155     else:
14156       # All groups except the one to be evacuated are potential targets
14157       self.target_uuids = [group_uuid for group_uuid in owned_groups
14158                            if group_uuid != self.group_uuid]
14159
14160       if not self.target_uuids:
14161         raise errors.OpPrereqError("There are no possible target groups",
14162                                    errors.ECODE_INVAL)
14163
14164   def BuildHooksEnv(self):
14165     """Build hooks env.
14166
14167     """
14168     return {
14169       "GROUP_NAME": self.op.group_name,
14170       "TARGET_GROUPS": " ".join(self.target_uuids),
14171       }
14172
14173   def BuildHooksNodes(self):
14174     """Build hooks nodes.
14175
14176     """
14177     mn = self.cfg.GetMasterNode()
14178
14179     assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
14180
14181     run_nodes = [mn] + self.cfg.GetNodeGroup(self.group_uuid).members
14182
14183     return (run_nodes, run_nodes)
14184
14185   def Exec(self, feedback_fn):
14186     instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
14187
14188     assert self.group_uuid not in self.target_uuids
14189
14190     ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
14191                      instances=instances, target_groups=self.target_uuids)
14192
14193     ial.Run(self.op.iallocator)
14194
14195     if not ial.success:
14196       raise errors.OpPrereqError("Can't compute group evacuation using"
14197                                  " iallocator '%s': %s" %
14198                                  (self.op.iallocator, ial.info),
14199                                  errors.ECODE_NORES)
14200
14201     jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
14202
14203     self.LogInfo("Iallocator returned %s job(s) for evacuating node group %s",
14204                  len(jobs), self.op.group_name)
14205
14206     return ResultWithJobs(jobs)
14207
14208
14209 class TagsLU(NoHooksLU): # pylint: disable=W0223
14210   """Generic tags LU.
14211
14212   This is an abstract class which is the parent of all the other tags LUs.
14213
14214   """
14215   def ExpandNames(self):
14216     self.group_uuid = None
14217     self.needed_locks = {}
14218     if self.op.kind == constants.TAG_NODE:
14219       self.op.name = _ExpandNodeName(self.cfg, self.op.name)
14220       self.needed_locks[locking.LEVEL_NODE] = self.op.name
14221     elif self.op.kind == constants.TAG_INSTANCE:
14222       self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
14223       self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
14224     elif self.op.kind == constants.TAG_NODEGROUP:
14225       self.group_uuid = self.cfg.LookupNodeGroup(self.op.name)
14226
14227     # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
14228     # not possible to acquire the BGL based on opcode parameters)
14229
14230   def CheckPrereq(self):
14231     """Check prerequisites.
14232
14233     """
14234     if self.op.kind == constants.TAG_CLUSTER:
14235       self.target = self.cfg.GetClusterInfo()
14236     elif self.op.kind == constants.TAG_NODE:
14237       self.target = self.cfg.GetNodeInfo(self.op.name)
14238     elif self.op.kind == constants.TAG_INSTANCE:
14239       self.target = self.cfg.GetInstanceInfo(self.op.name)
14240     elif self.op.kind == constants.TAG_NODEGROUP:
14241       self.target = self.cfg.GetNodeGroup(self.group_uuid)
14242     else:
14243       raise errors.OpPrereqError("Wrong tag type requested (%s)" %
14244                                  str(self.op.kind), errors.ECODE_INVAL)
14245
14246
14247 class LUTagsGet(TagsLU):
14248   """Returns the tags of a given object.
14249
14250   """
14251   REQ_BGL = False
14252
14253   def ExpandNames(self):
14254     TagsLU.ExpandNames(self)
14255
14256     # Share locks as this is only a read operation
14257     self.share_locks = _ShareAll()
14258
14259   def Exec(self, feedback_fn):
14260     """Returns the tag list.
14261
14262     """
14263     return list(self.target.GetTags())
14264
14265
14266 class LUTagsSearch(NoHooksLU):
14267   """Searches the tags for a given pattern.
14268
14269   """
14270   REQ_BGL = False
14271
14272   def ExpandNames(self):
14273     self.needed_locks = {}
14274
14275   def CheckPrereq(self):
14276     """Check prerequisites.
14277
14278     This checks the pattern passed for validity by compiling it.
14279
14280     """
14281     try:
14282       self.re = re.compile(self.op.pattern)
14283     except re.error, err:
14284       raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
14285                                  (self.op.pattern, err), errors.ECODE_INVAL)
14286
14287   def Exec(self, feedback_fn):
14288     """Returns the tag list.
14289
14290     """
14291     cfg = self.cfg
14292     tgts = [("/cluster", cfg.GetClusterInfo())]
14293     ilist = cfg.GetAllInstancesInfo().values()
14294     tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
14295     nlist = cfg.GetAllNodesInfo().values()
14296     tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
14297     tgts.extend(("/nodegroup/%s" % n.name, n)
14298                 for n in cfg.GetAllNodeGroupsInfo().values())
14299     results = []
14300     for path, target in tgts:
14301       for tag in target.GetTags():
14302         if self.re.search(tag):
14303           results.append((path, tag))
14304     return results
14305
14306
14307 class LUTagsSet(TagsLU):
14308   """Sets a tag on a given object.
14309
14310   """
14311   REQ_BGL = False
14312
14313   def CheckPrereq(self):
14314     """Check prerequisites.
14315
14316     This checks the type and length of the tag name and value.
14317
14318     """
14319     TagsLU.CheckPrereq(self)
14320     for tag in self.op.tags:
14321       objects.TaggableObject.ValidateTag(tag)
14322
14323   def Exec(self, feedback_fn):
14324     """Sets the tag.
14325
14326     """
14327     try:
14328       for tag in self.op.tags:
14329         self.target.AddTag(tag)
14330     except errors.TagError, err:
14331       raise errors.OpExecError("Error while setting tag: %s" % str(err))
14332     self.cfg.Update(self.target, feedback_fn)
14333
14334
14335 class LUTagsDel(TagsLU):
14336   """Delete a list of tags from a given object.
14337
14338   """
14339   REQ_BGL = False
14340
14341   def CheckPrereq(self):
14342     """Check prerequisites.
14343
14344     This checks that we have the given tag.
14345
14346     """
14347     TagsLU.CheckPrereq(self)
14348     for tag in self.op.tags:
14349       objects.TaggableObject.ValidateTag(tag)
14350     del_tags = frozenset(self.op.tags)
14351     cur_tags = self.target.GetTags()
14352
14353     diff_tags = del_tags - cur_tags
14354     if diff_tags:
14355       diff_names = ("'%s'" % i for i in sorted(diff_tags))
14356       raise errors.OpPrereqError("Tag(s) %s not found" %
14357                                  (utils.CommaJoin(diff_names), ),
14358                                  errors.ECODE_NOENT)
14359
14360   def Exec(self, feedback_fn):
14361     """Remove the tag from the object.
14362
14363     """
14364     for tag in self.op.tags:
14365       self.target.RemoveTag(tag)
14366     self.cfg.Update(self.target, feedback_fn)
14367
14368
14369 class LUTestDelay(NoHooksLU):
14370   """Sleep for a specified amount of time.
14371
14372   This LU sleeps on the master and/or nodes for a specified amount of
14373   time.
14374
14375   """
14376   REQ_BGL = False
14377
14378   def ExpandNames(self):
14379     """Expand names and set required locks.
14380
14381     This expands the node list, if any.
14382
14383     """
14384     self.needed_locks = {}
14385     if self.op.on_nodes:
14386       # _GetWantedNodes can be used here, but is not always appropriate to use
14387       # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
14388       # more information.
14389       self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
14390       self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
14391
14392   def _TestDelay(self):
14393     """Do the actual sleep.
14394
14395     """
14396     if self.op.on_master:
14397       if not utils.TestDelay(self.op.duration):
14398         raise errors.OpExecError("Error during master delay test")
14399     if self.op.on_nodes:
14400       result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
14401       for node, node_result in result.items():
14402         node_result.Raise("Failure during rpc call to node %s" % node)
14403
14404   def Exec(self, feedback_fn):
14405     """Execute the test delay opcode, with the wanted repetitions.
14406
14407     """
14408     if self.op.repeat == 0:
14409       self._TestDelay()
14410     else:
14411       top_value = self.op.repeat - 1
14412       for i in range(self.op.repeat):
14413         self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
14414         self._TestDelay()
14415
14416
14417 class LUTestJqueue(NoHooksLU):
14418   """Utility LU to test some aspects of the job queue.
14419
14420   """
14421   REQ_BGL = False
14422
14423   # Must be lower than default timeout for WaitForJobChange to see whether it
14424   # notices changed jobs
14425   _CLIENT_CONNECT_TIMEOUT = 20.0
14426   _CLIENT_CONFIRM_TIMEOUT = 60.0
14427
14428   @classmethod
14429   def _NotifyUsingSocket(cls, cb, errcls):
14430     """Opens a Unix socket and waits for another program to connect.
14431
14432     @type cb: callable
14433     @param cb: Callback to send socket name to client
14434     @type errcls: class
14435     @param errcls: Exception class to use for errors
14436
14437     """
14438     # Using a temporary directory as there's no easy way to create temporary
14439     # sockets without writing a custom loop around tempfile.mktemp and
14440     # socket.bind
14441     tmpdir = tempfile.mkdtemp()
14442     try:
14443       tmpsock = utils.PathJoin(tmpdir, "sock")
14444
14445       logging.debug("Creating temporary socket at %s", tmpsock)
14446       sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
14447       try:
14448         sock.bind(tmpsock)
14449         sock.listen(1)
14450
14451         # Send details to client
14452         cb(tmpsock)
14453
14454         # Wait for client to connect before continuing
14455         sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
14456         try:
14457           (conn, _) = sock.accept()
14458         except socket.error, err:
14459           raise errcls("Client didn't connect in time (%s)" % err)
14460       finally:
14461         sock.close()
14462     finally:
14463       # Remove as soon as client is connected
14464       shutil.rmtree(tmpdir)
14465
14466     # Wait for client to close
14467     try:
14468       try:
14469         # pylint: disable=E1101
14470         # Instance of '_socketobject' has no ... member
14471         conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
14472         conn.recv(1)
14473       except socket.error, err:
14474         raise errcls("Client failed to confirm notification (%s)" % err)
14475     finally:
14476       conn.close()
14477
14478   def _SendNotification(self, test, arg, sockname):
14479     """Sends a notification to the client.
14480
14481     @type test: string
14482     @param test: Test name
14483     @param arg: Test argument (depends on test)
14484     @type sockname: string
14485     @param sockname: Socket path
14486
14487     """
14488     self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
14489
14490   def _Notify(self, prereq, test, arg):
14491     """Notifies the client of a test.
14492
14493     @type prereq: bool
14494     @param prereq: Whether this is a prereq-phase test
14495     @type test: string
14496     @param test: Test name
14497     @param arg: Test argument (depends on test)
14498
14499     """
14500     if prereq:
14501       errcls = errors.OpPrereqError
14502     else:
14503       errcls = errors.OpExecError
14504
14505     return self._NotifyUsingSocket(compat.partial(self._SendNotification,
14506                                                   test, arg),
14507                                    errcls)
14508
14509   def CheckArguments(self):
14510     self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
14511     self.expandnames_calls = 0
14512
14513   def ExpandNames(self):
14514     checkargs_calls = getattr(self, "checkargs_calls", 0)
14515     if checkargs_calls < 1:
14516       raise errors.ProgrammerError("CheckArguments was not called")
14517
14518     self.expandnames_calls += 1
14519
14520     if self.op.notify_waitlock:
14521       self._Notify(True, constants.JQT_EXPANDNAMES, None)
14522
14523     self.LogInfo("Expanding names")
14524
14525     # Get lock on master node (just to get a lock, not for a particular reason)
14526     self.needed_locks = {
14527       locking.LEVEL_NODE: self.cfg.GetMasterNode(),
14528       }
14529
14530   def Exec(self, feedback_fn):
14531     if self.expandnames_calls < 1:
14532       raise errors.ProgrammerError("ExpandNames was not called")
14533
14534     if self.op.notify_exec:
14535       self._Notify(False, constants.JQT_EXEC, None)
14536
14537     self.LogInfo("Executing")
14538
14539     if self.op.log_messages:
14540       self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
14541       for idx, msg in enumerate(self.op.log_messages):
14542         self.LogInfo("Sending log message %s", idx + 1)
14543         feedback_fn(constants.JQT_MSGPREFIX + msg)
14544         # Report how many test messages have been sent
14545         self._Notify(False, constants.JQT_LOGMSG, idx + 1)
14546
14547     if self.op.fail:
14548       raise errors.OpExecError("Opcode failure was requested")
14549
14550     return True
14551
14552
14553 class IAllocator(object):
14554   """IAllocator framework.
14555
14556   An IAllocator instance has three sets of attributes:
14557     - cfg that is needed to query the cluster
14558     - input data (all members of the _KEYS class attribute are required)
14559     - four buffer attributes (in|out_data|text), that represent the
14560       input (to the external script) in text and data structure format,
14561       and the output from it, again in two formats
14562     - the result variables from the script (success, info, nodes) for
14563       easy usage
14564
14565   """
14566   # pylint: disable=R0902
14567   # lots of instance attributes
14568
14569   def __init__(self, cfg, rpc_runner, mode, **kwargs):
14570     self.cfg = cfg
14571     self.rpc = rpc_runner
14572     # init buffer variables
14573     self.in_text = self.out_text = self.in_data = self.out_data = None
14574     # init all input fields so that pylint is happy
14575     self.mode = mode
14576     self.memory = self.disks = self.disk_template = self.spindle_use = None
14577     self.os = self.tags = self.nics = self.vcpus = None
14578     self.hypervisor = None
14579     self.relocate_from = None
14580     self.name = None
14581     self.instances = None
14582     self.evac_mode = None
14583     self.target_groups = []
14584     # computed fields
14585     self.required_nodes = None
14586     # init result fields
14587     self.success = self.info = self.result = None
14588
14589     try:
14590       (fn, keydata, self._result_check) = self._MODE_DATA[self.mode]
14591     except KeyError:
14592       raise errors.ProgrammerError("Unknown mode '%s' passed to the"
14593                                    " IAllocator" % self.mode)
14594
14595     keyset = [n for (n, _) in keydata]
14596
14597     for key in kwargs:
14598       if key not in keyset:
14599         raise errors.ProgrammerError("Invalid input parameter '%s' to"
14600                                      " IAllocator" % key)
14601       setattr(self, key, kwargs[key])
14602
14603     for key in keyset:
14604       if key not in kwargs:
14605         raise errors.ProgrammerError("Missing input parameter '%s' to"
14606                                      " IAllocator" % key)
14607     self._BuildInputData(compat.partial(fn, self), keydata)
14608
14609   def _ComputeClusterData(self):
14610     """Compute the generic allocator input data.
14611
14612     This is the data that is independent of the actual operation.
14613
14614     """
14615     cfg = self.cfg
14616     cluster_info = cfg.GetClusterInfo()
14617     # cluster data
14618     data = {
14619       "version": constants.IALLOCATOR_VERSION,
14620       "cluster_name": cfg.GetClusterName(),
14621       "cluster_tags": list(cluster_info.GetTags()),
14622       "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
14623       "ipolicy": cluster_info.ipolicy,
14624       }
14625     ninfo = cfg.GetAllNodesInfo()
14626     iinfo = cfg.GetAllInstancesInfo().values()
14627     i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
14628
14629     # node data
14630     node_list = [n.name for n in ninfo.values() if n.vm_capable]
14631
14632     if self.mode == constants.IALLOCATOR_MODE_ALLOC:
14633       hypervisor_name = self.hypervisor
14634     elif self.mode == constants.IALLOCATOR_MODE_RELOC:
14635       hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
14636     else:
14637       hypervisor_name = cluster_info.primary_hypervisor
14638
14639     node_data = self.rpc.call_node_info(node_list, [cfg.GetVGName()],
14640                                         [hypervisor_name])
14641     node_iinfo = \
14642       self.rpc.call_all_instances_info(node_list,
14643                                        cluster_info.enabled_hypervisors)
14644
14645     data["nodegroups"] = self._ComputeNodeGroupData(cfg)
14646
14647     config_ndata = self._ComputeBasicNodeData(cfg, ninfo)
14648     data["nodes"] = self._ComputeDynamicNodeData(ninfo, node_data, node_iinfo,
14649                                                  i_list, config_ndata)
14650     assert len(data["nodes"]) == len(ninfo), \
14651         "Incomplete node data computed"
14652
14653     data["instances"] = self._ComputeInstanceData(cluster_info, i_list)
14654
14655     self.in_data = data
14656
14657   @staticmethod
14658   def _ComputeNodeGroupData(cfg):
14659     """Compute node groups data.
14660
14661     """
14662     cluster = cfg.GetClusterInfo()
14663     ng = dict((guuid, {
14664       "name": gdata.name,
14665       "alloc_policy": gdata.alloc_policy,
14666       "ipolicy": _CalculateGroupIPolicy(cluster, gdata),
14667       })
14668       for guuid, gdata in cfg.GetAllNodeGroupsInfo().items())
14669
14670     return ng
14671
14672   @staticmethod
14673   def _ComputeBasicNodeData(cfg, node_cfg):
14674     """Compute global node data.
14675
14676     @rtype: dict
14677     @returns: a dict of name: (node dict, node config)
14678
14679     """
14680     # fill in static (config-based) values
14681     node_results = dict((ninfo.name, {
14682       "tags": list(ninfo.GetTags()),
14683       "primary_ip": ninfo.primary_ip,
14684       "secondary_ip": ninfo.secondary_ip,
14685       "offline": ninfo.offline,
14686       "drained": ninfo.drained,
14687       "master_candidate": ninfo.master_candidate,
14688       "group": ninfo.group,
14689       "master_capable": ninfo.master_capable,
14690       "vm_capable": ninfo.vm_capable,
14691       "ndparams": cfg.GetNdParams(ninfo),
14692       })
14693       for ninfo in node_cfg.values())
14694
14695     return node_results
14696
14697   @staticmethod
14698   def _ComputeDynamicNodeData(node_cfg, node_data, node_iinfo, i_list,
14699                               node_results):
14700     """Compute global node data.
14701
14702     @param node_results: the basic node structures as filled from the config
14703
14704     """
14705     #TODO(dynmem): compute the right data on MAX and MIN memory
14706     # make a copy of the current dict
14707     node_results = dict(node_results)
14708     for nname, nresult in node_data.items():
14709       assert nname in node_results, "Missing basic data for node %s" % nname
14710       ninfo = node_cfg[nname]
14711
14712       if not (ninfo.offline or ninfo.drained):
14713         nresult.Raise("Can't get data for node %s" % nname)
14714         node_iinfo[nname].Raise("Can't get node instance info from node %s" %
14715                                 nname)
14716         remote_info = _MakeLegacyNodeInfo(nresult.payload)
14717
14718         for attr in ["memory_total", "memory_free", "memory_dom0",
14719                      "vg_size", "vg_free", "cpu_total"]:
14720           if attr not in remote_info:
14721             raise errors.OpExecError("Node '%s' didn't return attribute"
14722                                      " '%s'" % (nname, attr))
14723           if not isinstance(remote_info[attr], int):
14724             raise errors.OpExecError("Node '%s' returned invalid value"
14725                                      " for '%s': %s" %
14726                                      (nname, attr, remote_info[attr]))
14727         # compute memory used by primary instances
14728         i_p_mem = i_p_up_mem = 0
14729         for iinfo, beinfo in i_list:
14730           if iinfo.primary_node == nname:
14731             i_p_mem += beinfo[constants.BE_MAXMEM]
14732             if iinfo.name not in node_iinfo[nname].payload:
14733               i_used_mem = 0
14734             else:
14735               i_used_mem = int(node_iinfo[nname].payload[iinfo.name]["memory"])
14736             i_mem_diff = beinfo[constants.BE_MAXMEM] - i_used_mem
14737             remote_info["memory_free"] -= max(0, i_mem_diff)
14738
14739             if iinfo.admin_state == constants.ADMINST_UP:
14740               i_p_up_mem += beinfo[constants.BE_MAXMEM]
14741
14742         # compute memory used by instances
14743         pnr_dyn = {
14744           "total_memory": remote_info["memory_total"],
14745           "reserved_memory": remote_info["memory_dom0"],
14746           "free_memory": remote_info["memory_free"],
14747           "total_disk": remote_info["vg_size"],
14748           "free_disk": remote_info["vg_free"],
14749           "total_cpus": remote_info["cpu_total"],
14750           "i_pri_memory": i_p_mem,
14751           "i_pri_up_memory": i_p_up_mem,
14752           }
14753         pnr_dyn.update(node_results[nname])
14754         node_results[nname] = pnr_dyn
14755
14756     return node_results
14757
14758   @staticmethod
14759   def _ComputeInstanceData(cluster_info, i_list):
14760     """Compute global instance data.
14761
14762     """
14763     instance_data = {}
14764     for iinfo, beinfo in i_list:
14765       nic_data = []
14766       for nic in iinfo.nics:
14767         filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
14768         nic_dict = {
14769           "mac": nic.mac,
14770           "ip": nic.ip,
14771           "mode": filled_params[constants.NIC_MODE],
14772           "link": filled_params[constants.NIC_LINK],
14773           }
14774         if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
14775           nic_dict["bridge"] = filled_params[constants.NIC_LINK]
14776         nic_data.append(nic_dict)
14777       pir = {
14778         "tags": list(iinfo.GetTags()),
14779         "admin_state": iinfo.admin_state,
14780         "vcpus": beinfo[constants.BE_VCPUS],
14781         "memory": beinfo[constants.BE_MAXMEM],
14782         "spindle_use": beinfo[constants.BE_SPINDLE_USE],
14783         "os": iinfo.os,
14784         "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
14785         "nics": nic_data,
14786         "disks": [{constants.IDISK_SIZE: dsk.size,
14787                    constants.IDISK_MODE: dsk.mode}
14788                   for dsk in iinfo.disks],
14789         "disk_template": iinfo.disk_template,
14790         "hypervisor": iinfo.hypervisor,
14791         }
14792       pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
14793                                                  pir["disks"])
14794       instance_data[iinfo.name] = pir
14795
14796     return instance_data
14797
14798   def _AddNewInstance(self):
14799     """Add new instance data to allocator structure.
14800
14801     This in combination with _AllocatorGetClusterData will create the
14802     correct structure needed as input for the allocator.
14803
14804     The checks for the completeness of the opcode must have already been
14805     done.
14806
14807     """
14808     disk_space = _ComputeDiskSize(self.disk_template, self.disks)
14809
14810     if self.disk_template in constants.DTS_INT_MIRROR:
14811       self.required_nodes = 2
14812     else:
14813       self.required_nodes = 1
14814
14815     request = {
14816       "name": self.name,
14817       "disk_template": self.disk_template,
14818       "tags": self.tags,
14819       "os": self.os,
14820       "vcpus": self.vcpus,
14821       "memory": self.memory,
14822       "spindle_use": self.spindle_use,
14823       "disks": self.disks,
14824       "disk_space_total": disk_space,
14825       "nics": self.nics,
14826       "required_nodes": self.required_nodes,
14827       "hypervisor": self.hypervisor,
14828       }
14829
14830     return request
14831
14832   def _AddRelocateInstance(self):
14833     """Add relocate instance data to allocator structure.
14834
14835     This in combination with _IAllocatorGetClusterData will create the
14836     correct structure needed as input for the allocator.
14837
14838     The checks for the completeness of the opcode must have already been
14839     done.
14840
14841     """
14842     instance = self.cfg.GetInstanceInfo(self.name)
14843     if instance is None:
14844       raise errors.ProgrammerError("Unknown instance '%s' passed to"
14845                                    " IAllocator" % self.name)
14846
14847     if instance.disk_template not in constants.DTS_MIRRORED:
14848       raise errors.OpPrereqError("Can't relocate non-mirrored instances",
14849                                  errors.ECODE_INVAL)
14850
14851     if instance.disk_template in constants.DTS_INT_MIRROR and \
14852         len(instance.secondary_nodes) != 1:
14853       raise errors.OpPrereqError("Instance has not exactly one secondary node",
14854                                  errors.ECODE_STATE)
14855
14856     self.required_nodes = 1
14857     disk_sizes = [{constants.IDISK_SIZE: disk.size} for disk in instance.disks]
14858     disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
14859
14860     request = {
14861       "name": self.name,
14862       "disk_space_total": disk_space,
14863       "required_nodes": self.required_nodes,
14864       "relocate_from": self.relocate_from,
14865       }
14866     return request
14867
14868   def _AddNodeEvacuate(self):
14869     """Get data for node-evacuate requests.
14870
14871     """
14872     return {
14873       "instances": self.instances,
14874       "evac_mode": self.evac_mode,
14875       }
14876
14877   def _AddChangeGroup(self):
14878     """Get data for node-evacuate requests.
14879
14880     """
14881     return {
14882       "instances": self.instances,
14883       "target_groups": self.target_groups,
14884       }
14885
14886   def _BuildInputData(self, fn, keydata):
14887     """Build input data structures.
14888
14889     """
14890     self._ComputeClusterData()
14891
14892     request = fn()
14893     request["type"] = self.mode
14894     for keyname, keytype in keydata:
14895       if keyname not in request:
14896         raise errors.ProgrammerError("Request parameter %s is missing" %
14897                                      keyname)
14898       val = request[keyname]
14899       if not keytype(val):
14900         raise errors.ProgrammerError("Request parameter %s doesn't pass"
14901                                      " validation, value %s, expected"
14902                                      " type %s" % (keyname, val, keytype))
14903     self.in_data["request"] = request
14904
14905     self.in_text = serializer.Dump(self.in_data)
14906
14907   _STRING_LIST = ht.TListOf(ht.TString)
14908   _JOB_LIST = ht.TListOf(ht.TListOf(ht.TStrictDict(True, False, {
14909      # pylint: disable=E1101
14910      # Class '...' has no 'OP_ID' member
14911      "OP_ID": ht.TElemOf([opcodes.OpInstanceFailover.OP_ID,
14912                           opcodes.OpInstanceMigrate.OP_ID,
14913                           opcodes.OpInstanceReplaceDisks.OP_ID])
14914      })))
14915
14916   _NEVAC_MOVED = \
14917     ht.TListOf(ht.TAnd(ht.TIsLength(3),
14918                        ht.TItems([ht.TNonEmptyString,
14919                                   ht.TNonEmptyString,
14920                                   ht.TListOf(ht.TNonEmptyString),
14921                                  ])))
14922   _NEVAC_FAILED = \
14923     ht.TListOf(ht.TAnd(ht.TIsLength(2),
14924                        ht.TItems([ht.TNonEmptyString,
14925                                   ht.TMaybeString,
14926                                  ])))
14927   _NEVAC_RESULT = ht.TAnd(ht.TIsLength(3),
14928                           ht.TItems([_NEVAC_MOVED, _NEVAC_FAILED, _JOB_LIST]))
14929
14930   _MODE_DATA = {
14931     constants.IALLOCATOR_MODE_ALLOC:
14932       (_AddNewInstance,
14933        [
14934         ("name", ht.TString),
14935         ("memory", ht.TInt),
14936         ("spindle_use", ht.TInt),
14937         ("disks", ht.TListOf(ht.TDict)),
14938         ("disk_template", ht.TString),
14939         ("os", ht.TString),
14940         ("tags", _STRING_LIST),
14941         ("nics", ht.TListOf(ht.TDict)),
14942         ("vcpus", ht.TInt),
14943         ("hypervisor", ht.TString),
14944         ], ht.TList),
14945     constants.IALLOCATOR_MODE_RELOC:
14946       (_AddRelocateInstance,
14947        [("name", ht.TString), ("relocate_from", _STRING_LIST)],
14948        ht.TList),
14949      constants.IALLOCATOR_MODE_NODE_EVAC:
14950       (_AddNodeEvacuate, [
14951         ("instances", _STRING_LIST),
14952         ("evac_mode", ht.TElemOf(constants.IALLOCATOR_NEVAC_MODES)),
14953         ], _NEVAC_RESULT),
14954      constants.IALLOCATOR_MODE_CHG_GROUP:
14955       (_AddChangeGroup, [
14956         ("instances", _STRING_LIST),
14957         ("target_groups", _STRING_LIST),
14958         ], _NEVAC_RESULT),
14959     }
14960
14961   def Run(self, name, validate=True, call_fn=None):
14962     """Run an instance allocator and return the results.
14963
14964     """
14965     if call_fn is None:
14966       call_fn = self.rpc.call_iallocator_runner
14967
14968     result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
14969     result.Raise("Failure while running the iallocator script")
14970
14971     self.out_text = result.payload
14972     if validate:
14973       self._ValidateResult()
14974
14975   def _ValidateResult(self):
14976     """Process the allocator results.
14977
14978     This will process and if successful save the result in
14979     self.out_data and the other parameters.
14980
14981     """
14982     try:
14983       rdict = serializer.Load(self.out_text)
14984     except Exception, err:
14985       raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
14986
14987     if not isinstance(rdict, dict):
14988       raise errors.OpExecError("Can't parse iallocator results: not a dict")
14989
14990     # TODO: remove backwards compatiblity in later versions
14991     if "nodes" in rdict and "result" not in rdict:
14992       rdict["result"] = rdict["nodes"]
14993       del rdict["nodes"]
14994
14995     for key in "success", "info", "result":
14996       if key not in rdict:
14997         raise errors.OpExecError("Can't parse iallocator results:"
14998                                  " missing key '%s'" % key)
14999       setattr(self, key, rdict[key])
15000
15001     if not self._result_check(self.result):
15002       raise errors.OpExecError("Iallocator returned invalid result,"
15003                                " expected %s, got %s" %
15004                                (self._result_check, self.result),
15005                                errors.ECODE_INVAL)
15006
15007     if self.mode == constants.IALLOCATOR_MODE_RELOC:
15008       assert self.relocate_from is not None
15009       assert self.required_nodes == 1
15010
15011       node2group = dict((name, ndata["group"])
15012                         for (name, ndata) in self.in_data["nodes"].items())
15013
15014       fn = compat.partial(self._NodesToGroups, node2group,
15015                           self.in_data["nodegroups"])
15016
15017       instance = self.cfg.GetInstanceInfo(self.name)
15018       request_groups = fn(self.relocate_from + [instance.primary_node])
15019       result_groups = fn(rdict["result"] + [instance.primary_node])
15020
15021       if self.success and not set(result_groups).issubset(request_groups):
15022         raise errors.OpExecError("Groups of nodes returned by iallocator (%s)"
15023                                  " differ from original groups (%s)" %
15024                                  (utils.CommaJoin(result_groups),
15025                                   utils.CommaJoin(request_groups)))
15026
15027     elif self.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
15028       assert self.evac_mode in constants.IALLOCATOR_NEVAC_MODES
15029
15030     self.out_data = rdict
15031
15032   @staticmethod
15033   def _NodesToGroups(node2group, groups, nodes):
15034     """Returns a list of unique group names for a list of nodes.
15035
15036     @type node2group: dict
15037     @param node2group: Map from node name to group UUID
15038     @type groups: dict
15039     @param groups: Group information
15040     @type nodes: list
15041     @param nodes: Node names
15042
15043     """
15044     result = set()
15045
15046     for node in nodes:
15047       try:
15048         group_uuid = node2group[node]
15049       except KeyError:
15050         # Ignore unknown node
15051         pass
15052       else:
15053         try:
15054           group = groups[group_uuid]
15055         except KeyError:
15056           # Can't find group, let's use UUID
15057           group_name = group_uuid
15058         else:
15059           group_name = group["name"]
15060
15061         result.add(group_name)
15062
15063     return sorted(result)
15064
15065
15066 class LUTestAllocator(NoHooksLU):
15067   """Run allocator tests.
15068
15069   This LU runs the allocator tests
15070
15071   """
15072   def CheckPrereq(self):
15073     """Check prerequisites.
15074
15075     This checks the opcode parameters depending on the director and mode test.
15076
15077     """
15078     if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
15079       for attr in ["memory", "disks", "disk_template",
15080                    "os", "tags", "nics", "vcpus"]:
15081         if not hasattr(self.op, attr):
15082           raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
15083                                      attr, errors.ECODE_INVAL)
15084       iname = self.cfg.ExpandInstanceName(self.op.name)
15085       if iname is not None:
15086         raise errors.OpPrereqError("Instance '%s' already in the cluster" %
15087                                    iname, errors.ECODE_EXISTS)
15088       if not isinstance(self.op.nics, list):
15089         raise errors.OpPrereqError("Invalid parameter 'nics'",
15090                                    errors.ECODE_INVAL)
15091       if not isinstance(self.op.disks, list):
15092         raise errors.OpPrereqError("Invalid parameter 'disks'",
15093                                    errors.ECODE_INVAL)
15094       for row in self.op.disks:
15095         if (not isinstance(row, dict) or
15096             constants.IDISK_SIZE not in row or
15097             not isinstance(row[constants.IDISK_SIZE], int) or
15098             constants.IDISK_MODE not in row or
15099             row[constants.IDISK_MODE] not in constants.DISK_ACCESS_SET):
15100           raise errors.OpPrereqError("Invalid contents of the 'disks'"
15101                                      " parameter", errors.ECODE_INVAL)
15102       if self.op.hypervisor is None:
15103         self.op.hypervisor = self.cfg.GetHypervisorType()
15104     elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
15105       fname = _ExpandInstanceName(self.cfg, self.op.name)
15106       self.op.name = fname
15107       self.relocate_from = \
15108           list(self.cfg.GetInstanceInfo(fname).secondary_nodes)
15109     elif self.op.mode in (constants.IALLOCATOR_MODE_CHG_GROUP,
15110                           constants.IALLOCATOR_MODE_NODE_EVAC):
15111       if not self.op.instances:
15112         raise errors.OpPrereqError("Missing instances", errors.ECODE_INVAL)
15113       self.op.instances = _GetWantedInstances(self, self.op.instances)
15114     else:
15115       raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
15116                                  self.op.mode, errors.ECODE_INVAL)
15117
15118     if self.op.direction == constants.IALLOCATOR_DIR_OUT:
15119       if self.op.allocator is None:
15120         raise errors.OpPrereqError("Missing allocator name",
15121                                    errors.ECODE_INVAL)
15122     elif self.op.direction != constants.IALLOCATOR_DIR_IN:
15123       raise errors.OpPrereqError("Wrong allocator test '%s'" %
15124                                  self.op.direction, errors.ECODE_INVAL)
15125
15126   def Exec(self, feedback_fn):
15127     """Run the allocator test.
15128
15129     """
15130     if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
15131       ial = IAllocator(self.cfg, self.rpc,
15132                        mode=self.op.mode,
15133                        name=self.op.name,
15134                        memory=self.op.memory,
15135                        disks=self.op.disks,
15136                        disk_template=self.op.disk_template,
15137                        os=self.op.os,
15138                        tags=self.op.tags,
15139                        nics=self.op.nics,
15140                        vcpus=self.op.vcpus,
15141                        hypervisor=self.op.hypervisor,
15142                        )
15143     elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
15144       ial = IAllocator(self.cfg, self.rpc,
15145                        mode=self.op.mode,
15146                        name=self.op.name,
15147                        relocate_from=list(self.relocate_from),
15148                        )
15149     elif self.op.mode == constants.IALLOCATOR_MODE_CHG_GROUP:
15150       ial = IAllocator(self.cfg, self.rpc,
15151                        mode=self.op.mode,
15152                        instances=self.op.instances,
15153                        target_groups=self.op.target_groups)
15154     elif self.op.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
15155       ial = IAllocator(self.cfg, self.rpc,
15156                        mode=self.op.mode,
15157                        instances=self.op.instances,
15158                        evac_mode=self.op.evac_mode)
15159     else:
15160       raise errors.ProgrammerError("Uncatched mode %s in"
15161                                    " LUTestAllocator.Exec", self.op.mode)
15162
15163     if self.op.direction == constants.IALLOCATOR_DIR_IN:
15164       result = ial.in_text
15165     else:
15166       ial.Run(self.op.allocator, validate=False)
15167       result = ial.out_text
15168     return result
15169
15170
15171 #: Query type implementations
15172 _QUERY_IMPL = {
15173   constants.QR_INSTANCE: _InstanceQuery,
15174   constants.QR_NODE: _NodeQuery,
15175   constants.QR_GROUP: _GroupQuery,
15176   constants.QR_OS: _OsQuery,
15177   }
15178
15179 assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
15180
15181
15182 def _GetQueryImplementation(name):
15183   """Returns the implemtnation for a query type.
15184
15185   @param name: Query type, must be one of L{constants.QR_VIA_OP}
15186
15187   """
15188   try:
15189     return _QUERY_IMPL[name]
15190   except KeyError:
15191     raise errors.OpPrereqError("Unknown query resource '%s'" % name,
15192                                errors.ECODE_INVAL)