code.grnet.gr Git - ganeti-local/blob - lib/cmdlib.py

   1 #
   2 #
   3
   4 # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012 Google Inc.
   5 #
   6 # This program is free software; you can redistribute it and/or modify
   7 # it under the terms of the GNU General Public License as published by
   8 # the Free Software Foundation; either version 2 of the License, or
   9 # (at your option) any later version.
  10 #
  11 # This program is distributed in the hope that it will be useful, but
  12 # WITHOUT ANY WARRANTY; without even the implied warranty of
  13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 # General Public License for more details.
  15 #
  16 # You should have received a copy of the GNU General Public License
  17 # along with this program; if not, write to the Free Software
  18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  19 # 02110-1301, USA.
  20
  21
  22 """Module implementing the master-side code."""
  23
  24 # pylint: disable=W0201,C0302
  25
  26 # W0201 since most LU attributes are defined in CheckPrereq or similar
  27 # functions
  28
  29 # C0302: since we have waaaay too many lines in this module
  30
  31 import os
  32 import os.path
  33 import time
  34 import re
  35 import logging
  36 import copy
  37 import OpenSSL
  38 import socket
  39 import tempfile
  40 import shutil
  41 import itertools
  42 import operator
  43
  44 from ganeti import ssh
  45 from ganeti import utils
  46 from ganeti import errors
  47 from ganeti import hypervisor
  48 from ganeti import locking
  49 from ganeti import constants
  50 from ganeti import objects
  51 from ganeti import serializer
  52 from ganeti import ssconf
  53 from ganeti import uidpool
  54 from ganeti import compat
  55 from ganeti import masterd
  56 from ganeti import netutils
  57 from ganeti import query
  58 from ganeti import qlang
  59 from ganeti import opcodes
  60 from ganeti import ht
  61 from ganeti import rpc
  62 from ganeti import runtime
  63
  64 import ganeti.masterd.instance # pylint: disable=W0611
  65
  66
  67 #: Size of DRBD meta block device
  68 DRBD_META_SIZE = 128
  69
  70 # States of instance
  71 INSTANCE_DOWN = [constants.ADMINST_DOWN]
  72 INSTANCE_ONLINE = [constants.ADMINST_DOWN, constants.ADMINST_UP]
  73 INSTANCE_NOT_RUNNING = [constants.ADMINST_DOWN, constants.ADMINST_OFFLINE]
  74
  75 #: Instance status in which an instance can be marked as offline/online
  76 CAN_CHANGE_INSTANCE_OFFLINE = (frozenset(INSTANCE_DOWN) | frozenset([
  77   constants.ADMINST_OFFLINE,
  78   ]))
  79
  80
  81 class ResultWithJobs:
  82   """Data container for LU results with jobs.
  83
  84   Instances of this class returned from L{LogicalUnit.Exec} will be recognized
  85   by L{mcpu._ProcessResult}. The latter will then submit the jobs
  86   contained in the C{jobs} attribute and include the job IDs in the opcode
  87   result.
  88
  89   """
  90   def __init__(self, jobs, **kwargs):
  91     """Initializes this class.
  92
  93     Additional return values can be specified as keyword arguments.
  94
  95     @type jobs: list of lists of L{opcode.OpCode}
  96     @param jobs: A list of lists of opcode objects
  97
  98     """
  99     self.jobs = jobs
 100     self.other = kwargs
 101
 102
 103 class LogicalUnit(object):
 104   """Logical Unit base class.
 105
 106   Subclasses must follow these rules:
 107     - implement ExpandNames
 108     - implement CheckPrereq (except when tasklets are used)
 109     - implement Exec (except when tasklets are used)
 110     - implement BuildHooksEnv
 111     - implement BuildHooksNodes
 112     - redefine HPATH and HTYPE
 113     - optionally redefine their run requirements:
 114         REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
 115
 116   Note that all commands require root permissions.
 117
 118   @ivar dry_run_result: the value (if any) that will be returned to the caller
 119       in dry-run mode (signalled by opcode dry_run parameter)
 120
 121   """
 122   HPATH = None
 123   HTYPE = None
 124   REQ_BGL = True
 125
 126   def __init__(self, processor, op, context, rpc_runner):
 127     """Constructor for LogicalUnit.
 128
 129     This needs to be overridden in derived classes in order to check op
 130     validity.
 131
 132     """
 133     self.proc = processor
 134     self.op = op
 135     self.cfg = context.cfg
 136     self.glm = context.glm
 137     # readability alias
 138     self.owned_locks = context.glm.list_owned
 139     self.context = context
 140     self.rpc = rpc_runner
 141     # Dicts used to declare locking needs to mcpu
 142     self.needed_locks = None
 143     self.share_locks = dict.fromkeys(locking.LEVELS, 0)
 144     self.add_locks = {}
 145     self.remove_locks = {}
 146     # Used to force good behavior when calling helper functions
 147     self.recalculate_locks = {}
 148     # logging
 149     self.Log = processor.Log # pylint: disable=C0103
 150     self.LogWarning = processor.LogWarning # pylint: disable=C0103
 151     self.LogInfo = processor.LogInfo # pylint: disable=C0103
 152     self.LogStep = processor.LogStep # pylint: disable=C0103
 153     # support for dry-run
 154     self.dry_run_result = None
 155     # support for generic debug attribute
 156     if (not hasattr(self.op, "debug_level") or
 157         not isinstance(self.op.debug_level, int)):
 158       self.op.debug_level = 0
 159
 160     # Tasklets
 161     self.tasklets = None
 162
 163     # Validate opcode parameters and set defaults
 164     self.op.Validate(True)
 165
 166     self.CheckArguments()
 167
 168   def CheckArguments(self):
 169     """Check syntactic validity for the opcode arguments.
 170
 171     This method is for doing a simple syntactic check and ensure
 172     validity of opcode parameters, without any cluster-related
 173     checks. While the same can be accomplished in ExpandNames and/or
 174     CheckPrereq, doing these separate is better because:
 175
 176       - ExpandNames is left as as purely a lock-related function
 177       - CheckPrereq is run after we have acquired locks (and possible
 178         waited for them)
 179
 180     The function is allowed to change the self.op attribute so that
 181     later methods can no longer worry about missing parameters.
 182
 183     """
 184     pass
 185
 186   def ExpandNames(self):
 187     """Expand names for this LU.
 188
 189     This method is called before starting to execute the opcode, and it should
 190     update all the parameters of the opcode to their canonical form (e.g. a
 191     short node name must be fully expanded after this method has successfully
 192     completed). This way locking, hooks, logging, etc. can work correctly.
 193
 194     LUs which implement this method must also populate the self.needed_locks
 195     member, as a dict with lock levels as keys, and a list of needed lock names
 196     as values. Rules:
 197
 198       - use an empty dict if you don't need any lock
 199       - if you don't need any lock at a particular level omit that
 200         level (note that in this case C{DeclareLocks} won't be called
 201         at all for that level)
 202       - if you need locks at a level, but you can't calculate it in
 203         this function, initialise that level with an empty list and do
 204         further processing in L{LogicalUnit.DeclareLocks} (see that
 205         function's docstring)
 206       - don't put anything for the BGL level
 207       - if you want all locks at a level use L{locking.ALL_SET} as a value
 208
 209     If you need to share locks (rather than acquire them exclusively) at one
 210     level you can modify self.share_locks, setting a true value (usually 1) for
 211     that level. By default locks are not shared.
 212
 213     This function can also define a list of tasklets, which then will be
 214     executed in order instead of the usual LU-level CheckPrereq and Exec
 215     functions, if those are not defined by the LU.
 216
 217     Examples::
 218
 219       # Acquire all nodes and one instance
 220       self.needed_locks = {
 221         locking.LEVEL_NODE: locking.ALL_SET,
 222         locking.LEVEL_INSTANCE: ['instance1.example.com'],
 223       }
 224       # Acquire just two nodes
 225       self.needed_locks = {
 226         locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
 227       }
 228       # Acquire no locks
 229       self.needed_locks = {} # No, you can't leave it to the default value None
 230
 231     """
 232     # The implementation of this method is mandatory only if the new LU is
 233     # concurrent, so that old LUs don't need to be changed all at the same
 234     # time.
 235     if self.REQ_BGL:
 236       self.needed_locks = {} # Exclusive LUs don't need locks.
 237     else:
 238       raise NotImplementedError
 239
 240   def DeclareLocks(self, level):
 241     """Declare LU locking needs for a level
 242
 243     While most LUs can just declare their locking needs at ExpandNames time,
 244     sometimes there's the need to calculate some locks after having acquired
 245     the ones before. This function is called just before acquiring locks at a
 246     particular level, but after acquiring the ones at lower levels, and permits
 247     such calculations. It can be used to modify self.needed_locks, and by
 248     default it does nothing.
 249
 250     This function is only called if you have something already set in
 251     self.needed_locks for the level.
 252
 253     @param level: Locking level which is going to be locked
 254     @type level: member of L{ganeti.locking.LEVELS}
 255
 256     """
 257
 258   def CheckPrereq(self):
 259     """Check prerequisites for this LU.
 260
 261     This method should check that the prerequisites for the execution
 262     of this LU are fulfilled. It can do internode communication, but
 263     it should be idempotent - no cluster or system changes are
 264     allowed.
 265
 266     The method should raise errors.OpPrereqError in case something is
 267     not fulfilled. Its return value is ignored.
 268
 269     This method should also update all the parameters of the opcode to
 270     their canonical form if it hasn't been done by ExpandNames before.
 271
 272     """
 273     if self.tasklets is not None:
 274       for (idx, tl) in enumerate(self.tasklets):
 275         logging.debug("Checking prerequisites for tasklet %s/%s",
 276                       idx + 1, len(self.tasklets))
 277         tl.CheckPrereq()
 278     else:
 279       pass
 280
 281   def Exec(self, feedback_fn):
 282     """Execute the LU.
 283
 284     This method should implement the actual work. It should raise
 285     errors.OpExecError for failures that are somewhat dealt with in
 286     code, or expected.
 287
 288     """
 289     if self.tasklets is not None:
 290       for (idx, tl) in enumerate(self.tasklets):
 291         logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
 292         tl.Exec(feedback_fn)
 293     else:
 294       raise NotImplementedError
 295
 296   def BuildHooksEnv(self):
 297     """Build hooks environment for this LU.
 298
 299     @rtype: dict
 300     @return: Dictionary containing the environment that will be used for
 301       running the hooks for this LU. The keys of the dict must not be prefixed
 302       with "GANETI_"--that'll be added by the hooks runner. The hooks runner
 303       will extend the environment with additional variables. If no environment
 304       should be defined, an empty dictionary should be returned (not C{None}).
 305     @note: If the C{HPATH} attribute of the LU class is C{None}, this function
 306       will not be called.
 307
 308     """
 309     raise NotImplementedError
 310
 311   def BuildHooksNodes(self):
 312     """Build list of nodes to run LU's hooks.
 313
 314     @rtype: tuple; (list, list)
 315     @return: Tuple containing a list of node names on which the hook
 316       should run before the execution and a list of node names on which the
 317       hook should run after the execution. No nodes should be returned as an
 318       empty list (and not None).
 319     @note: If the C{HPATH} attribute of the LU class is C{None}, this function
 320       will not be called.
 321
 322     """
 323     raise NotImplementedError
 324
 325   def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
 326     """Notify the LU about the results of its hooks.
 327
 328     This method is called every time a hooks phase is executed, and notifies
 329     the Logical Unit about the hooks' result. The LU can then use it to alter
 330     its result based on the hooks.  By default the method does nothing and the
 331     previous result is passed back unchanged but any LU can define it if it
 332     wants to use the local cluster hook-scripts somehow.
 333
 334     @param phase: one of L{constants.HOOKS_PHASE_POST} or
 335         L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
 336     @param hook_results: the results of the multi-node hooks rpc call
 337     @param feedback_fn: function used send feedback back to the caller
 338     @param lu_result: the previous Exec result this LU had, or None
 339         in the PRE phase
 340     @return: the new Exec result, based on the previous result
 341         and hook results
 342
 343     """
 344     # API must be kept, thus we ignore the unused argument and could
 345     # be a function warnings
 346     # pylint: disable=W0613,R0201
 347     return lu_result
 348
 349   def _ExpandAndLockInstance(self):
 350     """Helper function to expand and lock an instance.
 351
 352     Many LUs that work on an instance take its name in self.op.instance_name
 353     and need to expand it and then declare the expanded name for locking. This
 354     function does it, and then updates self.op.instance_name to the expanded
 355     name. It also initializes needed_locks as a dict, if this hasn't been done
 356     before.
 357
 358     """
 359     if self.needed_locks is None:
 360       self.needed_locks = {}
 361     else:
 362       assert locking.LEVEL_INSTANCE not in self.needed_locks, \
 363         "_ExpandAndLockInstance called with instance-level locks set"
 364     self.op.instance_name = _ExpandInstanceName(self.cfg,
 365                                                 self.op.instance_name)
 366     self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
 367
 368   def _LockInstancesNodes(self, primary_only=False,
 369                           level=locking.LEVEL_NODE):
 370     """Helper function to declare instances' nodes for locking.
 371
 372     This function should be called after locking one or more instances to lock
 373     their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
 374     with all primary or secondary nodes for instances already locked and
 375     present in self.needed_locks[locking.LEVEL_INSTANCE].
 376
 377     It should be called from DeclareLocks, and for safety only works if
 378     self.recalculate_locks[locking.LEVEL_NODE] is set.
 379
 380     In the future it may grow parameters to just lock some instance's nodes, or
 381     to just lock primaries or secondary nodes, if needed.
 382
 383     If should be called in DeclareLocks in a way similar to::
 384
 385       if level == locking.LEVEL_NODE:
 386         self._LockInstancesNodes()
 387
 388     @type primary_only: boolean
 389     @param primary_only: only lock primary nodes of locked instances
 390     @param level: Which lock level to use for locking nodes
 391
 392     """
 393     assert level in self.recalculate_locks, \
 394       "_LockInstancesNodes helper function called with no nodes to recalculate"
 395
 396     # TODO: check if we're really been called with the instance locks held
 397
 398     # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
 399     # future we might want to have different behaviors depending on the value
 400     # of self.recalculate_locks[locking.LEVEL_NODE]
 401     wanted_nodes = []
 402     locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
 403     for _, instance in self.cfg.GetMultiInstanceInfo(locked_i):
 404       wanted_nodes.append(instance.primary_node)
 405       if not primary_only:
 406         wanted_nodes.extend(instance.secondary_nodes)
 407
 408     if self.recalculate_locks[level] == constants.LOCKS_REPLACE:
 409       self.needed_locks[level] = wanted_nodes
 410     elif self.recalculate_locks[level] == constants.LOCKS_APPEND:
 411       self.needed_locks[level].extend(wanted_nodes)
 412     else:
 413       raise errors.ProgrammerError("Unknown recalculation mode")
 414
 415     del self.recalculate_locks[level]
 416
 417
 418 class NoHooksLU(LogicalUnit): # pylint: disable=W0223
 419   """Simple LU which runs no hooks.
 420
 421   This LU is intended as a parent for other LogicalUnits which will
 422   run no hooks, in order to reduce duplicate code.
 423
 424   """
 425   HPATH = None
 426   HTYPE = None
 427
 428   def BuildHooksEnv(self):
 429     """Empty BuildHooksEnv for NoHooksLu.
 430
 431     This just raises an error.
 432
 433     """
 434     raise AssertionError("BuildHooksEnv called for NoHooksLUs")
 435
 436   def BuildHooksNodes(self):
 437     """Empty BuildHooksNodes for NoHooksLU.
 438
 439     """
 440     raise AssertionError("BuildHooksNodes called for NoHooksLU")
 441
 442
 443 class Tasklet:
 444   """Tasklet base class.
 445
 446   Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
 447   they can mix legacy code with tasklets. Locking needs to be done in the LU,
 448   tasklets know nothing about locks.
 449
 450   Subclasses must follow these rules:
 451     - Implement CheckPrereq
 452     - Implement Exec
 453
 454   """
 455   def __init__(self, lu):
 456     self.lu = lu
 457
 458     # Shortcuts
 459     self.cfg = lu.cfg
 460     self.rpc = lu.rpc
 461
 462   def CheckPrereq(self):
 463     """Check prerequisites for this tasklets.
 464
 465     This method should check whether the prerequisites for the execution of
 466     this tasklet are fulfilled. It can do internode communication, but it
 467     should be idempotent - no cluster or system changes are allowed.
 468
 469     The method should raise errors.OpPrereqError in case something is not
 470     fulfilled. Its return value is ignored.
 471
 472     This method should also update all parameters to their canonical form if it
 473     hasn't been done before.
 474
 475     """
 476     pass
 477
 478   def Exec(self, feedback_fn):
 479     """Execute the tasklet.
 480
 481     This method should implement the actual work. It should raise
 482     errors.OpExecError for failures that are somewhat dealt with in code, or
 483     expected.
 484
 485     """
 486     raise NotImplementedError
 487
 488
 489 class _QueryBase:
 490   """Base for query utility classes.
 491
 492   """
 493   #: Attribute holding field definitions
 494   FIELDS = None
 495
 496   #: Field to sort by
 497   SORT_FIELD = "name"
 498
 499   def __init__(self, qfilter, fields, use_locking):
 500     """Initializes this class.
 501
 502     """
 503     self.use_locking = use_locking
 504
 505     self.query = query.Query(self.FIELDS, fields, qfilter=qfilter,
 506                              namefield=self.SORT_FIELD)
 507     self.requested_data = self.query.RequestedData()
 508     self.names = self.query.RequestedNames()
 509
 510     # Sort only if no names were requested
 511     self.sort_by_name = not self.names
 512
 513     self.do_locking = None
 514     self.wanted = None
 515
 516   def _GetNames(self, lu, all_names, lock_level):
 517     """Helper function to determine names asked for in the query.
 518
 519     """
 520     if self.do_locking:
 521       names = lu.owned_locks(lock_level)
 522     else:
 523       names = all_names
 524
 525     if self.wanted == locking.ALL_SET:
 526       assert not self.names
 527       # caller didn't specify names, so ordering is not important
 528       return utils.NiceSort(names)
 529
 530     # caller specified names and we must keep the same order
 531     assert self.names
 532     assert not self.do_locking or lu.glm.is_owned(lock_level)
 533
 534     missing = set(self.wanted).difference(names)
 535     if missing:
 536       raise errors.OpExecError("Some items were removed before retrieving"
 537                                " their data: %s" % missing)
 538
 539     # Return expanded names
 540     return self.wanted
 541
 542   def ExpandNames(self, lu):
 543     """Expand names for this query.
 544
 545     See L{LogicalUnit.ExpandNames}.
 546
 547     """
 548     raise NotImplementedError()
 549
 550   def DeclareLocks(self, lu, level):
 551     """Declare locks for this query.
 552
 553     See L{LogicalUnit.DeclareLocks}.
 554
 555     """
 556     raise NotImplementedError()
 557
 558   def _GetQueryData(self, lu):
 559     """Collects all data for this query.
 560
 561     @return: Query data object
 562
 563     """
 564     raise NotImplementedError()
 565
 566   def NewStyleQuery(self, lu):
 567     """Collect data and execute query.
 568
 569     """
 570     return query.GetQueryResponse(self.query, self._GetQueryData(lu),
 571                                   sort_by_name=self.sort_by_name)
 572
 573   def OldStyleQuery(self, lu):
 574     """Collect data and execute query.
 575
 576     """
 577     return self.query.OldStyleQuery(self._GetQueryData(lu),
 578                                     sort_by_name=self.sort_by_name)
 579
 580
 581 def _ShareAll():
 582   """Returns a dict declaring all lock levels shared.
 583
 584   """
 585   return dict.fromkeys(locking.LEVELS, 1)
 586
 587
 588 def _MakeLegacyNodeInfo(data):
 589   """Formats the data returned by L{rpc.RpcRunner.call_node_info}.
 590
 591   Converts the data into a single dictionary. This is fine for most use cases,
 592   but some require information from more than one volume group or hypervisor.
 593
 594   """
 595   (bootid, (vg_info, ), (hv_info, )) = data
 596
 597   return utils.JoinDisjointDicts(utils.JoinDisjointDicts(vg_info, hv_info), {
 598     "bootid": bootid,
 599     })
 600
 601
 602 def _CheckInstancesNodeGroups(cfg, instances, owned_groups, owned_nodes,
 603                               cur_group_uuid):
 604   """Checks if node groups for locked instances are still correct.
 605
 606   @type cfg: L{config.ConfigWriter}
 607   @param cfg: Cluster configuration
 608   @type instances: dict; string as key, L{objects.Instance} as value
 609   @param instances: Dictionary, instance name as key, instance object as value
 610   @type owned_groups: iterable of string
 611   @param owned_groups: List of owned groups
 612   @type owned_nodes: iterable of string
 613   @param owned_nodes: List of owned nodes
 614   @type cur_group_uuid: string or None
 615   @param cur_group_uuid: Optional group UUID to check against instance's groups
 616
 617   """
 618   for (name, inst) in instances.items():
 619     assert owned_nodes.issuperset(inst.all_nodes), \
 620       "Instance %s's nodes changed while we kept the lock" % name
 621
 622     inst_groups = _CheckInstanceNodeGroups(cfg, name, owned_groups)
 623
 624     assert cur_group_uuid is None or cur_group_uuid in inst_groups, \
 625       "Instance %s has no node in group %s" % (name, cur_group_uuid)
 626
 627
 628 def _CheckInstanceNodeGroups(cfg, instance_name, owned_groups):
 629   """Checks if the owned node groups are still correct for an instance.
 630
 631   @type cfg: L{config.ConfigWriter}
 632   @param cfg: The cluster configuration
 633   @type instance_name: string
 634   @param instance_name: Instance name
 635   @type owned_groups: set or frozenset
 636   @param owned_groups: List of currently owned node groups
 637
 638   """
 639   inst_groups = cfg.GetInstanceNodeGroups(instance_name)
 640
 641   if not owned_groups.issuperset(inst_groups):
 642     raise errors.OpPrereqError("Instance %s's node groups changed since"
 643                                " locks were acquired, current groups are"
 644                                " are '%s', owning groups '%s'; retry the"
 645                                " operation" %
 646                                (instance_name,
 647                                 utils.CommaJoin(inst_groups),
 648                                 utils.CommaJoin(owned_groups)),
 649                                errors.ECODE_STATE)
 650
 651   return inst_groups
 652
 653
 654 def _CheckNodeGroupInstances(cfg, group_uuid, owned_instances):
 655   """Checks if the instances in a node group are still correct.
 656
 657   @type cfg: L{config.ConfigWriter}
 658   @param cfg: The cluster configuration
 659   @type group_uuid: string
 660   @param group_uuid: Node group UUID
 661   @type owned_instances: set or frozenset
 662   @param owned_instances: List of currently owned instances
 663
 664   """
 665   wanted_instances = cfg.GetNodeGroupInstances(group_uuid)
 666   if owned_instances != wanted_instances:
 667     raise errors.OpPrereqError("Instances in node group '%s' changed since"
 668                                " locks were acquired, wanted '%s', have '%s';"
 669                                " retry the operation" %
 670                                (group_uuid,
 671                                 utils.CommaJoin(wanted_instances),
 672                                 utils.CommaJoin(owned_instances)),
 673                                errors.ECODE_STATE)
 674
 675   return wanted_instances
 676
 677
 678 def _SupportsOob(cfg, node):
 679   """Tells if node supports OOB.
 680
 681   @type cfg: L{config.ConfigWriter}
 682   @param cfg: The cluster configuration
 683   @type node: L{objects.Node}
 684   @param node: The node
 685   @return: The OOB script if supported or an empty string otherwise
 686
 687   """
 688   return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
 689
 690
 691 def _GetWantedNodes(lu, nodes):
 692   """Returns list of checked and expanded node names.
 693
 694   @type lu: L{LogicalUnit}
 695   @param lu: the logical unit on whose behalf we execute
 696   @type nodes: list
 697   @param nodes: list of node names or None for all nodes
 698   @rtype: list
 699   @return: the list of nodes, sorted
 700   @raise errors.ProgrammerError: if the nodes parameter is wrong type
 701
 702   """
 703   if nodes:
 704     return [_ExpandNodeName(lu.cfg, name) for name in nodes]
 705
 706   return utils.NiceSort(lu.cfg.GetNodeList())
 707
 708
 709 def _GetWantedInstances(lu, instances):
 710   """Returns list of checked and expanded instance names.
 711
 712   @type lu: L{LogicalUnit}
 713   @param lu: the logical unit on whose behalf we execute
 714   @type instances: list
 715   @param instances: list of instance names or None for all instances
 716   @rtype: list
 717   @return: the list of instances, sorted
 718   @raise errors.OpPrereqError: if the instances parameter is wrong type
 719   @raise errors.OpPrereqError: if any of the passed instances is not found
 720
 721   """
 722   if instances:
 723     wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
 724   else:
 725     wanted = utils.NiceSort(lu.cfg.GetInstanceList())
 726   return wanted
 727
 728
 729 def _GetUpdatedParams(old_params, update_dict,
 730                       use_default=True, use_none=False):
 731   """Return the new version of a parameter dictionary.
 732
 733   @type old_params: dict
 734   @param old_params: old parameters
 735   @type update_dict: dict
 736   @param update_dict: dict containing new parameter values, or
 737       constants.VALUE_DEFAULT to reset the parameter to its default
 738       value
 739   @param use_default: boolean
 740   @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
 741       values as 'to be deleted' values
 742   @param use_none: boolean
 743   @type use_none: whether to recognise C{None} values as 'to be
 744       deleted' values
 745   @rtype: dict
 746   @return: the new parameter dictionary
 747
 748   """
 749   params_copy = copy.deepcopy(old_params)
 750   for key, val in update_dict.iteritems():
 751     if ((use_default and val == constants.VALUE_DEFAULT) or
 752         (use_none and val is None)):
 753       try:
 754         del params_copy[key]
 755       except KeyError:
 756         pass
 757     else:
 758       params_copy[key] = val
 759   return params_copy
 760
 761
 762 def _GetUpdatedIPolicy(old_ipolicy, new_ipolicy, group_policy=False):
 763   """Return the new version of a instance policy.
 764
 765   @param group_policy: whether this policy applies to a group and thus
 766     we should support removal of policy entries
 767
 768   """
 769   use_none = use_default = group_policy
 770   ipolicy = copy.deepcopy(old_ipolicy)
 771   for key, value in new_ipolicy.items():
 772     if key not in constants.IPOLICY_ALL_KEYS:
 773       raise errors.OpPrereqError("Invalid key in new ipolicy: %s" % key,
 774                                  errors.ECODE_INVAL)
 775     if key in constants.IPOLICY_ISPECS:
 776       utils.ForceDictType(value, constants.ISPECS_PARAMETER_TYPES)
 777       ipolicy[key] = _GetUpdatedParams(old_ipolicy.get(key, {}), value,
 778                                        use_none=use_none,
 779                                        use_default=use_default)
 780     else:
 781       if not value or value == [constants.VALUE_DEFAULT]:
 782         if group_policy:
 783           del ipolicy[key]
 784         else:
 785           raise errors.OpPrereqError("Can't unset ipolicy attribute '%s'"
 786                                      " on the cluster'" % key,
 787                                      errors.ECODE_INVAL)
 788       else:
 789         if key in constants.IPOLICY_PARAMETERS:
 790           # FIXME: we assume all such values are float
 791           try:
 792             ipolicy[key] = float(value)
 793           except (TypeError, ValueError), err:
 794             raise errors.OpPrereqError("Invalid value for attribute"
 795                                        " '%s': '%s', error: %s" %
 796                                        (key, value, err), errors.ECODE_INVAL)
 797         else:
 798           # FIXME: we assume all others are lists; this should be redone
 799           # in a nicer way
 800           ipolicy[key] = list(value)
 801   try:
 802     objects.InstancePolicy.CheckParameterSyntax(ipolicy)
 803   except errors.ConfigurationError, err:
 804     raise errors.OpPrereqError("Invalid instance policy: %s" % err,
 805                                errors.ECODE_INVAL)
 806   return ipolicy
 807
 808
 809 def _UpdateAndVerifySubDict(base, updates, type_check):
 810   """Updates and verifies a dict with sub dicts of the same type.
 811
 812   @param base: The dict with the old data
 813   @param updates: The dict with the new data
 814   @param type_check: Dict suitable to ForceDictType to verify correct types
 815   @returns: A new dict with updated and verified values
 816
 817   """
 818   def fn(old, value):
 819     new = _GetUpdatedParams(old, value)
 820     utils.ForceDictType(new, type_check)
 821     return new
 822
 823   ret = copy.deepcopy(base)
 824   ret.update(dict((key, fn(base.get(key, {}), value))
 825                   for key, value in updates.items()))
 826   return ret
 827
 828
 829 def _MergeAndVerifyHvState(op_input, obj_input):
 830   """Combines the hv state from an opcode with the one of the object
 831
 832   @param op_input: The input dict from the opcode
 833   @param obj_input: The input dict from the objects
 834   @return: The verified and updated dict
 835
 836   """
 837   if op_input:
 838     invalid_hvs = set(op_input) - constants.HYPER_TYPES
 839     if invalid_hvs:
 840       raise errors.OpPrereqError("Invalid hypervisor(s) in hypervisor state:"
 841                                  " %s" % utils.CommaJoin(invalid_hvs),
 842                                  errors.ECODE_INVAL)
 843     if obj_input is None:
 844       obj_input = {}
 845     type_check = constants.HVSTS_PARAMETER_TYPES
 846     return _UpdateAndVerifySubDict(obj_input, op_input, type_check)
 847
 848   return None
 849
 850
 851 def _MergeAndVerifyDiskState(op_input, obj_input):
 852   """Combines the disk state from an opcode with the one of the object
 853
 854   @param op_input: The input dict from the opcode
 855   @param obj_input: The input dict from the objects
 856   @return: The verified and updated dict
 857   """
 858   if op_input:
 859     invalid_dst = set(op_input) - constants.DS_VALID_TYPES
 860     if invalid_dst:
 861       raise errors.OpPrereqError("Invalid storage type(s) in disk state: %s" %
 862                                  utils.CommaJoin(invalid_dst),
 863                                  errors.ECODE_INVAL)
 864     type_check = constants.DSS_PARAMETER_TYPES
 865     if obj_input is None:
 866       obj_input = {}
 867     return dict((key, _UpdateAndVerifySubDict(obj_input.get(key, {}), value,
 868                                               type_check))
 869                 for key, value in op_input.items())
 870
 871   return None
 872
 873
 874 def _ReleaseLocks(lu, level, names=None, keep=None):
 875   """Releases locks owned by an LU.
 876
 877   @type lu: L{LogicalUnit}
 878   @param level: Lock level
 879   @type names: list or None
 880   @param names: Names of locks to release
 881   @type keep: list or None
 882   @param keep: Names of locks to retain
 883
 884   """
 885   assert not (keep is not None and names is not None), \
 886          "Only one of the 'names' and the 'keep' parameters can be given"
 887
 888   if names is not None:
 889     should_release = names.__contains__
 890   elif keep:
 891     should_release = lambda name: name not in keep
 892   else:
 893     should_release = None
 894
 895   owned = lu.owned_locks(level)
 896   if not owned:
 897     # Not owning any lock at this level, do nothing
 898     pass
 899
 900   elif should_release:
 901     retain = []
 902     release = []
 903
 904     # Determine which locks to release
 905     for name in owned:
 906       if should_release(name):
 907         release.append(name)
 908       else:
 909         retain.append(name)
 910
 911     assert len(lu.owned_locks(level)) == (len(retain) + len(release))
 912
 913     # Release just some locks
 914     lu.glm.release(level, names=release)
 915
 916     assert frozenset(lu.owned_locks(level)) == frozenset(retain)
 917   else:
 918     # Release everything
 919     lu.glm.release(level)
 920
 921     assert not lu.glm.is_owned(level), "No locks should be owned"
 922
 923
 924 def _MapInstanceDisksToNodes(instances):
 925   """Creates a map from (node, volume) to instance name.
 926
 927   @type instances: list of L{objects.Instance}
 928   @rtype: dict; tuple of (node name, volume name) as key, instance name as value
 929
 930   """
 931   return dict(((node, vol), inst.name)
 932               for inst in instances
 933               for (node, vols) in inst.MapLVsByNode().items()
 934               for vol in vols)
 935
 936
 937 def _RunPostHook(lu, node_name):
 938   """Runs the post-hook for an opcode on a single node.
 939
 940   """
 941   hm = lu.proc.BuildHooksManager(lu)
 942   try:
 943     hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
 944   except:
 945     # pylint: disable=W0702
 946     lu.LogWarning("Errors occurred running hooks on %s" % node_name)
 947
 948
 949 def _CheckOutputFields(static, dynamic, selected):
 950   """Checks whether all selected fields are valid.
 951
 952   @type static: L{utils.FieldSet}
 953   @param static: static fields set
 954   @type dynamic: L{utils.FieldSet}
 955   @param dynamic: dynamic fields set
 956
 957   """
 958   f = utils.FieldSet()
 959   f.Extend(static)
 960   f.Extend(dynamic)
 961
 962   delta = f.NonMatching(selected)
 963   if delta:
 964     raise errors.OpPrereqError("Unknown output fields selected: %s"
 965                                % ",".join(delta), errors.ECODE_INVAL)
 966
 967
 968 def _CheckGlobalHvParams(params):
 969   """Validates that given hypervisor params are not global ones.
 970
 971   This will ensure that instances don't get customised versions of
 972   global params.
 973
 974   """
 975   used_globals = constants.HVC_GLOBALS.intersection(params)
 976   if used_globals:
 977     msg = ("The following hypervisor parameters are global and cannot"
 978            " be customized at instance level, please modify them at"
 979            " cluster level: %s" % utils.CommaJoin(used_globals))
 980     raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
 981
 982
 983 def _CheckNodeOnline(lu, node, msg=None):
 984   """Ensure that a given node is online.
 985
 986   @param lu: the LU on behalf of which we make the check
 987   @param node: the node to check
 988   @param msg: if passed, should be a message to replace the default one
 989   @raise errors.OpPrereqError: if the node is offline
 990
 991   """
 992   if msg is None:
 993     msg = "Can't use offline node"
 994   if lu.cfg.GetNodeInfo(node).offline:
 995     raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
 996
 997
 998 def _CheckNodeNotDrained(lu, node):
 999   """Ensure that a given node is not drained.
1000
1001   @param lu: the LU on behalf of which we make the check
1002   @param node: the node to check
1003   @raise errors.OpPrereqError: if the node is drained
1004
1005   """
1006   if lu.cfg.GetNodeInfo(node).drained:
1007     raise errors.OpPrereqError("Can't use drained node %s" % node,
1008                                errors.ECODE_STATE)
1009
1010
1011 def _CheckNodeVmCapable(lu, node):
1012   """Ensure that a given node is vm capable.
1013
1014   @param lu: the LU on behalf of which we make the check
1015   @param node: the node to check
1016   @raise errors.OpPrereqError: if the node is not vm capable
1017
1018   """
1019   if not lu.cfg.GetNodeInfo(node).vm_capable:
1020     raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
1021                                errors.ECODE_STATE)
1022
1023
1024 def _CheckNodeHasOS(lu, node, os_name, force_variant):
1025   """Ensure that a node supports a given OS.
1026
1027   @param lu: the LU on behalf of which we make the check
1028   @param node: the node to check
1029   @param os_name: the OS to query about
1030   @param force_variant: whether to ignore variant errors
1031   @raise errors.OpPrereqError: if the node is not supporting the OS
1032
1033   """
1034   result = lu.rpc.call_os_get(node, os_name)
1035   result.Raise("OS '%s' not in supported OS list for node %s" %
1036                (os_name, node),
1037                prereq=True, ecode=errors.ECODE_INVAL)
1038   if not force_variant:
1039     _CheckOSVariant(result.payload, os_name)
1040
1041
1042 def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
1043   """Ensure that a node has the given secondary ip.
1044
1045   @type lu: L{LogicalUnit}
1046   @param lu: the LU on behalf of which we make the check
1047   @type node: string
1048   @param node: the node to check
1049   @type secondary_ip: string
1050   @param secondary_ip: the ip to check
1051   @type prereq: boolean
1052   @param prereq: whether to throw a prerequisite or an execute error
1053   @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
1054   @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
1055
1056   """
1057   result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
1058   result.Raise("Failure checking secondary ip on node %s" % node,
1059                prereq=prereq, ecode=errors.ECODE_ENVIRON)
1060   if not result.payload:
1061     msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
1062            " please fix and re-run this command" % secondary_ip)
1063     if prereq:
1064       raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
1065     else:
1066       raise errors.OpExecError(msg)
1067
1068
1069 def _GetClusterDomainSecret():
1070   """Reads the cluster domain secret.
1071
1072   """
1073   return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
1074                                strict=True)
1075
1076
1077 def _CheckInstanceState(lu, instance, req_states, msg=None):
1078   """Ensure that an instance is in one of the required states.
1079
1080   @param lu: the LU on behalf of which we make the check
1081   @param instance: the instance to check
1082   @param msg: if passed, should be a message to replace the default one
1083   @raise errors.OpPrereqError: if the instance is not in the required state
1084
1085   """
1086   if msg is None:
1087     msg = "can't use instance from outside %s states" % ", ".join(req_states)
1088   if instance.admin_state not in req_states:
1089     raise errors.OpPrereqError("Instance '%s' is marked to be %s, %s" %
1090                                (instance.name, instance.admin_state, msg),
1091                                errors.ECODE_STATE)
1092
1093   if constants.ADMINST_UP not in req_states:
1094     pnode = instance.primary_node
1095     ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
1096     ins_l.Raise("Can't contact node %s for instance information" % pnode,
1097                 prereq=True, ecode=errors.ECODE_ENVIRON)
1098
1099     if instance.name in ins_l.payload:
1100       raise errors.OpPrereqError("Instance %s is running, %s" %
1101                                  (instance.name, msg), errors.ECODE_STATE)
1102
1103
1104 def _ComputeMinMaxSpec(name, ipolicy, value):
1105   """Computes if value is in the desired range.
1106
1107   @param name: name of the parameter for which we perform the check
1108   @param ipolicy: dictionary containing min, max and std values
1109   @param value: actual value that we want to use
1110   @return: None or element not meeting the criteria
1111
1112
1113   """
1114   if value in [None, constants.VALUE_AUTO]:
1115     return None
1116   max_v = ipolicy[constants.ISPECS_MAX].get(name, value)
1117   min_v = ipolicy[constants.ISPECS_MIN].get(name, value)
1118   if value > max_v or min_v > value:
1119     return ("%s value %s is not in range [%s, %s]" %
1120             (name, value, min_v, max_v))
1121   return None
1122
1123
1124 def _ComputeIPolicySpecViolation(ipolicy, mem_size, cpu_count, disk_count,
1125                                  nic_count, disk_sizes, spindle_use,
1126                                  _compute_fn=_ComputeMinMaxSpec):
1127   """Verifies ipolicy against provided specs.
1128
1129   @type ipolicy: dict
1130   @param ipolicy: The ipolicy
1131   @type mem_size: int
1132   @param mem_size: The memory size
1133   @type cpu_count: int
1134   @param cpu_count: Used cpu cores
1135   @type disk_count: int
1136   @param disk_count: Number of disks used
1137   @type nic_count: int
1138   @param nic_count: Number of nics used
1139   @type disk_sizes: list of ints
1140   @param disk_sizes: Disk sizes of used disk (len must match C{disk_count})
1141   @type spindle_use: int
1142   @param spindle_use: The number of spindles this instance uses
1143   @param _compute_fn: The compute function (unittest only)
1144   @return: A list of violations, or an empty list of no violations are found
1145
1146   """
1147   assert disk_count == len(disk_sizes)
1148
1149   test_settings = [
1150     (constants.ISPEC_MEM_SIZE, mem_size),
1151     (constants.ISPEC_CPU_COUNT, cpu_count),
1152     (constants.ISPEC_DISK_COUNT, disk_count),
1153     (constants.ISPEC_NIC_COUNT, nic_count),
1154     (constants.ISPEC_SPINDLE_USE, spindle_use),
1155     ] + map((lambda d: (constants.ISPEC_DISK_SIZE, d)), disk_sizes)
1156
1157   return filter(None,
1158                 (_compute_fn(name, ipolicy, value)
1159                  for (name, value) in test_settings))
1160
1161
1162 def _ComputeIPolicyInstanceViolation(ipolicy, instance,
1163                                      _compute_fn=_ComputeIPolicySpecViolation):
1164   """Compute if instance meets the specs of ipolicy.
1165
1166   @type ipolicy: dict
1167   @param ipolicy: The ipolicy to verify against
1168   @type instance: L{objects.Instance}
1169   @param instance: The instance to verify
1170   @param _compute_fn: The function to verify ipolicy (unittest only)
1171   @see: L{_ComputeIPolicySpecViolation}
1172
1173   """
1174   mem_size = instance.beparams.get(constants.BE_MAXMEM, None)
1175   cpu_count = instance.beparams.get(constants.BE_VCPUS, None)
1176   spindle_use = instance.beparams.get(constants.BE_SPINDLE_USE, None)
1177   disk_count = len(instance.disks)
1178   disk_sizes = [disk.size for disk in instance.disks]
1179   nic_count = len(instance.nics)
1180
1181   return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1182                      disk_sizes, spindle_use)
1183
1184
1185 def _ComputeIPolicyInstanceSpecViolation(ipolicy, instance_spec,
1186     _compute_fn=_ComputeIPolicySpecViolation):
1187   """Compute if instance specs meets the specs of ipolicy.
1188
1189   @type ipolicy: dict
1190   @param ipolicy: The ipolicy to verify against
1191   @param instance_spec: dict
1192   @param instance_spec: The instance spec to verify
1193   @param _compute_fn: The function to verify ipolicy (unittest only)
1194   @see: L{_ComputeIPolicySpecViolation}
1195
1196   """
1197   mem_size = instance_spec.get(constants.ISPEC_MEM_SIZE, None)
1198   cpu_count = instance_spec.get(constants.ISPEC_CPU_COUNT, None)
1199   disk_count = instance_spec.get(constants.ISPEC_DISK_COUNT, 0)
1200   disk_sizes = instance_spec.get(constants.ISPEC_DISK_SIZE, [])
1201   nic_count = instance_spec.get(constants.ISPEC_NIC_COUNT, 0)
1202   spindle_use = instance_spec.get(constants.ISPEC_SPINDLE_USE, None)
1203
1204   return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1205                      disk_sizes, spindle_use)
1206
1207
1208 def _ComputeIPolicyNodeViolation(ipolicy, instance, current_group,
1209                                  target_group,
1210                                  _compute_fn=_ComputeIPolicyInstanceViolation):
1211   """Compute if instance meets the specs of the new target group.
1212
1213   @param ipolicy: The ipolicy to verify
1214   @param instance: The instance object to verify
1215   @param current_group: The current group of the instance
1216   @param target_group: The new group of the instance
1217   @param _compute_fn: The function to verify ipolicy (unittest only)
1218   @see: L{_ComputeIPolicySpecViolation}
1219
1220   """
1221   if current_group == target_group:
1222     return []
1223   else:
1224     return _compute_fn(ipolicy, instance)
1225
1226
1227 def _CheckTargetNodeIPolicy(lu, ipolicy, instance, node, ignore=False,
1228                             _compute_fn=_ComputeIPolicyNodeViolation):
1229   """Checks that the target node is correct in terms of instance policy.
1230
1231   @param ipolicy: The ipolicy to verify
1232   @param instance: The instance object to verify
1233   @param node: The new node to relocate
1234   @param ignore: Ignore violations of the ipolicy
1235   @param _compute_fn: The function to verify ipolicy (unittest only)
1236   @see: L{_ComputeIPolicySpecViolation}
1237
1238   """
1239   primary_node = lu.cfg.GetNodeInfo(instance.primary_node)
1240   res = _compute_fn(ipolicy, instance, primary_node.group, node.group)
1241
1242   if res:
1243     msg = ("Instance does not meet target node group's (%s) instance"
1244            " policy: %s") % (node.group, utils.CommaJoin(res))
1245     if ignore:
1246       lu.LogWarning(msg)
1247     else:
1248       raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
1249
1250
1251 def _ComputeNewInstanceViolations(old_ipolicy, new_ipolicy, instances):
1252   """Computes a set of any instances that would violate the new ipolicy.
1253
1254   @param old_ipolicy: The current (still in-place) ipolicy
1255   @param new_ipolicy: The new (to become) ipolicy
1256   @param instances: List of instances to verify
1257   @return: A list of instances which violates the new ipolicy but did not before
1258
1259   """
1260   return (_ComputeViolatingInstances(old_ipolicy, instances) -
1261           _ComputeViolatingInstances(new_ipolicy, instances))
1262
1263
1264 def _ExpandItemName(fn, name, kind):
1265   """Expand an item name.
1266
1267   @param fn: the function to use for expansion
1268   @param name: requested item name
1269   @param kind: text description ('Node' or 'Instance')
1270   @return: the resolved (full) name
1271   @raise errors.OpPrereqError: if the item is not found
1272
1273   """
1274   full_name = fn(name)
1275   if full_name is None:
1276     raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
1277                                errors.ECODE_NOENT)
1278   return full_name
1279
1280
1281 def _ExpandNodeName(cfg, name):
1282   """Wrapper over L{_ExpandItemName} for nodes."""
1283   return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
1284
1285
1286 def _ExpandInstanceName(cfg, name):
1287   """Wrapper over L{_ExpandItemName} for instance."""
1288   return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
1289
1290
1291 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
1292                           minmem, maxmem, vcpus, nics, disk_template, disks,
1293                           bep, hvp, hypervisor_name, tags):
1294   """Builds instance related env variables for hooks
1295
1296   This builds the hook environment from individual variables.
1297
1298   @type name: string
1299   @param name: the name of the instance
1300   @type primary_node: string
1301   @param primary_node: the name of the instance's primary node
1302   @type secondary_nodes: list
1303   @param secondary_nodes: list of secondary nodes as strings
1304   @type os_type: string
1305   @param os_type: the name of the instance's OS
1306   @type status: string
1307   @param status: the desired status of the instance
1308   @type minmem: string
1309   @param minmem: the minimum memory size of the instance
1310   @type maxmem: string
1311   @param maxmem: the maximum memory size of the instance
1312   @type vcpus: string
1313   @param vcpus: the count of VCPUs the instance has
1314   @type nics: list
1315   @param nics: list of tuples (ip, mac, mode, link) representing
1316       the NICs the instance has
1317   @type disk_template: string
1318   @param disk_template: the disk template of the instance
1319   @type disks: list
1320   @param disks: the list of (size, mode) pairs
1321   @type bep: dict
1322   @param bep: the backend parameters for the instance
1323   @type hvp: dict
1324   @param hvp: the hypervisor parameters for the instance
1325   @type hypervisor_name: string
1326   @param hypervisor_name: the hypervisor for the instance
1327   @type tags: list
1328   @param tags: list of instance tags as strings
1329   @rtype: dict
1330   @return: the hook environment for this instance
1331
1332   """
1333   env = {
1334     "OP_TARGET": name,
1335     "INSTANCE_NAME": name,
1336     "INSTANCE_PRIMARY": primary_node,
1337     "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
1338     "INSTANCE_OS_TYPE": os_type,
1339     "INSTANCE_STATUS": status,
1340     "INSTANCE_MINMEM": minmem,
1341     "INSTANCE_MAXMEM": maxmem,
1342     # TODO(2.7) remove deprecated "memory" value
1343     "INSTANCE_MEMORY": maxmem,
1344     "INSTANCE_VCPUS": vcpus,
1345     "INSTANCE_DISK_TEMPLATE": disk_template,
1346     "INSTANCE_HYPERVISOR": hypervisor_name,
1347   }
1348   if nics:
1349     nic_count = len(nics)
1350     for idx, (ip, mac, mode, link) in enumerate(nics):
1351       if ip is None:
1352         ip = ""
1353       env["INSTANCE_NIC%d_IP" % idx] = ip
1354       env["INSTANCE_NIC%d_MAC" % idx] = mac
1355       env["INSTANCE_NIC%d_MODE" % idx] = mode
1356       env["INSTANCE_NIC%d_LINK" % idx] = link
1357       if mode == constants.NIC_MODE_BRIDGED:
1358         env["INSTANCE_NIC%d_BRIDGE" % idx] = link
1359   else:
1360     nic_count = 0
1361
1362   env["INSTANCE_NIC_COUNT"] = nic_count
1363
1364   if disks:
1365     disk_count = len(disks)
1366     for idx, (size, mode) in enumerate(disks):
1367       env["INSTANCE_DISK%d_SIZE" % idx] = size
1368       env["INSTANCE_DISK%d_MODE" % idx] = mode
1369   else:
1370     disk_count = 0
1371
1372   env["INSTANCE_DISK_COUNT"] = disk_count
1373
1374   if not tags:
1375     tags = []
1376
1377   env["INSTANCE_TAGS"] = " ".join(tags)
1378
1379   for source, kind in [(bep, "BE"), (hvp, "HV")]:
1380     for key, value in source.items():
1381       env["INSTANCE_%s_%s" % (kind, key)] = value
1382
1383   return env
1384
1385
1386 def _NICListToTuple(lu, nics):
1387   """Build a list of nic information tuples.
1388
1389   This list is suitable to be passed to _BuildInstanceHookEnv or as a return
1390   value in LUInstanceQueryData.
1391
1392   @type lu:  L{LogicalUnit}
1393   @param lu: the logical unit on whose behalf we execute
1394   @type nics: list of L{objects.NIC}
1395   @param nics: list of nics to convert to hooks tuples
1396
1397   """
1398   hooks_nics = []
1399   cluster = lu.cfg.GetClusterInfo()
1400   for nic in nics:
1401     ip = nic.ip
1402     mac = nic.mac
1403     filled_params = cluster.SimpleFillNIC(nic.nicparams)
1404     mode = filled_params[constants.NIC_MODE]
1405     link = filled_params[constants.NIC_LINK]
1406     hooks_nics.append((ip, mac, mode, link))
1407   return hooks_nics
1408
1409
1410 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
1411   """Builds instance related env variables for hooks from an object.
1412
1413   @type lu: L{LogicalUnit}
1414   @param lu: the logical unit on whose behalf we execute
1415   @type instance: L{objects.Instance}
1416   @param instance: the instance for which we should build the
1417       environment
1418   @type override: dict
1419   @param override: dictionary with key/values that will override
1420       our values
1421   @rtype: dict
1422   @return: the hook environment dictionary
1423
1424   """
1425   cluster = lu.cfg.GetClusterInfo()
1426   bep = cluster.FillBE(instance)
1427   hvp = cluster.FillHV(instance)
1428   args = {
1429     "name": instance.name,
1430     "primary_node": instance.primary_node,
1431     "secondary_nodes": instance.secondary_nodes,
1432     "os_type": instance.os,
1433     "status": instance.admin_state,
1434     "maxmem": bep[constants.BE_MAXMEM],
1435     "minmem": bep[constants.BE_MINMEM],
1436     "vcpus": bep[constants.BE_VCPUS],
1437     "nics": _NICListToTuple(lu, instance.nics),
1438     "disk_template": instance.disk_template,
1439     "disks": [(disk.size, disk.mode) for disk in instance.disks],
1440     "bep": bep,
1441     "hvp": hvp,
1442     "hypervisor_name": instance.hypervisor,
1443     "tags": instance.tags,
1444   }
1445   if override:
1446     args.update(override)
1447   return _BuildInstanceHookEnv(**args) # pylint: disable=W0142
1448
1449
1450 def _AdjustCandidatePool(lu, exceptions):
1451   """Adjust the candidate pool after node operations.
1452
1453   """
1454   mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1455   if mod_list:
1456     lu.LogInfo("Promoted nodes to master candidate role: %s",
1457                utils.CommaJoin(node.name for node in mod_list))
1458     for name in mod_list:
1459       lu.context.ReaddNode(name)
1460   mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1461   if mc_now > mc_max:
1462     lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1463                (mc_now, mc_max))
1464
1465
1466 def _DecideSelfPromotion(lu, exceptions=None):
1467   """Decide whether I should promote myself as a master candidate.
1468
1469   """
1470   cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1471   mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1472   # the new node will increase mc_max with one, so:
1473   mc_should = min(mc_should + 1, cp_size)
1474   return mc_now < mc_should
1475
1476
1477 def _CalculateGroupIPolicy(cluster, group):
1478   """Calculate instance policy for group.
1479
1480   """
1481   return cluster.SimpleFillIPolicy(group.ipolicy)
1482
1483
1484 def _ComputeViolatingInstances(ipolicy, instances):
1485   """Computes a set of instances who violates given ipolicy.
1486
1487   @param ipolicy: The ipolicy to verify
1488   @type instances: object.Instance
1489   @param instances: List of instances to verify
1490   @return: A frozenset of instance names violating the ipolicy
1491
1492   """
1493   return frozenset([inst.name for inst in instances
1494                     if _ComputeIPolicyInstanceViolation(ipolicy, inst)])
1495
1496
1497 def _CheckNicsBridgesExist(lu, target_nics, target_node):
1498   """Check that the brigdes needed by a list of nics exist.
1499
1500   """
1501   cluster = lu.cfg.GetClusterInfo()
1502   paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1503   brlist = [params[constants.NIC_LINK] for params in paramslist
1504             if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1505   if brlist:
1506     result = lu.rpc.call_bridges_exist(target_node, brlist)
1507     result.Raise("Error checking bridges on destination node '%s'" %
1508                  target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1509
1510
1511 def _CheckInstanceBridgesExist(lu, instance, node=None):
1512   """Check that the brigdes needed by an instance exist.
1513
1514   """
1515   if node is None:
1516     node = instance.primary_node
1517   _CheckNicsBridgesExist(lu, instance.nics, node)
1518
1519
1520 def _CheckOSVariant(os_obj, name):
1521   """Check whether an OS name conforms to the os variants specification.
1522
1523   @type os_obj: L{objects.OS}
1524   @param os_obj: OS object to check
1525   @type name: string
1526   @param name: OS name passed by the user, to check for validity
1527
1528   """
1529   variant = objects.OS.GetVariant(name)
1530   if not os_obj.supported_variants:
1531     if variant:
1532       raise errors.OpPrereqError("OS '%s' doesn't support variants ('%s'"
1533                                  " passed)" % (os_obj.name, variant),
1534                                  errors.ECODE_INVAL)
1535     return
1536   if not variant:
1537     raise errors.OpPrereqError("OS name must include a variant",
1538                                errors.ECODE_INVAL)
1539
1540   if variant not in os_obj.supported_variants:
1541     raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1542
1543
1544 def _GetNodeInstancesInner(cfg, fn):
1545   return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1546
1547
1548 def _GetNodeInstances(cfg, node_name):
1549   """Returns a list of all primary and secondary instances on a node.
1550
1551   """
1552
1553   return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1554
1555
1556 def _GetNodePrimaryInstances(cfg, node_name):
1557   """Returns primary instances on a node.
1558
1559   """
1560   return _GetNodeInstancesInner(cfg,
1561                                 lambda inst: node_name == inst.primary_node)
1562
1563
1564 def _GetNodeSecondaryInstances(cfg, node_name):
1565   """Returns secondary instances on a node.
1566
1567   """
1568   return _GetNodeInstancesInner(cfg,
1569                                 lambda inst: node_name in inst.secondary_nodes)
1570
1571
1572 def _GetStorageTypeArgs(cfg, storage_type):
1573   """Returns the arguments for a storage type.
1574
1575   """
1576   # Special case for file storage
1577   if storage_type == constants.ST_FILE:
1578     # storage.FileStorage wants a list of storage directories
1579     return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1580
1581   return []
1582
1583
1584 def _FindFaultyInstanceDisks(cfg, rpc_runner, instance, node_name, prereq):
1585   faulty = []
1586
1587   for dev in instance.disks:
1588     cfg.SetDiskID(dev, node_name)
1589
1590   result = rpc_runner.call_blockdev_getmirrorstatus(node_name, instance.disks)
1591   result.Raise("Failed to get disk status from node %s" % node_name,
1592                prereq=prereq, ecode=errors.ECODE_ENVIRON)
1593
1594   for idx, bdev_status in enumerate(result.payload):
1595     if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1596       faulty.append(idx)
1597
1598   return faulty
1599
1600
1601 def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1602   """Check the sanity of iallocator and node arguments and use the
1603   cluster-wide iallocator if appropriate.
1604
1605   Check that at most one of (iallocator, node) is specified. If none is
1606   specified, then the LU's opcode's iallocator slot is filled with the
1607   cluster-wide default iallocator.
1608
1609   @type iallocator_slot: string
1610   @param iallocator_slot: the name of the opcode iallocator slot
1611   @type node_slot: string
1612   @param node_slot: the name of the opcode target node slot
1613
1614   """
1615   node = getattr(lu.op, node_slot, None)
1616   iallocator = getattr(lu.op, iallocator_slot, None)
1617
1618   if node is not None and iallocator is not None:
1619     raise errors.OpPrereqError("Do not specify both, iallocator and node",
1620                                errors.ECODE_INVAL)
1621   elif node is None and iallocator is None:
1622     default_iallocator = lu.cfg.GetDefaultIAllocator()
1623     if default_iallocator:
1624       setattr(lu.op, iallocator_slot, default_iallocator)
1625     else:
1626       raise errors.OpPrereqError("No iallocator or node given and no"
1627                                  " cluster-wide default iallocator found;"
1628                                  " please specify either an iallocator or a"
1629                                  " node, or set a cluster-wide default"
1630                                  " iallocator")
1631
1632
1633 def _GetDefaultIAllocator(cfg, iallocator):
1634   """Decides on which iallocator to use.
1635
1636   @type cfg: L{config.ConfigWriter}
1637   @param cfg: Cluster configuration object
1638   @type iallocator: string or None
1639   @param iallocator: Iallocator specified in opcode
1640   @rtype: string
1641   @return: Iallocator name
1642
1643   """
1644   if not iallocator:
1645     # Use default iallocator
1646     iallocator = cfg.GetDefaultIAllocator()
1647
1648   if not iallocator:
1649     raise errors.OpPrereqError("No iallocator was specified, neither in the"
1650                                " opcode nor as a cluster-wide default",
1651                                errors.ECODE_INVAL)
1652
1653   return iallocator
1654
1655
1656 class LUClusterPostInit(LogicalUnit):
1657   """Logical unit for running hooks after cluster initialization.
1658
1659   """
1660   HPATH = "cluster-init"
1661   HTYPE = constants.HTYPE_CLUSTER
1662
1663   def BuildHooksEnv(self):
1664     """Build hooks env.
1665
1666     """
1667     return {
1668       "OP_TARGET": self.cfg.GetClusterName(),
1669       }
1670
1671   def BuildHooksNodes(self):
1672     """Build hooks nodes.
1673
1674     """
1675     return ([], [self.cfg.GetMasterNode()])
1676
1677   def Exec(self, feedback_fn):
1678     """Nothing to do.
1679
1680     """
1681     return True
1682
1683
1684 class LUClusterDestroy(LogicalUnit):
1685   """Logical unit for destroying the cluster.
1686
1687   """
1688   HPATH = "cluster-destroy"
1689   HTYPE = constants.HTYPE_CLUSTER
1690
1691   def BuildHooksEnv(self):
1692     """Build hooks env.
1693
1694     """
1695     return {
1696       "OP_TARGET": self.cfg.GetClusterName(),
1697       }
1698
1699   def BuildHooksNodes(self):
1700     """Build hooks nodes.
1701
1702     """
1703     return ([], [])
1704
1705   def CheckPrereq(self):
1706     """Check prerequisites.
1707
1708     This checks whether the cluster is empty.
1709
1710     Any errors are signaled by raising errors.OpPrereqError.
1711
1712     """
1713     master = self.cfg.GetMasterNode()
1714
1715     nodelist = self.cfg.GetNodeList()
1716     if len(nodelist) != 1 or nodelist[0] != master:
1717       raise errors.OpPrereqError("There are still %d node(s) in"
1718                                  " this cluster." % (len(nodelist) - 1),
1719                                  errors.ECODE_INVAL)
1720     instancelist = self.cfg.GetInstanceList()
1721     if instancelist:
1722       raise errors.OpPrereqError("There are still %d instance(s) in"
1723                                  " this cluster." % len(instancelist),
1724                                  errors.ECODE_INVAL)
1725
1726   def Exec(self, feedback_fn):
1727     """Destroys the cluster.
1728
1729     """
1730     master_params = self.cfg.GetMasterNetworkParameters()
1731
1732     # Run post hooks on master node before it's removed
1733     _RunPostHook(self, master_params.name)
1734
1735     ems = self.cfg.GetUseExternalMipScript()
1736     result = self.rpc.call_node_deactivate_master_ip(master_params.name,
1737                                                      master_params, ems)
1738     if result.fail_msg:
1739       self.LogWarning("Error disabling the master IP address: %s",
1740                       result.fail_msg)
1741
1742     return master_params.name
1743
1744
1745 def _VerifyCertificate(filename):
1746   """Verifies a certificate for L{LUClusterVerifyConfig}.
1747
1748   @type filename: string
1749   @param filename: Path to PEM file
1750
1751   """
1752   try:
1753     cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1754                                            utils.ReadFile(filename))
1755   except Exception, err: # pylint: disable=W0703
1756     return (LUClusterVerifyConfig.ETYPE_ERROR,
1757             "Failed to load X509 certificate %s: %s" % (filename, err))
1758
1759   (errcode, msg) = \
1760     utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1761                                 constants.SSL_CERT_EXPIRATION_ERROR)
1762
1763   if msg:
1764     fnamemsg = "While verifying %s: %s" % (filename, msg)
1765   else:
1766     fnamemsg = None
1767
1768   if errcode is None:
1769     return (None, fnamemsg)
1770   elif errcode == utils.CERT_WARNING:
1771     return (LUClusterVerifyConfig.ETYPE_WARNING, fnamemsg)
1772   elif errcode == utils.CERT_ERROR:
1773     return (LUClusterVerifyConfig.ETYPE_ERROR, fnamemsg)
1774
1775   raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1776
1777
1778 def _GetAllHypervisorParameters(cluster, instances):
1779   """Compute the set of all hypervisor parameters.
1780
1781   @type cluster: L{objects.Cluster}
1782   @param cluster: the cluster object
1783   @param instances: list of L{objects.Instance}
1784   @param instances: additional instances from which to obtain parameters
1785   @rtype: list of (origin, hypervisor, parameters)
1786   @return: a list with all parameters found, indicating the hypervisor they
1787        apply to, and the origin (can be "cluster", "os X", or "instance Y")
1788
1789   """
1790   hvp_data = []
1791
1792   for hv_name in cluster.enabled_hypervisors:
1793     hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
1794
1795   for os_name, os_hvp in cluster.os_hvp.items():
1796     for hv_name, hv_params in os_hvp.items():
1797       if hv_params:
1798         full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
1799         hvp_data.append(("os %s" % os_name, hv_name, full_params))
1800
1801   # TODO: collapse identical parameter values in a single one
1802   for instance in instances:
1803     if instance.hvparams:
1804       hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
1805                        cluster.FillHV(instance)))
1806
1807   return hvp_data
1808
1809
1810 class _VerifyErrors(object):
1811   """Mix-in for cluster/group verify LUs.
1812
1813   It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects
1814   self.op and self._feedback_fn to be available.)
1815
1816   """
1817
1818   ETYPE_FIELD = "code"
1819   ETYPE_ERROR = "ERROR"
1820   ETYPE_WARNING = "WARNING"
1821
1822   def _Error(self, ecode, item, msg, *args, **kwargs):
1823     """Format an error message.
1824
1825     Based on the opcode's error_codes parameter, either format a
1826     parseable error code, or a simpler error string.
1827
1828     This must be called only from Exec and functions called from Exec.
1829
1830     """
1831     ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1832     itype, etxt, _ = ecode
1833     # first complete the msg
1834     if args:
1835       msg = msg % args
1836     # then format the whole message
1837     if self.op.error_codes: # This is a mix-in. pylint: disable=E1101
1838       msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1839     else:
1840       if item:
1841         item = " " + item
1842       else:
1843         item = ""
1844       msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1845     # and finally report it via the feedback_fn
1846     self._feedback_fn("  - %s" % msg) # Mix-in. pylint: disable=E1101
1847
1848   def _ErrorIf(self, cond, ecode, *args, **kwargs):
1849     """Log an error message if the passed condition is True.
1850
1851     """
1852     cond = (bool(cond)
1853             or self.op.debug_simulate_errors) # pylint: disable=E1101
1854
1855     # If the error code is in the list of ignored errors, demote the error to a
1856     # warning
1857     (_, etxt, _) = ecode
1858     if etxt in self.op.ignore_errors:     # pylint: disable=E1101
1859       kwargs[self.ETYPE_FIELD] = self.ETYPE_WARNING
1860
1861     if cond:
1862       self._Error(ecode, *args, **kwargs)
1863
1864     # do not mark the operation as failed for WARN cases only
1865     if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1866       self.bad = self.bad or cond
1867
1868
1869 class LUClusterVerify(NoHooksLU):
1870   """Submits all jobs necessary to verify the cluster.
1871
1872   """
1873   REQ_BGL = False
1874
1875   def ExpandNames(self):
1876     self.needed_locks = {}
1877
1878   def Exec(self, feedback_fn):
1879     jobs = []
1880
1881     if self.op.group_name:
1882       groups = [self.op.group_name]
1883       depends_fn = lambda: None
1884     else:
1885       groups = self.cfg.GetNodeGroupList()
1886
1887       # Verify global configuration
1888       jobs.append([
1889         opcodes.OpClusterVerifyConfig(ignore_errors=self.op.ignore_errors)
1890         ])
1891
1892       # Always depend on global verification
1893       depends_fn = lambda: [(-len(jobs), [])]
1894
1895     jobs.extend([opcodes.OpClusterVerifyGroup(group_name=group,
1896                                             ignore_errors=self.op.ignore_errors,
1897                                             depends=depends_fn())]
1898                 for group in groups)
1899
1900     # Fix up all parameters
1901     for op in itertools.chain(*jobs): # pylint: disable=W0142
1902       op.debug_simulate_errors = self.op.debug_simulate_errors
1903       op.verbose = self.op.verbose
1904       op.error_codes = self.op.error_codes
1905       try:
1906         op.skip_checks = self.op.skip_checks
1907       except AttributeError:
1908         assert not isinstance(op, opcodes.OpClusterVerifyGroup)
1909
1910     return ResultWithJobs(jobs)
1911
1912
1913 class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
1914   """Verifies the cluster config.
1915
1916   """
1917   REQ_BGL = False
1918
1919   def _VerifyHVP(self, hvp_data):
1920     """Verifies locally the syntax of the hypervisor parameters.
1921
1922     """
1923     for item, hv_name, hv_params in hvp_data:
1924       msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
1925              (item, hv_name))
1926       try:
1927         hv_class = hypervisor.GetHypervisor(hv_name)
1928         utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
1929         hv_class.CheckParameterSyntax(hv_params)
1930       except errors.GenericError, err:
1931         self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg % str(err))
1932
1933   def ExpandNames(self):
1934     self.needed_locks = dict.fromkeys(locking.LEVELS, locking.ALL_SET)
1935     self.share_locks = _ShareAll()
1936
1937   def CheckPrereq(self):
1938     """Check prerequisites.
1939
1940     """
1941     # Retrieve all information
1942     self.all_group_info = self.cfg.GetAllNodeGroupsInfo()
1943     self.all_node_info = self.cfg.GetAllNodesInfo()
1944     self.all_inst_info = self.cfg.GetAllInstancesInfo()
1945
1946   def Exec(self, feedback_fn):
1947     """Verify integrity of cluster, performing various test on nodes.
1948
1949     """
1950     self.bad = False
1951     self._feedback_fn = feedback_fn
1952
1953     feedback_fn("* Verifying cluster config")
1954
1955     for msg in self.cfg.VerifyConfig():
1956       self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg)
1957
1958     feedback_fn("* Verifying cluster certificate files")
1959
1960     for cert_filename in constants.ALL_CERT_FILES:
1961       (errcode, msg) = _VerifyCertificate(cert_filename)
1962       self._ErrorIf(errcode, constants.CV_ECLUSTERCERT, None, msg, code=errcode)
1963
1964     feedback_fn("* Verifying hypervisor parameters")
1965
1966     self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(),
1967                                                 self.all_inst_info.values()))
1968
1969     feedback_fn("* Verifying all nodes belong to an existing group")
1970
1971     # We do this verification here because, should this bogus circumstance
1972     # occur, it would never be caught by VerifyGroup, which only acts on
1973     # nodes/instances reachable from existing node groups.
1974
1975     dangling_nodes = set(node.name for node in self.all_node_info.values()
1976                          if node.group not in self.all_group_info)
1977
1978     dangling_instances = {}
1979     no_node_instances = []
1980
1981     for inst in self.all_inst_info.values():
1982       if inst.primary_node in dangling_nodes:
1983         dangling_instances.setdefault(inst.primary_node, []).append(inst.name)
1984       elif inst.primary_node not in self.all_node_info:
1985         no_node_instances.append(inst.name)
1986
1987     pretty_dangling = [
1988         "%s (%s)" %
1989         (node.name,
1990          utils.CommaJoin(dangling_instances.get(node.name,
1991                                                 ["no instances"])))
1992         for node in dangling_nodes]
1993
1994     self._ErrorIf(bool(dangling_nodes), constants.CV_ECLUSTERDANGLINGNODES,
1995                   None,
1996                   "the following nodes (and their instances) belong to a non"
1997                   " existing group: %s", utils.CommaJoin(pretty_dangling))
1998
1999     self._ErrorIf(bool(no_node_instances), constants.CV_ECLUSTERDANGLINGINST,
2000                   None,
2001                   "the following instances have a non-existing primary-node:"
2002                   " %s", utils.CommaJoin(no_node_instances))
2003
2004     return not self.bad
2005
2006
2007 class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
2008   """Verifies the status of a node group.
2009
2010   """
2011   HPATH = "cluster-verify"
2012   HTYPE = constants.HTYPE_CLUSTER
2013   REQ_BGL = False
2014
2015   _HOOKS_INDENT_RE = re.compile("^", re.M)
2016
2017   class NodeImage(object):
2018     """A class representing the logical and physical status of a node.
2019
2020     @type name: string
2021     @ivar name: the node name to which this object refers
2022     @ivar volumes: a structure as returned from
2023         L{ganeti.backend.GetVolumeList} (runtime)
2024     @ivar instances: a list of running instances (runtime)
2025     @ivar pinst: list of configured primary instances (config)
2026     @ivar sinst: list of configured secondary instances (config)
2027     @ivar sbp: dictionary of {primary-node: list of instances} for all
2028         instances for which this node is secondary (config)
2029     @ivar mfree: free memory, as reported by hypervisor (runtime)
2030     @ivar dfree: free disk, as reported by the node (runtime)
2031     @ivar offline: the offline status (config)
2032     @type rpc_fail: boolean
2033     @ivar rpc_fail: whether the RPC verify call was successfull (overall,
2034         not whether the individual keys were correct) (runtime)
2035     @type lvm_fail: boolean
2036     @ivar lvm_fail: whether the RPC call didn't return valid LVM data
2037     @type hyp_fail: boolean
2038     @ivar hyp_fail: whether the RPC call didn't return the instance list
2039     @type ghost: boolean
2040     @ivar ghost: whether this is a known node or not (config)
2041     @type os_fail: boolean
2042     @ivar os_fail: whether the RPC call didn't return valid OS data
2043     @type oslist: list
2044     @ivar oslist: list of OSes as diagnosed by DiagnoseOS
2045     @type vm_capable: boolean
2046     @ivar vm_capable: whether the node can host instances
2047
2048     """
2049     def __init__(self, offline=False, name=None, vm_capable=True):
2050       self.name = name
2051       self.volumes = {}
2052       self.instances = []
2053       self.pinst = []
2054       self.sinst = []
2055       self.sbp = {}
2056       self.mfree = 0
2057       self.dfree = 0
2058       self.offline = offline
2059       self.vm_capable = vm_capable
2060       self.rpc_fail = False
2061       self.lvm_fail = False
2062       self.hyp_fail = False
2063       self.ghost = False
2064       self.os_fail = False
2065       self.oslist = {}
2066
2067   def ExpandNames(self):
2068     # This raises errors.OpPrereqError on its own:
2069     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
2070
2071     # Get instances in node group; this is unsafe and needs verification later
2072     inst_names = \
2073       self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True)
2074
2075     self.needed_locks = {
2076       locking.LEVEL_INSTANCE: inst_names,
2077       locking.LEVEL_NODEGROUP: [self.group_uuid],
2078       locking.LEVEL_NODE: [],
2079       }
2080
2081     self.share_locks = _ShareAll()
2082
2083   def DeclareLocks(self, level):
2084     if level == locking.LEVEL_NODE:
2085       # Get members of node group; this is unsafe and needs verification later
2086       nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
2087
2088       all_inst_info = self.cfg.GetAllInstancesInfo()
2089
2090       # In Exec(), we warn about mirrored instances that have primary and
2091       # secondary living in separate node groups. To fully verify that
2092       # volumes for these instances are healthy, we will need to do an
2093       # extra call to their secondaries. We ensure here those nodes will
2094       # be locked.
2095       for inst in self.owned_locks(locking.LEVEL_INSTANCE):
2096         # Important: access only the instances whose lock is owned
2097         if all_inst_info[inst].disk_template in constants.DTS_INT_MIRROR:
2098           nodes.update(all_inst_info[inst].secondary_nodes)
2099
2100       self.needed_locks[locking.LEVEL_NODE] = nodes
2101
2102   def CheckPrereq(self):
2103     assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
2104     self.group_info = self.cfg.GetNodeGroup(self.group_uuid)
2105
2106     group_nodes = set(self.group_info.members)
2107     group_instances = \
2108       self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True)
2109
2110     unlocked_nodes = \
2111         group_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
2112
2113     unlocked_instances = \
2114         group_instances.difference(self.owned_locks(locking.LEVEL_INSTANCE))
2115
2116     if unlocked_nodes:
2117       raise errors.OpPrereqError("Missing lock for nodes: %s" %
2118                                  utils.CommaJoin(unlocked_nodes),
2119                                  errors.ECODE_STATE)
2120
2121     if unlocked_instances:
2122       raise errors.OpPrereqError("Missing lock for instances: %s" %
2123                                  utils.CommaJoin(unlocked_instances),
2124                                  errors.ECODE_STATE)
2125
2126     self.all_node_info = self.cfg.GetAllNodesInfo()
2127     self.all_inst_info = self.cfg.GetAllInstancesInfo()
2128
2129     self.my_node_names = utils.NiceSort(group_nodes)
2130     self.my_inst_names = utils.NiceSort(group_instances)
2131
2132     self.my_node_info = dict((name, self.all_node_info[name])
2133                              for name in self.my_node_names)
2134
2135     self.my_inst_info = dict((name, self.all_inst_info[name])
2136                              for name in self.my_inst_names)
2137
2138     # We detect here the nodes that will need the extra RPC calls for verifying
2139     # split LV volumes; they should be locked.
2140     extra_lv_nodes = set()
2141
2142     for inst in self.my_inst_info.values():
2143       if inst.disk_template in constants.DTS_INT_MIRROR:
2144         for nname in inst.all_nodes:
2145           if self.all_node_info[nname].group != self.group_uuid:
2146             extra_lv_nodes.add(nname)
2147
2148     unlocked_lv_nodes = \
2149         extra_lv_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
2150
2151     if unlocked_lv_nodes:
2152       raise errors.OpPrereqError("Missing node locks for LV check: %s" %
2153                                  utils.CommaJoin(unlocked_lv_nodes),
2154                                  errors.ECODE_STATE)
2155     self.extra_lv_nodes = list(extra_lv_nodes)
2156
2157   def _VerifyNode(self, ninfo, nresult):
2158     """Perform some basic validation on data returned from a node.
2159
2160       - check the result data structure is well formed and has all the
2161         mandatory fields
2162       - check ganeti version
2163
2164     @type ninfo: L{objects.Node}
2165     @param ninfo: the node to check
2166     @param nresult: the results from the node
2167     @rtype: boolean
2168     @return: whether overall this call was successful (and we can expect
2169          reasonable values in the respose)
2170
2171     """
2172     node = ninfo.name
2173     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2174
2175     # main result, nresult should be a non-empty dict
2176     test = not nresult or not isinstance(nresult, dict)
2177     _ErrorIf(test, constants.CV_ENODERPC, node,
2178                   "unable to verify node: no data returned")
2179     if test:
2180       return False
2181
2182     # compares ganeti version
2183     local_version = constants.PROTOCOL_VERSION
2184     remote_version = nresult.get("version", None)
2185     test = not (remote_version and
2186                 isinstance(remote_version, (list, tuple)) and
2187                 len(remote_version) == 2)
2188     _ErrorIf(test, constants.CV_ENODERPC, node,
2189              "connection to node returned invalid data")
2190     if test:
2191       return False
2192
2193     test = local_version != remote_version[0]
2194     _ErrorIf(test, constants.CV_ENODEVERSION, node,
2195              "incompatible protocol versions: master %s,"
2196              " node %s", local_version, remote_version[0])
2197     if test:
2198       return False
2199
2200     # node seems compatible, we can actually try to look into its results
2201
2202     # full package version
2203     self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
2204                   constants.CV_ENODEVERSION, node,
2205                   "software version mismatch: master %s, node %s",
2206                   constants.RELEASE_VERSION, remote_version[1],
2207                   code=self.ETYPE_WARNING)
2208
2209     hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
2210     if ninfo.vm_capable and isinstance(hyp_result, dict):
2211       for hv_name, hv_result in hyp_result.iteritems():
2212         test = hv_result is not None
2213         _ErrorIf(test, constants.CV_ENODEHV, node,
2214                  "hypervisor %s verify failure: '%s'", hv_name, hv_result)
2215
2216     hvp_result = nresult.get(constants.NV_HVPARAMS, None)
2217     if ninfo.vm_capable and isinstance(hvp_result, list):
2218       for item, hv_name, hv_result in hvp_result:
2219         _ErrorIf(True, constants.CV_ENODEHV, node,
2220                  "hypervisor %s parameter verify failure (source %s): %s",
2221                  hv_name, item, hv_result)
2222
2223     test = nresult.get(constants.NV_NODESETUP,
2224                        ["Missing NODESETUP results"])
2225     _ErrorIf(test, constants.CV_ENODESETUP, node, "node setup error: %s",
2226              "; ".join(test))
2227
2228     return True
2229
2230   def _VerifyNodeTime(self, ninfo, nresult,
2231                       nvinfo_starttime, nvinfo_endtime):
2232     """Check the node time.
2233
2234     @type ninfo: L{objects.Node}
2235     @param ninfo: the node to check
2236     @param nresult: the remote results for the node
2237     @param nvinfo_starttime: the start time of the RPC call
2238     @param nvinfo_endtime: the end time of the RPC call
2239
2240     """
2241     node = ninfo.name
2242     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2243
2244     ntime = nresult.get(constants.NV_TIME, None)
2245     try:
2246       ntime_merged = utils.MergeTime(ntime)
2247     except (ValueError, TypeError):
2248       _ErrorIf(True, constants.CV_ENODETIME, node, "Node returned invalid time")
2249       return
2250
2251     if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
2252       ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
2253     elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
2254       ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
2255     else:
2256       ntime_diff = None
2257
2258     _ErrorIf(ntime_diff is not None, constants.CV_ENODETIME, node,
2259              "Node time diverges by at least %s from master node time",
2260              ntime_diff)
2261
2262   def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
2263     """Check the node LVM results.
2264
2265     @type ninfo: L{objects.Node}
2266     @param ninfo: the node to check
2267     @param nresult: the remote results for the node
2268     @param vg_name: the configured VG name
2269
2270     """
2271     if vg_name is None:
2272       return
2273
2274     node = ninfo.name
2275     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2276
2277     # checks vg existence and size > 20G
2278     vglist = nresult.get(constants.NV_VGLIST, None)
2279     test = not vglist
2280     _ErrorIf(test, constants.CV_ENODELVM, node, "unable to check volume groups")
2281     if not test:
2282       vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
2283                                             constants.MIN_VG_SIZE)
2284       _ErrorIf(vgstatus, constants.CV_ENODELVM, node, vgstatus)
2285
2286     # check pv names
2287     pvlist = nresult.get(constants.NV_PVLIST, None)
2288     test = pvlist is None
2289     _ErrorIf(test, constants.CV_ENODELVM, node, "Can't get PV list from node")
2290     if not test:
2291       # check that ':' is not present in PV names, since it's a
2292       # special character for lvcreate (denotes the range of PEs to
2293       # use on the PV)
2294       for _, pvname, owner_vg in pvlist:
2295         test = ":" in pvname
2296         _ErrorIf(test, constants.CV_ENODELVM, node,
2297                  "Invalid character ':' in PV '%s' of VG '%s'",
2298                  pvname, owner_vg)
2299
2300   def _VerifyNodeBridges(self, ninfo, nresult, bridges):
2301     """Check the node bridges.
2302
2303     @type ninfo: L{objects.Node}
2304     @param ninfo: the node to check
2305     @param nresult: the remote results for the node
2306     @param bridges: the expected list of bridges
2307
2308     """
2309     if not bridges:
2310       return
2311
2312     node = ninfo.name
2313     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2314
2315     missing = nresult.get(constants.NV_BRIDGES, None)
2316     test = not isinstance(missing, list)
2317     _ErrorIf(test, constants.CV_ENODENET, node,
2318              "did not return valid bridge information")
2319     if not test:
2320       _ErrorIf(bool(missing), constants.CV_ENODENET, node,
2321                "missing bridges: %s" % utils.CommaJoin(sorted(missing)))
2322
2323   def _VerifyNodeUserScripts(self, ninfo, nresult):
2324     """Check the results of user scripts presence and executability on the node
2325
2326     @type ninfo: L{objects.Node}
2327     @param ninfo: the node to check
2328     @param nresult: the remote results for the node
2329
2330     """
2331     node = ninfo.name
2332
2333     test = not constants.NV_USERSCRIPTS in nresult
2334     self._ErrorIf(test, constants.CV_ENODEUSERSCRIPTS, node,
2335                   "did not return user scripts information")
2336
2337     broken_scripts = nresult.get(constants.NV_USERSCRIPTS, None)
2338     if not test:
2339       self._ErrorIf(broken_scripts, constants.CV_ENODEUSERSCRIPTS, node,
2340                     "user scripts not present or not executable: %s" %
2341                     utils.CommaJoin(sorted(broken_scripts)))
2342
2343   def _VerifyNodeNetwork(self, ninfo, nresult):
2344     """Check the node network connectivity results.
2345
2346     @type ninfo: L{objects.Node}
2347     @param ninfo: the node to check
2348     @param nresult: the remote results for the node
2349
2350     """
2351     node = ninfo.name
2352     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2353
2354     test = constants.NV_NODELIST not in nresult
2355     _ErrorIf(test, constants.CV_ENODESSH, node,
2356              "node hasn't returned node ssh connectivity data")
2357     if not test:
2358       if nresult[constants.NV_NODELIST]:
2359         for a_node, a_msg in nresult[constants.NV_NODELIST].items():
2360           _ErrorIf(True, constants.CV_ENODESSH, node,
2361                    "ssh communication with node '%s': %s", a_node, a_msg)
2362
2363     test = constants.NV_NODENETTEST not in nresult
2364     _ErrorIf(test, constants.CV_ENODENET, node,
2365              "node hasn't returned node tcp connectivity data")
2366     if not test:
2367       if nresult[constants.NV_NODENETTEST]:
2368         nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
2369         for anode in nlist:
2370           _ErrorIf(True, constants.CV_ENODENET, node,
2371                    "tcp communication with node '%s': %s",
2372                    anode, nresult[constants.NV_NODENETTEST][anode])
2373
2374     test = constants.NV_MASTERIP not in nresult
2375     _ErrorIf(test, constants.CV_ENODENET, node,
2376              "node hasn't returned node master IP reachability data")
2377     if not test:
2378       if not nresult[constants.NV_MASTERIP]:
2379         if node == self.master_node:
2380           msg = "the master node cannot reach the master IP (not configured?)"
2381         else:
2382           msg = "cannot reach the master IP"
2383         _ErrorIf(True, constants.CV_ENODENET, node, msg)
2384
2385   def _VerifyInstance(self, instance, instanceconfig, node_image,
2386                       diskstatus):
2387     """Verify an instance.
2388
2389     This function checks to see if the required block devices are
2390     available on the instance's node.
2391
2392     """
2393     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2394     node_current = instanceconfig.primary_node
2395
2396     node_vol_should = {}
2397     instanceconfig.MapLVsByNode(node_vol_should)
2398
2399     ipolicy = _CalculateGroupIPolicy(self.cfg.GetClusterInfo(), self.group_info)
2400     err = _ComputeIPolicyInstanceViolation(ipolicy, instanceconfig)
2401     _ErrorIf(err, constants.CV_EINSTANCEPOLICY, instance, err)
2402
2403     for node in node_vol_should:
2404       n_img = node_image[node]
2405       if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2406         # ignore missing volumes on offline or broken nodes
2407         continue
2408       for volume in node_vol_should[node]:
2409         test = volume not in n_img.volumes
2410         _ErrorIf(test, constants.CV_EINSTANCEMISSINGDISK, instance,
2411                  "volume %s missing on node %s", volume, node)
2412
2413     if instanceconfig.admin_state == constants.ADMINST_UP:
2414       pri_img = node_image[node_current]
2415       test = instance not in pri_img.instances and not pri_img.offline
2416       _ErrorIf(test, constants.CV_EINSTANCEDOWN, instance,
2417                "instance not running on its primary node %s",
2418                node_current)
2419
2420     diskdata = [(nname, success, status, idx)
2421                 for (nname, disks) in diskstatus.items()
2422                 for idx, (success, status) in enumerate(disks)]
2423
2424     for nname, success, bdev_status, idx in diskdata:
2425       # the 'ghost node' construction in Exec() ensures that we have a
2426       # node here
2427       snode = node_image[nname]
2428       bad_snode = snode.ghost or snode.offline
2429       _ErrorIf(instanceconfig.admin_state == constants.ADMINST_UP and
2430                not success and not bad_snode,
2431                constants.CV_EINSTANCEFAULTYDISK, instance,
2432                "couldn't retrieve status for disk/%s on %s: %s",
2433                idx, nname, bdev_status)
2434       _ErrorIf((instanceconfig.admin_state == constants.ADMINST_UP and
2435                 success and bdev_status.ldisk_status == constants.LDS_FAULTY),
2436                constants.CV_EINSTANCEFAULTYDISK, instance,
2437                "disk/%s on %s is faulty", idx, nname)
2438
2439   def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
2440     """Verify if there are any unknown volumes in the cluster.
2441
2442     The .os, .swap and backup volumes are ignored. All other volumes are
2443     reported as unknown.
2444
2445     @type reserved: L{ganeti.utils.FieldSet}
2446     @param reserved: a FieldSet of reserved volume names
2447
2448     """
2449     for node, n_img in node_image.items():
2450       if (n_img.offline or n_img.rpc_fail or n_img.lvm_fail or
2451           self.all_node_info[node].group != self.group_uuid):
2452         # skip non-healthy nodes
2453         continue
2454       for volume in n_img.volumes:
2455         test = ((node not in node_vol_should or
2456                 volume not in node_vol_should[node]) and
2457                 not reserved.Matches(volume))
2458         self._ErrorIf(test, constants.CV_ENODEORPHANLV, node,
2459                       "volume %s is unknown", volume)
2460
2461   def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
2462     """Verify N+1 Memory Resilience.
2463
2464     Check that if one single node dies we can still start all the
2465     instances it was primary for.
2466
2467     """
2468     cluster_info = self.cfg.GetClusterInfo()
2469     for node, n_img in node_image.items():
2470       # This code checks that every node which is now listed as
2471       # secondary has enough memory to host all instances it is
2472       # supposed to should a single other node in the cluster fail.
2473       # FIXME: not ready for failover to an arbitrary node
2474       # FIXME: does not support file-backed instances
2475       # WARNING: we currently take into account down instances as well
2476       # as up ones, considering that even if they're down someone
2477       # might want to start them even in the event of a node failure.
2478       if n_img.offline or self.all_node_info[node].group != self.group_uuid:
2479         # we're skipping nodes marked offline and nodes in other groups from
2480         # the N+1 warning, since most likely we don't have good memory
2481         # infromation from them; we already list instances living on such
2482         # nodes, and that's enough warning
2483         continue
2484       #TODO(dynmem): also consider ballooning out other instances
2485       for prinode, instances in n_img.sbp.items():
2486         needed_mem = 0
2487         for instance in instances:
2488           bep = cluster_info.FillBE(instance_cfg[instance])
2489           if bep[constants.BE_AUTO_BALANCE]:
2490             needed_mem += bep[constants.BE_MINMEM]
2491         test = n_img.mfree < needed_mem
2492         self._ErrorIf(test, constants.CV_ENODEN1, node,
2493                       "not enough memory to accomodate instance failovers"
2494                       " should node %s fail (%dMiB needed, %dMiB available)",
2495                       prinode, needed_mem, n_img.mfree)
2496
2497   @classmethod
2498   def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
2499                    (files_all, files_opt, files_mc, files_vm)):
2500     """Verifies file checksums collected from all nodes.
2501
2502     @param errorif: Callback for reporting errors
2503     @param nodeinfo: List of L{objects.Node} objects
2504     @param master_node: Name of master node
2505     @param all_nvinfo: RPC results
2506
2507     """
2508     # Define functions determining which nodes to consider for a file
2509     files2nodefn = [
2510       (files_all, None),
2511       (files_mc, lambda node: (node.master_candidate or
2512                                node.name == master_node)),
2513       (files_vm, lambda node: node.vm_capable),
2514       ]
2515
2516     # Build mapping from filename to list of nodes which should have the file
2517     nodefiles = {}
2518     for (files, fn) in files2nodefn:
2519       if fn is None:
2520         filenodes = nodeinfo
2521       else:
2522         filenodes = filter(fn, nodeinfo)
2523       nodefiles.update((filename,
2524                         frozenset(map(operator.attrgetter("name"), filenodes)))
2525                        for filename in files)
2526
2527     assert set(nodefiles) == (files_all | files_mc | files_vm)
2528
2529     fileinfo = dict((filename, {}) for filename in nodefiles)
2530     ignore_nodes = set()
2531
2532     for node in nodeinfo:
2533       if node.offline:
2534         ignore_nodes.add(node.name)
2535         continue
2536
2537       nresult = all_nvinfo[node.name]
2538
2539       if nresult.fail_msg or not nresult.payload:
2540         node_files = None
2541       else:
2542         node_files = nresult.payload.get(constants.NV_FILELIST, None)
2543
2544       test = not (node_files and isinstance(node_files, dict))
2545       errorif(test, constants.CV_ENODEFILECHECK, node.name,
2546               "Node did not return file checksum data")
2547       if test:
2548         ignore_nodes.add(node.name)
2549         continue
2550
2551       # Build per-checksum mapping from filename to nodes having it
2552       for (filename, checksum) in node_files.items():
2553         assert filename in nodefiles
2554         fileinfo[filename].setdefault(checksum, set()).add(node.name)
2555
2556     for (filename, checksums) in fileinfo.items():
2557       assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
2558
2559       # Nodes having the file
2560       with_file = frozenset(node_name
2561                             for nodes in fileinfo[filename].values()
2562                             for node_name in nodes) - ignore_nodes
2563
2564       expected_nodes = nodefiles[filename] - ignore_nodes
2565
2566       # Nodes missing file
2567       missing_file = expected_nodes - with_file
2568
2569       if filename in files_opt:
2570         # All or no nodes
2571         errorif(missing_file and missing_file != expected_nodes,
2572                 constants.CV_ECLUSTERFILECHECK, None,
2573                 "File %s is optional, but it must exist on all or no"
2574                 " nodes (not found on %s)",
2575                 filename, utils.CommaJoin(utils.NiceSort(missing_file)))
2576       else:
2577         errorif(missing_file, constants.CV_ECLUSTERFILECHECK, None,
2578                 "File %s is missing from node(s) %s", filename,
2579                 utils.CommaJoin(utils.NiceSort(missing_file)))
2580
2581         # Warn if a node has a file it shouldn't
2582         unexpected = with_file - expected_nodes
2583         errorif(unexpected,
2584                 constants.CV_ECLUSTERFILECHECK, None,
2585                 "File %s should not exist on node(s) %s",
2586                 filename, utils.CommaJoin(utils.NiceSort(unexpected)))
2587
2588       # See if there are multiple versions of the file
2589       test = len(checksums) > 1
2590       if test:
2591         variants = ["variant %s on %s" %
2592                     (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
2593                     for (idx, (checksum, nodes)) in
2594                       enumerate(sorted(checksums.items()))]
2595       else:
2596         variants = []
2597
2598       errorif(test, constants.CV_ECLUSTERFILECHECK, None,
2599               "File %s found with %s different checksums (%s)",
2600               filename, len(checksums), "; ".join(variants))
2601
2602   def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
2603                       drbd_map):
2604     """Verifies and the node DRBD status.
2605
2606     @type ninfo: L{objects.Node}
2607     @param ninfo: the node to check
2608     @param nresult: the remote results for the node
2609     @param instanceinfo: the dict of instances
2610     @param drbd_helper: the configured DRBD usermode helper
2611     @param drbd_map: the DRBD map as returned by
2612         L{ganeti.config.ConfigWriter.ComputeDRBDMap}
2613
2614     """
2615     node = ninfo.name
2616     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2617
2618     if drbd_helper:
2619       helper_result = nresult.get(constants.NV_DRBDHELPER, None)
2620       test = (helper_result == None)
2621       _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2622                "no drbd usermode helper returned")
2623       if helper_result:
2624         status, payload = helper_result
2625         test = not status
2626         _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2627                  "drbd usermode helper check unsuccessful: %s", payload)
2628         test = status and (payload != drbd_helper)
2629         _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2630                  "wrong drbd usermode helper: %s", payload)
2631
2632     # compute the DRBD minors
2633     node_drbd = {}
2634     for minor, instance in drbd_map[node].items():
2635       test = instance not in instanceinfo
2636       _ErrorIf(test, constants.CV_ECLUSTERCFG, None,
2637                "ghost instance '%s' in temporary DRBD map", instance)
2638         # ghost instance should not be running, but otherwise we
2639         # don't give double warnings (both ghost instance and
2640         # unallocated minor in use)
2641       if test:
2642         node_drbd[minor] = (instance, False)
2643       else:
2644         instance = instanceinfo[instance]
2645         node_drbd[minor] = (instance.name,
2646                             instance.admin_state == constants.ADMINST_UP)
2647
2648     # and now check them
2649     used_minors = nresult.get(constants.NV_DRBDLIST, [])
2650     test = not isinstance(used_minors, (tuple, list))
2651     _ErrorIf(test, constants.CV_ENODEDRBD, node,
2652              "cannot parse drbd status file: %s", str(used_minors))
2653     if test:
2654       # we cannot check drbd status
2655       return
2656
2657     for minor, (iname, must_exist) in node_drbd.items():
2658       test = minor not in used_minors and must_exist
2659       _ErrorIf(test, constants.CV_ENODEDRBD, node,
2660                "drbd minor %d of instance %s is not active", minor, iname)
2661     for minor in used_minors:
2662       test = minor not in node_drbd
2663       _ErrorIf(test, constants.CV_ENODEDRBD, node,
2664                "unallocated drbd minor %d is in use", minor)
2665
2666   def _UpdateNodeOS(self, ninfo, nresult, nimg):
2667     """Builds the node OS structures.
2668
2669     @type ninfo: L{objects.Node}
2670     @param ninfo: the node to check
2671     @param nresult: the remote results for the node
2672     @param nimg: the node image object
2673
2674     """
2675     node = ninfo.name
2676     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2677
2678     remote_os = nresult.get(constants.NV_OSLIST, None)
2679     test = (not isinstance(remote_os, list) or
2680             not compat.all(isinstance(v, list) and len(v) == 7
2681                            for v in remote_os))
2682
2683     _ErrorIf(test, constants.CV_ENODEOS, node,
2684              "node hasn't returned valid OS data")
2685
2686     nimg.os_fail = test
2687
2688     if test:
2689       return
2690
2691     os_dict = {}
2692
2693     for (name, os_path, status, diagnose,
2694          variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
2695
2696       if name not in os_dict:
2697         os_dict[name] = []
2698
2699       # parameters is a list of lists instead of list of tuples due to
2700       # JSON lacking a real tuple type, fix it:
2701       parameters = [tuple(v) for v in parameters]
2702       os_dict[name].append((os_path, status, diagnose,
2703                             set(variants), set(parameters), set(api_ver)))
2704
2705     nimg.oslist = os_dict
2706
2707   def _VerifyNodeOS(self, ninfo, nimg, base):
2708     """Verifies the node OS list.
2709
2710     @type ninfo: L{objects.Node}
2711     @param ninfo: the node to check
2712     @param nimg: the node image object
2713     @param base: the 'template' node we match against (e.g. from the master)
2714
2715     """
2716     node = ninfo.name
2717     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2718
2719     assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
2720
2721     beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
2722     for os_name, os_data in nimg.oslist.items():
2723       assert os_data, "Empty OS status for OS %s?!" % os_name
2724       f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
2725       _ErrorIf(not f_status, constants.CV_ENODEOS, node,
2726                "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
2727       _ErrorIf(len(os_data) > 1, constants.CV_ENODEOS, node,
2728                "OS '%s' has multiple entries (first one shadows the rest): %s",
2729                os_name, utils.CommaJoin([v[0] for v in os_data]))
2730       # comparisons with the 'base' image
2731       test = os_name not in base.oslist
2732       _ErrorIf(test, constants.CV_ENODEOS, node,
2733                "Extra OS %s not present on reference node (%s)",
2734                os_name, base.name)
2735       if test:
2736         continue
2737       assert base.oslist[os_name], "Base node has empty OS status?"
2738       _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
2739       if not b_status:
2740         # base OS is invalid, skipping
2741         continue
2742       for kind, a, b in [("API version", f_api, b_api),
2743                          ("variants list", f_var, b_var),
2744                          ("parameters", beautify_params(f_param),
2745                           beautify_params(b_param))]:
2746         _ErrorIf(a != b, constants.CV_ENODEOS, node,
2747                  "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
2748                  kind, os_name, base.name,
2749                  utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
2750
2751     # check any missing OSes
2752     missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
2753     _ErrorIf(missing, constants.CV_ENODEOS, node,
2754              "OSes present on reference node %s but missing on this node: %s",
2755              base.name, utils.CommaJoin(missing))
2756
2757   def _VerifyOob(self, ninfo, nresult):
2758     """Verifies out of band functionality of a node.
2759
2760     @type ninfo: L{objects.Node}
2761     @param ninfo: the node to check
2762     @param nresult: the remote results for the node
2763
2764     """
2765     node = ninfo.name
2766     # We just have to verify the paths on master and/or master candidates
2767     # as the oob helper is invoked on the master
2768     if ((ninfo.master_candidate or ninfo.master_capable) and
2769         constants.NV_OOB_PATHS in nresult):
2770       for path_result in nresult[constants.NV_OOB_PATHS]:
2771         self._ErrorIf(path_result, constants.CV_ENODEOOBPATH, node, path_result)
2772
2773   def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
2774     """Verifies and updates the node volume data.
2775
2776     This function will update a L{NodeImage}'s internal structures
2777     with data from the remote call.
2778
2779     @type ninfo: L{objects.Node}
2780     @param ninfo: the node to check
2781     @param nresult: the remote results for the node
2782     @param nimg: the node image object
2783     @param vg_name: the configured VG name
2784
2785     """
2786     node = ninfo.name
2787     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2788
2789     nimg.lvm_fail = True
2790     lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
2791     if vg_name is None:
2792       pass
2793     elif isinstance(lvdata, basestring):
2794       _ErrorIf(True, constants.CV_ENODELVM, node, "LVM problem on node: %s",
2795                utils.SafeEncode(lvdata))
2796     elif not isinstance(lvdata, dict):
2797       _ErrorIf(True, constants.CV_ENODELVM, node,
2798                "rpc call to node failed (lvlist)")
2799     else:
2800       nimg.volumes = lvdata
2801       nimg.lvm_fail = False
2802
2803   def _UpdateNodeInstances(self, ninfo, nresult, nimg):
2804     """Verifies and updates the node instance list.
2805
2806     If the listing was successful, then updates this node's instance
2807     list. Otherwise, it marks the RPC call as failed for the instance
2808     list key.
2809
2810     @type ninfo: L{objects.Node}
2811     @param ninfo: the node to check
2812     @param nresult: the remote results for the node
2813     @param nimg: the node image object
2814
2815     """
2816     idata = nresult.get(constants.NV_INSTANCELIST, None)
2817     test = not isinstance(idata, list)
2818     self._ErrorIf(test, constants.CV_ENODEHV, ninfo.name,
2819                   "rpc call to node failed (instancelist): %s",
2820                   utils.SafeEncode(str(idata)))
2821     if test:
2822       nimg.hyp_fail = True
2823     else:
2824       nimg.instances = idata
2825
2826   def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
2827     """Verifies and computes a node information map
2828
2829     @type ninfo: L{objects.Node}
2830     @param ninfo: the node to check
2831     @param nresult: the remote results for the node
2832     @param nimg: the node image object
2833     @param vg_name: the configured VG name
2834
2835     """
2836     node = ninfo.name
2837     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2838
2839     # try to read free memory (from the hypervisor)
2840     hv_info = nresult.get(constants.NV_HVINFO, None)
2841     test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
2842     _ErrorIf(test, constants.CV_ENODEHV, node,
2843              "rpc call to node failed (hvinfo)")
2844     if not test:
2845       try:
2846         nimg.mfree = int(hv_info["memory_free"])
2847       except (ValueError, TypeError):
2848         _ErrorIf(True, constants.CV_ENODERPC, node,
2849                  "node returned invalid nodeinfo, check hypervisor")
2850
2851     # FIXME: devise a free space model for file based instances as well
2852     if vg_name is not None:
2853       test = (constants.NV_VGLIST not in nresult or
2854               vg_name not in nresult[constants.NV_VGLIST])
2855       _ErrorIf(test, constants.CV_ENODELVM, node,
2856                "node didn't return data for the volume group '%s'"
2857                " - it is either missing or broken", vg_name)
2858       if not test:
2859         try:
2860           nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
2861         except (ValueError, TypeError):
2862           _ErrorIf(True, constants.CV_ENODERPC, node,
2863                    "node returned invalid LVM info, check LVM status")
2864
2865   def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
2866     """Gets per-disk status information for all instances.
2867
2868     @type nodelist: list of strings
2869     @param nodelist: Node names
2870     @type node_image: dict of (name, L{objects.Node})
2871     @param node_image: Node objects
2872     @type instanceinfo: dict of (name, L{objects.Instance})
2873     @param instanceinfo: Instance objects
2874     @rtype: {instance: {node: [(succes, payload)]}}
2875     @return: a dictionary of per-instance dictionaries with nodes as
2876         keys and disk information as values; the disk information is a
2877         list of tuples (success, payload)
2878
2879     """
2880     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2881
2882     node_disks = {}
2883     node_disks_devonly = {}
2884     diskless_instances = set()
2885     diskless = constants.DT_DISKLESS
2886
2887     for nname in nodelist:
2888       node_instances = list(itertools.chain(node_image[nname].pinst,
2889                                             node_image[nname].sinst))
2890       diskless_instances.update(inst for inst in node_instances
2891                                 if instanceinfo[inst].disk_template == diskless)
2892       disks = [(inst, disk)
2893                for inst in node_instances
2894                for disk in instanceinfo[inst].disks]
2895
2896       if not disks:
2897         # No need to collect data
2898         continue
2899
2900       node_disks[nname] = disks
2901
2902       # Creating copies as SetDiskID below will modify the objects and that can
2903       # lead to incorrect data returned from nodes
2904       devonly = [dev.Copy() for (_, dev) in disks]
2905
2906       for dev in devonly:
2907         self.cfg.SetDiskID(dev, nname)
2908
2909       node_disks_devonly[nname] = devonly
2910
2911     assert len(node_disks) == len(node_disks_devonly)
2912
2913     # Collect data from all nodes with disks
2914     result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
2915                                                           node_disks_devonly)
2916
2917     assert len(result) == len(node_disks)
2918
2919     instdisk = {}
2920
2921     for (nname, nres) in result.items():
2922       disks = node_disks[nname]
2923
2924       if nres.offline:
2925         # No data from this node
2926         data = len(disks) * [(False, "node offline")]
2927       else:
2928         msg = nres.fail_msg
2929         _ErrorIf(msg, constants.CV_ENODERPC, nname,
2930                  "while getting disk information: %s", msg)
2931         if msg:
2932           # No data from this node
2933           data = len(disks) * [(False, msg)]
2934         else:
2935           data = []
2936           for idx, i in enumerate(nres.payload):
2937             if isinstance(i, (tuple, list)) and len(i) == 2:
2938               data.append(i)
2939             else:
2940               logging.warning("Invalid result from node %s, entry %d: %s",
2941                               nname, idx, i)
2942               data.append((False, "Invalid result from the remote node"))
2943
2944       for ((inst, _), status) in zip(disks, data):
2945         instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
2946
2947     # Add empty entries for diskless instances.
2948     for inst in diskless_instances:
2949       assert inst not in instdisk
2950       instdisk[inst] = {}
2951
2952     assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
2953                       len(nnames) <= len(instanceinfo[inst].all_nodes) and
2954                       compat.all(isinstance(s, (tuple, list)) and
2955                                  len(s) == 2 for s in statuses)
2956                       for inst, nnames in instdisk.items()
2957                       for nname, statuses in nnames.items())
2958     assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
2959
2960     return instdisk
2961
2962   @staticmethod
2963   def _SshNodeSelector(group_uuid, all_nodes):
2964     """Create endless iterators for all potential SSH check hosts.
2965
2966     """
2967     nodes = [node for node in all_nodes
2968              if (node.group != group_uuid and
2969                  not node.offline)]
2970     keyfunc = operator.attrgetter("group")
2971
2972     return map(itertools.cycle,
2973                [sorted(map(operator.attrgetter("name"), names))
2974                 for _, names in itertools.groupby(sorted(nodes, key=keyfunc),
2975                                                   keyfunc)])
2976
2977   @classmethod
2978   def _SelectSshCheckNodes(cls, group_nodes, group_uuid, all_nodes):
2979     """Choose which nodes should talk to which other nodes.
2980
2981     We will make nodes contact all nodes in their group, and one node from
2982     every other group.
2983
2984     @warning: This algorithm has a known issue if one node group is much
2985       smaller than others (e.g. just one node). In such a case all other
2986       nodes will talk to the single node.
2987
2988     """
2989     online_nodes = sorted(node.name for node in group_nodes if not node.offline)
2990     sel = cls._SshNodeSelector(group_uuid, all_nodes)
2991
2992     return (online_nodes,
2993             dict((name, sorted([i.next() for i in sel]))
2994                  for name in online_nodes))
2995
2996   def BuildHooksEnv(self):
2997     """Build hooks env.
2998
2999     Cluster-Verify hooks just ran in the post phase and their failure makes
3000     the output be logged in the verify output and the verification to fail.
3001
3002     """
3003     env = {
3004       "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
3005       }
3006
3007     env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
3008                for node in self.my_node_info.values())
3009
3010     return env
3011
3012   def BuildHooksNodes(self):
3013     """Build hooks nodes.
3014
3015     """
3016     return ([], self.my_node_names)
3017
3018   def Exec(self, feedback_fn):
3019     """Verify integrity of the node group, performing various test on nodes.
3020
3021     """
3022     # This method has too many local variables. pylint: disable=R0914
3023     feedback_fn("* Verifying group '%s'" % self.group_info.name)
3024
3025     if not self.my_node_names:
3026       # empty node group
3027       feedback_fn("* Empty node group, skipping verification")
3028       return True
3029
3030     self.bad = False
3031     _ErrorIf = self._ErrorIf # pylint: disable=C0103
3032     verbose = self.op.verbose
3033     self._feedback_fn = feedback_fn
3034
3035     vg_name = self.cfg.GetVGName()
3036     drbd_helper = self.cfg.GetDRBDHelper()
3037     cluster = self.cfg.GetClusterInfo()
3038     groupinfo = self.cfg.GetAllNodeGroupsInfo()
3039     hypervisors = cluster.enabled_hypervisors
3040     node_data_list = [self.my_node_info[name] for name in self.my_node_names]
3041
3042     i_non_redundant = [] # Non redundant instances
3043     i_non_a_balanced = [] # Non auto-balanced instances
3044     i_offline = 0 # Count of offline instances
3045     n_offline = 0 # Count of offline nodes
3046     n_drained = 0 # Count of nodes being drained
3047     node_vol_should = {}
3048
3049     # FIXME: verify OS list
3050
3051     # File verification
3052     filemap = _ComputeAncillaryFiles(cluster, False)
3053
3054     # do local checksums
3055     master_node = self.master_node = self.cfg.GetMasterNode()
3056     master_ip = self.cfg.GetMasterIP()
3057
3058     feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names))
3059
3060     user_scripts = []
3061     if self.cfg.GetUseExternalMipScript():
3062       user_scripts.append(constants.EXTERNAL_MASTER_SETUP_SCRIPT)
3063
3064     node_verify_param = {
3065       constants.NV_FILELIST:
3066         utils.UniqueSequence(filename
3067                              for files in filemap
3068                              for filename in files),
3069       constants.NV_NODELIST:
3070         self._SelectSshCheckNodes(node_data_list, self.group_uuid,
3071                                   self.all_node_info.values()),
3072       constants.NV_HYPERVISOR: hypervisors,
3073       constants.NV_HVPARAMS:
3074         _GetAllHypervisorParameters(cluster, self.all_inst_info.values()),
3075       constants.NV_NODENETTEST: [(node.name, node.primary_ip, node.secondary_ip)
3076                                  for node in node_data_list
3077                                  if not node.offline],
3078       constants.NV_INSTANCELIST: hypervisors,
3079       constants.NV_VERSION: None,
3080       constants.NV_HVINFO: self.cfg.GetHypervisorType(),
3081       constants.NV_NODESETUP: None,
3082       constants.NV_TIME: None,
3083       constants.NV_MASTERIP: (master_node, master_ip),
3084       constants.NV_OSLIST: None,
3085       constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
3086       constants.NV_USERSCRIPTS: user_scripts,
3087       }
3088
3089     if vg_name is not None:
3090       node_verify_param[constants.NV_VGLIST] = None
3091       node_verify_param[constants.NV_LVLIST] = vg_name
3092       node_verify_param[constants.NV_PVLIST] = [vg_name]
3093       node_verify_param[constants.NV_DRBDLIST] = None
3094
3095     if drbd_helper:
3096       node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
3097
3098     # bridge checks
3099     # FIXME: this needs to be changed per node-group, not cluster-wide
3100     bridges = set()
3101     default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
3102     if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3103       bridges.add(default_nicpp[constants.NIC_LINK])
3104     for instance in self.my_inst_info.values():
3105       for nic in instance.nics:
3106         full_nic = cluster.SimpleFillNIC(nic.nicparams)
3107         if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3108           bridges.add(full_nic[constants.NIC_LINK])
3109
3110     if bridges:
3111       node_verify_param[constants.NV_BRIDGES] = list(bridges)
3112
3113     # Build our expected cluster state
3114     node_image = dict((node.name, self.NodeImage(offline=node.offline,
3115                                                  name=node.name,
3116                                                  vm_capable=node.vm_capable))
3117                       for node in node_data_list)
3118
3119     # Gather OOB paths
3120     oob_paths = []
3121     for node in self.all_node_info.values():
3122       path = _SupportsOob(self.cfg, node)
3123       if path and path not in oob_paths:
3124         oob_paths.append(path)
3125
3126     if oob_paths:
3127       node_verify_param[constants.NV_OOB_PATHS] = oob_paths
3128
3129     for instance in self.my_inst_names:
3130       inst_config = self.my_inst_info[instance]
3131
3132       for nname in inst_config.all_nodes:
3133         if nname not in node_image:
3134           gnode = self.NodeImage(name=nname)
3135           gnode.ghost = (nname not in self.all_node_info)
3136           node_image[nname] = gnode
3137
3138       inst_config.MapLVsByNode(node_vol_should)
3139
3140       pnode = inst_config.primary_node
3141       node_image[pnode].pinst.append(instance)
3142
3143       for snode in inst_config.secondary_nodes:
3144         nimg = node_image[snode]
3145         nimg.sinst.append(instance)
3146         if pnode not in nimg.sbp:
3147           nimg.sbp[pnode] = []
3148         nimg.sbp[pnode].append(instance)
3149
3150     # At this point, we have the in-memory data structures complete,
3151     # except for the runtime information, which we'll gather next
3152
3153     # Due to the way our RPC system works, exact response times cannot be
3154     # guaranteed (e.g. a broken node could run into a timeout). By keeping the
3155     # time before and after executing the request, we can at least have a time
3156     # window.
3157     nvinfo_starttime = time.time()
3158     all_nvinfo = self.rpc.call_node_verify(self.my_node_names,
3159                                            node_verify_param,
3160                                            self.cfg.GetClusterName())
3161     nvinfo_endtime = time.time()
3162
3163     if self.extra_lv_nodes and vg_name is not None:
3164       extra_lv_nvinfo = \
3165           self.rpc.call_node_verify(self.extra_lv_nodes,
3166                                     {constants.NV_LVLIST: vg_name},
3167                                     self.cfg.GetClusterName())
3168     else:
3169       extra_lv_nvinfo = {}
3170
3171     all_drbd_map = self.cfg.ComputeDRBDMap()
3172
3173     feedback_fn("* Gathering disk information (%s nodes)" %
3174                 len(self.my_node_names))
3175     instdisk = self._CollectDiskInfo(self.my_node_names, node_image,
3176                                      self.my_inst_info)
3177
3178     feedback_fn("* Verifying configuration file consistency")
3179
3180     # If not all nodes are being checked, we need to make sure the master node
3181     # and a non-checked vm_capable node are in the list.
3182     absent_nodes = set(self.all_node_info).difference(self.my_node_info)
3183     if absent_nodes:
3184       vf_nvinfo = all_nvinfo.copy()
3185       vf_node_info = list(self.my_node_info.values())
3186       additional_nodes = []
3187       if master_node not in self.my_node_info:
3188         additional_nodes.append(master_node)
3189         vf_node_info.append(self.all_node_info[master_node])
3190       # Add the first vm_capable node we find which is not included
3191       for node in absent_nodes:
3192         nodeinfo = self.all_node_info[node]
3193         if nodeinfo.vm_capable and not nodeinfo.offline:
3194           additional_nodes.append(node)
3195           vf_node_info.append(self.all_node_info[node])
3196           break
3197       key = constants.NV_FILELIST
3198       vf_nvinfo.update(self.rpc.call_node_verify(additional_nodes,
3199                                                  {key: node_verify_param[key]},
3200                                                  self.cfg.GetClusterName()))
3201     else:
3202       vf_nvinfo = all_nvinfo
3203       vf_node_info = self.my_node_info.values()
3204
3205     self._VerifyFiles(_ErrorIf, vf_node_info, master_node, vf_nvinfo, filemap)
3206
3207     feedback_fn("* Verifying node status")
3208
3209     refos_img = None
3210
3211     for node_i in node_data_list:
3212       node = node_i.name
3213       nimg = node_image[node]
3214
3215       if node_i.offline:
3216         if verbose:
3217           feedback_fn("* Skipping offline node %s" % (node,))
3218         n_offline += 1
3219         continue
3220
3221       if node == master_node:
3222         ntype = "master"
3223       elif node_i.master_candidate:
3224         ntype = "master candidate"
3225       elif node_i.drained:
3226         ntype = "drained"
3227         n_drained += 1
3228       else:
3229         ntype = "regular"
3230       if verbose:
3231         feedback_fn("* Verifying node %s (%s)" % (node, ntype))
3232
3233       msg = all_nvinfo[node].fail_msg
3234       _ErrorIf(msg, constants.CV_ENODERPC, node, "while contacting node: %s",
3235                msg)
3236       if msg:
3237         nimg.rpc_fail = True
3238         continue
3239
3240       nresult = all_nvinfo[node].payload
3241
3242       nimg.call_ok = self._VerifyNode(node_i, nresult)
3243       self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
3244       self._VerifyNodeNetwork(node_i, nresult)
3245       self._VerifyNodeUserScripts(node_i, nresult)
3246       self._VerifyOob(node_i, nresult)
3247
3248       if nimg.vm_capable:
3249         self._VerifyNodeLVM(node_i, nresult, vg_name)
3250         self._VerifyNodeDrbd(node_i, nresult, self.all_inst_info, drbd_helper,
3251                              all_drbd_map)
3252
3253         self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
3254         self._UpdateNodeInstances(node_i, nresult, nimg)
3255         self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
3256         self._UpdateNodeOS(node_i, nresult, nimg)
3257
3258         if not nimg.os_fail:
3259           if refos_img is None:
3260             refos_img = nimg
3261           self._VerifyNodeOS(node_i, nimg, refos_img)
3262         self._VerifyNodeBridges(node_i, nresult, bridges)
3263
3264         # Check whether all running instancies are primary for the node. (This
3265         # can no longer be done from _VerifyInstance below, since some of the
3266         # wrong instances could be from other node groups.)
3267         non_primary_inst = set(nimg.instances).difference(nimg.pinst)
3268
3269         for inst in non_primary_inst:
3270           # FIXME: investigate best way to handle offline insts
3271           if inst.admin_state == constants.ADMINST_OFFLINE:
3272             if verbose:
3273               feedback_fn("* Skipping offline instance %s" % inst.name)
3274             i_offline += 1
3275             continue
3276           test = inst in self.all_inst_info
3277           _ErrorIf(test, constants.CV_EINSTANCEWRONGNODE, inst,
3278                    "instance should not run on node %s", node_i.name)
3279           _ErrorIf(not test, constants.CV_ENODEORPHANINSTANCE, node_i.name,
3280                    "node is running unknown instance %s", inst)
3281
3282     for node, result in extra_lv_nvinfo.items():
3283       self._UpdateNodeVolumes(self.all_node_info[node], result.payload,
3284                               node_image[node], vg_name)
3285
3286     feedback_fn("* Verifying instance status")
3287     for instance in self.my_inst_names:
3288       if verbose:
3289         feedback_fn("* Verifying instance %s" % instance)
3290       inst_config = self.my_inst_info[instance]
3291       self._VerifyInstance(instance, inst_config, node_image,
3292                            instdisk[instance])
3293       inst_nodes_offline = []
3294
3295       pnode = inst_config.primary_node
3296       pnode_img = node_image[pnode]
3297       _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
3298                constants.CV_ENODERPC, pnode, "instance %s, connection to"
3299                " primary node failed", instance)
3300
3301       _ErrorIf(inst_config.admin_state == constants.ADMINST_UP and
3302                pnode_img.offline,
3303                constants.CV_EINSTANCEBADNODE, instance,
3304                "instance is marked as running and lives on offline node %s",
3305                inst_config.primary_node)
3306
3307       # If the instance is non-redundant we cannot survive losing its primary
3308       # node, so we are not N+1 compliant. On the other hand we have no disk
3309       # templates with more than one secondary so that situation is not well
3310       # supported either.
3311       # FIXME: does not support file-backed instances
3312       if not inst_config.secondary_nodes:
3313         i_non_redundant.append(instance)
3314
3315       _ErrorIf(len(inst_config.secondary_nodes) > 1,
3316                constants.CV_EINSTANCELAYOUT,
3317                instance, "instance has multiple secondary nodes: %s",
3318                utils.CommaJoin(inst_config.secondary_nodes),
3319                code=self.ETYPE_WARNING)
3320
3321       if inst_config.disk_template in constants.DTS_INT_MIRROR:
3322         pnode = inst_config.primary_node
3323         instance_nodes = utils.NiceSort(inst_config.all_nodes)
3324         instance_groups = {}
3325
3326         for node in instance_nodes:
3327           instance_groups.setdefault(self.all_node_info[node].group,
3328                                      []).append(node)
3329
3330         pretty_list = [
3331           "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
3332           # Sort so that we always list the primary node first.
3333           for group, nodes in sorted(instance_groups.items(),
3334                                      key=lambda (_, nodes): pnode in nodes,
3335                                      reverse=True)]
3336
3337         self._ErrorIf(len(instance_groups) > 1,
3338                       constants.CV_EINSTANCESPLITGROUPS,
3339                       instance, "instance has primary and secondary nodes in"
3340                       " different groups: %s", utils.CommaJoin(pretty_list),
3341                       code=self.ETYPE_WARNING)
3342
3343       if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
3344         i_non_a_balanced.append(instance)
3345
3346       for snode in inst_config.secondary_nodes:
3347         s_img = node_image[snode]
3348         _ErrorIf(s_img.rpc_fail and not s_img.offline, constants.CV_ENODERPC,
3349                  snode, "instance %s, connection to secondary node failed",
3350                  instance)
3351
3352         if s_img.offline:
3353           inst_nodes_offline.append(snode)
3354
3355       # warn that the instance lives on offline nodes
3356       _ErrorIf(inst_nodes_offline, constants.CV_EINSTANCEBADNODE, instance,
3357                "instance has offline secondary node(s) %s",
3358                utils.CommaJoin(inst_nodes_offline))
3359       # ... or ghost/non-vm_capable nodes
3360       for node in inst_config.all_nodes:
3361         _ErrorIf(node_image[node].ghost, constants.CV_EINSTANCEBADNODE,
3362                  instance, "instance lives on ghost node %s", node)
3363         _ErrorIf(not node_image[node].vm_capable, constants.CV_EINSTANCEBADNODE,
3364                  instance, "instance lives on non-vm_capable node %s", node)
3365
3366     feedback_fn("* Verifying orphan volumes")
3367     reserved = utils.FieldSet(*cluster.reserved_lvs)
3368
3369     # We will get spurious "unknown volume" warnings if any node of this group
3370     # is secondary for an instance whose primary is in another group. To avoid
3371     # them, we find these instances and add their volumes to node_vol_should.
3372     for inst in self.all_inst_info.values():
3373       for secondary in inst.secondary_nodes:
3374         if (secondary in self.my_node_info
3375             and inst.name not in self.my_inst_info):
3376           inst.MapLVsByNode(node_vol_should)
3377           break
3378
3379     self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
3380
3381     if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
3382       feedback_fn("* Verifying N+1 Memory redundancy")
3383       self._VerifyNPlusOneMemory(node_image, self.my_inst_info)
3384
3385     feedback_fn("* Other Notes")
3386     if i_non_redundant:
3387       feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
3388                   % len(i_non_redundant))
3389
3390     if i_non_a_balanced:
3391       feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
3392                   % len(i_non_a_balanced))
3393
3394     if i_offline:
3395       feedback_fn("  - NOTICE: %d offline instance(s) found." % i_offline)
3396
3397     if n_offline:
3398       feedback_fn("  - NOTICE: %d offline node(s) found." % n_offline)
3399
3400     if n_drained:
3401       feedback_fn("  - NOTICE: %d drained node(s) found." % n_drained)
3402
3403     return not self.bad
3404
3405   def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
3406     """Analyze the post-hooks' result
3407
3408     This method analyses the hook result, handles it, and sends some
3409     nicely-formatted feedback back to the user.
3410
3411     @param phase: one of L{constants.HOOKS_PHASE_POST} or
3412         L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
3413     @param hooks_results: the results of the multi-node hooks rpc call
3414     @param feedback_fn: function used send feedback back to the caller
3415     @param lu_result: previous Exec result
3416     @return: the new Exec result, based on the previous result
3417         and hook results
3418
3419     """
3420     # We only really run POST phase hooks, only for non-empty groups,
3421     # and are only interested in their results
3422     if not self.my_node_names:
3423       # empty node group
3424       pass
3425     elif phase == constants.HOOKS_PHASE_POST:
3426       # Used to change hooks' output to proper indentation
3427       feedback_fn("* Hooks Results")
3428       assert hooks_results, "invalid result from hooks"
3429
3430       for node_name in hooks_results:
3431         res = hooks_results[node_name]
3432         msg = res.fail_msg
3433         test = msg and not res.offline
3434         self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3435                       "Communication failure in hooks execution: %s", msg)
3436         if res.offline or msg:
3437           # No need to investigate payload if node is offline or gave
3438           # an error.
3439           continue
3440         for script, hkr, output in res.payload:
3441           test = hkr == constants.HKR_FAIL
3442           self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3443                         "Script %s failed, output:", script)
3444           if test:
3445             output = self._HOOKS_INDENT_RE.sub("      ", output)
3446             feedback_fn("%s" % output)
3447             lu_result = False
3448
3449     return lu_result
3450
3451
3452 class LUClusterVerifyDisks(NoHooksLU):
3453   """Verifies the cluster disks status.
3454
3455   """
3456   REQ_BGL = False
3457
3458   def ExpandNames(self):
3459     self.share_locks = _ShareAll()
3460     self.needed_locks = {
3461       locking.LEVEL_NODEGROUP: locking.ALL_SET,
3462       }
3463
3464   def Exec(self, feedback_fn):
3465     group_names = self.owned_locks(locking.LEVEL_NODEGROUP)
3466
3467     # Submit one instance of L{opcodes.OpGroupVerifyDisks} per node group
3468     return ResultWithJobs([[opcodes.OpGroupVerifyDisks(group_name=group)]
3469                            for group in group_names])
3470
3471
3472 class LUGroupVerifyDisks(NoHooksLU):
3473   """Verifies the status of all disks in a node group.
3474
3475   """
3476   REQ_BGL = False
3477
3478   def ExpandNames(self):
3479     # Raises errors.OpPrereqError on its own if group can't be found
3480     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
3481
3482     self.share_locks = _ShareAll()
3483     self.needed_locks = {
3484       locking.LEVEL_INSTANCE: [],
3485       locking.LEVEL_NODEGROUP: [],
3486       locking.LEVEL_NODE: [],
3487       }
3488
3489   def DeclareLocks(self, level):
3490     if level == locking.LEVEL_INSTANCE:
3491       assert not self.needed_locks[locking.LEVEL_INSTANCE]
3492
3493       # Lock instances optimistically, needs verification once node and group
3494       # locks have been acquired
3495       self.needed_locks[locking.LEVEL_INSTANCE] = \
3496         self.cfg.GetNodeGroupInstances(self.group_uuid)
3497
3498     elif level == locking.LEVEL_NODEGROUP:
3499       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
3500
3501       self.needed_locks[locking.LEVEL_NODEGROUP] = \
3502         set([self.group_uuid] +
3503             # Lock all groups used by instances optimistically; this requires
3504             # going via the node before it's locked, requiring verification
3505             # later on
3506             [group_uuid
3507              for instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
3508              for group_uuid in self.cfg.GetInstanceNodeGroups(instance_name)])
3509
3510     elif level == locking.LEVEL_NODE:
3511       # This will only lock the nodes in the group to be verified which contain
3512       # actual instances
3513       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
3514       self._LockInstancesNodes()
3515
3516       # Lock all nodes in group to be verified
3517       assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
3518       member_nodes = self.cfg.GetNodeGroup(self.group_uuid).members
3519       self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
3520
3521   def CheckPrereq(self):
3522     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
3523     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
3524     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
3525
3526     assert self.group_uuid in owned_groups
3527
3528     # Check if locked instances are still correct
3529     _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
3530
3531     # Get instance information
3532     self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
3533
3534     # Check if node groups for locked instances are still correct
3535     _CheckInstancesNodeGroups(self.cfg, self.instances,
3536                               owned_groups, owned_nodes, self.group_uuid)
3537
3538   def Exec(self, feedback_fn):
3539     """Verify integrity of cluster disks.
3540
3541     @rtype: tuple of three items
3542     @return: a tuple of (dict of node-to-node_error, list of instances
3543         which need activate-disks, dict of instance: (node, volume) for
3544         missing volumes
3545
3546     """
3547     res_nodes = {}
3548     res_instances = set()
3549     res_missing = {}
3550
3551     nv_dict = _MapInstanceDisksToNodes([inst
3552             for inst in self.instances.values()
3553             if inst.admin_state == constants.ADMINST_UP])
3554
3555     if nv_dict:
3556       nodes = utils.NiceSort(set(self.owned_locks(locking.LEVEL_NODE)) &
3557                              set(self.cfg.GetVmCapableNodeList()))
3558
3559       node_lvs = self.rpc.call_lv_list(nodes, [])
3560
3561       for (node, node_res) in node_lvs.items():
3562         if node_res.offline:
3563           continue
3564
3565         msg = node_res.fail_msg
3566         if msg:
3567           logging.warning("Error enumerating LVs on node %s: %s", node, msg)
3568           res_nodes[node] = msg
3569           continue
3570
3571         for lv_name, (_, _, lv_online) in node_res.payload.items():
3572           inst = nv_dict.pop((node, lv_name), None)
3573           if not (lv_online or inst is None):
3574             res_instances.add(inst)
3575
3576       # any leftover items in nv_dict are missing LVs, let's arrange the data
3577       # better
3578       for key, inst in nv_dict.iteritems():
3579         res_missing.setdefault(inst, []).append(list(key))
3580
3581     return (res_nodes, list(res_instances), res_missing)
3582
3583
3584 class LUClusterRepairDiskSizes(NoHooksLU):
3585   """Verifies the cluster disks sizes.
3586
3587   """
3588   REQ_BGL = False
3589
3590   def ExpandNames(self):
3591     if self.op.instances:
3592       self.wanted_names = _GetWantedInstances(self, self.op.instances)
3593       self.needed_locks = {
3594         locking.LEVEL_NODE_RES: [],
3595         locking.LEVEL_INSTANCE: self.wanted_names,
3596         }
3597       self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
3598     else:
3599       self.wanted_names = None
3600       self.needed_locks = {
3601         locking.LEVEL_NODE_RES: locking.ALL_SET,
3602         locking.LEVEL_INSTANCE: locking.ALL_SET,
3603         }
3604     self.share_locks = {
3605       locking.LEVEL_NODE_RES: 1,
3606       locking.LEVEL_INSTANCE: 0,
3607       }
3608
3609   def DeclareLocks(self, level):
3610     if level == locking.LEVEL_NODE_RES and self.wanted_names is not None:
3611       self._LockInstancesNodes(primary_only=True, level=level)
3612
3613   def CheckPrereq(self):
3614     """Check prerequisites.
3615
3616     This only checks the optional instance list against the existing names.
3617
3618     """
3619     if self.wanted_names is None:
3620       self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
3621
3622     self.wanted_instances = \
3623         map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
3624
3625   def _EnsureChildSizes(self, disk):
3626     """Ensure children of the disk have the needed disk size.
3627
3628     This is valid mainly for DRBD8 and fixes an issue where the
3629     children have smaller disk size.
3630
3631     @param disk: an L{ganeti.objects.Disk} object
3632
3633     """
3634     if disk.dev_type == constants.LD_DRBD8:
3635       assert disk.children, "Empty children for DRBD8?"
3636       fchild = disk.children[0]
3637       mismatch = fchild.size < disk.size
3638       if mismatch:
3639         self.LogInfo("Child disk has size %d, parent %d, fixing",
3640                      fchild.size, disk.size)
3641         fchild.size = disk.size
3642
3643       # and we recurse on this child only, not on the metadev
3644       return self._EnsureChildSizes(fchild) or mismatch
3645     else:
3646       return False
3647
3648   def Exec(self, feedback_fn):
3649     """Verify the size of cluster disks.
3650
3651     """
3652     # TODO: check child disks too
3653     # TODO: check differences in size between primary/secondary nodes
3654     per_node_disks = {}
3655     for instance in self.wanted_instances:
3656       pnode = instance.primary_node
3657       if pnode not in per_node_disks:
3658         per_node_disks[pnode] = []
3659       for idx, disk in enumerate(instance.disks):
3660         per_node_disks[pnode].append((instance, idx, disk))
3661
3662     assert not (frozenset(per_node_disks.keys()) -
3663                 self.owned_locks(locking.LEVEL_NODE_RES)), \
3664       "Not owning correct locks"
3665     assert not self.owned_locks(locking.LEVEL_NODE)
3666
3667     changed = []
3668     for node, dskl in per_node_disks.items():
3669       newl = [v[2].Copy() for v in dskl]
3670       for dsk in newl:
3671         self.cfg.SetDiskID(dsk, node)
3672       result = self.rpc.call_blockdev_getsize(node, newl)
3673       if result.fail_msg:
3674         self.LogWarning("Failure in blockdev_getsize call to node"
3675                         " %s, ignoring", node)
3676         continue
3677       if len(result.payload) != len(dskl):
3678         logging.warning("Invalid result from node %s: len(dksl)=%d,"
3679                         " result.payload=%s", node, len(dskl), result.payload)
3680         self.LogWarning("Invalid result from node %s, ignoring node results",
3681                         node)
3682         continue
3683       for ((instance, idx, disk), size) in zip(dskl, result.payload):
3684         if size is None:
3685           self.LogWarning("Disk %d of instance %s did not return size"
3686                           " information, ignoring", idx, instance.name)
3687           continue
3688         if not isinstance(size, (int, long)):
3689           self.LogWarning("Disk %d of instance %s did not return valid"
3690                           " size information, ignoring", idx, instance.name)
3691           continue
3692         size = size >> 20
3693         if size != disk.size:
3694           self.LogInfo("Disk %d of instance %s has mismatched size,"
3695                        " correcting: recorded %d, actual %d", idx,
3696                        instance.name, disk.size, size)
3697           disk.size = size
3698           self.cfg.Update(instance, feedback_fn)
3699           changed.append((instance.name, idx, size))
3700         if self._EnsureChildSizes(disk):
3701           self.cfg.Update(instance, feedback_fn)
3702           changed.append((instance.name, idx, disk.size))
3703     return changed
3704
3705
3706 class LUClusterRename(LogicalUnit):
3707   """Rename the cluster.
3708
3709   """
3710   HPATH = "cluster-rename"
3711   HTYPE = constants.HTYPE_CLUSTER
3712
3713   def BuildHooksEnv(self):
3714     """Build hooks env.
3715
3716     """
3717     return {
3718       "OP_TARGET": self.cfg.GetClusterName(),
3719       "NEW_NAME": self.op.name,
3720       }
3721
3722   def BuildHooksNodes(self):
3723     """Build hooks nodes.
3724
3725     """
3726     return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
3727
3728   def CheckPrereq(self):
3729     """Verify that the passed name is a valid one.
3730
3731     """
3732     hostname = netutils.GetHostname(name=self.op.name,
3733                                     family=self.cfg.GetPrimaryIPFamily())
3734
3735     new_name = hostname.name
3736     self.ip = new_ip = hostname.ip
3737     old_name = self.cfg.GetClusterName()
3738     old_ip = self.cfg.GetMasterIP()
3739     if new_name == old_name and new_ip == old_ip:
3740       raise errors.OpPrereqError("Neither the name nor the IP address of the"
3741                                  " cluster has changed",
3742                                  errors.ECODE_INVAL)
3743     if new_ip != old_ip:
3744       if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
3745         raise errors.OpPrereqError("The given cluster IP address (%s) is"
3746                                    " reachable on the network" %
3747                                    new_ip, errors.ECODE_NOTUNIQUE)
3748
3749     self.op.name = new_name
3750
3751   def Exec(self, feedback_fn):
3752     """Rename the cluster.
3753
3754     """
3755     clustername = self.op.name
3756     new_ip = self.ip
3757
3758     # shutdown the master IP
3759     master_params = self.cfg.GetMasterNetworkParameters()
3760     ems = self.cfg.GetUseExternalMipScript()
3761     result = self.rpc.call_node_deactivate_master_ip(master_params.name,
3762                                                      master_params, ems)
3763     result.Raise("Could not disable the master role")
3764
3765     try:
3766       cluster = self.cfg.GetClusterInfo()
3767       cluster.cluster_name = clustername
3768       cluster.master_ip = new_ip
3769       self.cfg.Update(cluster, feedback_fn)
3770
3771       # update the known hosts file
3772       ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
3773       node_list = self.cfg.GetOnlineNodeList()
3774       try:
3775         node_list.remove(master_params.name)
3776       except ValueError:
3777         pass
3778       _UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE)
3779     finally:
3780       master_params.ip = new_ip
3781       result = self.rpc.call_node_activate_master_ip(master_params.name,
3782                                                      master_params, ems)
3783       msg = result.fail_msg
3784       if msg:
3785         self.LogWarning("Could not re-enable the master role on"
3786                         " the master, please restart manually: %s", msg)
3787
3788     return clustername
3789
3790
3791 def _ValidateNetmask(cfg, netmask):
3792   """Checks if a netmask is valid.
3793
3794   @type cfg: L{config.ConfigWriter}
3795   @param cfg: The cluster configuration
3796   @type netmask: int
3797   @param netmask: the netmask to be verified
3798   @raise errors.OpPrereqError: if the validation fails
3799
3800   """
3801   ip_family = cfg.GetPrimaryIPFamily()
3802   try:
3803     ipcls = netutils.IPAddress.GetClassFromIpFamily(ip_family)
3804   except errors.ProgrammerError:
3805     raise errors.OpPrereqError("Invalid primary ip family: %s." %
3806                                ip_family)
3807   if not ipcls.ValidateNetmask(netmask):
3808     raise errors.OpPrereqError("CIDR netmask (%s) not valid" %
3809                                 (netmask))
3810
3811
3812 class LUClusterSetParams(LogicalUnit):
3813   """Change the parameters of the cluster.
3814
3815   """
3816   HPATH = "cluster-modify"
3817   HTYPE = constants.HTYPE_CLUSTER
3818   REQ_BGL = False
3819
3820   def CheckArguments(self):
3821     """Check parameters
3822
3823     """
3824     if self.op.uid_pool:
3825       uidpool.CheckUidPool(self.op.uid_pool)
3826
3827     if self.op.add_uids:
3828       uidpool.CheckUidPool(self.op.add_uids)
3829
3830     if self.op.remove_uids:
3831       uidpool.CheckUidPool(self.op.remove_uids)
3832
3833     if self.op.master_netmask is not None:
3834       _ValidateNetmask(self.cfg, self.op.master_netmask)
3835
3836     if self.op.diskparams:
3837       for dt_params in self.op.diskparams.values():
3838         utils.ForceDictType(dt_params, constants.DISK_DT_TYPES)
3839
3840   def ExpandNames(self):
3841     # FIXME: in the future maybe other cluster params won't require checking on
3842     # all nodes to be modified.
3843     self.needed_locks = {
3844       locking.LEVEL_NODE: locking.ALL_SET,
3845       locking.LEVEL_INSTANCE: locking.ALL_SET,
3846       locking.LEVEL_NODEGROUP: locking.ALL_SET,
3847     }
3848     self.share_locks = {
3849         locking.LEVEL_NODE: 1,
3850         locking.LEVEL_INSTANCE: 1,
3851         locking.LEVEL_NODEGROUP: 1,
3852     }
3853
3854   def BuildHooksEnv(self):
3855     """Build hooks env.
3856
3857     """
3858     return {
3859       "OP_TARGET": self.cfg.GetClusterName(),
3860       "NEW_VG_NAME": self.op.vg_name,
3861       }
3862
3863   def BuildHooksNodes(self):
3864     """Build hooks nodes.
3865
3866     """
3867     mn = self.cfg.GetMasterNode()
3868     return ([mn], [mn])
3869
3870   def CheckPrereq(self):
3871     """Check prerequisites.
3872
3873     This checks whether the given params don't conflict and
3874     if the given volume group is valid.
3875
3876     """
3877     if self.op.vg_name is not None and not self.op.vg_name:
3878       if self.cfg.HasAnyDiskOfType(constants.LD_LV):
3879         raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
3880                                    " instances exist", errors.ECODE_INVAL)
3881
3882     if self.op.drbd_helper is not None and not self.op.drbd_helper:
3883       if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
3884         raise errors.OpPrereqError("Cannot disable drbd helper while"
3885                                    " drbd-based instances exist",
3886                                    errors.ECODE_INVAL)
3887
3888     node_list = self.owned_locks(locking.LEVEL_NODE)
3889
3890     # if vg_name not None, checks given volume group on all nodes
3891     if self.op.vg_name:
3892       vglist = self.rpc.call_vg_list(node_list)
3893       for node in node_list:
3894         msg = vglist[node].fail_msg
3895         if msg:
3896           # ignoring down node
3897           self.LogWarning("Error while gathering data on node %s"
3898                           " (ignoring node): %s", node, msg)
3899           continue
3900         vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
3901                                               self.op.vg_name,
3902                                               constants.MIN_VG_SIZE)
3903         if vgstatus:
3904           raise errors.OpPrereqError("Error on node '%s': %s" %
3905                                      (node, vgstatus), errors.ECODE_ENVIRON)
3906
3907     if self.op.drbd_helper:
3908       # checks given drbd helper on all nodes
3909       helpers = self.rpc.call_drbd_helper(node_list)
3910       for (node, ninfo) in self.cfg.GetMultiNodeInfo(node_list):
3911         if ninfo.offline:
3912           self.LogInfo("Not checking drbd helper on offline node %s", node)
3913           continue
3914         msg = helpers[node].fail_msg
3915         if msg:
3916           raise errors.OpPrereqError("Error checking drbd helper on node"
3917                                      " '%s': %s" % (node, msg),
3918                                      errors.ECODE_ENVIRON)
3919         node_helper = helpers[node].payload
3920         if node_helper != self.op.drbd_helper:
3921           raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
3922                                      (node, node_helper), errors.ECODE_ENVIRON)
3923
3924     self.cluster = cluster = self.cfg.GetClusterInfo()
3925     # validate params changes
3926     if self.op.beparams:
3927       objects.UpgradeBeParams(self.op.beparams)
3928       utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
3929       self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
3930
3931     if self.op.ndparams:
3932       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
3933       self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
3934
3935       # TODO: we need a more general way to handle resetting
3936       # cluster-level parameters to default values
3937       if self.new_ndparams["oob_program"] == "":
3938         self.new_ndparams["oob_program"] = \
3939             constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
3940
3941     if self.op.hv_state:
3942       new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
3943                                             self.cluster.hv_state_static)
3944       self.new_hv_state = dict((hv, cluster.SimpleFillHvState(values))
3945                                for hv, values in new_hv_state.items())
3946
3947     if self.op.disk_state:
3948       new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state,
3949                                                 self.cluster.disk_state_static)
3950       self.new_disk_state = \
3951         dict((storage, dict((name, cluster.SimpleFillDiskState(values))
3952                             for name, values in svalues.items()))
3953              for storage, svalues in new_disk_state.items())
3954
3955     if self.op.ipolicy:
3956       self.new_ipolicy = _GetUpdatedIPolicy(cluster.ipolicy, self.op.ipolicy,
3957                                             group_policy=False)
3958
3959       all_instances = self.cfg.GetAllInstancesInfo().values()
3960       violations = set()
3961       for group in self.cfg.GetAllNodeGroupsInfo().values():
3962         instances = frozenset([inst for inst in all_instances
3963                                if compat.any(node in group.members
3964                                              for node in inst.all_nodes)])
3965         new_ipolicy = objects.FillIPolicy(self.new_ipolicy, group.ipolicy)
3966         new = _ComputeNewInstanceViolations(_CalculateGroupIPolicy(cluster,
3967                                                                    group),
3968                                             new_ipolicy, instances)
3969         if new:
3970           violations.update(new)
3971
3972       if violations:
3973         self.LogWarning("After the ipolicy change the following instances"
3974                         " violate them: %s",
3975                         utils.CommaJoin(violations))
3976
3977     if self.op.nicparams:
3978       utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
3979       self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
3980       objects.NIC.CheckParameterSyntax(self.new_nicparams)
3981       nic_errors = []
3982
3983       # check all instances for consistency
3984       for instance in self.cfg.GetAllInstancesInfo().values():
3985         for nic_idx, nic in enumerate(instance.nics):
3986           params_copy = copy.deepcopy(nic.nicparams)
3987           params_filled = objects.FillDict(self.new_nicparams, params_copy)
3988
3989           # check parameter syntax
3990           try:
3991             objects.NIC.CheckParameterSyntax(params_filled)
3992           except errors.ConfigurationError, err:
3993             nic_errors.append("Instance %s, nic/%d: %s" %
3994                               (instance.name, nic_idx, err))
3995
3996           # if we're moving instances to routed, check that they have an ip
3997           target_mode = params_filled[constants.NIC_MODE]
3998           if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
3999             nic_errors.append("Instance %s, nic/%d: routed NIC with no ip"
4000                               " address" % (instance.name, nic_idx))
4001       if nic_errors:
4002         raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
4003                                    "\n".join(nic_errors))
4004
4005     # hypervisor list/parameters
4006     self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
4007     if self.op.hvparams:
4008       for hv_name, hv_dict in self.op.hvparams.items():
4009         if hv_name not in self.new_hvparams:
4010           self.new_hvparams[hv_name] = hv_dict
4011         else:
4012           self.new_hvparams[hv_name].update(hv_dict)
4013
4014     # disk template parameters
4015     self.new_diskparams = objects.FillDict(cluster.diskparams, {})
4016     if self.op.diskparams:
4017       for dt_name, dt_params in self.op.diskparams.items():
4018         if dt_name not in self.op.diskparams:
4019           self.new_diskparams[dt_name] = dt_params
4020         else:
4021           self.new_diskparams[dt_name].update(dt_params)
4022
4023     # os hypervisor parameters
4024     self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
4025     if self.op.os_hvp:
4026       for os_name, hvs in self.op.os_hvp.items():
4027         if os_name not in self.new_os_hvp:
4028           self.new_os_hvp[os_name] = hvs
4029         else:
4030           for hv_name, hv_dict in hvs.items():
4031             if hv_name not in self.new_os_hvp[os_name]:
4032               self.new_os_hvp[os_name][hv_name] = hv_dict
4033             else:
4034               self.new_os_hvp[os_name][hv_name].update(hv_dict)
4035
4036     # os parameters
4037     self.new_osp = objects.FillDict(cluster.osparams, {})
4038     if self.op.osparams:
4039       for os_name, osp in self.op.osparams.items():
4040         if os_name not in self.new_osp:
4041           self.new_osp[os_name] = {}
4042
4043         self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
4044                                                   use_none=True)
4045
4046         if not self.new_osp[os_name]:
4047           # we removed all parameters
4048           del self.new_osp[os_name]
4049         else:
4050           # check the parameter validity (remote check)
4051           _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
4052                          os_name, self.new_osp[os_name])
4053
4054     # changes to the hypervisor list
4055     if self.op.enabled_hypervisors is not None:
4056       self.hv_list = self.op.enabled_hypervisors
4057       for hv in self.hv_list:
4058         # if the hypervisor doesn't already exist in the cluster
4059         # hvparams, we initialize it to empty, and then (in both
4060         # cases) we make sure to fill the defaults, as we might not
4061         # have a complete defaults list if the hypervisor wasn't
4062         # enabled before
4063         if hv not in new_hvp:
4064           new_hvp[hv] = {}
4065         new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
4066         utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
4067     else:
4068       self.hv_list = cluster.enabled_hypervisors
4069
4070     if self.op.hvparams or self.op.enabled_hypervisors is not None:
4071       # either the enabled list has changed, or the parameters have, validate
4072       for hv_name, hv_params in self.new_hvparams.items():
4073         if ((self.op.hvparams and hv_name in self.op.hvparams) or
4074             (self.op.enabled_hypervisors and
4075              hv_name in self.op.enabled_hypervisors)):
4076           # either this is a new hypervisor, or its parameters have changed
4077           hv_class = hypervisor.GetHypervisor(hv_name)
4078           utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
4079           hv_class.CheckParameterSyntax(hv_params)
4080           _CheckHVParams(self, node_list, hv_name, hv_params)
4081
4082     if self.op.os_hvp:
4083       # no need to check any newly-enabled hypervisors, since the
4084       # defaults have already been checked in the above code-block
4085       for os_name, os_hvp in self.new_os_hvp.items():
4086         for hv_name, hv_params in os_hvp.items():
4087           utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
4088           # we need to fill in the new os_hvp on top of the actual hv_p
4089           cluster_defaults = self.new_hvparams.get(hv_name, {})
4090           new_osp = objects.FillDict(cluster_defaults, hv_params)
4091           hv_class = hypervisor.GetHypervisor(hv_name)
4092           hv_class.CheckParameterSyntax(new_osp)
4093           _CheckHVParams(self, node_list, hv_name, new_osp)
4094
4095     if self.op.default_iallocator:
4096       alloc_script = utils.FindFile(self.op.default_iallocator,
4097                                     constants.IALLOCATOR_SEARCH_PATH,
4098                                     os.path.isfile)
4099       if alloc_script is None:
4100         raise errors.OpPrereqError("Invalid default iallocator script '%s'"
4101                                    " specified" % self.op.default_iallocator,
4102                                    errors.ECODE_INVAL)
4103
4104   def Exec(self, feedback_fn):
4105     """Change the parameters of the cluster.
4106
4107     """
4108     if self.op.vg_name is not None:
4109       new_volume = self.op.vg_name
4110       if not new_volume:
4111         new_volume = None
4112       if new_volume != self.cfg.GetVGName():
4113         self.cfg.SetVGName(new_volume)
4114       else:
4115         feedback_fn("Cluster LVM configuration already in desired"
4116                     " state, not changing")
4117     if self.op.drbd_helper is not None:
4118       new_helper = self.op.drbd_helper
4119       if not new_helper:
4120         new_helper = None
4121       if new_helper != self.cfg.GetDRBDHelper():
4122         self.cfg.SetDRBDHelper(new_helper)
4123       else:
4124         feedback_fn("Cluster DRBD helper already in desired state,"
4125                     " not changing")
4126     if self.op.hvparams:
4127       self.cluster.hvparams = self.new_hvparams
4128     if self.op.os_hvp:
4129       self.cluster.os_hvp = self.new_os_hvp
4130     if self.op.enabled_hypervisors is not None:
4131       self.cluster.hvparams = self.new_hvparams
4132       self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
4133     if self.op.beparams:
4134       self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
4135     if self.op.nicparams:
4136       self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
4137     if self.op.ipolicy:
4138       self.cluster.ipolicy = self.new_ipolicy
4139     if self.op.osparams:
4140       self.cluster.osparams = self.new_osp
4141     if self.op.ndparams:
4142       self.cluster.ndparams = self.new_ndparams
4143     if self.op.diskparams:
4144       self.cluster.diskparams = self.new_diskparams
4145     if self.op.hv_state:
4146       self.cluster.hv_state_static = self.new_hv_state
4147     if self.op.disk_state:
4148       self.cluster.disk_state_static = self.new_disk_state
4149
4150     if self.op.candidate_pool_size is not None:
4151       self.cluster.candidate_pool_size = self.op.candidate_pool_size
4152       # we need to update the pool size here, otherwise the save will fail
4153       _AdjustCandidatePool(self, [])
4154
4155     if self.op.maintain_node_health is not None:
4156       if self.op.maintain_node_health and not constants.ENABLE_CONFD:
4157         feedback_fn("Note: CONFD was disabled at build time, node health"
4158                     " maintenance is not useful (still enabling it)")
4159       self.cluster.maintain_node_health = self.op.maintain_node_health
4160
4161     if self.op.prealloc_wipe_disks is not None:
4162       self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
4163
4164     if self.op.add_uids is not None:
4165       uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
4166
4167     if self.op.remove_uids is not None:
4168       uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
4169
4170     if self.op.uid_pool is not None:
4171       self.cluster.uid_pool = self.op.uid_pool
4172
4173     if self.op.default_iallocator is not None:
4174       self.cluster.default_iallocator = self.op.default_iallocator
4175
4176     if self.op.reserved_lvs is not None:
4177       self.cluster.reserved_lvs = self.op.reserved_lvs
4178
4179     if self.op.use_external_mip_script is not None:
4180       self.cluster.use_external_mip_script = self.op.use_external_mip_script
4181
4182     def helper_os(aname, mods, desc):
4183       desc += " OS list"
4184       lst = getattr(self.cluster, aname)
4185       for key, val in mods:
4186         if key == constants.DDM_ADD:
4187           if val in lst:
4188             feedback_fn("OS %s already in %s, ignoring" % (val, desc))
4189           else:
4190             lst.append(val)
4191         elif key == constants.DDM_REMOVE:
4192           if val in lst:
4193             lst.remove(val)
4194           else:
4195             feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
4196         else:
4197           raise errors.ProgrammerError("Invalid modification '%s'" % key)
4198
4199     if self.op.hidden_os:
4200       helper_os("hidden_os", self.op.hidden_os, "hidden")
4201
4202     if self.op.blacklisted_os:
4203       helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
4204
4205     if self.op.master_netdev:
4206       master_params = self.cfg.GetMasterNetworkParameters()
4207       ems = self.cfg.GetUseExternalMipScript()
4208       feedback_fn("Shutting down master ip on the current netdev (%s)" %
4209                   self.cluster.master_netdev)
4210       result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4211                                                        master_params, ems)
4212       result.Raise("Could not disable the master ip")
4213       feedback_fn("Changing master_netdev from %s to %s" %
4214                   (master_params.netdev, self.op.master_netdev))
4215       self.cluster.master_netdev = self.op.master_netdev
4216
4217     if self.op.master_netmask:
4218       master_params = self.cfg.GetMasterNetworkParameters()
4219       feedback_fn("Changing master IP netmask to %s" % self.op.master_netmask)
4220       result = self.rpc.call_node_change_master_netmask(master_params.name,
4221                                                         master_params.netmask,
4222                                                         self.op.master_netmask,
4223                                                         master_params.ip,
4224                                                         master_params.netdev)
4225       if result.fail_msg:
4226         msg = "Could not change the master IP netmask: %s" % result.fail_msg
4227         feedback_fn(msg)
4228
4229       self.cluster.master_netmask = self.op.master_netmask
4230
4231     self.cfg.Update(self.cluster, feedback_fn)
4232
4233     if self.op.master_netdev:
4234       master_params = self.cfg.GetMasterNetworkParameters()
4235       feedback_fn("Starting the master ip on the new master netdev (%s)" %
4236                   self.op.master_netdev)
4237       ems = self.cfg.GetUseExternalMipScript()
4238       result = self.rpc.call_node_activate_master_ip(master_params.name,
4239                                                      master_params, ems)
4240       if result.fail_msg:
4241         self.LogWarning("Could not re-enable the master ip on"
4242                         " the master, please restart manually: %s",
4243                         result.fail_msg)
4244
4245
4246 def _UploadHelper(lu, nodes, fname):
4247   """Helper for uploading a file and showing warnings.
4248
4249   """
4250   if os.path.exists(fname):
4251     result = lu.rpc.call_upload_file(nodes, fname)
4252     for to_node, to_result in result.items():
4253       msg = to_result.fail_msg
4254       if msg:
4255         msg = ("Copy of file %s to node %s failed: %s" %
4256                (fname, to_node, msg))
4257         lu.proc.LogWarning(msg)
4258
4259
4260 def _ComputeAncillaryFiles(cluster, redist):
4261   """Compute files external to Ganeti which need to be consistent.
4262
4263   @type redist: boolean
4264   @param redist: Whether to include files which need to be redistributed
4265
4266   """
4267   # Compute files for all nodes
4268   files_all = set([
4269     constants.SSH_KNOWN_HOSTS_FILE,
4270     constants.CONFD_HMAC_KEY,
4271     constants.CLUSTER_DOMAIN_SECRET_FILE,
4272     constants.SPICE_CERT_FILE,
4273     constants.SPICE_CACERT_FILE,
4274     constants.RAPI_USERS_FILE,
4275     ])
4276
4277   if not redist:
4278     files_all.update(constants.ALL_CERT_FILES)
4279     files_all.update(ssconf.SimpleStore().GetFileList())
4280   else:
4281     # we need to ship at least the RAPI certificate
4282     files_all.add(constants.RAPI_CERT_FILE)
4283
4284   if cluster.modify_etc_hosts:
4285     files_all.add(constants.ETC_HOSTS)
4286
4287   # Files which are optional, these must:
4288   # - be present in one other category as well
4289   # - either exist or not exist on all nodes of that category (mc, vm all)
4290   files_opt = set([
4291     constants.RAPI_USERS_FILE,
4292     ])
4293
4294   # Files which should only be on master candidates
4295   files_mc = set()
4296
4297   if not redist:
4298     files_mc.add(constants.CLUSTER_CONF_FILE)
4299
4300     # FIXME: this should also be replicated but Ganeti doesn't support files_mc
4301     # replication
4302     files_mc.add(constants.DEFAULT_MASTER_SETUP_SCRIPT)
4303
4304   # Files which should only be on VM-capable nodes
4305   files_vm = set(filename
4306     for hv_name in cluster.enabled_hypervisors
4307     for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[0])
4308
4309   files_opt |= set(filename
4310     for hv_name in cluster.enabled_hypervisors
4311     for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[1])
4312
4313   # Filenames in each category must be unique
4314   all_files_set = files_all | files_mc | files_vm
4315   assert (len(all_files_set) ==
4316           sum(map(len, [files_all, files_mc, files_vm]))), \
4317          "Found file listed in more than one file list"
4318
4319   # Optional files must be present in one other category
4320   assert all_files_set.issuperset(files_opt), \
4321          "Optional file not in a different required list"
4322
4323   return (files_all, files_opt, files_mc, files_vm)
4324
4325
4326 def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
4327   """Distribute additional files which are part of the cluster configuration.
4328
4329   ConfigWriter takes care of distributing the config and ssconf files, but
4330   there are more files which should be distributed to all nodes. This function
4331   makes sure those are copied.
4332
4333   @param lu: calling logical unit
4334   @param additional_nodes: list of nodes not in the config to distribute to
4335   @type additional_vm: boolean
4336   @param additional_vm: whether the additional nodes are vm-capable or not
4337
4338   """
4339   # Gather target nodes
4340   cluster = lu.cfg.GetClusterInfo()
4341   master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
4342
4343   online_nodes = lu.cfg.GetOnlineNodeList()
4344   vm_nodes = lu.cfg.GetVmCapableNodeList()
4345
4346   if additional_nodes is not None:
4347     online_nodes.extend(additional_nodes)
4348     if additional_vm:
4349       vm_nodes.extend(additional_nodes)
4350
4351   # Never distribute to master node
4352   for nodelist in [online_nodes, vm_nodes]:
4353     if master_info.name in nodelist:
4354       nodelist.remove(master_info.name)
4355
4356   # Gather file lists
4357   (files_all, _, files_mc, files_vm) = \
4358     _ComputeAncillaryFiles(cluster, True)
4359
4360   # Never re-distribute configuration file from here
4361   assert not (constants.CLUSTER_CONF_FILE in files_all or
4362               constants.CLUSTER_CONF_FILE in files_vm)
4363   assert not files_mc, "Master candidates not handled in this function"
4364
4365   filemap = [
4366     (online_nodes, files_all),
4367     (vm_nodes, files_vm),
4368     ]
4369
4370   # Upload the files
4371   for (node_list, files) in filemap:
4372     for fname in files:
4373       _UploadHelper(lu, node_list, fname)
4374
4375
4376 class LUClusterRedistConf(NoHooksLU):
4377   """Force the redistribution of cluster configuration.
4378
4379   This is a very simple LU.
4380
4381   """
4382   REQ_BGL = False
4383
4384   def ExpandNames(self):
4385     self.needed_locks = {
4386       locking.LEVEL_NODE: locking.ALL_SET,
4387     }
4388     self.share_locks[locking.LEVEL_NODE] = 1
4389
4390   def Exec(self, feedback_fn):
4391     """Redistribute the configuration.
4392
4393     """
4394     self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
4395     _RedistributeAncillaryFiles(self)
4396
4397
4398 class LUClusterActivateMasterIp(NoHooksLU):
4399   """Activate the master IP on the master node.
4400
4401   """
4402   def Exec(self, feedback_fn):
4403     """Activate the master IP.
4404
4405     """
4406     master_params = self.cfg.GetMasterNetworkParameters()
4407     ems = self.cfg.GetUseExternalMipScript()
4408     result = self.rpc.call_node_activate_master_ip(master_params.name,
4409                                                    master_params, ems)
4410     result.Raise("Could not activate the master IP")
4411
4412
4413 class LUClusterDeactivateMasterIp(NoHooksLU):
4414   """Deactivate the master IP on the master node.
4415
4416   """
4417   def Exec(self, feedback_fn):
4418     """Deactivate the master IP.
4419
4420     """
4421     master_params = self.cfg.GetMasterNetworkParameters()
4422     ems = self.cfg.GetUseExternalMipScript()
4423     result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4424                                                      master_params, ems)
4425     result.Raise("Could not deactivate the master IP")
4426
4427
4428 def _WaitForSync(lu, instance, disks=None, oneshot=False):
4429   """Sleep and poll for an instance's disk to sync.
4430
4431   """
4432   if not instance.disks or disks is not None and not disks:
4433     return True
4434
4435   disks = _ExpandCheckDisks(instance, disks)
4436
4437   if not oneshot:
4438     lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
4439
4440   node = instance.primary_node
4441
4442   for dev in disks:
4443     lu.cfg.SetDiskID(dev, node)
4444
4445   # TODO: Convert to utils.Retry
4446
4447   retries = 0
4448   degr_retries = 10 # in seconds, as we sleep 1 second each time
4449   while True:
4450     max_time = 0
4451     done = True
4452     cumul_degraded = False
4453     rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
4454     msg = rstats.fail_msg
4455     if msg:
4456       lu.LogWarning("Can't get any data from node %s: %s", node, msg)
4457       retries += 1
4458       if retries >= 10:
4459         raise errors.RemoteError("Can't contact node %s for mirror data,"
4460                                  " aborting." % node)
4461       time.sleep(6)
4462       continue
4463     rstats = rstats.payload
4464     retries = 0
4465     for i, mstat in enumerate(rstats):
4466       if mstat is None:
4467         lu.LogWarning("Can't compute data for node %s/%s",
4468                            node, disks[i].iv_name)
4469         continue
4470
4471       cumul_degraded = (cumul_degraded or
4472                         (mstat.is_degraded and mstat.sync_percent is None))
4473       if mstat.sync_percent is not None:
4474         done = False
4475         if mstat.estimated_time is not None:
4476           rem_time = ("%s remaining (estimated)" %
4477                       utils.FormatSeconds(mstat.estimated_time))
4478           max_time = mstat.estimated_time
4479         else:
4480           rem_time = "no time estimate"
4481         lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
4482                         (disks[i].iv_name, mstat.sync_percent, rem_time))
4483
4484     # if we're done but degraded, let's do a few small retries, to
4485     # make sure we see a stable and not transient situation; therefore
4486     # we force restart of the loop
4487     if (done or oneshot) and cumul_degraded and degr_retries > 0:
4488       logging.info("Degraded disks found, %d retries left", degr_retries)
4489       degr_retries -= 1
4490       time.sleep(1)
4491       continue
4492
4493     if done or oneshot:
4494       break
4495
4496     time.sleep(min(60, max_time))
4497
4498   if done:
4499     lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
4500   return not cumul_degraded
4501
4502
4503 def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
4504   """Check that mirrors are not degraded.
4505
4506   The ldisk parameter, if True, will change the test from the
4507   is_degraded attribute (which represents overall non-ok status for
4508   the device(s)) to the ldisk (representing the local storage status).
4509
4510   """
4511   lu.cfg.SetDiskID(dev, node)
4512
4513   result = True
4514
4515   if on_primary or dev.AssembleOnSecondary():
4516     rstats = lu.rpc.call_blockdev_find(node, dev)
4517     msg = rstats.fail_msg
4518     if msg:
4519       lu.LogWarning("Can't find disk on node %s: %s", node, msg)
4520       result = False
4521     elif not rstats.payload:
4522       lu.LogWarning("Can't find disk on node %s", node)
4523       result = False
4524     else:
4525       if ldisk:
4526         result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
4527       else:
4528         result = result and not rstats.payload.is_degraded
4529
4530   if dev.children:
4531     for child in dev.children:
4532       result = result and _CheckDiskConsistency(lu, child, node, on_primary)
4533
4534   return result
4535
4536
4537 class LUOobCommand(NoHooksLU):
4538   """Logical unit for OOB handling.
4539
4540   """
4541   REQ_BGL = False
4542   _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
4543
4544   def ExpandNames(self):
4545     """Gather locks we need.
4546
4547     """
4548     if self.op.node_names:
4549       self.op.node_names = _GetWantedNodes(self, self.op.node_names)
4550       lock_names = self.op.node_names
4551     else:
4552       lock_names = locking.ALL_SET
4553
4554     self.needed_locks = {
4555       locking.LEVEL_NODE: lock_names,
4556       }
4557
4558   def CheckPrereq(self):
4559     """Check prerequisites.
4560
4561     This checks:
4562      - the node exists in the configuration
4563      - OOB is supported
4564
4565     Any errors are signaled by raising errors.OpPrereqError.
4566
4567     """
4568     self.nodes = []
4569     self.master_node = self.cfg.GetMasterNode()
4570
4571     assert self.op.power_delay >= 0.0
4572
4573     if self.op.node_names:
4574       if (self.op.command in self._SKIP_MASTER and
4575           self.master_node in self.op.node_names):
4576         master_node_obj = self.cfg.GetNodeInfo(self.master_node)
4577         master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
4578
4579         if master_oob_handler:
4580           additional_text = ("run '%s %s %s' if you want to operate on the"
4581                              " master regardless") % (master_oob_handler,
4582                                                       self.op.command,
4583                                                       self.master_node)
4584         else:
4585           additional_text = "it does not support out-of-band operations"
4586
4587         raise errors.OpPrereqError(("Operating on the master node %s is not"
4588                                     " allowed for %s; %s") %
4589                                    (self.master_node, self.op.command,
4590                                     additional_text), errors.ECODE_INVAL)
4591     else:
4592       self.op.node_names = self.cfg.GetNodeList()
4593       if self.op.command in self._SKIP_MASTER:
4594         self.op.node_names.remove(self.master_node)
4595
4596     if self.op.command in self._SKIP_MASTER:
4597       assert self.master_node not in self.op.node_names
4598
4599     for (node_name, node) in self.cfg.GetMultiNodeInfo(self.op.node_names):
4600       if node is None:
4601         raise errors.OpPrereqError("Node %s not found" % node_name,
4602                                    errors.ECODE_NOENT)
4603       else:
4604         self.nodes.append(node)
4605
4606       if (not self.op.ignore_status and
4607           (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
4608         raise errors.OpPrereqError(("Cannot power off node %s because it is"
4609                                     " not marked offline") % node_name,
4610                                    errors.ECODE_STATE)
4611
4612   def Exec(self, feedback_fn):
4613     """Execute OOB and return result if we expect any.
4614
4615     """
4616     master_node = self.master_node
4617     ret = []
4618
4619     for idx, node in enumerate(utils.NiceSort(self.nodes,
4620                                               key=lambda node: node.name)):
4621       node_entry = [(constants.RS_NORMAL, node.name)]
4622       ret.append(node_entry)
4623
4624       oob_program = _SupportsOob(self.cfg, node)
4625
4626       if not oob_program:
4627         node_entry.append((constants.RS_UNAVAIL, None))
4628         continue
4629
4630       logging.info("Executing out-of-band command '%s' using '%s' on %s",
4631                    self.op.command, oob_program, node.name)
4632       result = self.rpc.call_run_oob(master_node, oob_program,
4633                                      self.op.command, node.name,
4634                                      self.op.timeout)
4635
4636       if result.fail_msg:
4637         self.LogWarning("Out-of-band RPC failed on node '%s': %s",
4638                         node.name, result.fail_msg)
4639         node_entry.append((constants.RS_NODATA, None))
4640       else:
4641         try:
4642           self._CheckPayload(result)
4643         except errors.OpExecError, err:
4644           self.LogWarning("Payload returned by node '%s' is not valid: %s",
4645                           node.name, err)
4646           node_entry.append((constants.RS_NODATA, None))
4647         else:
4648           if self.op.command == constants.OOB_HEALTH:
4649             # For health we should log important events
4650             for item, status in result.payload:
4651               if status in [constants.OOB_STATUS_WARNING,
4652                             constants.OOB_STATUS_CRITICAL]:
4653                 self.LogWarning("Item '%s' on node '%s' has status '%s'",
4654                                 item, node.name, status)
4655
4656           if self.op.command == constants.OOB_POWER_ON:
4657             node.powered = True
4658           elif self.op.command == constants.OOB_POWER_OFF:
4659             node.powered = False
4660           elif self.op.command == constants.OOB_POWER_STATUS:
4661             powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
4662             if powered != node.powered:
4663               logging.warning(("Recorded power state (%s) of node '%s' does not"
4664                                " match actual power state (%s)"), node.powered,
4665                               node.name, powered)
4666
4667           # For configuration changing commands we should update the node
4668           if self.op.command in (constants.OOB_POWER_ON,
4669                                  constants.OOB_POWER_OFF):
4670             self.cfg.Update(node, feedback_fn)
4671
4672           node_entry.append((constants.RS_NORMAL, result.payload))
4673
4674           if (self.op.command == constants.OOB_POWER_ON and
4675               idx < len(self.nodes) - 1):
4676             time.sleep(self.op.power_delay)
4677
4678     return ret
4679
4680   def _CheckPayload(self, result):
4681     """Checks if the payload is valid.
4682
4683     @param result: RPC result
4684     @raises errors.OpExecError: If payload is not valid
4685
4686     """
4687     errs = []
4688     if self.op.command == constants.OOB_HEALTH:
4689       if not isinstance(result.payload, list):
4690         errs.append("command 'health' is expected to return a list but got %s" %
4691                     type(result.payload))
4692       else:
4693         for item, status in result.payload:
4694           if status not in constants.OOB_STATUSES:
4695             errs.append("health item '%s' has invalid status '%s'" %
4696                         (item, status))
4697
4698     if self.op.command == constants.OOB_POWER_STATUS:
4699       if not isinstance(result.payload, dict):
4700         errs.append("power-status is expected to return a dict but got %s" %
4701                     type(result.payload))
4702
4703     if self.op.command in [
4704         constants.OOB_POWER_ON,
4705         constants.OOB_POWER_OFF,
4706         constants.OOB_POWER_CYCLE,
4707         ]:
4708       if result.payload is not None:
4709         errs.append("%s is expected to not return payload but got '%s'" %
4710                     (self.op.command, result.payload))
4711
4712     if errs:
4713       raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
4714                                utils.CommaJoin(errs))
4715
4716
4717 class _OsQuery(_QueryBase):
4718   FIELDS = query.OS_FIELDS
4719
4720   def ExpandNames(self, lu):
4721     # Lock all nodes in shared mode
4722     # Temporary removal of locks, should be reverted later
4723     # TODO: reintroduce locks when they are lighter-weight
4724     lu.needed_locks = {}
4725     #self.share_locks[locking.LEVEL_NODE] = 1
4726     #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4727
4728     # The following variables interact with _QueryBase._GetNames
4729     if self.names:
4730       self.wanted = self.names
4731     else:
4732       self.wanted = locking.ALL_SET
4733
4734     self.do_locking = self.use_locking
4735
4736   def DeclareLocks(self, lu, level):
4737     pass
4738
4739   @staticmethod
4740   def _DiagnoseByOS(rlist):
4741     """Remaps a per-node return list into an a per-os per-node dictionary
4742
4743     @param rlist: a map with node names as keys and OS objects as values
4744
4745     @rtype: dict
4746     @return: a dictionary with osnames as keys and as value another
4747         map, with nodes as keys and tuples of (path, status, diagnose,
4748         variants, parameters, api_versions) as values, eg::
4749
4750           {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
4751                                      (/srv/..., False, "invalid api")],
4752                            "node2": [(/srv/..., True, "", [], [])]}
4753           }
4754
4755     """
4756     all_os = {}
4757     # we build here the list of nodes that didn't fail the RPC (at RPC
4758     # level), so that nodes with a non-responding node daemon don't
4759     # make all OSes invalid
4760     good_nodes = [node_name for node_name in rlist
4761                   if not rlist[node_name].fail_msg]
4762     for node_name, nr in rlist.items():
4763       if nr.fail_msg or not nr.payload:
4764         continue
4765       for (name, path, status, diagnose, variants,
4766            params, api_versions) in nr.payload:
4767         if name not in all_os:
4768           # build a list of nodes for this os containing empty lists
4769           # for each node in node_list
4770           all_os[name] = {}
4771           for nname in good_nodes:
4772             all_os[name][nname] = []
4773         # convert params from [name, help] to (name, help)
4774         params = [tuple(v) for v in params]
4775         all_os[name][node_name].append((path, status, diagnose,
4776                                         variants, params, api_versions))
4777     return all_os
4778
4779   def _GetQueryData(self, lu):
4780     """Computes the list of nodes and their attributes.
4781
4782     """
4783     # Locking is not used
4784     assert not (compat.any(lu.glm.is_owned(level)
4785                            for level in locking.LEVELS
4786                            if level != locking.LEVEL_CLUSTER) or
4787                 self.do_locking or self.use_locking)
4788
4789     valid_nodes = [node.name
4790                    for node in lu.cfg.GetAllNodesInfo().values()
4791                    if not node.offline and node.vm_capable]
4792     pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
4793     cluster = lu.cfg.GetClusterInfo()
4794
4795     data = {}
4796
4797     for (os_name, os_data) in pol.items():
4798       info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
4799                           hidden=(os_name in cluster.hidden_os),
4800                           blacklisted=(os_name in cluster.blacklisted_os))
4801
4802       variants = set()
4803       parameters = set()
4804       api_versions = set()
4805
4806       for idx, osl in enumerate(os_data.values()):
4807         info.valid = bool(info.valid and osl and osl[0][1])
4808         if not info.valid:
4809           break
4810
4811         (node_variants, node_params, node_api) = osl[0][3:6]
4812         if idx == 0:
4813           # First entry
4814           variants.update(node_variants)
4815           parameters.update(node_params)
4816           api_versions.update(node_api)
4817         else:
4818           # Filter out inconsistent values
4819           variants.intersection_update(node_variants)
4820           parameters.intersection_update(node_params)
4821           api_versions.intersection_update(node_api)
4822
4823       info.variants = list(variants)
4824       info.parameters = list(parameters)
4825       info.api_versions = list(api_versions)
4826
4827       data[os_name] = info
4828
4829     # Prepare data in requested order
4830     return [data[name] for name in self._GetNames(lu, pol.keys(), None)
4831             if name in data]
4832
4833
4834 class LUOsDiagnose(NoHooksLU):
4835   """Logical unit for OS diagnose/query.
4836
4837   """
4838   REQ_BGL = False
4839
4840   @staticmethod
4841   def _BuildFilter(fields, names):
4842     """Builds a filter for querying OSes.
4843
4844     """
4845     name_filter = qlang.MakeSimpleFilter("name", names)
4846
4847     # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
4848     # respective field is not requested
4849     status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
4850                      for fname in ["hidden", "blacklisted"]
4851                      if fname not in fields]
4852     if "valid" not in fields:
4853       status_filter.append([qlang.OP_TRUE, "valid"])
4854
4855     if status_filter:
4856       status_filter.insert(0, qlang.OP_AND)
4857     else:
4858       status_filter = None
4859
4860     if name_filter and status_filter:
4861       return [qlang.OP_AND, name_filter, status_filter]
4862     elif name_filter:
4863       return name_filter
4864     else:
4865       return status_filter
4866
4867   def CheckArguments(self):
4868     self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
4869                        self.op.output_fields, False)
4870
4871   def ExpandNames(self):
4872     self.oq.ExpandNames(self)
4873
4874   def Exec(self, feedback_fn):
4875     return self.oq.OldStyleQuery(self)
4876
4877
4878 class LUNodeRemove(LogicalUnit):
4879   """Logical unit for removing a node.
4880
4881   """
4882   HPATH = "node-remove"
4883   HTYPE = constants.HTYPE_NODE
4884
4885   def BuildHooksEnv(self):
4886     """Build hooks env.
4887
4888     """
4889     return {
4890       "OP_TARGET": self.op.node_name,
4891       "NODE_NAME": self.op.node_name,
4892       }
4893
4894   def BuildHooksNodes(self):
4895     """Build hooks nodes.
4896
4897     This doesn't run on the target node in the pre phase as a failed
4898     node would then be impossible to remove.
4899
4900     """
4901     all_nodes = self.cfg.GetNodeList()
4902     try:
4903       all_nodes.remove(self.op.node_name)
4904     except ValueError:
4905       pass
4906     return (all_nodes, all_nodes)
4907
4908   def CheckPrereq(self):
4909     """Check prerequisites.
4910
4911     This checks:
4912      - the node exists in the configuration
4913      - it does not have primary or secondary instances
4914      - it's not the master
4915
4916     Any errors are signaled by raising errors.OpPrereqError.
4917
4918     """
4919     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4920     node = self.cfg.GetNodeInfo(self.op.node_name)
4921     assert node is not None
4922
4923     masternode = self.cfg.GetMasterNode()
4924     if node.name == masternode:
4925       raise errors.OpPrereqError("Node is the master node, failover to another"
4926                                  " node is required", errors.ECODE_INVAL)
4927
4928     for instance_name, instance in self.cfg.GetAllInstancesInfo().items():
4929       if node.name in instance.all_nodes:
4930         raise errors.OpPrereqError("Instance %s is still running on the node,"
4931                                    " please remove first" % instance_name,
4932                                    errors.ECODE_INVAL)
4933     self.op.node_name = node.name
4934     self.node = node
4935
4936   def Exec(self, feedback_fn):
4937     """Removes the node from the cluster.
4938
4939     """
4940     node = self.node
4941     logging.info("Stopping the node daemon and removing configs from node %s",
4942                  node.name)
4943
4944     modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
4945
4946     assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
4947       "Not owning BGL"
4948
4949     # Promote nodes to master candidate as needed
4950     _AdjustCandidatePool(self, exceptions=[node.name])
4951     self.context.RemoveNode(node.name)
4952
4953     # Run post hooks on the node before it's removed
4954     _RunPostHook(self, node.name)
4955
4956     result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
4957     msg = result.fail_msg
4958     if msg:
4959       self.LogWarning("Errors encountered on the remote node while leaving"
4960                       " the cluster: %s", msg)
4961
4962     # Remove node from our /etc/hosts
4963     if self.cfg.GetClusterInfo().modify_etc_hosts:
4964       master_node = self.cfg.GetMasterNode()
4965       result = self.rpc.call_etc_hosts_modify(master_node,
4966                                               constants.ETC_HOSTS_REMOVE,
4967                                               node.name, None)
4968       result.Raise("Can't update hosts file with new host data")
4969       _RedistributeAncillaryFiles(self)
4970
4971
4972 class _NodeQuery(_QueryBase):
4973   FIELDS = query.NODE_FIELDS
4974
4975   def ExpandNames(self, lu):
4976     lu.needed_locks = {}
4977     lu.share_locks = _ShareAll()
4978
4979     if self.names:
4980       self.wanted = _GetWantedNodes(lu, self.names)
4981     else:
4982       self.wanted = locking.ALL_SET
4983
4984     self.do_locking = (self.use_locking and
4985                        query.NQ_LIVE in self.requested_data)
4986
4987     if self.do_locking:
4988       # If any non-static field is requested we need to lock the nodes
4989       lu.needed_locks[locking.LEVEL_NODE] = self.wanted
4990
4991   def DeclareLocks(self, lu, level):
4992     pass
4993
4994   def _GetQueryData(self, lu):
4995     """Computes the list of nodes and their attributes.
4996
4997     """
4998     all_info = lu.cfg.GetAllNodesInfo()
4999
5000     nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
5001
5002     # Gather data as requested
5003     if query.NQ_LIVE in self.requested_data:
5004       # filter out non-vm_capable nodes
5005       toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
5006
5007       node_data = lu.rpc.call_node_info(toquery_nodes, [lu.cfg.GetVGName()],
5008                                         [lu.cfg.GetHypervisorType()])
5009       live_data = dict((name, _MakeLegacyNodeInfo(nresult.payload))
5010                        for (name, nresult) in node_data.items()
5011                        if not nresult.fail_msg and nresult.payload)
5012     else:
5013       live_data = None
5014
5015     if query.NQ_INST in self.requested_data:
5016       node_to_primary = dict([(name, set()) for name in nodenames])
5017       node_to_secondary = dict([(name, set()) for name in nodenames])
5018
5019       inst_data = lu.cfg.GetAllInstancesInfo()
5020
5021       for inst in inst_data.values():
5022         if inst.primary_node in node_to_primary:
5023           node_to_primary[inst.primary_node].add(inst.name)
5024         for secnode in inst.secondary_nodes:
5025           if secnode in node_to_secondary:
5026             node_to_secondary[secnode].add(inst.name)
5027     else:
5028       node_to_primary = None
5029       node_to_secondary = None
5030
5031     if query.NQ_OOB in self.requested_data:
5032       oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
5033                          for name, node in all_info.iteritems())
5034     else:
5035       oob_support = None
5036
5037     if query.NQ_GROUP in self.requested_data:
5038       groups = lu.cfg.GetAllNodeGroupsInfo()
5039     else:
5040       groups = {}
5041
5042     return query.NodeQueryData([all_info[name] for name in nodenames],
5043                                live_data, lu.cfg.GetMasterNode(),
5044                                node_to_primary, node_to_secondary, groups,
5045                                oob_support, lu.cfg.GetClusterInfo())
5046
5047
5048 class LUNodeQuery(NoHooksLU):
5049   """Logical unit for querying nodes.
5050
5051   """
5052   # pylint: disable=W0142
5053   REQ_BGL = False
5054
5055   def CheckArguments(self):
5056     self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
5057                          self.op.output_fields, self.op.use_locking)
5058
5059   def ExpandNames(self):
5060     self.nq.ExpandNames(self)
5061
5062   def DeclareLocks(self, level):
5063     self.nq.DeclareLocks(self, level)
5064
5065   def Exec(self, feedback_fn):
5066     return self.nq.OldStyleQuery(self)
5067
5068
5069 class LUNodeQueryvols(NoHooksLU):
5070   """Logical unit for getting volumes on node(s).
5071
5072   """
5073   REQ_BGL = False
5074   _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
5075   _FIELDS_STATIC = utils.FieldSet("node")
5076
5077   def CheckArguments(self):
5078     _CheckOutputFields(static=self._FIELDS_STATIC,
5079                        dynamic=self._FIELDS_DYNAMIC,
5080                        selected=self.op.output_fields)
5081
5082   def ExpandNames(self):
5083     self.share_locks = _ShareAll()
5084     self.needed_locks = {}
5085
5086     if not self.op.nodes:
5087       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5088     else:
5089       self.needed_locks[locking.LEVEL_NODE] = \
5090         _GetWantedNodes(self, self.op.nodes)
5091
5092   def Exec(self, feedback_fn):
5093     """Computes the list of nodes and their attributes.
5094
5095     """
5096     nodenames = self.owned_locks(locking.LEVEL_NODE)
5097     volumes = self.rpc.call_node_volumes(nodenames)
5098
5099     ilist = self.cfg.GetAllInstancesInfo()
5100     vol2inst = _MapInstanceDisksToNodes(ilist.values())
5101
5102     output = []
5103     for node in nodenames:
5104       nresult = volumes[node]
5105       if nresult.offline:
5106         continue
5107       msg = nresult.fail_msg
5108       if msg:
5109         self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
5110         continue
5111
5112       node_vols = sorted(nresult.payload,
5113                          key=operator.itemgetter("dev"))
5114
5115       for vol in node_vols:
5116         node_output = []
5117         for field in self.op.output_fields:
5118           if field == "node":
5119             val = node
5120           elif field == "phys":
5121             val = vol["dev"]
5122           elif field == "vg":
5123             val = vol["vg"]
5124           elif field == "name":
5125             val = vol["name"]
5126           elif field == "size":
5127             val = int(float(vol["size"]))
5128           elif field == "instance":
5129             val = vol2inst.get((node, vol["vg"] + "/" + vol["name"]), "-")
5130           else:
5131             raise errors.ParameterError(field)
5132           node_output.append(str(val))
5133
5134         output.append(node_output)
5135
5136     return output
5137
5138
5139 class LUNodeQueryStorage(NoHooksLU):
5140   """Logical unit for getting information on storage units on node(s).
5141
5142   """
5143   _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
5144   REQ_BGL = False
5145
5146   def CheckArguments(self):
5147     _CheckOutputFields(static=self._FIELDS_STATIC,
5148                        dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
5149                        selected=self.op.output_fields)
5150
5151   def ExpandNames(self):
5152     self.share_locks = _ShareAll()
5153     self.needed_locks = {}
5154
5155     if self.op.nodes:
5156       self.needed_locks[locking.LEVEL_NODE] = \
5157         _GetWantedNodes(self, self.op.nodes)
5158     else:
5159       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5160
5161   def Exec(self, feedback_fn):
5162     """Computes the list of nodes and their attributes.
5163
5164     """
5165     self.nodes = self.owned_locks(locking.LEVEL_NODE)
5166
5167     # Always get name to sort by
5168     if constants.SF_NAME in self.op.output_fields:
5169       fields = self.op.output_fields[:]
5170     else:
5171       fields = [constants.SF_NAME] + self.op.output_fields
5172
5173     # Never ask for node or type as it's only known to the LU
5174     for extra in [constants.SF_NODE, constants.SF_TYPE]:
5175       while extra in fields:
5176         fields.remove(extra)
5177
5178     field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
5179     name_idx = field_idx[constants.SF_NAME]
5180
5181     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5182     data = self.rpc.call_storage_list(self.nodes,
5183                                       self.op.storage_type, st_args,
5184                                       self.op.name, fields)
5185
5186     result = []
5187
5188     for node in utils.NiceSort(self.nodes):
5189       nresult = data[node]
5190       if nresult.offline:
5191         continue
5192
5193       msg = nresult.fail_msg
5194       if msg:
5195         self.LogWarning("Can't get storage data from node %s: %s", node, msg)
5196         continue
5197
5198       rows = dict([(row[name_idx], row) for row in nresult.payload])
5199
5200       for name in utils.NiceSort(rows.keys()):
5201         row = rows[name]
5202
5203         out = []
5204
5205         for field in self.op.output_fields:
5206           if field == constants.SF_NODE:
5207             val = node
5208           elif field == constants.SF_TYPE:
5209             val = self.op.storage_type
5210           elif field in field_idx:
5211             val = row[field_idx[field]]
5212           else:
5213             raise errors.ParameterError(field)
5214
5215           out.append(val)
5216
5217         result.append(out)
5218
5219     return result
5220
5221
5222 class _InstanceQuery(_QueryBase):
5223   FIELDS = query.INSTANCE_FIELDS
5224
5225   def ExpandNames(self, lu):
5226     lu.needed_locks = {}
5227     lu.share_locks = _ShareAll()
5228
5229     if self.names:
5230       self.wanted = _GetWantedInstances(lu, self.names)
5231     else:
5232       self.wanted = locking.ALL_SET
5233
5234     self.do_locking = (self.use_locking and
5235                        query.IQ_LIVE in self.requested_data)
5236     if self.do_locking:
5237       lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
5238       lu.needed_locks[locking.LEVEL_NODEGROUP] = []
5239       lu.needed_locks[locking.LEVEL_NODE] = []
5240       lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5241
5242     self.do_grouplocks = (self.do_locking and
5243                           query.IQ_NODES in self.requested_data)
5244
5245   def DeclareLocks(self, lu, level):
5246     if self.do_locking:
5247       if level == locking.LEVEL_NODEGROUP and self.do_grouplocks:
5248         assert not lu.needed_locks[locking.LEVEL_NODEGROUP]
5249
5250         # Lock all groups used by instances optimistically; this requires going
5251         # via the node before it's locked, requiring verification later on
5252         lu.needed_locks[locking.LEVEL_NODEGROUP] = \
5253           set(group_uuid
5254               for instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
5255               for group_uuid in lu.cfg.GetInstanceNodeGroups(instance_name))
5256       elif level == locking.LEVEL_NODE:
5257         lu._LockInstancesNodes() # pylint: disable=W0212
5258
5259   @staticmethod
5260   def _CheckGroupLocks(lu):
5261     owned_instances = frozenset(lu.owned_locks(locking.LEVEL_INSTANCE))
5262     owned_groups = frozenset(lu.owned_locks(locking.LEVEL_NODEGROUP))
5263
5264     # Check if node groups for locked instances are still correct
5265     for instance_name in owned_instances:
5266       _CheckInstanceNodeGroups(lu.cfg, instance_name, owned_groups)
5267
5268   def _GetQueryData(self, lu):
5269     """Computes the list of instances and their attributes.
5270
5271     """
5272     if self.do_grouplocks:
5273       self._CheckGroupLocks(lu)
5274
5275     cluster = lu.cfg.GetClusterInfo()
5276     all_info = lu.cfg.GetAllInstancesInfo()
5277
5278     instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
5279
5280     instance_list = [all_info[name] for name in instance_names]
5281     nodes = frozenset(itertools.chain(*(inst.all_nodes
5282                                         for inst in instance_list)))
5283     hv_list = list(set([inst.hypervisor for inst in instance_list]))
5284     bad_nodes = []
5285     offline_nodes = []
5286     wrongnode_inst = set()
5287
5288     # Gather data as requested
5289     if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
5290       live_data = {}
5291       node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
5292       for name in nodes:
5293         result = node_data[name]
5294         if result.offline:
5295           # offline nodes will be in both lists
5296           assert result.fail_msg
5297           offline_nodes.append(name)
5298         if result.fail_msg:
5299           bad_nodes.append(name)
5300         elif result.payload:
5301           for inst in result.payload:
5302             if inst in all_info:
5303               if all_info[inst].primary_node == name:
5304                 live_data.update(result.payload)
5305               else:
5306                 wrongnode_inst.add(inst)
5307             else:
5308               # orphan instance; we don't list it here as we don't
5309               # handle this case yet in the output of instance listing
5310               logging.warning("Orphan instance '%s' found on node %s",
5311                               inst, name)
5312         # else no instance is alive
5313     else:
5314       live_data = {}
5315
5316     if query.IQ_DISKUSAGE in self.requested_data:
5317       disk_usage = dict((inst.name,
5318                          _ComputeDiskSize(inst.disk_template,
5319                                           [{constants.IDISK_SIZE: disk.size}
5320                                            for disk in inst.disks]))
5321                         for inst in instance_list)
5322     else:
5323       disk_usage = None
5324
5325     if query.IQ_CONSOLE in self.requested_data:
5326       consinfo = {}
5327       for inst in instance_list:
5328         if inst.name in live_data:
5329           # Instance is running
5330           consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
5331         else:
5332           consinfo[inst.name] = None
5333       assert set(consinfo.keys()) == set(instance_names)
5334     else:
5335       consinfo = None
5336
5337     if query.IQ_NODES in self.requested_data:
5338       node_names = set(itertools.chain(*map(operator.attrgetter("all_nodes"),
5339                                             instance_list)))
5340       nodes = dict(lu.cfg.GetMultiNodeInfo(node_names))
5341       groups = dict((uuid, lu.cfg.GetNodeGroup(uuid))
5342                     for uuid in set(map(operator.attrgetter("group"),
5343                                         nodes.values())))
5344     else:
5345       nodes = None
5346       groups = None
5347
5348     return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
5349                                    disk_usage, offline_nodes, bad_nodes,
5350                                    live_data, wrongnode_inst, consinfo,
5351                                    nodes, groups)
5352
5353
5354 class LUQuery(NoHooksLU):
5355   """Query for resources/items of a certain kind.
5356
5357   """
5358   # pylint: disable=W0142
5359   REQ_BGL = False
5360
5361   def CheckArguments(self):
5362     qcls = _GetQueryImplementation(self.op.what)
5363
5364     self.impl = qcls(self.op.qfilter, self.op.fields, self.op.use_locking)
5365
5366   def ExpandNames(self):
5367     self.impl.ExpandNames(self)
5368
5369   def DeclareLocks(self, level):
5370     self.impl.DeclareLocks(self, level)
5371
5372   def Exec(self, feedback_fn):
5373     return self.impl.NewStyleQuery(self)
5374
5375
5376 class LUQueryFields(NoHooksLU):
5377   """Query for resources/items of a certain kind.
5378
5379   """
5380   # pylint: disable=W0142
5381   REQ_BGL = False
5382
5383   def CheckArguments(self):
5384     self.qcls = _GetQueryImplementation(self.op.what)
5385
5386   def ExpandNames(self):
5387     self.needed_locks = {}
5388
5389   def Exec(self, feedback_fn):
5390     return query.QueryFields(self.qcls.FIELDS, self.op.fields)
5391
5392
5393 class LUNodeModifyStorage(NoHooksLU):
5394   """Logical unit for modifying a storage volume on a node.
5395
5396   """
5397   REQ_BGL = False
5398
5399   def CheckArguments(self):
5400     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5401
5402     storage_type = self.op.storage_type
5403
5404     try:
5405       modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
5406     except KeyError:
5407       raise errors.OpPrereqError("Storage units of type '%s' can not be"
5408                                  " modified" % storage_type,
5409                                  errors.ECODE_INVAL)
5410
5411     diff = set(self.op.changes.keys()) - modifiable
5412     if diff:
5413       raise errors.OpPrereqError("The following fields can not be modified for"
5414                                  " storage units of type '%s': %r" %
5415                                  (storage_type, list(diff)),
5416                                  errors.ECODE_INVAL)
5417
5418   def ExpandNames(self):
5419     self.needed_locks = {
5420       locking.LEVEL_NODE: self.op.node_name,
5421       }
5422
5423   def Exec(self, feedback_fn):
5424     """Computes the list of nodes and their attributes.
5425
5426     """
5427     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5428     result = self.rpc.call_storage_modify(self.op.node_name,
5429                                           self.op.storage_type, st_args,
5430                                           self.op.name, self.op.changes)
5431     result.Raise("Failed to modify storage unit '%s' on %s" %
5432                  (self.op.name, self.op.node_name))
5433
5434
5435 class LUNodeAdd(LogicalUnit):
5436   """Logical unit for adding node to the cluster.
5437
5438   """
5439   HPATH = "node-add"
5440   HTYPE = constants.HTYPE_NODE
5441   _NFLAGS = ["master_capable", "vm_capable"]
5442
5443   def CheckArguments(self):
5444     self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
5445     # validate/normalize the node name
5446     self.hostname = netutils.GetHostname(name=self.op.node_name,
5447                                          family=self.primary_ip_family)
5448     self.op.node_name = self.hostname.name
5449
5450     if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
5451       raise errors.OpPrereqError("Cannot readd the master node",
5452                                  errors.ECODE_STATE)
5453
5454     if self.op.readd and self.op.group:
5455       raise errors.OpPrereqError("Cannot pass a node group when a node is"
5456                                  " being readded", errors.ECODE_INVAL)
5457
5458   def BuildHooksEnv(self):
5459     """Build hooks env.
5460
5461     This will run on all nodes before, and on all nodes + the new node after.
5462
5463     """
5464     return {
5465       "OP_TARGET": self.op.node_name,
5466       "NODE_NAME": self.op.node_name,
5467       "NODE_PIP": self.op.primary_ip,
5468       "NODE_SIP": self.op.secondary_ip,
5469       "MASTER_CAPABLE": str(self.op.master_capable),
5470       "VM_CAPABLE": str(self.op.vm_capable),
5471       }
5472
5473   def BuildHooksNodes(self):
5474     """Build hooks nodes.
5475
5476     """
5477     # Exclude added node
5478     pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
5479     post_nodes = pre_nodes + [self.op.node_name, ]
5480
5481     return (pre_nodes, post_nodes)
5482
5483   def CheckPrereq(self):
5484     """Check prerequisites.
5485
5486     This checks:
5487      - the new node is not already in the config
5488      - it is resolvable
5489      - its parameters (single/dual homed) matches the cluster
5490
5491     Any errors are signaled by raising errors.OpPrereqError.
5492
5493     """
5494     cfg = self.cfg
5495     hostname = self.hostname
5496     node = hostname.name
5497     primary_ip = self.op.primary_ip = hostname.ip
5498     if self.op.secondary_ip is None:
5499       if self.primary_ip_family == netutils.IP6Address.family:
5500         raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
5501                                    " IPv4 address must be given as secondary",
5502                                    errors.ECODE_INVAL)
5503       self.op.secondary_ip = primary_ip
5504
5505     secondary_ip = self.op.secondary_ip
5506     if not netutils.IP4Address.IsValid(secondary_ip):
5507       raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5508                                  " address" % secondary_ip, errors.ECODE_INVAL)
5509
5510     node_list = cfg.GetNodeList()
5511     if not self.op.readd and node in node_list:
5512       raise errors.OpPrereqError("Node %s is already in the configuration" %
5513                                  node, errors.ECODE_EXISTS)
5514     elif self.op.readd and node not in node_list:
5515       raise errors.OpPrereqError("Node %s is not in the configuration" % node,
5516                                  errors.ECODE_NOENT)
5517
5518     self.changed_primary_ip = False
5519
5520     for existing_node_name, existing_node in cfg.GetMultiNodeInfo(node_list):
5521       if self.op.readd and node == existing_node_name:
5522         if existing_node.secondary_ip != secondary_ip:
5523           raise errors.OpPrereqError("Readded node doesn't have the same IP"
5524                                      " address configuration as before",
5525                                      errors.ECODE_INVAL)
5526         if existing_node.primary_ip != primary_ip:
5527           self.changed_primary_ip = True
5528
5529         continue
5530
5531       if (existing_node.primary_ip == primary_ip or
5532           existing_node.secondary_ip == primary_ip or
5533           existing_node.primary_ip == secondary_ip or
5534           existing_node.secondary_ip == secondary_ip):
5535         raise errors.OpPrereqError("New node ip address(es) conflict with"
5536                                    " existing node %s" % existing_node.name,
5537                                    errors.ECODE_NOTUNIQUE)
5538
5539     # After this 'if' block, None is no longer a valid value for the
5540     # _capable op attributes
5541     if self.op.readd:
5542       old_node = self.cfg.GetNodeInfo(node)
5543       assert old_node is not None, "Can't retrieve locked node %s" % node
5544       for attr in self._NFLAGS:
5545         if getattr(self.op, attr) is None:
5546           setattr(self.op, attr, getattr(old_node, attr))
5547     else:
5548       for attr in self._NFLAGS:
5549         if getattr(self.op, attr) is None:
5550           setattr(self.op, attr, True)
5551
5552     if self.op.readd and not self.op.vm_capable:
5553       pri, sec = cfg.GetNodeInstances(node)
5554       if pri or sec:
5555         raise errors.OpPrereqError("Node %s being re-added with vm_capable"
5556                                    " flag set to false, but it already holds"
5557                                    " instances" % node,
5558                                    errors.ECODE_STATE)
5559
5560     # check that the type of the node (single versus dual homed) is the
5561     # same as for the master
5562     myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
5563     master_singlehomed = myself.secondary_ip == myself.primary_ip
5564     newbie_singlehomed = secondary_ip == primary_ip
5565     if master_singlehomed != newbie_singlehomed:
5566       if master_singlehomed:
5567         raise errors.OpPrereqError("The master has no secondary ip but the"
5568                                    " new node has one",
5569                                    errors.ECODE_INVAL)
5570       else:
5571         raise errors.OpPrereqError("The master has a secondary ip but the"
5572                                    " new node doesn't have one",
5573                                    errors.ECODE_INVAL)
5574
5575     # checks reachability
5576     if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
5577       raise errors.OpPrereqError("Node not reachable by ping",
5578                                  errors.ECODE_ENVIRON)
5579
5580     if not newbie_singlehomed:
5581       # check reachability from my secondary ip to newbie's secondary ip
5582       if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
5583                            source=myself.secondary_ip):
5584         raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5585                                    " based ping to node daemon port",
5586                                    errors.ECODE_ENVIRON)
5587
5588     if self.op.readd:
5589       exceptions = [node]
5590     else:
5591       exceptions = []
5592
5593     if self.op.master_capable:
5594       self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
5595     else:
5596       self.master_candidate = False
5597
5598     if self.op.readd:
5599       self.new_node = old_node
5600     else:
5601       node_group = cfg.LookupNodeGroup(self.op.group)
5602       self.new_node = objects.Node(name=node,
5603                                    primary_ip=primary_ip,
5604                                    secondary_ip=secondary_ip,
5605                                    master_candidate=self.master_candidate,
5606                                    offline=False, drained=False,
5607                                    group=node_group)
5608
5609     if self.op.ndparams:
5610       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
5611
5612     if self.op.hv_state:
5613       self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
5614
5615     if self.op.disk_state:
5616       self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
5617
5618     # TODO: If we need to have multiple DnsOnlyRunner we probably should make
5619     #       it a property on the base class.
5620     result = rpc.DnsOnlyRunner().call_version([node])[node]
5621     result.Raise("Can't get version information from node %s" % node)
5622     if constants.PROTOCOL_VERSION == result.payload:
5623       logging.info("Communication to node %s fine, sw version %s match",
5624                    node, result.payload)
5625     else:
5626       raise errors.OpPrereqError("Version mismatch master version %s,"
5627                                  " node version %s" %
5628                                  (constants.PROTOCOL_VERSION, result.payload),
5629                                  errors.ECODE_ENVIRON)
5630
5631   def Exec(self, feedback_fn):
5632     """Adds the new node to the cluster.
5633
5634     """
5635     new_node = self.new_node
5636     node = new_node.name
5637
5638     assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
5639       "Not owning BGL"
5640
5641     # We adding a new node so we assume it's powered
5642     new_node.powered = True
5643
5644     # for re-adds, reset the offline/drained/master-candidate flags;
5645     # we need to reset here, otherwise offline would prevent RPC calls
5646     # later in the procedure; this also means that if the re-add
5647     # fails, we are left with a non-offlined, broken node
5648     if self.op.readd:
5649       new_node.drained = new_node.offline = False # pylint: disable=W0201
5650       self.LogInfo("Readding a node, the offline/drained flags were reset")
5651       # if we demote the node, we do cleanup later in the procedure
5652       new_node.master_candidate = self.master_candidate
5653       if self.changed_primary_ip:
5654         new_node.primary_ip = self.op.primary_ip
5655
5656     # copy the master/vm_capable flags
5657     for attr in self._NFLAGS:
5658       setattr(new_node, attr, getattr(self.op, attr))
5659
5660     # notify the user about any possible mc promotion
5661     if new_node.master_candidate:
5662       self.LogInfo("Node will be a master candidate")
5663
5664     if self.op.ndparams:
5665       new_node.ndparams = self.op.ndparams
5666     else:
5667       new_node.ndparams = {}
5668
5669     if self.op.hv_state:
5670       new_node.hv_state_static = self.new_hv_state
5671
5672     if self.op.disk_state:
5673       new_node.disk_state_static = self.new_disk_state
5674
5675     # Add node to our /etc/hosts, and add key to known_hosts
5676     if self.cfg.GetClusterInfo().modify_etc_hosts:
5677       master_node = self.cfg.GetMasterNode()
5678       result = self.rpc.call_etc_hosts_modify(master_node,
5679                                               constants.ETC_HOSTS_ADD,
5680                                               self.hostname.name,
5681                                               self.hostname.ip)
5682       result.Raise("Can't update hosts file with new host data")
5683
5684     if new_node.secondary_ip != new_node.primary_ip:
5685       _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
5686                                False)
5687
5688     node_verify_list = [self.cfg.GetMasterNode()]
5689     node_verify_param = {
5690       constants.NV_NODELIST: ([node], {}),
5691       # TODO: do a node-net-test as well?
5692     }
5693
5694     result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
5695                                        self.cfg.GetClusterName())
5696     for verifier in node_verify_list:
5697       result[verifier].Raise("Cannot communicate with node %s" % verifier)
5698       nl_payload = result[verifier].payload[constants.NV_NODELIST]
5699       if nl_payload:
5700         for failed in nl_payload:
5701           feedback_fn("ssh/hostname verification failed"
5702                       " (checking from %s): %s" %
5703                       (verifier, nl_payload[failed]))
5704         raise errors.OpExecError("ssh/hostname verification failed")
5705
5706     if self.op.readd:
5707       _RedistributeAncillaryFiles(self)
5708       self.context.ReaddNode(new_node)
5709       # make sure we redistribute the config
5710       self.cfg.Update(new_node, feedback_fn)
5711       # and make sure the new node will not have old files around
5712       if not new_node.master_candidate:
5713         result = self.rpc.call_node_demote_from_mc(new_node.name)
5714         msg = result.fail_msg
5715         if msg:
5716           self.LogWarning("Node failed to demote itself from master"
5717                           " candidate status: %s" % msg)
5718     else:
5719       _RedistributeAncillaryFiles(self, additional_nodes=[node],
5720                                   additional_vm=self.op.vm_capable)
5721       self.context.AddNode(new_node, self.proc.GetECId())
5722
5723
5724 class LUNodeSetParams(LogicalUnit):
5725   """Modifies the parameters of a node.
5726
5727   @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
5728       to the node role (as _ROLE_*)
5729   @cvar _R2F: a dictionary from node role to tuples of flags
5730   @cvar _FLAGS: a list of attribute names corresponding to the flags
5731
5732   """
5733   HPATH = "node-modify"
5734   HTYPE = constants.HTYPE_NODE
5735   REQ_BGL = False
5736   (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
5737   _F2R = {
5738     (True, False, False): _ROLE_CANDIDATE,
5739     (False, True, False): _ROLE_DRAINED,
5740     (False, False, True): _ROLE_OFFLINE,
5741     (False, False, False): _ROLE_REGULAR,
5742     }
5743   _R2F = dict((v, k) for k, v in _F2R.items())
5744   _FLAGS = ["master_candidate", "drained", "offline"]
5745
5746   def CheckArguments(self):
5747     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5748     all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
5749                 self.op.master_capable, self.op.vm_capable,
5750                 self.op.secondary_ip, self.op.ndparams, self.op.hv_state,
5751                 self.op.disk_state]
5752     if all_mods.count(None) == len(all_mods):
5753       raise errors.OpPrereqError("Please pass at least one modification",
5754                                  errors.ECODE_INVAL)
5755     if all_mods.count(True) > 1:
5756       raise errors.OpPrereqError("Can't set the node into more than one"
5757                                  " state at the same time",
5758                                  errors.ECODE_INVAL)
5759
5760     # Boolean value that tells us whether we might be demoting from MC
5761     self.might_demote = (self.op.master_candidate == False or
5762                          self.op.offline == True or
5763                          self.op.drained == True or
5764                          self.op.master_capable == False)
5765
5766     if self.op.secondary_ip:
5767       if not netutils.IP4Address.IsValid(self.op.secondary_ip):
5768         raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5769                                    " address" % self.op.secondary_ip,
5770                                    errors.ECODE_INVAL)
5771
5772     self.lock_all = self.op.auto_promote and self.might_demote
5773     self.lock_instances = self.op.secondary_ip is not None
5774
5775   def _InstanceFilter(self, instance):
5776     """Filter for getting affected instances.
5777
5778     """
5779     return (instance.disk_template in constants.DTS_INT_MIRROR and
5780             self.op.node_name in instance.all_nodes)
5781
5782   def ExpandNames(self):
5783     if self.lock_all:
5784       self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
5785     else:
5786       self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
5787
5788     # Since modifying a node can have severe effects on currently running
5789     # operations the resource lock is at least acquired in shared mode
5790     self.needed_locks[locking.LEVEL_NODE_RES] = \
5791       self.needed_locks[locking.LEVEL_NODE]
5792
5793     # Get node resource and instance locks in shared mode; they are not used
5794     # for anything but read-only access
5795     self.share_locks[locking.LEVEL_NODE_RES] = 1
5796     self.share_locks[locking.LEVEL_INSTANCE] = 1
5797
5798     if self.lock_instances:
5799       self.needed_locks[locking.LEVEL_INSTANCE] = \
5800         frozenset(self.cfg.GetInstancesInfoByFilter(self._InstanceFilter))
5801
5802   def BuildHooksEnv(self):
5803     """Build hooks env.
5804
5805     This runs on the master node.
5806
5807     """
5808     return {
5809       "OP_TARGET": self.op.node_name,
5810       "MASTER_CANDIDATE": str(self.op.master_candidate),
5811       "OFFLINE": str(self.op.offline),
5812       "DRAINED": str(self.op.drained),
5813       "MASTER_CAPABLE": str(self.op.master_capable),
5814       "VM_CAPABLE": str(self.op.vm_capable),
5815       }
5816
5817   def BuildHooksNodes(self):
5818     """Build hooks nodes.
5819
5820     """
5821     nl = [self.cfg.GetMasterNode(), self.op.node_name]
5822     return (nl, nl)
5823
5824   def CheckPrereq(self):
5825     """Check prerequisites.
5826
5827     This only checks the instance list against the existing names.
5828
5829     """
5830     node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
5831
5832     if self.lock_instances:
5833       affected_instances = \
5834         self.cfg.GetInstancesInfoByFilter(self._InstanceFilter)
5835
5836       # Verify instance locks
5837       owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
5838       wanted_instances = frozenset(affected_instances.keys())
5839       if wanted_instances - owned_instances:
5840         raise errors.OpPrereqError("Instances affected by changing node %s's"
5841                                    " secondary IP address have changed since"
5842                                    " locks were acquired, wanted '%s', have"
5843                                    " '%s'; retry the operation" %
5844                                    (self.op.node_name,
5845                                     utils.CommaJoin(wanted_instances),
5846                                     utils.CommaJoin(owned_instances)),
5847                                    errors.ECODE_STATE)
5848     else:
5849       affected_instances = None
5850
5851     if (self.op.master_candidate is not None or
5852         self.op.drained is not None or
5853         self.op.offline is not None):
5854       # we can't change the master's node flags
5855       if self.op.node_name == self.cfg.GetMasterNode():
5856         raise errors.OpPrereqError("The master role can be changed"
5857                                    " only via master-failover",
5858                                    errors.ECODE_INVAL)
5859
5860     if self.op.master_candidate and not node.master_capable:
5861       raise errors.OpPrereqError("Node %s is not master capable, cannot make"
5862                                  " it a master candidate" % node.name,
5863                                  errors.ECODE_STATE)
5864
5865     if self.op.vm_capable == False:
5866       (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
5867       if ipri or isec:
5868         raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
5869                                    " the vm_capable flag" % node.name,
5870                                    errors.ECODE_STATE)
5871
5872     if node.master_candidate and self.might_demote and not self.lock_all:
5873       assert not self.op.auto_promote, "auto_promote set but lock_all not"
5874       # check if after removing the current node, we're missing master
5875       # candidates
5876       (mc_remaining, mc_should, _) = \
5877           self.cfg.GetMasterCandidateStats(exceptions=[node.name])
5878       if mc_remaining < mc_should:
5879         raise errors.OpPrereqError("Not enough master candidates, please"
5880                                    " pass auto promote option to allow"
5881                                    " promotion", errors.ECODE_STATE)
5882
5883     self.old_flags = old_flags = (node.master_candidate,
5884                                   node.drained, node.offline)
5885     assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
5886     self.old_role = old_role = self._F2R[old_flags]
5887
5888     # Check for ineffective changes
5889     for attr in self._FLAGS:
5890       if (getattr(self.op, attr) == False and getattr(node, attr) == False):
5891         self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
5892         setattr(self.op, attr, None)
5893
5894     # Past this point, any flag change to False means a transition
5895     # away from the respective state, as only real changes are kept
5896
5897     # TODO: We might query the real power state if it supports OOB
5898     if _SupportsOob(self.cfg, node):
5899       if self.op.offline is False and not (node.powered or
5900                                            self.op.powered == True):
5901         raise errors.OpPrereqError(("Node %s needs to be turned on before its"
5902                                     " offline status can be reset") %
5903                                    self.op.node_name)
5904     elif self.op.powered is not None:
5905       raise errors.OpPrereqError(("Unable to change powered state for node %s"
5906                                   " as it does not support out-of-band"
5907                                   " handling") % self.op.node_name)
5908
5909     # If we're being deofflined/drained, we'll MC ourself if needed
5910     if (self.op.drained == False or self.op.offline == False or
5911         (self.op.master_capable and not node.master_capable)):
5912       if _DecideSelfPromotion(self):
5913         self.op.master_candidate = True
5914         self.LogInfo("Auto-promoting node to master candidate")
5915
5916     # If we're no longer master capable, we'll demote ourselves from MC
5917     if self.op.master_capable == False and node.master_candidate:
5918       self.LogInfo("Demoting from master candidate")
5919       self.op.master_candidate = False
5920
5921     # Compute new role
5922     assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
5923     if self.op.master_candidate:
5924       new_role = self._ROLE_CANDIDATE
5925     elif self.op.drained:
5926       new_role = self._ROLE_DRAINED
5927     elif self.op.offline:
5928       new_role = self._ROLE_OFFLINE
5929     elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
5930       # False is still in new flags, which means we're un-setting (the
5931       # only) True flag
5932       new_role = self._ROLE_REGULAR
5933     else: # no new flags, nothing, keep old role
5934       new_role = old_role
5935
5936     self.new_role = new_role
5937
5938     if old_role == self._ROLE_OFFLINE and new_role != old_role:
5939       # Trying to transition out of offline status
5940       result = self.rpc.call_version([node.name])[node.name]
5941       if result.fail_msg:
5942         raise errors.OpPrereqError("Node %s is being de-offlined but fails"
5943                                    " to report its version: %s" %
5944                                    (node.name, result.fail_msg),
5945                                    errors.ECODE_STATE)
5946       else:
5947         self.LogWarning("Transitioning node from offline to online state"
5948                         " without using re-add. Please make sure the node"
5949                         " is healthy!")
5950
5951     if self.op.secondary_ip:
5952       # Ok even without locking, because this can't be changed by any LU
5953       master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
5954       master_singlehomed = master.secondary_ip == master.primary_ip
5955       if master_singlehomed and self.op.secondary_ip:
5956         raise errors.OpPrereqError("Cannot change the secondary ip on a single"
5957                                    " homed cluster", errors.ECODE_INVAL)
5958
5959       assert not (frozenset(affected_instances) -
5960                   self.owned_locks(locking.LEVEL_INSTANCE))
5961
5962       if node.offline:
5963         if affected_instances:
5964           raise errors.OpPrereqError("Cannot change secondary IP address:"
5965                                      " offline node has instances (%s)"
5966                                      " configured to use it" %
5967                                      utils.CommaJoin(affected_instances.keys()))
5968       else:
5969         # On online nodes, check that no instances are running, and that
5970         # the node has the new ip and we can reach it.
5971         for instance in affected_instances.values():
5972           _CheckInstanceState(self, instance, INSTANCE_DOWN,
5973                               msg="cannot change secondary ip")
5974
5975         _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
5976         if master.name != node.name:
5977           # check reachability from master secondary ip to new secondary ip
5978           if not netutils.TcpPing(self.op.secondary_ip,
5979                                   constants.DEFAULT_NODED_PORT,
5980                                   source=master.secondary_ip):
5981             raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5982                                        " based ping to node daemon port",
5983                                        errors.ECODE_ENVIRON)
5984
5985     if self.op.ndparams:
5986       new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
5987       utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
5988       self.new_ndparams = new_ndparams
5989
5990     if self.op.hv_state:
5991       self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
5992                                                  self.node.hv_state_static)
5993
5994     if self.op.disk_state:
5995       self.new_disk_state = \
5996         _MergeAndVerifyDiskState(self.op.disk_state,
5997                                  self.node.disk_state_static)
5998
5999   def Exec(self, feedback_fn):
6000     """Modifies a node.
6001
6002     """
6003     node = self.node
6004     old_role = self.old_role
6005     new_role = self.new_role
6006
6007     result = []
6008
6009     if self.op.ndparams:
6010       node.ndparams = self.new_ndparams
6011
6012     if self.op.powered is not None:
6013       node.powered = self.op.powered
6014
6015     if self.op.hv_state:
6016       node.hv_state_static = self.new_hv_state
6017
6018     if self.op.disk_state:
6019       node.disk_state_static = self.new_disk_state
6020
6021     for attr in ["master_capable", "vm_capable"]:
6022       val = getattr(self.op, attr)
6023       if val is not None:
6024         setattr(node, attr, val)
6025         result.append((attr, str(val)))
6026
6027     if new_role != old_role:
6028       # Tell the node to demote itself, if no longer MC and not offline
6029       if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
6030         msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
6031         if msg:
6032           self.LogWarning("Node failed to demote itself: %s", msg)
6033
6034       new_flags = self._R2F[new_role]
6035       for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
6036         if of != nf:
6037           result.append((desc, str(nf)))
6038       (node.master_candidate, node.drained, node.offline) = new_flags
6039
6040       # we locked all nodes, we adjust the CP before updating this node
6041       if self.lock_all:
6042         _AdjustCandidatePool(self, [node.name])
6043
6044     if self.op.secondary_ip:
6045       node.secondary_ip = self.op.secondary_ip
6046       result.append(("secondary_ip", self.op.secondary_ip))
6047
6048     # this will trigger configuration file update, if needed
6049     self.cfg.Update(node, feedback_fn)
6050
6051     # this will trigger job queue propagation or cleanup if the mc
6052     # flag changed
6053     if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
6054       self.context.ReaddNode(node)
6055
6056     return result
6057
6058
6059 class LUNodePowercycle(NoHooksLU):
6060   """Powercycles a node.
6061
6062   """
6063   REQ_BGL = False
6064
6065   def CheckArguments(self):
6066     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6067     if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
6068       raise errors.OpPrereqError("The node is the master and the force"
6069                                  " parameter was not set",
6070                                  errors.ECODE_INVAL)
6071
6072   def ExpandNames(self):
6073     """Locking for PowercycleNode.
6074
6075     This is a last-resort option and shouldn't block on other
6076     jobs. Therefore, we grab no locks.
6077
6078     """
6079     self.needed_locks = {}
6080
6081   def Exec(self, feedback_fn):
6082     """Reboots a node.
6083
6084     """
6085     result = self.rpc.call_node_powercycle(self.op.node_name,
6086                                            self.cfg.GetHypervisorType())
6087     result.Raise("Failed to schedule the reboot")
6088     return result.payload
6089
6090
6091 class LUClusterQuery(NoHooksLU):
6092   """Query cluster configuration.
6093
6094   """
6095   REQ_BGL = False
6096
6097   def ExpandNames(self):
6098     self.needed_locks = {}
6099
6100   def Exec(self, feedback_fn):
6101     """Return cluster config.
6102
6103     """
6104     cluster = self.cfg.GetClusterInfo()
6105     os_hvp = {}
6106
6107     # Filter just for enabled hypervisors
6108     for os_name, hv_dict in cluster.os_hvp.items():
6109       os_hvp[os_name] = {}
6110       for hv_name, hv_params in hv_dict.items():
6111         if hv_name in cluster.enabled_hypervisors:
6112           os_hvp[os_name][hv_name] = hv_params
6113
6114     # Convert ip_family to ip_version
6115     primary_ip_version = constants.IP4_VERSION
6116     if cluster.primary_ip_family == netutils.IP6Address.family:
6117       primary_ip_version = constants.IP6_VERSION
6118
6119     result = {
6120       "software_version": constants.RELEASE_VERSION,
6121       "protocol_version": constants.PROTOCOL_VERSION,
6122       "config_version": constants.CONFIG_VERSION,
6123       "os_api_version": max(constants.OS_API_VERSIONS),
6124       "export_version": constants.EXPORT_VERSION,
6125       "architecture": runtime.GetArchInfo(),
6126       "name": cluster.cluster_name,
6127       "master": cluster.master_node,
6128       "default_hypervisor": cluster.primary_hypervisor,
6129       "enabled_hypervisors": cluster.enabled_hypervisors,
6130       "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
6131                         for hypervisor_name in cluster.enabled_hypervisors]),
6132       "os_hvp": os_hvp,
6133       "beparams": cluster.beparams,
6134       "osparams": cluster.osparams,
6135       "ipolicy": cluster.ipolicy,
6136       "nicparams": cluster.nicparams,
6137       "ndparams": cluster.ndparams,
6138       "candidate_pool_size": cluster.candidate_pool_size,
6139       "master_netdev": cluster.master_netdev,
6140       "master_netmask": cluster.master_netmask,
6141       "use_external_mip_script": cluster.use_external_mip_script,
6142       "volume_group_name": cluster.volume_group_name,
6143       "drbd_usermode_helper": cluster.drbd_usermode_helper,
6144       "file_storage_dir": cluster.file_storage_dir,
6145       "shared_file_storage_dir": cluster.shared_file_storage_dir,
6146       "maintain_node_health": cluster.maintain_node_health,
6147       "ctime": cluster.ctime,
6148       "mtime": cluster.mtime,
6149       "uuid": cluster.uuid,
6150       "tags": list(cluster.GetTags()),
6151       "uid_pool": cluster.uid_pool,
6152       "default_iallocator": cluster.default_iallocator,
6153       "reserved_lvs": cluster.reserved_lvs,
6154       "primary_ip_version": primary_ip_version,
6155       "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
6156       "hidden_os": cluster.hidden_os,
6157       "blacklisted_os": cluster.blacklisted_os,
6158       }
6159
6160     return result
6161
6162
6163 class LUClusterConfigQuery(NoHooksLU):
6164   """Return configuration values.
6165
6166   """
6167   REQ_BGL = False
6168
6169   def CheckArguments(self):
6170     self.cq = _ClusterQuery(None, self.op.output_fields, False)
6171
6172   def ExpandNames(self):
6173     self.cq.ExpandNames(self)
6174
6175   def DeclareLocks(self, level):
6176     self.cq.DeclareLocks(self, level)
6177
6178   def Exec(self, feedback_fn):
6179     result = self.cq.OldStyleQuery(self)
6180
6181     assert len(result) == 1
6182
6183     return result[0]
6184
6185
6186 class _ClusterQuery(_QueryBase):
6187   FIELDS = query.CLUSTER_FIELDS
6188
6189   #: Do not sort (there is only one item)
6190   SORT_FIELD = None
6191
6192   def ExpandNames(self, lu):
6193     lu.needed_locks = {}
6194
6195     # The following variables interact with _QueryBase._GetNames
6196     self.wanted = locking.ALL_SET
6197     self.do_locking = self.use_locking
6198
6199     if self.do_locking:
6200       raise errors.OpPrereqError("Can not use locking for cluster queries",
6201                                  errors.ECODE_INVAL)
6202
6203   def DeclareLocks(self, lu, level):
6204     pass
6205
6206   def _GetQueryData(self, lu):
6207     """Computes the list of nodes and their attributes.
6208
6209     """
6210     # Locking is not used
6211     assert not (compat.any(lu.glm.is_owned(level)
6212                            for level in locking.LEVELS
6213                            if level != locking.LEVEL_CLUSTER) or
6214                 self.do_locking or self.use_locking)
6215
6216     if query.CQ_CONFIG in self.requested_data:
6217       cluster = lu.cfg.GetClusterInfo()
6218     else:
6219       cluster = NotImplemented
6220
6221     if query.CQ_QUEUE_DRAINED in self.requested_data:
6222       drain_flag = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
6223     else:
6224       drain_flag = NotImplemented
6225
6226     if query.CQ_WATCHER_PAUSE in self.requested_data:
6227       watcher_pause = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
6228     else:
6229       watcher_pause = NotImplemented
6230
6231     return query.ClusterQueryData(cluster, drain_flag, watcher_pause)
6232
6233
6234 class LUInstanceActivateDisks(NoHooksLU):
6235   """Bring up an instance's disks.
6236
6237   """
6238   REQ_BGL = False
6239
6240   def ExpandNames(self):
6241     self._ExpandAndLockInstance()
6242     self.needed_locks[locking.LEVEL_NODE] = []
6243     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6244
6245   def DeclareLocks(self, level):
6246     if level == locking.LEVEL_NODE:
6247       self._LockInstancesNodes()
6248
6249   def CheckPrereq(self):
6250     """Check prerequisites.
6251
6252     This checks that the instance is in the cluster.
6253
6254     """
6255     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6256     assert self.instance is not None, \
6257       "Cannot retrieve locked instance %s" % self.op.instance_name
6258     _CheckNodeOnline(self, self.instance.primary_node)
6259
6260   def Exec(self, feedback_fn):
6261     """Activate the disks.
6262
6263     """
6264     disks_ok, disks_info = \
6265               _AssembleInstanceDisks(self, self.instance,
6266                                      ignore_size=self.op.ignore_size)
6267     if not disks_ok:
6268       raise errors.OpExecError("Cannot activate block devices")
6269
6270     return disks_info
6271
6272
6273 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
6274                            ignore_size=False):
6275   """Prepare the block devices for an instance.
6276
6277   This sets up the block devices on all nodes.
6278
6279   @type lu: L{LogicalUnit}
6280   @param lu: the logical unit on whose behalf we execute
6281   @type instance: L{objects.Instance}
6282   @param instance: the instance for whose disks we assemble
6283   @type disks: list of L{objects.Disk} or None
6284   @param disks: which disks to assemble (or all, if None)
6285   @type ignore_secondaries: boolean
6286   @param ignore_secondaries: if true, errors on secondary nodes
6287       won't result in an error return from the function
6288   @type ignore_size: boolean
6289   @param ignore_size: if true, the current known size of the disk
6290       will not be used during the disk activation, useful for cases
6291       when the size is wrong
6292   @return: False if the operation failed, otherwise a list of
6293       (host, instance_visible_name, node_visible_name)
6294       with the mapping from node devices to instance devices
6295
6296   """
6297   device_info = []
6298   disks_ok = True
6299   iname = instance.name
6300   disks = _ExpandCheckDisks(instance, disks)
6301
6302   # With the two passes mechanism we try to reduce the window of
6303   # opportunity for the race condition of switching DRBD to primary
6304   # before handshaking occured, but we do not eliminate it
6305
6306   # The proper fix would be to wait (with some limits) until the
6307   # connection has been made and drbd transitions from WFConnection
6308   # into any other network-connected state (Connected, SyncTarget,
6309   # SyncSource, etc.)
6310
6311   # 1st pass, assemble on all nodes in secondary mode
6312   for idx, inst_disk in enumerate(disks):
6313     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6314       if ignore_size:
6315         node_disk = node_disk.Copy()
6316         node_disk.UnsetSize()
6317       lu.cfg.SetDiskID(node_disk, node)
6318       result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False, idx)
6319       msg = result.fail_msg
6320       if msg:
6321         lu.proc.LogWarning("Could not prepare block device %s on node %s"
6322                            " (is_primary=False, pass=1): %s",
6323                            inst_disk.iv_name, node, msg)
6324         if not ignore_secondaries:
6325           disks_ok = False
6326
6327   # FIXME: race condition on drbd migration to primary
6328
6329   # 2nd pass, do only the primary node
6330   for idx, inst_disk in enumerate(disks):
6331     dev_path = None
6332
6333     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6334       if node != instance.primary_node:
6335         continue
6336       if ignore_size:
6337         node_disk = node_disk.Copy()
6338         node_disk.UnsetSize()
6339       lu.cfg.SetDiskID(node_disk, node)
6340       result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True, idx)
6341       msg = result.fail_msg
6342       if msg:
6343         lu.proc.LogWarning("Could not prepare block device %s on node %s"
6344                            " (is_primary=True, pass=2): %s",
6345                            inst_disk.iv_name, node, msg)
6346         disks_ok = False
6347       else:
6348         dev_path = result.payload
6349
6350     device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
6351
6352   # leave the disks configured for the primary node
6353   # this is a workaround that would be fixed better by
6354   # improving the logical/physical id handling
6355   for disk in disks:
6356     lu.cfg.SetDiskID(disk, instance.primary_node)
6357
6358   return disks_ok, device_info
6359
6360
6361 def _StartInstanceDisks(lu, instance, force):
6362   """Start the disks of an instance.
6363
6364   """
6365   disks_ok, _ = _AssembleInstanceDisks(lu, instance,
6366                                            ignore_secondaries=force)
6367   if not disks_ok:
6368     _ShutdownInstanceDisks(lu, instance)
6369     if force is not None and not force:
6370       lu.proc.LogWarning("", hint="If the message above refers to a"
6371                          " secondary node,"
6372                          " you can retry the operation using '--force'.")
6373     raise errors.OpExecError("Disk consistency error")
6374
6375
6376 class LUInstanceDeactivateDisks(NoHooksLU):
6377   """Shutdown an instance's disks.
6378
6379   """
6380   REQ_BGL = False
6381
6382   def ExpandNames(self):
6383     self._ExpandAndLockInstance()
6384     self.needed_locks[locking.LEVEL_NODE] = []
6385     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6386
6387   def DeclareLocks(self, level):
6388     if level == locking.LEVEL_NODE:
6389       self._LockInstancesNodes()
6390
6391   def CheckPrereq(self):
6392     """Check prerequisites.
6393
6394     This checks that the instance is in the cluster.
6395
6396     """
6397     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6398     assert self.instance is not None, \
6399       "Cannot retrieve locked instance %s" % self.op.instance_name
6400
6401   def Exec(self, feedback_fn):
6402     """Deactivate the disks
6403
6404     """
6405     instance = self.instance
6406     if self.op.force:
6407       _ShutdownInstanceDisks(self, instance)
6408     else:
6409       _SafeShutdownInstanceDisks(self, instance)
6410
6411
6412 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
6413   """Shutdown block devices of an instance.
6414
6415   This function checks if an instance is running, before calling
6416   _ShutdownInstanceDisks.
6417
6418   """
6419   _CheckInstanceState(lu, instance, INSTANCE_DOWN, msg="cannot shutdown disks")
6420   _ShutdownInstanceDisks(lu, instance, disks=disks)
6421
6422
6423 def _ExpandCheckDisks(instance, disks):
6424   """Return the instance disks selected by the disks list
6425
6426   @type disks: list of L{objects.Disk} or None
6427   @param disks: selected disks
6428   @rtype: list of L{objects.Disk}
6429   @return: selected instance disks to act on
6430
6431   """
6432   if disks is None:
6433     return instance.disks
6434   else:
6435     if not set(disks).issubset(instance.disks):
6436       raise errors.ProgrammerError("Can only act on disks belonging to the"
6437                                    " target instance")
6438     return disks
6439
6440
6441 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
6442   """Shutdown block devices of an instance.
6443
6444   This does the shutdown on all nodes of the instance.
6445
6446   If the ignore_primary is false, errors on the primary node are
6447   ignored.
6448
6449   """
6450   all_result = True
6451   disks = _ExpandCheckDisks(instance, disks)
6452
6453   for disk in disks:
6454     for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
6455       lu.cfg.SetDiskID(top_disk, node)
6456       result = lu.rpc.call_blockdev_shutdown(node, top_disk)
6457       msg = result.fail_msg
6458       if msg:
6459         lu.LogWarning("Could not shutdown block device %s on node %s: %s",
6460                       disk.iv_name, node, msg)
6461         if ((node == instance.primary_node and not ignore_primary) or
6462             (node != instance.primary_node and not result.offline)):
6463           all_result = False
6464   return all_result
6465
6466
6467 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
6468   """Checks if a node has enough free memory.
6469
6470   This function check if a given node has the needed amount of free
6471   memory. In case the node has less memory or we cannot get the
6472   information from the node, this function raise an OpPrereqError
6473   exception.
6474
6475   @type lu: C{LogicalUnit}
6476   @param lu: a logical unit from which we get configuration data
6477   @type node: C{str}
6478   @param node: the node to check
6479   @type reason: C{str}
6480   @param reason: string to use in the error message
6481   @type requested: C{int}
6482   @param requested: the amount of memory in MiB to check for
6483   @type hypervisor_name: C{str}
6484   @param hypervisor_name: the hypervisor to ask for memory stats
6485   @rtype: integer
6486   @return: node current free memory
6487   @raise errors.OpPrereqError: if the node doesn't have enough memory, or
6488       we cannot check the node
6489
6490   """
6491   nodeinfo = lu.rpc.call_node_info([node], None, [hypervisor_name])
6492   nodeinfo[node].Raise("Can't get data from node %s" % node,
6493                        prereq=True, ecode=errors.ECODE_ENVIRON)
6494   (_, _, (hv_info, )) = nodeinfo[node].payload
6495
6496   free_mem = hv_info.get("memory_free", None)
6497   if not isinstance(free_mem, int):
6498     raise errors.OpPrereqError("Can't compute free memory on node %s, result"
6499                                " was '%s'" % (node, free_mem),
6500                                errors.ECODE_ENVIRON)
6501   if requested > free_mem:
6502     raise errors.OpPrereqError("Not enough memory on node %s for %s:"
6503                                " needed %s MiB, available %s MiB" %
6504                                (node, reason, requested, free_mem),
6505                                errors.ECODE_NORES)
6506   return free_mem
6507
6508
6509 def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
6510   """Checks if nodes have enough free disk space in the all VGs.
6511
6512   This function check if all given nodes have the needed amount of
6513   free disk. In case any node has less disk or we cannot get the
6514   information from the node, this function raise an OpPrereqError
6515   exception.
6516
6517   @type lu: C{LogicalUnit}
6518   @param lu: a logical unit from which we get configuration data
6519   @type nodenames: C{list}
6520   @param nodenames: the list of node names to check
6521   @type req_sizes: C{dict}
6522   @param req_sizes: the hash of vg and corresponding amount of disk in
6523       MiB to check for
6524   @raise errors.OpPrereqError: if the node doesn't have enough disk,
6525       or we cannot check the node
6526
6527   """
6528   for vg, req_size in req_sizes.items():
6529     _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
6530
6531
6532 def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
6533   """Checks if nodes have enough free disk space in the specified VG.
6534
6535   This function check if all given nodes have the needed amount of
6536   free disk. In case any node has less disk or we cannot get the
6537   information from the node, this function raise an OpPrereqError
6538   exception.
6539
6540   @type lu: C{LogicalUnit}
6541   @param lu: a logical unit from which we get configuration data
6542   @type nodenames: C{list}
6543   @param nodenames: the list of node names to check
6544   @type vg: C{str}
6545   @param vg: the volume group to check
6546   @type requested: C{int}
6547   @param requested: the amount of disk in MiB to check for
6548   @raise errors.OpPrereqError: if the node doesn't have enough disk,
6549       or we cannot check the node
6550
6551   """
6552   nodeinfo = lu.rpc.call_node_info(nodenames, [vg], None)
6553   for node in nodenames:
6554     info = nodeinfo[node]
6555     info.Raise("Cannot get current information from node %s" % node,
6556                prereq=True, ecode=errors.ECODE_ENVIRON)
6557     (_, (vg_info, ), _) = info.payload
6558     vg_free = vg_info.get("vg_free", None)
6559     if not isinstance(vg_free, int):
6560       raise errors.OpPrereqError("Can't compute free disk space on node"
6561                                  " %s for vg %s, result was '%s'" %
6562                                  (node, vg, vg_free), errors.ECODE_ENVIRON)
6563     if requested > vg_free:
6564       raise errors.OpPrereqError("Not enough disk space on target node %s"
6565                                  " vg %s: required %d MiB, available %d MiB" %
6566                                  (node, vg, requested, vg_free),
6567                                  errors.ECODE_NORES)
6568
6569
6570 def _CheckNodesPhysicalCPUs(lu, nodenames, requested, hypervisor_name):
6571   """Checks if nodes have enough physical CPUs
6572
6573   This function checks if all given nodes have the needed number of
6574   physical CPUs. In case any node has less CPUs or we cannot get the
6575   information from the node, this function raises an OpPrereqError
6576   exception.
6577
6578   @type lu: C{LogicalUnit}
6579   @param lu: a logical unit from which we get configuration data
6580   @type nodenames: C{list}
6581   @param nodenames: the list of node names to check
6582   @type requested: C{int}
6583   @param requested: the minimum acceptable number of physical CPUs
6584   @raise errors.OpPrereqError: if the node doesn't have enough CPUs,
6585       or we cannot check the node
6586
6587   """
6588   nodeinfo = lu.rpc.call_node_info(nodenames, None, [hypervisor_name])
6589   for node in nodenames:
6590     info = nodeinfo[node]
6591     info.Raise("Cannot get current information from node %s" % node,
6592                prereq=True, ecode=errors.ECODE_ENVIRON)
6593     (_, _, (hv_info, )) = info.payload
6594     num_cpus = hv_info.get("cpu_total", None)
6595     if not isinstance(num_cpus, int):
6596       raise errors.OpPrereqError("Can't compute the number of physical CPUs"
6597                                  " on node %s, result was '%s'" %
6598                                  (node, num_cpus), errors.ECODE_ENVIRON)
6599     if requested > num_cpus:
6600       raise errors.OpPrereqError("Node %s has %s physical CPUs, but %s are "
6601                                  "required" % (node, num_cpus, requested),
6602                                  errors.ECODE_NORES)
6603
6604
6605 class LUInstanceStartup(LogicalUnit):
6606   """Starts an instance.
6607
6608   """
6609   HPATH = "instance-start"
6610   HTYPE = constants.HTYPE_INSTANCE
6611   REQ_BGL = False
6612
6613   def CheckArguments(self):
6614     # extra beparams
6615     if self.op.beparams:
6616       # fill the beparams dict
6617       objects.UpgradeBeParams(self.op.beparams)
6618       utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
6619
6620   def ExpandNames(self):
6621     self._ExpandAndLockInstance()
6622     self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
6623
6624   def DeclareLocks(self, level):
6625     if level == locking.LEVEL_NODE_RES:
6626       self._LockInstancesNodes(primary_only=True, level=locking.LEVEL_NODE_RES)
6627
6628   def BuildHooksEnv(self):
6629     """Build hooks env.
6630
6631     This runs on master, primary and secondary nodes of the instance.
6632
6633     """
6634     env = {
6635       "FORCE": self.op.force,
6636       }
6637
6638     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6639
6640     return env
6641
6642   def BuildHooksNodes(self):
6643     """Build hooks nodes.
6644
6645     """
6646     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6647     return (nl, nl)
6648
6649   def CheckPrereq(self):
6650     """Check prerequisites.
6651
6652     This checks that the instance is in the cluster.
6653
6654     """
6655     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6656     assert self.instance is not None, \
6657       "Cannot retrieve locked instance %s" % self.op.instance_name
6658
6659     # extra hvparams
6660     if self.op.hvparams:
6661       # check hypervisor parameter syntax (locally)
6662       cluster = self.cfg.GetClusterInfo()
6663       utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
6664       filled_hvp = cluster.FillHV(instance)
6665       filled_hvp.update(self.op.hvparams)
6666       hv_type = hypervisor.GetHypervisor(instance.hypervisor)
6667       hv_type.CheckParameterSyntax(filled_hvp)
6668       _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
6669
6670     _CheckInstanceState(self, instance, INSTANCE_ONLINE)
6671
6672     self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
6673
6674     if self.primary_offline and self.op.ignore_offline_nodes:
6675       self.proc.LogWarning("Ignoring offline primary node")
6676
6677       if self.op.hvparams or self.op.beparams:
6678         self.proc.LogWarning("Overridden parameters are ignored")
6679     else:
6680       _CheckNodeOnline(self, instance.primary_node)
6681
6682       bep = self.cfg.GetClusterInfo().FillBE(instance)
6683       bep.update(self.op.beparams)
6684
6685       # check bridges existence
6686       _CheckInstanceBridgesExist(self, instance)
6687
6688       remote_info = self.rpc.call_instance_info(instance.primary_node,
6689                                                 instance.name,
6690                                                 instance.hypervisor)
6691       remote_info.Raise("Error checking node %s" % instance.primary_node,
6692                         prereq=True, ecode=errors.ECODE_ENVIRON)
6693       if not remote_info.payload: # not running already
6694         _CheckNodeFreeMemory(self, instance.primary_node,
6695                              "starting instance %s" % instance.name,
6696                              bep[constants.BE_MINMEM], instance.hypervisor)
6697
6698   def Exec(self, feedback_fn):
6699     """Start the instance.
6700
6701     """
6702     instance = self.instance
6703     force = self.op.force
6704
6705     if not self.op.no_remember:
6706       self.cfg.MarkInstanceUp(instance.name)
6707
6708     if self.primary_offline:
6709       assert self.op.ignore_offline_nodes
6710       self.proc.LogInfo("Primary node offline, marked instance as started")
6711     else:
6712       node_current = instance.primary_node
6713
6714       _StartInstanceDisks(self, instance, force)
6715
6716       result = \
6717         self.rpc.call_instance_start(node_current,
6718                                      (instance, self.op.hvparams,
6719                                       self.op.beparams),
6720                                      self.op.startup_paused)
6721       msg = result.fail_msg
6722       if msg:
6723         _ShutdownInstanceDisks(self, instance)
6724         raise errors.OpExecError("Could not start instance: %s" % msg)
6725
6726
6727 class LUInstanceReboot(LogicalUnit):
6728   """Reboot an instance.
6729
6730   """
6731   HPATH = "instance-reboot"
6732   HTYPE = constants.HTYPE_INSTANCE
6733   REQ_BGL = False
6734
6735   def ExpandNames(self):
6736     self._ExpandAndLockInstance()
6737
6738   def BuildHooksEnv(self):
6739     """Build hooks env.
6740
6741     This runs on master, primary and secondary nodes of the instance.
6742
6743     """
6744     env = {
6745       "IGNORE_SECONDARIES": self.op.ignore_secondaries,
6746       "REBOOT_TYPE": self.op.reboot_type,
6747       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6748       }
6749
6750     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6751
6752     return env
6753
6754   def BuildHooksNodes(self):
6755     """Build hooks nodes.
6756
6757     """
6758     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6759     return (nl, nl)
6760
6761   def CheckPrereq(self):
6762     """Check prerequisites.
6763
6764     This checks that the instance is in the cluster.
6765
6766     """
6767     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6768     assert self.instance is not None, \
6769       "Cannot retrieve locked instance %s" % self.op.instance_name
6770     _CheckInstanceState(self, instance, INSTANCE_ONLINE)
6771     _CheckNodeOnline(self, instance.primary_node)
6772
6773     # check bridges existence
6774     _CheckInstanceBridgesExist(self, instance)
6775
6776   def Exec(self, feedback_fn):
6777     """Reboot the instance.
6778
6779     """
6780     instance = self.instance
6781     ignore_secondaries = self.op.ignore_secondaries
6782     reboot_type = self.op.reboot_type
6783
6784     remote_info = self.rpc.call_instance_info(instance.primary_node,
6785                                               instance.name,
6786                                               instance.hypervisor)
6787     remote_info.Raise("Error checking node %s" % instance.primary_node)
6788     instance_running = bool(remote_info.payload)
6789
6790     node_current = instance.primary_node
6791
6792     if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
6793                                             constants.INSTANCE_REBOOT_HARD]:
6794       for disk in instance.disks:
6795         self.cfg.SetDiskID(disk, node_current)
6796       result = self.rpc.call_instance_reboot(node_current, instance,
6797                                              reboot_type,
6798                                              self.op.shutdown_timeout)
6799       result.Raise("Could not reboot instance")
6800     else:
6801       if instance_running:
6802         result = self.rpc.call_instance_shutdown(node_current, instance,
6803                                                  self.op.shutdown_timeout)
6804         result.Raise("Could not shutdown instance for full reboot")
6805         _ShutdownInstanceDisks(self, instance)
6806       else:
6807         self.LogInfo("Instance %s was already stopped, starting now",
6808                      instance.name)
6809       _StartInstanceDisks(self, instance, ignore_secondaries)
6810       result = self.rpc.call_instance_start(node_current,
6811                                             (instance, None, None), False)
6812       msg = result.fail_msg
6813       if msg:
6814         _ShutdownInstanceDisks(self, instance)
6815         raise errors.OpExecError("Could not start instance for"
6816                                  " full reboot: %s" % msg)
6817
6818     self.cfg.MarkInstanceUp(instance.name)
6819
6820
6821 class LUInstanceShutdown(LogicalUnit):
6822   """Shutdown an instance.
6823
6824   """
6825   HPATH = "instance-stop"
6826   HTYPE = constants.HTYPE_INSTANCE
6827   REQ_BGL = False
6828
6829   def ExpandNames(self):
6830     self._ExpandAndLockInstance()
6831
6832   def BuildHooksEnv(self):
6833     """Build hooks env.
6834
6835     This runs on master, primary and secondary nodes of the instance.
6836
6837     """
6838     env = _BuildInstanceHookEnvByObject(self, self.instance)
6839     env["TIMEOUT"] = self.op.timeout
6840     return env
6841
6842   def BuildHooksNodes(self):
6843     """Build hooks nodes.
6844
6845     """
6846     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6847     return (nl, nl)
6848
6849   def CheckPrereq(self):
6850     """Check prerequisites.
6851
6852     This checks that the instance is in the cluster.
6853
6854     """
6855     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6856     assert self.instance is not None, \
6857       "Cannot retrieve locked instance %s" % self.op.instance_name
6858
6859     _CheckInstanceState(self, self.instance, INSTANCE_ONLINE)
6860
6861     self.primary_offline = \
6862       self.cfg.GetNodeInfo(self.instance.primary_node).offline
6863
6864     if self.primary_offline and self.op.ignore_offline_nodes:
6865       self.proc.LogWarning("Ignoring offline primary node")
6866     else:
6867       _CheckNodeOnline(self, self.instance.primary_node)
6868
6869   def Exec(self, feedback_fn):
6870     """Shutdown the instance.
6871
6872     """
6873     instance = self.instance
6874     node_current = instance.primary_node
6875     timeout = self.op.timeout
6876
6877     if not self.op.no_remember:
6878       self.cfg.MarkInstanceDown(instance.name)
6879
6880     if self.primary_offline:
6881       assert self.op.ignore_offline_nodes
6882       self.proc.LogInfo("Primary node offline, marked instance as stopped")
6883     else:
6884       result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
6885       msg = result.fail_msg
6886       if msg:
6887         self.proc.LogWarning("Could not shutdown instance: %s" % msg)
6888
6889       _ShutdownInstanceDisks(self, instance)
6890
6891
6892 class LUInstanceReinstall(LogicalUnit):
6893   """Reinstall an instance.
6894
6895   """
6896   HPATH = "instance-reinstall"
6897   HTYPE = constants.HTYPE_INSTANCE
6898   REQ_BGL = False
6899
6900   def ExpandNames(self):
6901     self._ExpandAndLockInstance()
6902
6903   def BuildHooksEnv(self):
6904     """Build hooks env.
6905
6906     This runs on master, primary and secondary nodes of the instance.
6907
6908     """
6909     return _BuildInstanceHookEnvByObject(self, self.instance)
6910
6911   def BuildHooksNodes(self):
6912     """Build hooks nodes.
6913
6914     """
6915     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6916     return (nl, nl)
6917
6918   def CheckPrereq(self):
6919     """Check prerequisites.
6920
6921     This checks that the instance is in the cluster and is not running.
6922
6923     """
6924     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6925     assert instance is not None, \
6926       "Cannot retrieve locked instance %s" % self.op.instance_name
6927     _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
6928                      " offline, cannot reinstall")
6929     for node in instance.secondary_nodes:
6930       _CheckNodeOnline(self, node, "Instance secondary node offline,"
6931                        " cannot reinstall")
6932
6933     if instance.disk_template == constants.DT_DISKLESS:
6934       raise errors.OpPrereqError("Instance '%s' has no disks" %
6935                                  self.op.instance_name,
6936                                  errors.ECODE_INVAL)
6937     _CheckInstanceState(self, instance, INSTANCE_DOWN, msg="cannot reinstall")
6938
6939     if self.op.os_type is not None:
6940       # OS verification
6941       pnode = _ExpandNodeName(self.cfg, instance.primary_node)
6942       _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
6943       instance_os = self.op.os_type
6944     else:
6945       instance_os = instance.os
6946
6947     nodelist = list(instance.all_nodes)
6948
6949     if self.op.osparams:
6950       i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
6951       _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
6952       self.os_inst = i_osdict # the new dict (without defaults)
6953     else:
6954       self.os_inst = None
6955
6956     self.instance = instance
6957
6958   def Exec(self, feedback_fn):
6959     """Reinstall the instance.
6960
6961     """
6962     inst = self.instance
6963
6964     if self.op.os_type is not None:
6965       feedback_fn("Changing OS to '%s'..." % self.op.os_type)
6966       inst.os = self.op.os_type
6967       # Write to configuration
6968       self.cfg.Update(inst, feedback_fn)
6969
6970     _StartInstanceDisks(self, inst, None)
6971     try:
6972       feedback_fn("Running the instance OS create scripts...")
6973       # FIXME: pass debug option from opcode to backend
6974       result = self.rpc.call_instance_os_add(inst.primary_node,
6975                                              (inst, self.os_inst), True,
6976                                              self.op.debug_level)
6977       result.Raise("Could not install OS for instance %s on node %s" %
6978                    (inst.name, inst.primary_node))
6979     finally:
6980       _ShutdownInstanceDisks(self, inst)
6981
6982
6983 class LUInstanceRecreateDisks(LogicalUnit):
6984   """Recreate an instance's missing disks.
6985
6986   """
6987   HPATH = "instance-recreate-disks"
6988   HTYPE = constants.HTYPE_INSTANCE
6989   REQ_BGL = False
6990
6991   _MODIFYABLE = frozenset([
6992     constants.IDISK_SIZE,
6993     constants.IDISK_MODE,
6994     ])
6995
6996   # New or changed disk parameters may have different semantics
6997   assert constants.IDISK_PARAMS == (_MODIFYABLE | frozenset([
6998     constants.IDISK_ADOPT,
6999
7000     # TODO: Implement support changing VG while recreating
7001     constants.IDISK_VG,
7002     constants.IDISK_METAVG,
7003     ]))
7004
7005   def CheckArguments(self):
7006     if self.op.disks and ht.TPositiveInt(self.op.disks[0]):
7007       # Normalize and convert deprecated list of disk indices
7008       self.op.disks = [(idx, {}) for idx in sorted(frozenset(self.op.disks))]
7009
7010     duplicates = utils.FindDuplicates(map(compat.fst, self.op.disks))
7011     if duplicates:
7012       raise errors.OpPrereqError("Some disks have been specified more than"
7013                                  " once: %s" % utils.CommaJoin(duplicates),
7014                                  errors.ECODE_INVAL)
7015
7016     for (idx, params) in self.op.disks:
7017       utils.ForceDictType(params, constants.IDISK_PARAMS_TYPES)
7018       unsupported = frozenset(params.keys()) - self._MODIFYABLE
7019       if unsupported:
7020         raise errors.OpPrereqError("Parameters for disk %s try to change"
7021                                    " unmodifyable parameter(s): %s" %
7022                                    (idx, utils.CommaJoin(unsupported)),
7023                                    errors.ECODE_INVAL)
7024
7025   def ExpandNames(self):
7026     self._ExpandAndLockInstance()
7027     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7028     if self.op.nodes:
7029       self.op.nodes = [_ExpandNodeName(self.cfg, n) for n in self.op.nodes]
7030       self.needed_locks[locking.LEVEL_NODE] = list(self.op.nodes)
7031     else:
7032       self.needed_locks[locking.LEVEL_NODE] = []
7033     self.needed_locks[locking.LEVEL_NODE_RES] = []
7034
7035   def DeclareLocks(self, level):
7036     if level == locking.LEVEL_NODE:
7037       # if we replace the nodes, we only need to lock the old primary,
7038       # otherwise we need to lock all nodes for disk re-creation
7039       primary_only = bool(self.op.nodes)
7040       self._LockInstancesNodes(primary_only=primary_only)
7041     elif level == locking.LEVEL_NODE_RES:
7042       # Copy node locks
7043       self.needed_locks[locking.LEVEL_NODE_RES] = \
7044         self.needed_locks[locking.LEVEL_NODE][:]
7045
7046   def BuildHooksEnv(self):
7047     """Build hooks env.
7048
7049     This runs on master, primary and secondary nodes of the instance.
7050
7051     """
7052     return _BuildInstanceHookEnvByObject(self, self.instance)
7053
7054   def BuildHooksNodes(self):
7055     """Build hooks nodes.
7056
7057     """
7058     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7059     return (nl, nl)
7060
7061   def CheckPrereq(self):
7062     """Check prerequisites.
7063
7064     This checks that the instance is in the cluster and is not running.
7065
7066     """
7067     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7068     assert instance is not None, \
7069       "Cannot retrieve locked instance %s" % self.op.instance_name
7070     if self.op.nodes:
7071       if len(self.op.nodes) != len(instance.all_nodes):
7072         raise errors.OpPrereqError("Instance %s currently has %d nodes, but"
7073                                    " %d replacement nodes were specified" %
7074                                    (instance.name, len(instance.all_nodes),
7075                                     len(self.op.nodes)),
7076                                    errors.ECODE_INVAL)
7077       assert instance.disk_template != constants.DT_DRBD8 or \
7078           len(self.op.nodes) == 2
7079       assert instance.disk_template != constants.DT_PLAIN or \
7080           len(self.op.nodes) == 1
7081       primary_node = self.op.nodes[0]
7082     else:
7083       primary_node = instance.primary_node
7084     _CheckNodeOnline(self, primary_node)
7085
7086     if instance.disk_template == constants.DT_DISKLESS:
7087       raise errors.OpPrereqError("Instance '%s' has no disks" %
7088                                  self.op.instance_name, errors.ECODE_INVAL)
7089
7090     # if we replace nodes *and* the old primary is offline, we don't
7091     # check
7092     assert instance.primary_node in self.owned_locks(locking.LEVEL_NODE)
7093     assert instance.primary_node in self.owned_locks(locking.LEVEL_NODE_RES)
7094     old_pnode = self.cfg.GetNodeInfo(instance.primary_node)
7095     if not (self.op.nodes and old_pnode.offline):
7096       _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
7097                           msg="cannot recreate disks")
7098
7099     if self.op.disks:
7100       self.disks = dict(self.op.disks)
7101     else:
7102       self.disks = dict((idx, {}) for idx in range(len(instance.disks)))
7103
7104     maxidx = max(self.disks.keys())
7105     if maxidx >= len(instance.disks):
7106       raise errors.OpPrereqError("Invalid disk index '%s'" % maxidx,
7107                                  errors.ECODE_INVAL)
7108
7109     if (self.op.nodes and
7110         sorted(self.disks.keys()) != range(len(instance.disks))):
7111       raise errors.OpPrereqError("Can't recreate disks partially and"
7112                                  " change the nodes at the same time",
7113                                  errors.ECODE_INVAL)
7114
7115     self.instance = instance
7116
7117   def Exec(self, feedback_fn):
7118     """Recreate the disks.
7119
7120     """
7121     instance = self.instance
7122
7123     assert (self.owned_locks(locking.LEVEL_NODE) ==
7124             self.owned_locks(locking.LEVEL_NODE_RES))
7125
7126     to_skip = []
7127     mods = [] # keeps track of needed changes
7128
7129     for idx, disk in enumerate(instance.disks):
7130       try:
7131         changes = self.disks[idx]
7132       except KeyError:
7133         # Disk should not be recreated
7134         to_skip.append(idx)
7135         continue
7136
7137       # update secondaries for disks, if needed
7138       if self.op.nodes and disk.dev_type == constants.LD_DRBD8:
7139         # need to update the nodes and minors
7140         assert len(self.op.nodes) == 2
7141         assert len(disk.logical_id) == 6 # otherwise disk internals
7142                                          # have changed
7143         (_, _, old_port, _, _, old_secret) = disk.logical_id
7144         new_minors = self.cfg.AllocateDRBDMinor(self.op.nodes, instance.name)
7145         new_id = (self.op.nodes[0], self.op.nodes[1], old_port,
7146                   new_minors[0], new_minors[1], old_secret)
7147         assert len(disk.logical_id) == len(new_id)
7148       else:
7149         new_id = None
7150
7151       mods.append((idx, new_id, changes))
7152
7153     # now that we have passed all asserts above, we can apply the mods
7154     # in a single run (to avoid partial changes)
7155     for idx, new_id, changes in mods:
7156       disk = instance.disks[idx]
7157       if new_id is not None:
7158         assert disk.dev_type == constants.LD_DRBD8
7159         disk.logical_id = new_id
7160       if changes:
7161         disk.Update(size=changes.get(constants.IDISK_SIZE, None),
7162                     mode=changes.get(constants.IDISK_MODE, None))
7163
7164     # change primary node, if needed
7165     if self.op.nodes:
7166       instance.primary_node = self.op.nodes[0]
7167       self.LogWarning("Changing the instance's nodes, you will have to"
7168                       " remove any disks left on the older nodes manually")
7169
7170     if self.op.nodes:
7171       self.cfg.Update(instance, feedback_fn)
7172
7173     _CreateDisks(self, instance, to_skip=to_skip)
7174
7175
7176 class LUInstanceRename(LogicalUnit):
7177   """Rename an instance.
7178
7179   """
7180   HPATH = "instance-rename"
7181   HTYPE = constants.HTYPE_INSTANCE
7182
7183   def CheckArguments(self):
7184     """Check arguments.
7185
7186     """
7187     if self.op.ip_check and not self.op.name_check:
7188       # TODO: make the ip check more flexible and not depend on the name check
7189       raise errors.OpPrereqError("IP address check requires a name check",
7190                                  errors.ECODE_INVAL)
7191
7192   def BuildHooksEnv(self):
7193     """Build hooks env.
7194
7195     This runs on master, primary and secondary nodes of the instance.
7196
7197     """
7198     env = _BuildInstanceHookEnvByObject(self, self.instance)
7199     env["INSTANCE_NEW_NAME"] = self.op.new_name
7200     return env
7201
7202   def BuildHooksNodes(self):
7203     """Build hooks nodes.
7204
7205     """
7206     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7207     return (nl, nl)
7208
7209   def CheckPrereq(self):
7210     """Check prerequisites.
7211
7212     This checks that the instance is in the cluster and is not running.
7213
7214     """
7215     self.op.instance_name = _ExpandInstanceName(self.cfg,
7216                                                 self.op.instance_name)
7217     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7218     assert instance is not None
7219     _CheckNodeOnline(self, instance.primary_node)
7220     _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
7221                         msg="cannot rename")
7222     self.instance = instance
7223
7224     new_name = self.op.new_name
7225     if self.op.name_check:
7226       hostname = netutils.GetHostname(name=new_name)
7227       if hostname.name != new_name:
7228         self.LogInfo("Resolved given name '%s' to '%s'", new_name,
7229                      hostname.name)
7230       if not utils.MatchNameComponent(self.op.new_name, [hostname.name]):
7231         raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
7232                                     " same as given hostname '%s'") %
7233                                     (hostname.name, self.op.new_name),
7234                                     errors.ECODE_INVAL)
7235       new_name = self.op.new_name = hostname.name
7236       if (self.op.ip_check and
7237           netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
7238         raise errors.OpPrereqError("IP %s of instance %s already in use" %
7239                                    (hostname.ip, new_name),
7240                                    errors.ECODE_NOTUNIQUE)
7241
7242     instance_list = self.cfg.GetInstanceList()
7243     if new_name in instance_list and new_name != instance.name:
7244       raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
7245                                  new_name, errors.ECODE_EXISTS)
7246
7247   def Exec(self, feedback_fn):
7248     """Rename the instance.
7249
7250     """
7251     inst = self.instance
7252     old_name = inst.name
7253
7254     rename_file_storage = False
7255     if (inst.disk_template in constants.DTS_FILEBASED and
7256         self.op.new_name != inst.name):
7257       old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
7258       rename_file_storage = True
7259
7260     self.cfg.RenameInstance(inst.name, self.op.new_name)
7261     # Change the instance lock. This is definitely safe while we hold the BGL.
7262     # Otherwise the new lock would have to be added in acquired mode.
7263     assert self.REQ_BGL
7264     self.glm.remove(locking.LEVEL_INSTANCE, old_name)
7265     self.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
7266
7267     # re-read the instance from the configuration after rename
7268     inst = self.cfg.GetInstanceInfo(self.op.new_name)
7269
7270     if rename_file_storage:
7271       new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
7272       result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
7273                                                      old_file_storage_dir,
7274                                                      new_file_storage_dir)
7275       result.Raise("Could not rename on node %s directory '%s' to '%s'"
7276                    " (but the instance has been renamed in Ganeti)" %
7277                    (inst.primary_node, old_file_storage_dir,
7278                     new_file_storage_dir))
7279
7280     _StartInstanceDisks(self, inst, None)
7281     try:
7282       result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
7283                                                  old_name, self.op.debug_level)
7284       msg = result.fail_msg
7285       if msg:
7286         msg = ("Could not run OS rename script for instance %s on node %s"
7287                " (but the instance has been renamed in Ganeti): %s" %
7288                (inst.name, inst.primary_node, msg))
7289         self.proc.LogWarning(msg)
7290     finally:
7291       _ShutdownInstanceDisks(self, inst)
7292
7293     return inst.name
7294
7295
7296 class LUInstanceRemove(LogicalUnit):
7297   """Remove an instance.
7298
7299   """
7300   HPATH = "instance-remove"
7301   HTYPE = constants.HTYPE_INSTANCE
7302   REQ_BGL = False
7303
7304   def ExpandNames(self):
7305     self._ExpandAndLockInstance()
7306     self.needed_locks[locking.LEVEL_NODE] = []
7307     self.needed_locks[locking.LEVEL_NODE_RES] = []
7308     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7309
7310   def DeclareLocks(self, level):
7311     if level == locking.LEVEL_NODE:
7312       self._LockInstancesNodes()
7313     elif level == locking.LEVEL_NODE_RES:
7314       # Copy node locks
7315       self.needed_locks[locking.LEVEL_NODE_RES] = \
7316         self.needed_locks[locking.LEVEL_NODE][:]
7317
7318   def BuildHooksEnv(self):
7319     """Build hooks env.
7320
7321     This runs on master, primary and secondary nodes of the instance.
7322
7323     """
7324     env = _BuildInstanceHookEnvByObject(self, self.instance)
7325     env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
7326     return env
7327
7328   def BuildHooksNodes(self):
7329     """Build hooks nodes.
7330
7331     """
7332     nl = [self.cfg.GetMasterNode()]
7333     nl_post = list(self.instance.all_nodes) + nl
7334     return (nl, nl_post)
7335
7336   def CheckPrereq(self):
7337     """Check prerequisites.
7338
7339     This checks that the instance is in the cluster.
7340
7341     """
7342     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7343     assert self.instance is not None, \
7344       "Cannot retrieve locked instance %s" % self.op.instance_name
7345
7346   def Exec(self, feedback_fn):
7347     """Remove the instance.
7348
7349     """
7350     instance = self.instance
7351     logging.info("Shutting down instance %s on node %s",
7352                  instance.name, instance.primary_node)
7353
7354     result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
7355                                              self.op.shutdown_timeout)
7356     msg = result.fail_msg
7357     if msg:
7358       if self.op.ignore_failures:
7359         feedback_fn("Warning: can't shutdown instance: %s" % msg)
7360       else:
7361         raise errors.OpExecError("Could not shutdown instance %s on"
7362                                  " node %s: %s" %
7363                                  (instance.name, instance.primary_node, msg))
7364
7365     assert (self.owned_locks(locking.LEVEL_NODE) ==
7366             self.owned_locks(locking.LEVEL_NODE_RES))
7367     assert not (set(instance.all_nodes) -
7368                 self.owned_locks(locking.LEVEL_NODE)), \
7369       "Not owning correct locks"
7370
7371     _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
7372
7373
7374 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
7375   """Utility function to remove an instance.
7376
7377   """
7378   logging.info("Removing block devices for instance %s", instance.name)
7379
7380   if not _RemoveDisks(lu, instance, ignore_failures=ignore_failures):
7381     if not ignore_failures:
7382       raise errors.OpExecError("Can't remove instance's disks")
7383     feedback_fn("Warning: can't remove instance's disks")
7384
7385   logging.info("Removing instance %s out of cluster config", instance.name)
7386
7387   lu.cfg.RemoveInstance(instance.name)
7388
7389   assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
7390     "Instance lock removal conflict"
7391
7392   # Remove lock for the instance
7393   lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
7394
7395
7396 class LUInstanceQuery(NoHooksLU):
7397   """Logical unit for querying instances.
7398
7399   """
7400   # pylint: disable=W0142
7401   REQ_BGL = False
7402
7403   def CheckArguments(self):
7404     self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
7405                              self.op.output_fields, self.op.use_locking)
7406
7407   def ExpandNames(self):
7408     self.iq.ExpandNames(self)
7409
7410   def DeclareLocks(self, level):
7411     self.iq.DeclareLocks(self, level)
7412
7413   def Exec(self, feedback_fn):
7414     return self.iq.OldStyleQuery(self)
7415
7416
7417 class LUInstanceFailover(LogicalUnit):
7418   """Failover an instance.
7419
7420   """
7421   HPATH = "instance-failover"
7422   HTYPE = constants.HTYPE_INSTANCE
7423   REQ_BGL = False
7424
7425   def CheckArguments(self):
7426     """Check the arguments.
7427
7428     """
7429     self.iallocator = getattr(self.op, "iallocator", None)
7430     self.target_node = getattr(self.op, "target_node", None)
7431
7432   def ExpandNames(self):
7433     self._ExpandAndLockInstance()
7434
7435     if self.op.target_node is not None:
7436       self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7437
7438     self.needed_locks[locking.LEVEL_NODE] = []
7439     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7440
7441     self.needed_locks[locking.LEVEL_NODE_RES] = []
7442     self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
7443
7444     ignore_consistency = self.op.ignore_consistency
7445     shutdown_timeout = self.op.shutdown_timeout
7446     self._migrater = TLMigrateInstance(self, self.op.instance_name,
7447                                        cleanup=False,
7448                                        failover=True,
7449                                        ignore_consistency=ignore_consistency,
7450                                        shutdown_timeout=shutdown_timeout,
7451                                        ignore_ipolicy=self.op.ignore_ipolicy)
7452     self.tasklets = [self._migrater]
7453
7454   def DeclareLocks(self, level):
7455     if level == locking.LEVEL_NODE:
7456       instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
7457       if instance.disk_template in constants.DTS_EXT_MIRROR:
7458         if self.op.target_node is None:
7459           self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7460         else:
7461           self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
7462                                                    self.op.target_node]
7463         del self.recalculate_locks[locking.LEVEL_NODE]
7464       else:
7465         self._LockInstancesNodes()
7466     elif level == locking.LEVEL_NODE_RES:
7467       # Copy node locks
7468       self.needed_locks[locking.LEVEL_NODE_RES] = \
7469         self.needed_locks[locking.LEVEL_NODE][:]
7470
7471   def BuildHooksEnv(self):
7472     """Build hooks env.
7473
7474     This runs on master, primary and secondary nodes of the instance.
7475
7476     """
7477     instance = self._migrater.instance
7478     source_node = instance.primary_node
7479     target_node = self.op.target_node
7480     env = {
7481       "IGNORE_CONSISTENCY": self.op.ignore_consistency,
7482       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7483       "OLD_PRIMARY": source_node,
7484       "NEW_PRIMARY": target_node,
7485       }
7486
7487     if instance.disk_template in constants.DTS_INT_MIRROR:
7488       env["OLD_SECONDARY"] = instance.secondary_nodes[0]
7489       env["NEW_SECONDARY"] = source_node
7490     else:
7491       env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
7492
7493     env.update(_BuildInstanceHookEnvByObject(self, instance))
7494
7495     return env
7496
7497   def BuildHooksNodes(self):
7498     """Build hooks nodes.
7499
7500     """
7501     instance = self._migrater.instance
7502     nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
7503     return (nl, nl + [instance.primary_node])
7504
7505
7506 class LUInstanceMigrate(LogicalUnit):
7507   """Migrate an instance.
7508
7509   This is migration without shutting down, compared to the failover,
7510   which is done with shutdown.
7511
7512   """
7513   HPATH = "instance-migrate"
7514   HTYPE = constants.HTYPE_INSTANCE
7515   REQ_BGL = False
7516
7517   def ExpandNames(self):
7518     self._ExpandAndLockInstance()
7519
7520     if self.op.target_node is not None:
7521       self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7522
7523     self.needed_locks[locking.LEVEL_NODE] = []
7524     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7525
7526     self.needed_locks[locking.LEVEL_NODE] = []
7527     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7528
7529     self._migrater = \
7530       TLMigrateInstance(self, self.op.instance_name,
7531                         cleanup=self.op.cleanup,
7532                         failover=False,
7533                         fallback=self.op.allow_failover,
7534                         allow_runtime_changes=self.op.allow_runtime_changes,
7535                         ignore_ipolicy=self.op.ignore_ipolicy)
7536     self.tasklets = [self._migrater]
7537
7538   def DeclareLocks(self, level):
7539     if level == locking.LEVEL_NODE:
7540       instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
7541       if instance.disk_template in constants.DTS_EXT_MIRROR:
7542         if self.op.target_node is None:
7543           self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7544         else:
7545           self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
7546                                                    self.op.target_node]
7547         del self.recalculate_locks[locking.LEVEL_NODE]
7548       else:
7549         self._LockInstancesNodes()
7550     elif level == locking.LEVEL_NODE_RES:
7551       # Copy node locks
7552       self.needed_locks[locking.LEVEL_NODE_RES] = \
7553         self.needed_locks[locking.LEVEL_NODE][:]
7554
7555   def BuildHooksEnv(self):
7556     """Build hooks env.
7557
7558     This runs on master, primary and secondary nodes of the instance.
7559
7560     """
7561     instance = self._migrater.instance
7562     source_node = instance.primary_node
7563     target_node = self.op.target_node
7564     env = _BuildInstanceHookEnvByObject(self, instance)
7565     env.update({
7566       "MIGRATE_LIVE": self._migrater.live,
7567       "MIGRATE_CLEANUP": self.op.cleanup,
7568       "OLD_PRIMARY": source_node,
7569       "NEW_PRIMARY": target_node,
7570       "ALLOW_RUNTIME_CHANGES": self.op.allow_runtime_changes,
7571       })
7572
7573     if instance.disk_template in constants.DTS_INT_MIRROR:
7574       env["OLD_SECONDARY"] = target_node
7575       env["NEW_SECONDARY"] = source_node
7576     else:
7577       env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
7578
7579     return env
7580
7581   def BuildHooksNodes(self):
7582     """Build hooks nodes.
7583
7584     """
7585     instance = self._migrater.instance
7586     nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
7587     return (nl, nl + [instance.primary_node])
7588
7589
7590 class LUInstanceMove(LogicalUnit):
7591   """Move an instance by data-copying.
7592
7593   """
7594   HPATH = "instance-move"
7595   HTYPE = constants.HTYPE_INSTANCE
7596   REQ_BGL = False
7597
7598   def ExpandNames(self):
7599     self._ExpandAndLockInstance()
7600     target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7601     self.op.target_node = target_node
7602     self.needed_locks[locking.LEVEL_NODE] = [target_node]
7603     self.needed_locks[locking.LEVEL_NODE_RES] = []
7604     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7605
7606   def DeclareLocks(self, level):
7607     if level == locking.LEVEL_NODE:
7608       self._LockInstancesNodes(primary_only=True)
7609     elif level == locking.LEVEL_NODE_RES:
7610       # Copy node locks
7611       self.needed_locks[locking.LEVEL_NODE_RES] = \
7612         self.needed_locks[locking.LEVEL_NODE][:]
7613
7614   def BuildHooksEnv(self):
7615     """Build hooks env.
7616
7617     This runs on master, primary and secondary nodes of the instance.
7618
7619     """
7620     env = {
7621       "TARGET_NODE": self.op.target_node,
7622       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7623       }
7624     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
7625     return env
7626
7627   def BuildHooksNodes(self):
7628     """Build hooks nodes.
7629
7630     """
7631     nl = [
7632       self.cfg.GetMasterNode(),
7633       self.instance.primary_node,
7634       self.op.target_node,
7635       ]
7636     return (nl, nl)
7637
7638   def CheckPrereq(self):
7639     """Check prerequisites.
7640
7641     This checks that the instance is in the cluster.
7642
7643     """
7644     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7645     assert self.instance is not None, \
7646       "Cannot retrieve locked instance %s" % self.op.instance_name
7647
7648     node = self.cfg.GetNodeInfo(self.op.target_node)
7649     assert node is not None, \
7650       "Cannot retrieve locked node %s" % self.op.target_node
7651
7652     self.target_node = target_node = node.name
7653
7654     if target_node == instance.primary_node:
7655       raise errors.OpPrereqError("Instance %s is already on the node %s" %
7656                                  (instance.name, target_node),
7657                                  errors.ECODE_STATE)
7658
7659     bep = self.cfg.GetClusterInfo().FillBE(instance)
7660
7661     for idx, dsk in enumerate(instance.disks):
7662       if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
7663         raise errors.OpPrereqError("Instance disk %d has a complex layout,"
7664                                    " cannot copy" % idx, errors.ECODE_STATE)
7665
7666     _CheckNodeOnline(self, target_node)
7667     _CheckNodeNotDrained(self, target_node)
7668     _CheckNodeVmCapable(self, target_node)
7669     ipolicy = _CalculateGroupIPolicy(self.cfg.GetClusterInfo(),
7670                                      self.cfg.GetNodeGroup(node.group))
7671     _CheckTargetNodeIPolicy(self, ipolicy, instance, node,
7672                             ignore=self.op.ignore_ipolicy)
7673
7674     if instance.admin_state == constants.ADMINST_UP:
7675       # check memory requirements on the secondary node
7676       _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
7677                            instance.name, bep[constants.BE_MAXMEM],
7678                            instance.hypervisor)
7679     else:
7680       self.LogInfo("Not checking memory on the secondary node as"
7681                    " instance will not be started")
7682
7683     # check bridge existance
7684     _CheckInstanceBridgesExist(self, instance, node=target_node)
7685
7686   def Exec(self, feedback_fn):
7687     """Move an instance.
7688
7689     The move is done by shutting it down on its present node, copying
7690     the data over (slow) and starting it on the new node.
7691
7692     """
7693     instance = self.instance
7694
7695     source_node = instance.primary_node
7696     target_node = self.target_node
7697
7698     self.LogInfo("Shutting down instance %s on source node %s",
7699                  instance.name, source_node)
7700
7701     assert (self.owned_locks(locking.LEVEL_NODE) ==
7702             self.owned_locks(locking.LEVEL_NODE_RES))
7703
7704     result = self.rpc.call_instance_shutdown(source_node, instance,
7705                                              self.op.shutdown_timeout)
7706     msg = result.fail_msg
7707     if msg:
7708       if self.op.ignore_consistency:
7709         self.proc.LogWarning("Could not shutdown instance %s on node %s."
7710                              " Proceeding anyway. Please make sure node"
7711                              " %s is down. Error details: %s",
7712                              instance.name, source_node, source_node, msg)
7713       else:
7714         raise errors.OpExecError("Could not shutdown instance %s on"
7715                                  " node %s: %s" %
7716                                  (instance.name, source_node, msg))
7717
7718     # create the target disks
7719     try:
7720       _CreateDisks(self, instance, target_node=target_node)
7721     except errors.OpExecError:
7722       self.LogWarning("Device creation failed, reverting...")
7723       try:
7724         _RemoveDisks(self, instance, target_node=target_node)
7725       finally:
7726         self.cfg.ReleaseDRBDMinors(instance.name)
7727         raise
7728
7729     cluster_name = self.cfg.GetClusterInfo().cluster_name
7730
7731     errs = []
7732     # activate, get path, copy the data over
7733     for idx, disk in enumerate(instance.disks):
7734       self.LogInfo("Copying data for disk %d", idx)
7735       result = self.rpc.call_blockdev_assemble(target_node, disk,
7736                                                instance.name, True, idx)
7737       if result.fail_msg:
7738         self.LogWarning("Can't assemble newly created disk %d: %s",
7739                         idx, result.fail_msg)
7740         errs.append(result.fail_msg)
7741         break
7742       dev_path = result.payload
7743       result = self.rpc.call_blockdev_export(source_node, disk,
7744                                              target_node, dev_path,
7745                                              cluster_name)
7746       if result.fail_msg:
7747         self.LogWarning("Can't copy data over for disk %d: %s",
7748                         idx, result.fail_msg)
7749         errs.append(result.fail_msg)
7750         break
7751
7752     if errs:
7753       self.LogWarning("Some disks failed to copy, aborting")
7754       try:
7755         _RemoveDisks(self, instance, target_node=target_node)
7756       finally:
7757         self.cfg.ReleaseDRBDMinors(instance.name)
7758         raise errors.OpExecError("Errors during disk copy: %s" %
7759                                  (",".join(errs),))
7760
7761     instance.primary_node = target_node
7762     self.cfg.Update(instance, feedback_fn)
7763
7764     self.LogInfo("Removing the disks on the original node")
7765     _RemoveDisks(self, instance, target_node=source_node)
7766
7767     # Only start the instance if it's marked as up
7768     if instance.admin_state == constants.ADMINST_UP:
7769       self.LogInfo("Starting instance %s on node %s",
7770                    instance.name, target_node)
7771
7772       disks_ok, _ = _AssembleInstanceDisks(self, instance,
7773                                            ignore_secondaries=True)
7774       if not disks_ok:
7775         _ShutdownInstanceDisks(self, instance)
7776         raise errors.OpExecError("Can't activate the instance's disks")
7777
7778       result = self.rpc.call_instance_start(target_node,
7779                                             (instance, None, None), False)
7780       msg = result.fail_msg
7781       if msg:
7782         _ShutdownInstanceDisks(self, instance)
7783         raise errors.OpExecError("Could not start instance %s on node %s: %s" %
7784                                  (instance.name, target_node, msg))
7785
7786
7787 class LUNodeMigrate(LogicalUnit):
7788   """Migrate all instances from a node.
7789
7790   """
7791   HPATH = "node-migrate"
7792   HTYPE = constants.HTYPE_NODE
7793   REQ_BGL = False
7794
7795   def CheckArguments(self):
7796     pass
7797
7798   def ExpandNames(self):
7799     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
7800
7801     self.share_locks = _ShareAll()
7802     self.needed_locks = {
7803       locking.LEVEL_NODE: [self.op.node_name],
7804       }
7805
7806   def BuildHooksEnv(self):
7807     """Build hooks env.
7808
7809     This runs on the master, the primary and all the secondaries.
7810
7811     """
7812     return {
7813       "NODE_NAME": self.op.node_name,
7814       "ALLOW_RUNTIME_CHANGES": self.op.allow_runtime_changes,
7815       }
7816
7817   def BuildHooksNodes(self):
7818     """Build hooks nodes.
7819
7820     """
7821     nl = [self.cfg.GetMasterNode()]
7822     return (nl, nl)
7823
7824   def CheckPrereq(self):
7825     pass
7826
7827   def Exec(self, feedback_fn):
7828     # Prepare jobs for migration instances
7829     allow_runtime_changes = self.op.allow_runtime_changes
7830     jobs = [
7831       [opcodes.OpInstanceMigrate(instance_name=inst.name,
7832                                  mode=self.op.mode,
7833                                  live=self.op.live,
7834                                  iallocator=self.op.iallocator,
7835                                  target_node=self.op.target_node,
7836                                  allow_runtime_changes=allow_runtime_changes,
7837                                  ignore_ipolicy=self.op.ignore_ipolicy)]
7838       for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name)
7839       ]
7840
7841     # TODO: Run iallocator in this opcode and pass correct placement options to
7842     # OpInstanceMigrate. Since other jobs can modify the cluster between
7843     # running the iallocator and the actual migration, a good consistency model
7844     # will have to be found.
7845
7846     assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
7847             frozenset([self.op.node_name]))
7848
7849     return ResultWithJobs(jobs)
7850
7851
7852 class TLMigrateInstance(Tasklet):
7853   """Tasklet class for instance migration.
7854
7855   @type live: boolean
7856   @ivar live: whether the migration will be done live or non-live;
7857       this variable is initalized only after CheckPrereq has run
7858   @type cleanup: boolean
7859   @ivar cleanup: Wheater we cleanup from a failed migration
7860   @type iallocator: string
7861   @ivar iallocator: The iallocator used to determine target_node
7862   @type target_node: string
7863   @ivar target_node: If given, the target_node to reallocate the instance to
7864   @type failover: boolean
7865   @ivar failover: Whether operation results in failover or migration
7866   @type fallback: boolean
7867   @ivar fallback: Whether fallback to failover is allowed if migration not
7868                   possible
7869   @type ignore_consistency: boolean
7870   @ivar ignore_consistency: Wheter we should ignore consistency between source
7871                             and target node
7872   @type shutdown_timeout: int
7873   @ivar shutdown_timeout: In case of failover timeout of the shutdown
7874   @type ignore_ipolicy: bool
7875   @ivar ignore_ipolicy: If true, we can ignore instance policy when migrating
7876
7877   """
7878
7879   # Constants
7880   _MIGRATION_POLL_INTERVAL = 1      # seconds
7881   _MIGRATION_FEEDBACK_INTERVAL = 10 # seconds
7882
7883   def __init__(self, lu, instance_name, cleanup=False,
7884                failover=False, fallback=False,
7885                ignore_consistency=False,
7886                allow_runtime_changes=True,
7887                shutdown_timeout=constants.DEFAULT_SHUTDOWN_TIMEOUT,
7888                ignore_ipolicy=False):
7889     """Initializes this class.
7890
7891     """
7892     Tasklet.__init__(self, lu)
7893
7894     # Parameters
7895     self.instance_name = instance_name
7896     self.cleanup = cleanup
7897     self.live = False # will be overridden later
7898     self.failover = failover
7899     self.fallback = fallback
7900     self.ignore_consistency = ignore_consistency
7901     self.shutdown_timeout = shutdown_timeout
7902     self.ignore_ipolicy = ignore_ipolicy
7903     self.allow_runtime_changes = allow_runtime_changes
7904
7905   def CheckPrereq(self):
7906     """Check prerequisites.
7907
7908     This checks that the instance is in the cluster.
7909
7910     """
7911     instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
7912     instance = self.cfg.GetInstanceInfo(instance_name)
7913     assert instance is not None
7914     self.instance = instance
7915     cluster = self.cfg.GetClusterInfo()
7916
7917     if (not self.cleanup and
7918         not instance.admin_state == constants.ADMINST_UP and
7919         not self.failover and self.fallback):
7920       self.lu.LogInfo("Instance is marked down or offline, fallback allowed,"
7921                       " switching to failover")
7922       self.failover = True
7923
7924     if instance.disk_template not in constants.DTS_MIRRORED:
7925       if self.failover:
7926         text = "failovers"
7927       else:
7928         text = "migrations"
7929       raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
7930                                  " %s" % (instance.disk_template, text),
7931                                  errors.ECODE_STATE)
7932
7933     if instance.disk_template in constants.DTS_EXT_MIRROR:
7934       _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
7935
7936       if self.lu.op.iallocator:
7937         self._RunAllocator()
7938       else:
7939         # We set set self.target_node as it is required by
7940         # BuildHooksEnv
7941         self.target_node = self.lu.op.target_node
7942
7943       # Check that the target node is correct in terms of instance policy
7944       nodeinfo = self.cfg.GetNodeInfo(self.target_node)
7945       group_info = self.cfg.GetNodeGroup(nodeinfo.group)
7946       ipolicy = _CalculateGroupIPolicy(cluster, group_info)
7947       _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo,
7948                               ignore=self.ignore_ipolicy)
7949
7950       # self.target_node is already populated, either directly or by the
7951       # iallocator run
7952       target_node = self.target_node
7953       if self.target_node == instance.primary_node:
7954         raise errors.OpPrereqError("Cannot migrate instance %s"
7955                                    " to its primary (%s)" %
7956                                    (instance.name, instance.primary_node))
7957
7958       if len(self.lu.tasklets) == 1:
7959         # It is safe to release locks only when we're the only tasklet
7960         # in the LU
7961         _ReleaseLocks(self.lu, locking.LEVEL_NODE,
7962                       keep=[instance.primary_node, self.target_node])
7963
7964     else:
7965       secondary_nodes = instance.secondary_nodes
7966       if not secondary_nodes:
7967         raise errors.ConfigurationError("No secondary node but using"
7968                                         " %s disk template" %
7969                                         instance.disk_template)
7970       target_node = secondary_nodes[0]
7971       if self.lu.op.iallocator or (self.lu.op.target_node and
7972                                    self.lu.op.target_node != target_node):
7973         if self.failover:
7974           text = "failed over"
7975         else:
7976           text = "migrated"
7977         raise errors.OpPrereqError("Instances with disk template %s cannot"
7978                                    " be %s to arbitrary nodes"
7979                                    " (neither an iallocator nor a target"
7980                                    " node can be passed)" %
7981                                    (instance.disk_template, text),
7982                                    errors.ECODE_INVAL)
7983       nodeinfo = self.cfg.GetNodeInfo(target_node)
7984       group_info = self.cfg.GetNodeGroup(nodeinfo.group)
7985       ipolicy = _CalculateGroupIPolicy(cluster, group_info)
7986       _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo,
7987                               ignore=self.ignore_ipolicy)
7988
7989     i_be = cluster.FillBE(instance)
7990
7991     # check memory requirements on the secondary node
7992     if (not self.cleanup and
7993          (not self.failover or instance.admin_state == constants.ADMINST_UP)):
7994       self.tgt_free_mem = _CheckNodeFreeMemory(self.lu, target_node,
7995                                                "migrating instance %s" %
7996                                                instance.name,
7997                                                i_be[constants.BE_MINMEM],
7998                                                instance.hypervisor)
7999     else:
8000       self.lu.LogInfo("Not checking memory on the secondary node as"
8001                       " instance will not be started")
8002
8003     # check if failover must be forced instead of migration
8004     if (not self.cleanup and not self.failover and
8005         i_be[constants.BE_ALWAYS_FAILOVER]):
8006       if self.fallback:
8007         self.lu.LogInfo("Instance configured to always failover; fallback"
8008                         " to failover")
8009         self.failover = True
8010       else:
8011         raise errors.OpPrereqError("This instance has been configured to"
8012                                    " always failover, please allow failover",
8013                                    errors.ECODE_STATE)
8014
8015     # check bridge existance
8016     _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
8017
8018     if not self.cleanup:
8019       _CheckNodeNotDrained(self.lu, target_node)
8020       if not self.failover:
8021         result = self.rpc.call_instance_migratable(instance.primary_node,
8022                                                    instance)
8023         if result.fail_msg and self.fallback:
8024           self.lu.LogInfo("Can't migrate, instance offline, fallback to"
8025                           " failover")
8026           self.failover = True
8027         else:
8028           result.Raise("Can't migrate, please use failover",
8029                        prereq=True, ecode=errors.ECODE_STATE)
8030
8031     assert not (self.failover and self.cleanup)
8032
8033     if not self.failover:
8034       if self.lu.op.live is not None and self.lu.op.mode is not None:
8035         raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
8036                                    " parameters are accepted",
8037                                    errors.ECODE_INVAL)
8038       if self.lu.op.live is not None:
8039         if self.lu.op.live:
8040           self.lu.op.mode = constants.HT_MIGRATION_LIVE
8041         else:
8042           self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
8043         # reset the 'live' parameter to None so that repeated
8044         # invocations of CheckPrereq do not raise an exception
8045         self.lu.op.live = None
8046       elif self.lu.op.mode is None:
8047         # read the default value from the hypervisor
8048         i_hv = cluster.FillHV(self.instance, skip_globals=False)
8049         self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
8050
8051       self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
8052     else:
8053       # Failover is never live
8054       self.live = False
8055
8056     if not (self.failover or self.cleanup):
8057       remote_info = self.rpc.call_instance_info(instance.primary_node,
8058                                                 instance.name,
8059                                                 instance.hypervisor)
8060       remote_info.Raise("Error checking instance on node %s" %
8061                         instance.primary_node)
8062       instance_running = bool(remote_info.payload)
8063       if instance_running:
8064         self.current_mem = int(remote_info.payload["memory"])
8065
8066   def _RunAllocator(self):
8067     """Run the allocator based on input opcode.
8068
8069     """
8070     # FIXME: add a self.ignore_ipolicy option
8071     ial = IAllocator(self.cfg, self.rpc,
8072                      mode=constants.IALLOCATOR_MODE_RELOC,
8073                      name=self.instance_name,
8074                      relocate_from=[self.instance.primary_node],
8075                      )
8076
8077     ial.Run(self.lu.op.iallocator)
8078
8079     if not ial.success:
8080       raise errors.OpPrereqError("Can't compute nodes using"
8081                                  " iallocator '%s': %s" %
8082                                  (self.lu.op.iallocator, ial.info),
8083                                  errors.ECODE_NORES)
8084     if len(ial.result) != ial.required_nodes:
8085       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
8086                                  " of nodes (%s), required %s" %
8087                                  (self.lu.op.iallocator, len(ial.result),
8088                                   ial.required_nodes), errors.ECODE_FAULT)
8089     self.target_node = ial.result[0]
8090     self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
8091                  self.instance_name, self.lu.op.iallocator,
8092                  utils.CommaJoin(ial.result))
8093
8094   def _WaitUntilSync(self):
8095     """Poll with custom rpc for disk sync.
8096
8097     This uses our own step-based rpc call.
8098
8099     """
8100     self.feedback_fn("* wait until resync is done")
8101     all_done = False
8102     while not all_done:
8103       all_done = True
8104       result = self.rpc.call_drbd_wait_sync(self.all_nodes,
8105                                             self.nodes_ip,
8106                                             self.instance.disks)
8107       min_percent = 100
8108       for node, nres in result.items():
8109         nres.Raise("Cannot resync disks on node %s" % node)
8110         node_done, node_percent = nres.payload
8111         all_done = all_done and node_done
8112         if node_percent is not None:
8113           min_percent = min(min_percent, node_percent)
8114       if not all_done:
8115         if min_percent < 100:
8116           self.feedback_fn("   - progress: %.1f%%" % min_percent)
8117         time.sleep(2)
8118
8119   def _EnsureSecondary(self, node):
8120     """Demote a node to secondary.
8121
8122     """
8123     self.feedback_fn("* switching node %s to secondary mode" % node)
8124
8125     for dev in self.instance.disks:
8126       self.cfg.SetDiskID(dev, node)
8127
8128     result = self.rpc.call_blockdev_close(node, self.instance.name,
8129                                           self.instance.disks)
8130     result.Raise("Cannot change disk to secondary on node %s" % node)
8131
8132   def _GoStandalone(self):
8133     """Disconnect from the network.
8134
8135     """
8136     self.feedback_fn("* changing into standalone mode")
8137     result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
8138                                                self.instance.disks)
8139     for node, nres in result.items():
8140       nres.Raise("Cannot disconnect disks node %s" % node)
8141
8142   def _GoReconnect(self, multimaster):
8143     """Reconnect to the network.
8144
8145     """
8146     if multimaster:
8147       msg = "dual-master"
8148     else:
8149       msg = "single-master"
8150     self.feedback_fn("* changing disks into %s mode" % msg)
8151     result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
8152                                            self.instance.disks,
8153                                            self.instance.name, multimaster)
8154     for node, nres in result.items():
8155       nres.Raise("Cannot change disks config on node %s" % node)
8156
8157   def _ExecCleanup(self):
8158     """Try to cleanup after a failed migration.
8159
8160     The cleanup is done by:
8161       - check that the instance is running only on one node
8162         (and update the config if needed)
8163       - change disks on its secondary node to secondary
8164       - wait until disks are fully synchronized
8165       - disconnect from the network
8166       - change disks into single-master mode
8167       - wait again until disks are fully synchronized
8168
8169     """
8170     instance = self.instance
8171     target_node = self.target_node
8172     source_node = self.source_node
8173
8174     # check running on only one node
8175     self.feedback_fn("* checking where the instance actually runs"
8176                      " (if this hangs, the hypervisor might be in"
8177                      " a bad state)")
8178     ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
8179     for node, result in ins_l.items():
8180       result.Raise("Can't contact node %s" % node)
8181
8182     runningon_source = instance.name in ins_l[source_node].payload
8183     runningon_target = instance.name in ins_l[target_node].payload
8184
8185     if runningon_source and runningon_target:
8186       raise errors.OpExecError("Instance seems to be running on two nodes,"
8187                                " or the hypervisor is confused; you will have"
8188                                " to ensure manually that it runs only on one"
8189                                " and restart this operation")
8190
8191     if not (runningon_source or runningon_target):
8192       raise errors.OpExecError("Instance does not seem to be running at all;"
8193                                " in this case it's safer to repair by"
8194                                " running 'gnt-instance stop' to ensure disk"
8195                                " shutdown, and then restarting it")
8196
8197     if runningon_target:
8198       # the migration has actually succeeded, we need to update the config
8199       self.feedback_fn("* instance running on secondary node (%s),"
8200                        " updating config" % target_node)
8201       instance.primary_node = target_node
8202       self.cfg.Update(instance, self.feedback_fn)
8203       demoted_node = source_node
8204     else:
8205       self.feedback_fn("* instance confirmed to be running on its"
8206                        " primary node (%s)" % source_node)
8207       demoted_node = target_node
8208
8209     if instance.disk_template in constants.DTS_INT_MIRROR:
8210       self._EnsureSecondary(demoted_node)
8211       try:
8212         self._WaitUntilSync()
8213       except errors.OpExecError:
8214         # we ignore here errors, since if the device is standalone, it
8215         # won't be able to sync
8216         pass
8217       self._GoStandalone()
8218       self._GoReconnect(False)
8219       self._WaitUntilSync()
8220
8221     self.feedback_fn("* done")
8222
8223   def _RevertDiskStatus(self):
8224     """Try to revert the disk status after a failed migration.
8225
8226     """
8227     target_node = self.target_node
8228     if self.instance.disk_template in constants.DTS_EXT_MIRROR:
8229       return
8230
8231     try:
8232       self._EnsureSecondary(target_node)
8233       self._GoStandalone()
8234       self._GoReconnect(False)
8235       self._WaitUntilSync()
8236     except errors.OpExecError, err:
8237       self.lu.LogWarning("Migration failed and I can't reconnect the drives,"
8238                          " please try to recover the instance manually;"
8239                          " error '%s'" % str(err))
8240
8241   def _AbortMigration(self):
8242     """Call the hypervisor code to abort a started migration.
8243
8244     """
8245     instance = self.instance
8246     target_node = self.target_node
8247     source_node = self.source_node
8248     migration_info = self.migration_info
8249
8250     abort_result = self.rpc.call_instance_finalize_migration_dst(target_node,
8251                                                                  instance,
8252                                                                  migration_info,
8253                                                                  False)
8254     abort_msg = abort_result.fail_msg
8255     if abort_msg:
8256       logging.error("Aborting migration failed on target node %s: %s",
8257                     target_node, abort_msg)
8258       # Don't raise an exception here, as we stil have to try to revert the
8259       # disk status, even if this step failed.
8260
8261     abort_result = self.rpc.call_instance_finalize_migration_src(source_node,
8262         instance, False, self.live)
8263     abort_msg = abort_result.fail_msg
8264     if abort_msg:
8265       logging.error("Aborting migration failed on source node %s: %s",
8266                     source_node, abort_msg)
8267
8268   def _ExecMigration(self):
8269     """Migrate an instance.
8270
8271     The migrate is done by:
8272       - change the disks into dual-master mode
8273       - wait until disks are fully synchronized again
8274       - migrate the instance
8275       - change disks on the new secondary node (the old primary) to secondary
8276       - wait until disks are fully synchronized
8277       - change disks into single-master mode
8278
8279     """
8280     instance = self.instance
8281     target_node = self.target_node
8282     source_node = self.source_node
8283
8284     # Check for hypervisor version mismatch and warn the user.
8285     nodeinfo = self.rpc.call_node_info([source_node, target_node],
8286                                        None, [self.instance.hypervisor])
8287     for ninfo in nodeinfo.values():
8288       ninfo.Raise("Unable to retrieve node information from node '%s'" %
8289                   ninfo.node)
8290     (_, _, (src_info, )) = nodeinfo[source_node].payload
8291     (_, _, (dst_info, )) = nodeinfo[target_node].payload
8292
8293     if ((constants.HV_NODEINFO_KEY_VERSION in src_info) and
8294         (constants.HV_NODEINFO_KEY_VERSION in dst_info)):
8295       src_version = src_info[constants.HV_NODEINFO_KEY_VERSION]
8296       dst_version = dst_info[constants.HV_NODEINFO_KEY_VERSION]
8297       if src_version != dst_version:
8298         self.feedback_fn("* warning: hypervisor version mismatch between"
8299                          " source (%s) and target (%s) node" %
8300                          (src_version, dst_version))
8301
8302     self.feedback_fn("* checking disk consistency between source and target")
8303     for (idx, dev) in enumerate(instance.disks):
8304       if not _CheckDiskConsistency(self.lu, dev, target_node, False):
8305         raise errors.OpExecError("Disk %s is degraded or not fully"
8306                                  " synchronized on target node,"
8307                                  " aborting migration" % idx)
8308
8309     if self.current_mem > self.tgt_free_mem:
8310       if not self.allow_runtime_changes:
8311         raise errors.OpExecError("Memory ballooning not allowed and not enough"
8312                                  " free memory to fit instance %s on target"
8313                                  " node %s (have %dMB, need %dMB)" %
8314                                  (instance.name, target_node,
8315                                   self.tgt_free_mem, self.current_mem))
8316       self.feedback_fn("* setting instance memory to %s" % self.tgt_free_mem)
8317       rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node,
8318                                                      instance,
8319                                                      self.tgt_free_mem)
8320       rpcres.Raise("Cannot modify instance runtime memory")
8321
8322     # First get the migration information from the remote node
8323     result = self.rpc.call_migration_info(source_node, instance)
8324     msg = result.fail_msg
8325     if msg:
8326       log_err = ("Failed fetching source migration information from %s: %s" %
8327                  (source_node, msg))
8328       logging.error(log_err)
8329       raise errors.OpExecError(log_err)
8330
8331     self.migration_info = migration_info = result.payload
8332
8333     if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
8334       # Then switch the disks to master/master mode
8335       self._EnsureSecondary(target_node)
8336       self._GoStandalone()
8337       self._GoReconnect(True)
8338       self._WaitUntilSync()
8339
8340     self.feedback_fn("* preparing %s to accept the instance" % target_node)
8341     result = self.rpc.call_accept_instance(target_node,
8342                                            instance,
8343                                            migration_info,
8344                                            self.nodes_ip[target_node])
8345
8346     msg = result.fail_msg
8347     if msg:
8348       logging.error("Instance pre-migration failed, trying to revert"
8349                     " disk status: %s", msg)
8350       self.feedback_fn("Pre-migration failed, aborting")
8351       self._AbortMigration()
8352       self._RevertDiskStatus()
8353       raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
8354                                (instance.name, msg))
8355
8356     self.feedback_fn("* migrating instance to %s" % target_node)
8357     result = self.rpc.call_instance_migrate(source_node, instance,
8358                                             self.nodes_ip[target_node],
8359                                             self.live)
8360     msg = result.fail_msg
8361     if msg:
8362       logging.error("Instance migration failed, trying to revert"
8363                     " disk status: %s", msg)
8364       self.feedback_fn("Migration failed, aborting")
8365       self._AbortMigration()
8366       self._RevertDiskStatus()
8367       raise errors.OpExecError("Could not migrate instance %s: %s" %
8368                                (instance.name, msg))
8369
8370     self.feedback_fn("* starting memory transfer")
8371     last_feedback = time.time()
8372     while True:
8373       result = self.rpc.call_instance_get_migration_status(source_node,
8374                                                            instance)
8375       msg = result.fail_msg
8376       ms = result.payload   # MigrationStatus instance
8377       if msg or (ms.status in constants.HV_MIGRATION_FAILED_STATUSES):
8378         logging.error("Instance migration failed, trying to revert"
8379                       " disk status: %s", msg)
8380         self.feedback_fn("Migration failed, aborting")
8381         self._AbortMigration()
8382         self._RevertDiskStatus()
8383         raise errors.OpExecError("Could not migrate instance %s: %s" %
8384                                  (instance.name, msg))
8385
8386       if result.payload.status != constants.HV_MIGRATION_ACTIVE:
8387         self.feedback_fn("* memory transfer complete")
8388         break
8389
8390       if (utils.TimeoutExpired(last_feedback,
8391                                self._MIGRATION_FEEDBACK_INTERVAL) and
8392           ms.transferred_ram is not None):
8393         mem_progress = 100 * float(ms.transferred_ram) / float(ms.total_ram)
8394         self.feedback_fn("* memory transfer progress: %.2f %%" % mem_progress)
8395         last_feedback = time.time()
8396
8397       time.sleep(self._MIGRATION_POLL_INTERVAL)
8398
8399     result = self.rpc.call_instance_finalize_migration_src(source_node,
8400                                                            instance,
8401                                                            True,
8402                                                            self.live)
8403     msg = result.fail_msg
8404     if msg:
8405       logging.error("Instance migration succeeded, but finalization failed"
8406                     " on the source node: %s", msg)
8407       raise errors.OpExecError("Could not finalize instance migration: %s" %
8408                                msg)
8409
8410     instance.primary_node = target_node
8411
8412     # distribute new instance config to the other nodes
8413     self.cfg.Update(instance, self.feedback_fn)
8414
8415     result = self.rpc.call_instance_finalize_migration_dst(target_node,
8416                                                            instance,
8417                                                            migration_info,
8418                                                            True)
8419     msg = result.fail_msg
8420     if msg:
8421       logging.error("Instance migration succeeded, but finalization failed"
8422                     " on the target node: %s", msg)
8423       raise errors.OpExecError("Could not finalize instance migration: %s" %
8424                                msg)
8425
8426     if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
8427       self._EnsureSecondary(source_node)
8428       self._WaitUntilSync()
8429       self._GoStandalone()
8430       self._GoReconnect(False)
8431       self._WaitUntilSync()
8432
8433     # If the instance's disk template is `rbd' and there was a successful
8434     # migration, unmap the device from the source node.
8435     if self.instance.disk_template == constants.DT_RBD:
8436       disks = _ExpandCheckDisks(instance, instance.disks)
8437       self.feedback_fn("* unmapping instance's disks from %s" % source_node)
8438       for disk in disks:
8439         result = self.rpc.call_blockdev_shutdown(source_node, disk)
8440         msg = result.fail_msg
8441         if msg:
8442           logging.error("Migration was successful, but couldn't unmap the"
8443                         " block device %s on source node %s: %s",
8444                         disk.iv_name, source_node, msg)
8445           logging.error("You need to unmap the device %s manually on %s",
8446                         disk.iv_name, source_node)
8447
8448     self.feedback_fn("* done")
8449
8450   def _ExecFailover(self):
8451     """Failover an instance.
8452
8453     The failover is done by shutting it down on its present node and
8454     starting it on the secondary.
8455
8456     """
8457     instance = self.instance
8458     primary_node = self.cfg.GetNodeInfo(instance.primary_node)
8459
8460     source_node = instance.primary_node
8461     target_node = self.target_node
8462
8463     if instance.admin_state == constants.ADMINST_UP:
8464       self.feedback_fn("* checking disk consistency between source and target")
8465       for (idx, dev) in enumerate(instance.disks):
8466         # for drbd, these are drbd over lvm
8467         if not _CheckDiskConsistency(self.lu, dev, target_node, False):
8468           if primary_node.offline:
8469             self.feedback_fn("Node %s is offline, ignoring degraded disk %s on"
8470                              " target node %s" %
8471                              (primary_node.name, idx, target_node))
8472           elif not self.ignore_consistency:
8473             raise errors.OpExecError("Disk %s is degraded on target node,"
8474                                      " aborting failover" % idx)
8475     else:
8476       self.feedback_fn("* not checking disk consistency as instance is not"
8477                        " running")
8478
8479     self.feedback_fn("* shutting down instance on source node")
8480     logging.info("Shutting down instance %s on node %s",
8481                  instance.name, source_node)
8482
8483     result = self.rpc.call_instance_shutdown(source_node, instance,
8484                                              self.shutdown_timeout)
8485     msg = result.fail_msg
8486     if msg:
8487       if self.ignore_consistency or primary_node.offline:
8488         self.lu.LogWarning("Could not shutdown instance %s on node %s,"
8489                            " proceeding anyway; please make sure node"
8490                            " %s is down; error details: %s",
8491                            instance.name, source_node, source_node, msg)
8492       else:
8493         raise errors.OpExecError("Could not shutdown instance %s on"
8494                                  " node %s: %s" %
8495                                  (instance.name, source_node, msg))
8496
8497     self.feedback_fn("* deactivating the instance's disks on source node")
8498     if not _ShutdownInstanceDisks(self.lu, instance, ignore_primary=True):
8499       raise errors.OpExecError("Can't shut down the instance's disks")
8500
8501     instance.primary_node = target_node
8502     # distribute new instance config to the other nodes
8503     self.cfg.Update(instance, self.feedback_fn)
8504
8505     # Only start the instance if it's marked as up
8506     if instance.admin_state == constants.ADMINST_UP:
8507       self.feedback_fn("* activating the instance's disks on target node %s" %
8508                        target_node)
8509       logging.info("Starting instance %s on node %s",
8510                    instance.name, target_node)
8511
8512       disks_ok, _ = _AssembleInstanceDisks(self.lu, instance,
8513                                            ignore_secondaries=True)
8514       if not disks_ok:
8515         _ShutdownInstanceDisks(self.lu, instance)
8516         raise errors.OpExecError("Can't activate the instance's disks")
8517
8518       self.feedback_fn("* starting the instance on the target node %s" %
8519                        target_node)
8520       result = self.rpc.call_instance_start(target_node, (instance, None, None),
8521                                             False)
8522       msg = result.fail_msg
8523       if msg:
8524         _ShutdownInstanceDisks(self.lu, instance)
8525         raise errors.OpExecError("Could not start instance %s on node %s: %s" %
8526                                  (instance.name, target_node, msg))
8527
8528   def Exec(self, feedback_fn):
8529     """Perform the migration.
8530
8531     """
8532     self.feedback_fn = feedback_fn
8533     self.source_node = self.instance.primary_node
8534
8535     # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
8536     if self.instance.disk_template in constants.DTS_INT_MIRROR:
8537       self.target_node = self.instance.secondary_nodes[0]
8538       # Otherwise self.target_node has been populated either
8539       # directly, or through an iallocator.
8540
8541     self.all_nodes = [self.source_node, self.target_node]
8542     self.nodes_ip = dict((name, node.secondary_ip) for (name, node)
8543                          in self.cfg.GetMultiNodeInfo(self.all_nodes))
8544
8545     if self.failover:
8546       feedback_fn("Failover instance %s" % self.instance.name)
8547       self._ExecFailover()
8548     else:
8549       feedback_fn("Migrating instance %s" % self.instance.name)
8550
8551       if self.cleanup:
8552         return self._ExecCleanup()
8553       else:
8554         return self._ExecMigration()
8555
8556
8557 def _CreateBlockDev(lu, node, instance, device, force_create,
8558                     info, force_open):
8559   """Create a tree of block devices on a given node.
8560
8561   If this device type has to be created on secondaries, create it and
8562   all its children.
8563
8564   If not, just recurse to children keeping the same 'force' value.
8565
8566   @param lu: the lu on whose behalf we execute
8567   @param node: the node on which to create the device
8568   @type instance: L{objects.Instance}
8569   @param instance: the instance which owns the device
8570   @type device: L{objects.Disk}
8571   @param device: the device to create
8572   @type force_create: boolean
8573   @param force_create: whether to force creation of this device; this
8574       will be change to True whenever we find a device which has
8575       CreateOnSecondary() attribute
8576   @param info: the extra 'metadata' we should attach to the device
8577       (this will be represented as a LVM tag)
8578   @type force_open: boolean
8579   @param force_open: this parameter will be passes to the
8580       L{backend.BlockdevCreate} function where it specifies
8581       whether we run on primary or not, and it affects both
8582       the child assembly and the device own Open() execution
8583
8584   """
8585   if device.CreateOnSecondary():
8586     force_create = True
8587
8588   if device.children:
8589     for child in device.children:
8590       _CreateBlockDev(lu, node, instance, child, force_create,
8591                       info, force_open)
8592
8593   if not force_create:
8594     return
8595
8596   _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
8597
8598
8599 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
8600   """Create a single block device on a given node.
8601
8602   This will not recurse over children of the device, so they must be
8603   created in advance.
8604
8605   @param lu: the lu on whose behalf we execute
8606   @param node: the node on which to create the device
8607   @type instance: L{objects.Instance}
8608   @param instance: the instance which owns the device
8609   @type device: L{objects.Disk}
8610   @param device: the device to create
8611   @param info: the extra 'metadata' we should attach to the device
8612       (this will be represented as a LVM tag)
8613   @type force_open: boolean
8614   @param force_open: this parameter will be passes to the
8615       L{backend.BlockdevCreate} function where it specifies
8616       whether we run on primary or not, and it affects both
8617       the child assembly and the device own Open() execution
8618
8619   """
8620   lu.cfg.SetDiskID(device, node)
8621   result = lu.rpc.call_blockdev_create(node, device, device.size,
8622                                        instance.name, force_open, info)
8623   result.Raise("Can't create block device %s on"
8624                " node %s for instance %s" % (device, node, instance.name))
8625   if device.physical_id is None:
8626     device.physical_id = result.payload
8627
8628
8629 def _GenerateUniqueNames(lu, exts):
8630   """Generate a suitable LV name.
8631
8632   This will generate a logical volume name for the given instance.
8633
8634   """
8635   results = []
8636   for val in exts:
8637     new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
8638     results.append("%s%s" % (new_id, val))
8639   return results
8640
8641
8642 def _ComputeLDParams(disk_template, disk_params):
8643   """Computes Logical Disk parameters from Disk Template parameters.
8644
8645   @type disk_template: string
8646   @param disk_template: disk template, one of L{constants.DISK_TEMPLATES}
8647   @type disk_params: dict
8648   @param disk_params: disk template parameters; dict(template_name -> parameters
8649   @rtype: list(dict)
8650   @return: a list of dicts, one for each node of the disk hierarchy. Each dict
8651     contains the LD parameters of the node. The tree is flattened in-order.
8652
8653   """
8654   if disk_template not in constants.DISK_TEMPLATES:
8655     raise errors.ProgrammerError("Unknown disk template %s" % disk_template)
8656
8657   result = list()
8658   dt_params = disk_params[disk_template]
8659   if disk_template == constants.DT_DRBD8:
8660     drbd_params = {
8661       constants.LDP_RESYNC_RATE: dt_params[constants.DRBD_RESYNC_RATE],
8662       constants.LDP_BARRIERS: dt_params[constants.DRBD_DISK_BARRIERS],
8663       constants.LDP_NO_META_FLUSH: dt_params[constants.DRBD_META_BARRIERS],
8664       constants.LDP_DEFAULT_METAVG: dt_params[constants.DRBD_DEFAULT_METAVG],
8665       constants.LDP_DISK_CUSTOM: dt_params[constants.DRBD_DISK_CUSTOM],
8666       constants.LDP_NET_CUSTOM: dt_params[constants.DRBD_NET_CUSTOM],
8667       constants.LDP_DYNAMIC_RESYNC: dt_params[constants.DRBD_DYNAMIC_RESYNC],
8668       constants.LDP_PLAN_AHEAD: dt_params[constants.DRBD_PLAN_AHEAD],
8669       constants.LDP_FILL_TARGET: dt_params[constants.DRBD_FILL_TARGET],
8670       constants.LDP_DELAY_TARGET: dt_params[constants.DRBD_DELAY_TARGET],
8671       constants.LDP_MAX_RATE: dt_params[constants.DRBD_MAX_RATE],
8672       constants.LDP_MIN_RATE: dt_params[constants.DRBD_MIN_RATE],
8673       }
8674
8675     drbd_params = \
8676       objects.FillDict(constants.DISK_LD_DEFAULTS[constants.LD_DRBD8],
8677                        drbd_params)
8678
8679     result.append(drbd_params)
8680
8681     # data LV
8682     data_params = {
8683       constants.LDP_STRIPES: dt_params[constants.DRBD_DATA_STRIPES],
8684       }
8685     data_params = \
8686       objects.FillDict(constants.DISK_LD_DEFAULTS[constants.LD_LV],
8687                        data_params)
8688     result.append(data_params)
8689
8690     # metadata LV
8691     meta_params = {
8692       constants.LDP_STRIPES: dt_params[constants.DRBD_META_STRIPES],
8693       }
8694     meta_params = \
8695       objects.FillDict(constants.DISK_LD_DEFAULTS[constants.LD_LV],
8696                        meta_params)
8697     result.append(meta_params)
8698
8699   elif (disk_template == constants.DT_FILE or
8700         disk_template == constants.DT_SHARED_FILE):
8701     result.append(constants.DISK_LD_DEFAULTS[constants.LD_FILE])
8702
8703   elif disk_template == constants.DT_PLAIN:
8704     params = {
8705       constants.LDP_STRIPES: dt_params[constants.LV_STRIPES],
8706       }
8707     params = \
8708       objects.FillDict(constants.DISK_LD_DEFAULTS[constants.LD_LV],
8709                        params)
8710     result.append(params)
8711
8712   elif disk_template == constants.DT_BLOCK:
8713     result.append(constants.DISK_LD_DEFAULTS[constants.LD_BLOCKDEV])
8714
8715   elif disk_template == constants.DT_RBD:
8716     params = {
8717       constants.LDP_POOL: dt_params[constants.RBD_POOL]
8718       }
8719     params = \
8720       objects.FillDict(constants.DISK_LD_DEFAULTS[constants.LD_RBD],
8721                        params)
8722     result.append(params)
8723
8724   return result
8725
8726
8727 def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
8728                          iv_name, p_minor, s_minor, drbd_params, data_params,
8729                          meta_params):
8730   """Generate a drbd8 device complete with its children.
8731
8732   """
8733   assert len(vgnames) == len(names) == 2
8734   port = lu.cfg.AllocatePort()
8735   shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
8736
8737   dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
8738                           logical_id=(vgnames[0], names[0]),
8739                           params=data_params)
8740   dev_meta = objects.Disk(dev_type=constants.LD_LV, size=DRBD_META_SIZE,
8741                           logical_id=(vgnames[1], names[1]),
8742                           params=meta_params)
8743   drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
8744                           logical_id=(primary, secondary, port,
8745                                       p_minor, s_minor,
8746                                       shared_secret),
8747                           children=[dev_data, dev_meta],
8748                           iv_name=iv_name, params=drbd_params)
8749   return drbd_dev
8750
8751
8752 _DISK_TEMPLATE_NAME_PREFIX = {
8753   constants.DT_PLAIN: "",
8754   constants.DT_RBD: ".rbd",
8755   }
8756
8757
8758 _DISK_TEMPLATE_DEVICE_TYPE = {
8759   constants.DT_PLAIN: constants.LD_LV,
8760   constants.DT_FILE: constants.LD_FILE,
8761   constants.DT_SHARED_FILE: constants.LD_FILE,
8762   constants.DT_BLOCK: constants.LD_BLOCKDEV,
8763   constants.DT_RBD: constants.LD_RBD,
8764   }
8765
8766
8767 def _GenerateDiskTemplate(lu, template_name, instance_name, primary_node,
8768     secondary_nodes, disk_info, file_storage_dir, file_driver, base_index,
8769     feedback_fn, disk_params,
8770     _req_file_storage=opcodes.RequireFileStorage,
8771     _req_shr_file_storage=opcodes.RequireSharedFileStorage):
8772   """Generate the entire disk layout for a given template type.
8773
8774   """
8775   #TODO: compute space requirements
8776
8777   vgname = lu.cfg.GetVGName()
8778   disk_count = len(disk_info)
8779   disks = []
8780   ld_params = _ComputeLDParams(template_name, disk_params)
8781
8782   if template_name == constants.DT_DISKLESS:
8783     pass
8784   elif template_name == constants.DT_DRBD8:
8785     drbd_params, data_params, meta_params = ld_params
8786     if len(secondary_nodes) != 1:
8787       raise errors.ProgrammerError("Wrong template configuration")
8788     remote_node = secondary_nodes[0]
8789     minors = lu.cfg.AllocateDRBDMinor(
8790       [primary_node, remote_node] * len(disk_info), instance_name)
8791
8792     names = []
8793     for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
8794                                                for i in range(disk_count)]):
8795       names.append(lv_prefix + "_data")
8796       names.append(lv_prefix + "_meta")
8797     for idx, disk in enumerate(disk_info):
8798       disk_index = idx + base_index
8799       drbd_default_metavg = drbd_params[constants.LDP_DEFAULT_METAVG]
8800       data_vg = disk.get(constants.IDISK_VG, vgname)
8801       meta_vg = disk.get(constants.IDISK_METAVG, drbd_default_metavg)
8802       disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
8803                                       disk[constants.IDISK_SIZE],
8804                                       [data_vg, meta_vg],
8805                                       names[idx * 2:idx * 2 + 2],
8806                                       "disk/%d" % disk_index,
8807                                       minors[idx * 2], minors[idx * 2 + 1],
8808                                       drbd_params, data_params, meta_params)
8809       disk_dev.mode = disk[constants.IDISK_MODE]
8810       disks.append(disk_dev)
8811   else:
8812     if secondary_nodes:
8813       raise errors.ProgrammerError("Wrong template configuration")
8814
8815     if template_name == constants.DT_FILE:
8816       _req_file_storage()
8817     elif template_name == constants.DT_SHARED_FILE:
8818       _req_shr_file_storage()
8819
8820     name_prefix = _DISK_TEMPLATE_NAME_PREFIX.get(template_name, None)
8821     if name_prefix is None:
8822       names = None
8823     else:
8824       names = _GenerateUniqueNames(lu, ["%s.disk%s" %
8825                                         (name_prefix, base_index + i)
8826                                         for i in range(disk_count)])
8827
8828     dev_type = _DISK_TEMPLATE_DEVICE_TYPE[template_name]
8829
8830     if template_name == constants.DT_PLAIN:
8831       def logical_id_fn(idx, _, disk):
8832         vg = disk.get(constants.IDISK_VG, vgname)
8833         return (vg, names[idx])
8834     elif template_name in (constants.DT_FILE, constants.DT_SHARED_FILE):
8835       logical_id_fn = \
8836         lambda _, disk_index, disk: (file_driver,
8837                                      "%s/disk%d" % (file_storage_dir,
8838                                                     disk_index))
8839     elif template_name == constants.DT_BLOCK:
8840       logical_id_fn = \
8841         lambda idx, disk_index, disk: (constants.BLOCKDEV_DRIVER_MANUAL,
8842                                        disk[constants.IDISK_ADOPT])
8843     elif template_name == constants.DT_RBD:
8844       logical_id_fn = lambda idx, _, disk: ("rbd", names[idx])
8845     else:
8846       raise errors.ProgrammerError("Unknown disk template '%s'" % template_name)
8847
8848     for idx, disk in enumerate(disk_info):
8849       disk_index = idx + base_index
8850       size = disk[constants.IDISK_SIZE]
8851       feedback_fn("* disk %s, size %s" %
8852                   (disk_index, utils.FormatUnit(size, "h")))
8853       disks.append(objects.Disk(dev_type=dev_type, size=size,
8854                                 logical_id=logical_id_fn(idx, disk_index, disk),
8855                                 iv_name="disk/%d" % disk_index,
8856                                 mode=disk[constants.IDISK_MODE],
8857                                 params=ld_params[0]))
8858
8859   return disks
8860
8861
8862 def _GetInstanceInfoText(instance):
8863   """Compute that text that should be added to the disk's metadata.
8864
8865   """
8866   return "originstname+%s" % instance.name
8867
8868
8869 def _CalcEta(time_taken, written, total_size):
8870   """Calculates the ETA based on size written and total size.
8871
8872   @param time_taken: The time taken so far
8873   @param written: amount written so far
8874   @param total_size: The total size of data to be written
8875   @return: The remaining time in seconds
8876
8877   """
8878   avg_time = time_taken / float(written)
8879   return (total_size - written) * avg_time
8880
8881
8882 def _WipeDisks(lu, instance):
8883   """Wipes instance disks.
8884
8885   @type lu: L{LogicalUnit}
8886   @param lu: the logical unit on whose behalf we execute
8887   @type instance: L{objects.Instance}
8888   @param instance: the instance whose disks we should create
8889   @return: the success of the wipe
8890
8891   """
8892   node = instance.primary_node
8893
8894   for device in instance.disks:
8895     lu.cfg.SetDiskID(device, node)
8896
8897   logging.info("Pause sync of instance %s disks", instance.name)
8898   result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, True)
8899
8900   for idx, success in enumerate(result.payload):
8901     if not success:
8902       logging.warn("pause-sync of instance %s for disks %d failed",
8903                    instance.name, idx)
8904
8905   try:
8906     for idx, device in enumerate(instance.disks):
8907       # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
8908       # MAX_WIPE_CHUNK at max
8909       wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 *
8910                             constants.MIN_WIPE_CHUNK_PERCENT)
8911       # we _must_ make this an int, otherwise rounding errors will
8912       # occur
8913       wipe_chunk_size = int(wipe_chunk_size)
8914
8915       lu.LogInfo("* Wiping disk %d", idx)
8916       logging.info("Wiping disk %d for instance %s, node %s using"
8917                    " chunk size %s", idx, instance.name, node, wipe_chunk_size)
8918
8919       offset = 0
8920       size = device.size
8921       last_output = 0
8922       start_time = time.time()
8923
8924       while offset < size:
8925         wipe_size = min(wipe_chunk_size, size - offset)
8926         logging.debug("Wiping disk %d, offset %s, chunk %s",
8927                       idx, offset, wipe_size)
8928         result = lu.rpc.call_blockdev_wipe(node, device, offset, wipe_size)
8929         result.Raise("Could not wipe disk %d at offset %d for size %d" %
8930                      (idx, offset, wipe_size))
8931         now = time.time()
8932         offset += wipe_size
8933         if now - last_output >= 60:
8934           eta = _CalcEta(now - start_time, offset, size)
8935           lu.LogInfo(" - done: %.1f%% ETA: %s" %
8936                      (offset / float(size) * 100, utils.FormatSeconds(eta)))
8937           last_output = now
8938   finally:
8939     logging.info("Resume sync of instance %s disks", instance.name)
8940
8941     result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, False)
8942
8943     for idx, success in enumerate(result.payload):
8944       if not success:
8945         lu.LogWarning("Resume sync of disk %d failed, please have a"
8946                       " look at the status and troubleshoot the issue", idx)
8947         logging.warn("resume-sync of instance %s for disks %d failed",
8948                      instance.name, idx)
8949
8950
8951 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
8952   """Create all disks for an instance.
8953
8954   This abstracts away some work from AddInstance.
8955
8956   @type lu: L{LogicalUnit}
8957   @param lu: the logical unit on whose behalf we execute
8958   @type instance: L{objects.Instance}
8959   @param instance: the instance whose disks we should create
8960   @type to_skip: list
8961   @param to_skip: list of indices to skip
8962   @type target_node: string
8963   @param target_node: if passed, overrides the target node for creation
8964   @rtype: boolean
8965   @return: the success of the creation
8966
8967   """
8968   info = _GetInstanceInfoText(instance)
8969   if target_node is None:
8970     pnode = instance.primary_node
8971     all_nodes = instance.all_nodes
8972   else:
8973     pnode = target_node
8974     all_nodes = [pnode]
8975
8976   if instance.disk_template in constants.DTS_FILEBASED:
8977     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
8978     result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
8979
8980     result.Raise("Failed to create directory '%s' on"
8981                  " node %s" % (file_storage_dir, pnode))
8982
8983   # Note: this needs to be kept in sync with adding of disks in
8984   # LUInstanceSetParams
8985   for idx, device in enumerate(instance.disks):
8986     if to_skip and idx in to_skip:
8987       continue
8988     logging.info("Creating disk %s for instance '%s'", idx, instance.name)
8989     #HARDCODE
8990     for node in all_nodes:
8991       f_create = node == pnode
8992       _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
8993
8994
8995 def _RemoveDisks(lu, instance, target_node=None, ignore_failures=False):
8996   """Remove all disks for an instance.
8997
8998   This abstracts away some work from `AddInstance()` and
8999   `RemoveInstance()`. Note that in case some of the devices couldn't
9000   be removed, the removal will continue with the other ones (compare
9001   with `_CreateDisks()`).
9002
9003   @type lu: L{LogicalUnit}
9004   @param lu: the logical unit on whose behalf we execute
9005   @type instance: L{objects.Instance}
9006   @param instance: the instance whose disks we should remove
9007   @type target_node: string
9008   @param target_node: used to override the node on which to remove the disks
9009   @rtype: boolean
9010   @return: the success of the removal
9011
9012   """
9013   logging.info("Removing block devices for instance %s", instance.name)
9014
9015   all_result = True
9016   ports_to_release = set()
9017   for (idx, device) in enumerate(instance.disks):
9018     if target_node:
9019       edata = [(target_node, device)]
9020     else:
9021       edata = device.ComputeNodeTree(instance.primary_node)
9022     for node, disk in edata:
9023       lu.cfg.SetDiskID(disk, node)
9024       msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
9025       if msg:
9026         lu.LogWarning("Could not remove disk %s on node %s,"
9027                       " continuing anyway: %s", idx, node, msg)
9028         all_result = False
9029
9030     # if this is a DRBD disk, return its port to the pool
9031     if device.dev_type in constants.LDS_DRBD:
9032       ports_to_release.add(device.logical_id[2])
9033
9034   if all_result or ignore_failures:
9035     for port in ports_to_release:
9036       lu.cfg.AddTcpUdpPort(port)
9037
9038   if instance.disk_template == constants.DT_FILE:
9039     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
9040     if target_node:
9041       tgt = target_node
9042     else:
9043       tgt = instance.primary_node
9044     result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
9045     if result.fail_msg:
9046       lu.LogWarning("Could not remove directory '%s' on node %s: %s",
9047                     file_storage_dir, instance.primary_node, result.fail_msg)
9048       all_result = False
9049
9050   return all_result
9051
9052
9053 def _ComputeDiskSizePerVG(disk_template, disks):
9054   """Compute disk size requirements in the volume group
9055
9056   """
9057   def _compute(disks, payload):
9058     """Universal algorithm.
9059
9060     """
9061     vgs = {}
9062     for disk in disks:
9063       vgs[disk[constants.IDISK_VG]] = \
9064         vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
9065
9066     return vgs
9067
9068   # Required free disk space as a function of disk and swap space
9069   req_size_dict = {
9070     constants.DT_DISKLESS: {},
9071     constants.DT_PLAIN: _compute(disks, 0),
9072     # 128 MB are added for drbd metadata for each disk
9073     constants.DT_DRBD8: _compute(disks, DRBD_META_SIZE),
9074     constants.DT_FILE: {},
9075     constants.DT_SHARED_FILE: {},
9076   }
9077
9078   if disk_template not in req_size_dict:
9079     raise errors.ProgrammerError("Disk template '%s' size requirement"
9080                                  " is unknown" % disk_template)
9081
9082   return req_size_dict[disk_template]
9083
9084
9085 def _ComputeDiskSize(disk_template, disks):
9086   """Compute disk size requirements in the volume group
9087
9088   """
9089   # Required free disk space as a function of disk and swap space
9090   req_size_dict = {
9091     constants.DT_DISKLESS: None,
9092     constants.DT_PLAIN: sum(d[constants.IDISK_SIZE] for d in disks),
9093     # 128 MB are added for drbd metadata for each disk
9094     constants.DT_DRBD8:
9095       sum(d[constants.IDISK_SIZE] + DRBD_META_SIZE for d in disks),
9096     constants.DT_FILE: None,
9097     constants.DT_SHARED_FILE: 0,
9098     constants.DT_BLOCK: 0,
9099     constants.DT_RBD: 0,
9100   }
9101
9102   if disk_template not in req_size_dict:
9103     raise errors.ProgrammerError("Disk template '%s' size requirement"
9104                                  " is unknown" % disk_template)
9105
9106   return req_size_dict[disk_template]
9107
9108
9109 def _FilterVmNodes(lu, nodenames):
9110   """Filters out non-vm_capable nodes from a list.
9111
9112   @type lu: L{LogicalUnit}
9113   @param lu: the logical unit for which we check
9114   @type nodenames: list
9115   @param nodenames: the list of nodes on which we should check
9116   @rtype: list
9117   @return: the list of vm-capable nodes
9118
9119   """
9120   vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
9121   return [name for name in nodenames if name not in vm_nodes]
9122
9123
9124 def _CheckHVParams(lu, nodenames, hvname, hvparams):
9125   """Hypervisor parameter validation.
9126
9127   This function abstract the hypervisor parameter validation to be
9128   used in both instance create and instance modify.
9129
9130   @type lu: L{LogicalUnit}
9131   @param lu: the logical unit for which we check
9132   @type nodenames: list
9133   @param nodenames: the list of nodes on which we should check
9134   @type hvname: string
9135   @param hvname: the name of the hypervisor we should use
9136   @type hvparams: dict
9137   @param hvparams: the parameters which we need to check
9138   @raise errors.OpPrereqError: if the parameters are not valid
9139
9140   """
9141   nodenames = _FilterVmNodes(lu, nodenames)
9142
9143   cluster = lu.cfg.GetClusterInfo()
9144   hvfull = objects.FillDict(cluster.hvparams.get(hvname, {}), hvparams)
9145
9146   hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames, hvname, hvfull)
9147   for node in nodenames:
9148     info = hvinfo[node]
9149     if info.offline:
9150       continue
9151     info.Raise("Hypervisor parameter validation failed on node %s" % node)
9152
9153
9154 def _CheckOSParams(lu, required, nodenames, osname, osparams):
9155   """OS parameters validation.
9156
9157   @type lu: L{LogicalUnit}
9158   @param lu: the logical unit for which we check
9159   @type required: boolean
9160   @param required: whether the validation should fail if the OS is not
9161       found
9162   @type nodenames: list
9163   @param nodenames: the list of nodes on which we should check
9164   @type osname: string
9165   @param osname: the name of the hypervisor we should use
9166   @type osparams: dict
9167   @param osparams: the parameters which we need to check
9168   @raise errors.OpPrereqError: if the parameters are not valid
9169
9170   """
9171   nodenames = _FilterVmNodes(lu, nodenames)
9172   result = lu.rpc.call_os_validate(nodenames, required, osname,
9173                                    [constants.OS_VALIDATE_PARAMETERS],
9174                                    osparams)
9175   for node, nres in result.items():
9176     # we don't check for offline cases since this should be run only
9177     # against the master node and/or an instance's nodes
9178     nres.Raise("OS Parameters validation failed on node %s" % node)
9179     if not nres.payload:
9180       lu.LogInfo("OS %s not found on node %s, validation skipped",
9181                  osname, node)
9182
9183
9184 class LUInstanceCreate(LogicalUnit):
9185   """Create an instance.
9186
9187   """
9188   HPATH = "instance-add"
9189   HTYPE = constants.HTYPE_INSTANCE
9190   REQ_BGL = False
9191
9192   def CheckArguments(self):
9193     """Check arguments.
9194
9195     """
9196     # do not require name_check to ease forward/backward compatibility
9197     # for tools
9198     if self.op.no_install and self.op.start:
9199       self.LogInfo("No-installation mode selected, disabling startup")
9200       self.op.start = False
9201     # validate/normalize the instance name
9202     self.op.instance_name = \
9203       netutils.Hostname.GetNormalizedName(self.op.instance_name)
9204
9205     if self.op.ip_check and not self.op.name_check:
9206       # TODO: make the ip check more flexible and not depend on the name check
9207       raise errors.OpPrereqError("Cannot do IP address check without a name"
9208                                  " check", errors.ECODE_INVAL)
9209
9210     # check nics' parameter names
9211     for nic in self.op.nics:
9212       utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
9213
9214     # check disks. parameter names and consistent adopt/no-adopt strategy
9215     has_adopt = has_no_adopt = False
9216     for disk in self.op.disks:
9217       utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
9218       if constants.IDISK_ADOPT in disk:
9219         has_adopt = True
9220       else:
9221         has_no_adopt = True
9222     if has_adopt and has_no_adopt:
9223       raise errors.OpPrereqError("Either all disks are adopted or none is",
9224                                  errors.ECODE_INVAL)
9225     if has_adopt:
9226       if self.op.disk_template not in constants.DTS_MAY_ADOPT:
9227         raise errors.OpPrereqError("Disk adoption is not supported for the"
9228                                    " '%s' disk template" %
9229                                    self.op.disk_template,
9230                                    errors.ECODE_INVAL)
9231       if self.op.iallocator is not None:
9232         raise errors.OpPrereqError("Disk adoption not allowed with an"
9233                                    " iallocator script", errors.ECODE_INVAL)
9234       if self.op.mode == constants.INSTANCE_IMPORT:
9235         raise errors.OpPrereqError("Disk adoption not allowed for"
9236                                    " instance import", errors.ECODE_INVAL)
9237     else:
9238       if self.op.disk_template in constants.DTS_MUST_ADOPT:
9239         raise errors.OpPrereqError("Disk template %s requires disk adoption,"
9240                                    " but no 'adopt' parameter given" %
9241                                    self.op.disk_template,
9242                                    errors.ECODE_INVAL)
9243
9244     self.adopt_disks = has_adopt
9245
9246     # instance name verification
9247     if self.op.name_check:
9248       self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
9249       self.op.instance_name = self.hostname1.name
9250       # used in CheckPrereq for ip ping check
9251       self.check_ip = self.hostname1.ip
9252     else:
9253       self.check_ip = None
9254
9255     # file storage checks
9256     if (self.op.file_driver and
9257         not self.op.file_driver in constants.FILE_DRIVER):
9258       raise errors.OpPrereqError("Invalid file driver name '%s'" %
9259                                  self.op.file_driver, errors.ECODE_INVAL)
9260
9261     if self.op.disk_template == constants.DT_FILE:
9262       opcodes.RequireFileStorage()
9263     elif self.op.disk_template == constants.DT_SHARED_FILE:
9264       opcodes.RequireSharedFileStorage()
9265
9266     ### Node/iallocator related checks
9267     _CheckIAllocatorOrNode(self, "iallocator", "pnode")
9268
9269     if self.op.pnode is not None:
9270       if self.op.disk_template in constants.DTS_INT_MIRROR:
9271         if self.op.snode is None:
9272           raise errors.OpPrereqError("The networked disk templates need"
9273                                      " a mirror node", errors.ECODE_INVAL)
9274       elif self.op.snode:
9275         self.LogWarning("Secondary node will be ignored on non-mirrored disk"
9276                         " template")
9277         self.op.snode = None
9278
9279     self._cds = _GetClusterDomainSecret()
9280
9281     if self.op.mode == constants.INSTANCE_IMPORT:
9282       # On import force_variant must be True, because if we forced it at
9283       # initial install, our only chance when importing it back is that it
9284       # works again!
9285       self.op.force_variant = True
9286
9287       if self.op.no_install:
9288         self.LogInfo("No-installation mode has no effect during import")
9289
9290     elif self.op.mode == constants.INSTANCE_CREATE:
9291       if self.op.os_type is None:
9292         raise errors.OpPrereqError("No guest OS specified",
9293                                    errors.ECODE_INVAL)
9294       if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
9295         raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
9296                                    " installation" % self.op.os_type,
9297                                    errors.ECODE_STATE)
9298       if self.op.disk_template is None:
9299         raise errors.OpPrereqError("No disk template specified",
9300                                    errors.ECODE_INVAL)
9301
9302     elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
9303       # Check handshake to ensure both clusters have the same domain secret
9304       src_handshake = self.op.source_handshake
9305       if not src_handshake:
9306         raise errors.OpPrereqError("Missing source handshake",
9307                                    errors.ECODE_INVAL)
9308
9309       errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
9310                                                            src_handshake)
9311       if errmsg:
9312         raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
9313                                    errors.ECODE_INVAL)
9314
9315       # Load and check source CA
9316       self.source_x509_ca_pem = self.op.source_x509_ca
9317       if not self.source_x509_ca_pem:
9318         raise errors.OpPrereqError("Missing source X509 CA",
9319                                    errors.ECODE_INVAL)
9320
9321       try:
9322         (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
9323                                                     self._cds)
9324       except OpenSSL.crypto.Error, err:
9325         raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
9326                                    (err, ), errors.ECODE_INVAL)
9327
9328       (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
9329       if errcode is not None:
9330         raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
9331                                    errors.ECODE_INVAL)
9332
9333       self.source_x509_ca = cert
9334
9335       src_instance_name = self.op.source_instance_name
9336       if not src_instance_name:
9337         raise errors.OpPrereqError("Missing source instance name",
9338                                    errors.ECODE_INVAL)
9339
9340       self.source_instance_name = \
9341           netutils.GetHostname(name=src_instance_name).name
9342
9343     else:
9344       raise errors.OpPrereqError("Invalid instance creation mode %r" %
9345                                  self.op.mode, errors.ECODE_INVAL)
9346
9347   def ExpandNames(self):
9348     """ExpandNames for CreateInstance.
9349
9350     Figure out the right locks for instance creation.
9351
9352     """
9353     self.needed_locks = {}
9354
9355     instance_name = self.op.instance_name
9356     # this is just a preventive check, but someone might still add this
9357     # instance in the meantime, and creation will fail at lock-add time
9358     if instance_name in self.cfg.GetInstanceList():
9359       raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
9360                                  instance_name, errors.ECODE_EXISTS)
9361
9362     self.add_locks[locking.LEVEL_INSTANCE] = instance_name
9363
9364     if self.op.iallocator:
9365       # TODO: Find a solution to not lock all nodes in the cluster, e.g. by
9366       # specifying a group on instance creation and then selecting nodes from
9367       # that group
9368       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9369       self.needed_locks[locking.LEVEL_NODE_RES] = locking.ALL_SET
9370     else:
9371       self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
9372       nodelist = [self.op.pnode]
9373       if self.op.snode is not None:
9374         self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
9375         nodelist.append(self.op.snode)
9376       self.needed_locks[locking.LEVEL_NODE] = nodelist
9377       # Lock resources of instance's primary and secondary nodes (copy to
9378       # prevent accidential modification)
9379       self.needed_locks[locking.LEVEL_NODE_RES] = list(nodelist)
9380
9381     # in case of import lock the source node too
9382     if self.op.mode == constants.INSTANCE_IMPORT:
9383       src_node = self.op.src_node
9384       src_path = self.op.src_path
9385
9386       if src_path is None:
9387         self.op.src_path = src_path = self.op.instance_name
9388
9389       if src_node is None:
9390         self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9391         self.op.src_node = None
9392         if os.path.isabs(src_path):
9393           raise errors.OpPrereqError("Importing an instance from a path"
9394                                      " requires a source node option",
9395                                      errors.ECODE_INVAL)
9396       else:
9397         self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
9398         if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
9399           self.needed_locks[locking.LEVEL_NODE].append(src_node)
9400         if not os.path.isabs(src_path):
9401           self.op.src_path = src_path = \
9402             utils.PathJoin(constants.EXPORT_DIR, src_path)
9403
9404   def _RunAllocator(self):
9405     """Run the allocator based on input opcode.
9406
9407     """
9408     nics = [n.ToDict() for n in self.nics]
9409     ial = IAllocator(self.cfg, self.rpc,
9410                      mode=constants.IALLOCATOR_MODE_ALLOC,
9411                      name=self.op.instance_name,
9412                      disk_template=self.op.disk_template,
9413                      tags=self.op.tags,
9414                      os=self.op.os_type,
9415                      vcpus=self.be_full[constants.BE_VCPUS],
9416                      memory=self.be_full[constants.BE_MAXMEM],
9417                      spindle_use=self.be_full[constants.BE_SPINDLE_USE],
9418                      disks=self.disks,
9419                      nics=nics,
9420                      hypervisor=self.op.hypervisor,
9421                      )
9422
9423     ial.Run(self.op.iallocator)
9424
9425     if not ial.success:
9426       raise errors.OpPrereqError("Can't compute nodes using"
9427                                  " iallocator '%s': %s" %
9428                                  (self.op.iallocator, ial.info),
9429                                  errors.ECODE_NORES)
9430     if len(ial.result) != ial.required_nodes:
9431       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
9432                                  " of nodes (%s), required %s" %
9433                                  (self.op.iallocator, len(ial.result),
9434                                   ial.required_nodes), errors.ECODE_FAULT)
9435     self.op.pnode = ial.result[0]
9436     self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
9437                  self.op.instance_name, self.op.iallocator,
9438                  utils.CommaJoin(ial.result))
9439     if ial.required_nodes == 2:
9440       self.op.snode = ial.result[1]
9441
9442   def BuildHooksEnv(self):
9443     """Build hooks env.
9444
9445     This runs on master, primary and secondary nodes of the instance.
9446
9447     """
9448     env = {
9449       "ADD_MODE": self.op.mode,
9450       }
9451     if self.op.mode == constants.INSTANCE_IMPORT:
9452       env["SRC_NODE"] = self.op.src_node
9453       env["SRC_PATH"] = self.op.src_path
9454       env["SRC_IMAGES"] = self.src_images
9455
9456     env.update(_BuildInstanceHookEnv(
9457       name=self.op.instance_name,
9458       primary_node=self.op.pnode,
9459       secondary_nodes=self.secondaries,
9460       status=self.op.start,
9461       os_type=self.op.os_type,
9462       minmem=self.be_full[constants.BE_MINMEM],
9463       maxmem=self.be_full[constants.BE_MAXMEM],
9464       vcpus=self.be_full[constants.BE_VCPUS],
9465       nics=_NICListToTuple(self, self.nics),
9466       disk_template=self.op.disk_template,
9467       disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE])
9468              for d in self.disks],
9469       bep=self.be_full,
9470       hvp=self.hv_full,
9471       hypervisor_name=self.op.hypervisor,
9472       tags=self.op.tags,
9473     ))
9474
9475     return env
9476
9477   def BuildHooksNodes(self):
9478     """Build hooks nodes.
9479
9480     """
9481     nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
9482     return nl, nl
9483
9484   def _ReadExportInfo(self):
9485     """Reads the export information from disk.
9486
9487     It will override the opcode source node and path with the actual
9488     information, if these two were not specified before.
9489
9490     @return: the export information
9491
9492     """
9493     assert self.op.mode == constants.INSTANCE_IMPORT
9494
9495     src_node = self.op.src_node
9496     src_path = self.op.src_path
9497
9498     if src_node is None:
9499       locked_nodes = self.owned_locks(locking.LEVEL_NODE)
9500       exp_list = self.rpc.call_export_list(locked_nodes)
9501       found = False
9502       for node in exp_list:
9503         if exp_list[node].fail_msg:
9504           continue
9505         if src_path in exp_list[node].payload:
9506           found = True
9507           self.op.src_node = src_node = node
9508           self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
9509                                                        src_path)
9510           break
9511       if not found:
9512         raise errors.OpPrereqError("No export found for relative path %s" %
9513                                     src_path, errors.ECODE_INVAL)
9514
9515     _CheckNodeOnline(self, src_node)
9516     result = self.rpc.call_export_info(src_node, src_path)
9517     result.Raise("No export or invalid export found in dir %s" % src_path)
9518
9519     export_info = objects.SerializableConfigParser.Loads(str(result.payload))
9520     if not export_info.has_section(constants.INISECT_EXP):
9521       raise errors.ProgrammerError("Corrupted export config",
9522                                    errors.ECODE_ENVIRON)
9523
9524     ei_version = export_info.get(constants.INISECT_EXP, "version")
9525     if (int(ei_version) != constants.EXPORT_VERSION):
9526       raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
9527                                  (ei_version, constants.EXPORT_VERSION),
9528                                  errors.ECODE_ENVIRON)
9529     return export_info
9530
9531   def _ReadExportParams(self, einfo):
9532     """Use export parameters as defaults.
9533
9534     In case the opcode doesn't specify (as in override) some instance
9535     parameters, then try to use them from the export information, if
9536     that declares them.
9537
9538     """
9539     self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
9540
9541     if self.op.disk_template is None:
9542       if einfo.has_option(constants.INISECT_INS, "disk_template"):
9543         self.op.disk_template = einfo.get(constants.INISECT_INS,
9544                                           "disk_template")
9545         if self.op.disk_template not in constants.DISK_TEMPLATES:
9546           raise errors.OpPrereqError("Disk template specified in configuration"
9547                                      " file is not one of the allowed values:"
9548                                      " %s" % " ".join(constants.DISK_TEMPLATES))
9549       else:
9550         raise errors.OpPrereqError("No disk template specified and the export"
9551                                    " is missing the disk_template information",
9552                                    errors.ECODE_INVAL)
9553
9554     if not self.op.disks:
9555       disks = []
9556       # TODO: import the disk iv_name too
9557       for idx in range(constants.MAX_DISKS):
9558         if einfo.has_option(constants.INISECT_INS, "disk%d_size" % idx):
9559           disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
9560           disks.append({constants.IDISK_SIZE: disk_sz})
9561       self.op.disks = disks
9562       if not disks and self.op.disk_template != constants.DT_DISKLESS:
9563         raise errors.OpPrereqError("No disk info specified and the export"
9564                                    " is missing the disk information",
9565                                    errors.ECODE_INVAL)
9566
9567     if not self.op.nics:
9568       nics = []
9569       for idx in range(constants.MAX_NICS):
9570         if einfo.has_option(constants.INISECT_INS, "nic%d_mac" % idx):
9571           ndict = {}
9572           for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
9573             v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
9574             ndict[name] = v
9575           nics.append(ndict)
9576         else:
9577           break
9578       self.op.nics = nics
9579
9580     if not self.op.tags and einfo.has_option(constants.INISECT_INS, "tags"):
9581       self.op.tags = einfo.get(constants.INISECT_INS, "tags").split()
9582
9583     if (self.op.hypervisor is None and
9584         einfo.has_option(constants.INISECT_INS, "hypervisor")):
9585       self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
9586
9587     if einfo.has_section(constants.INISECT_HYP):
9588       # use the export parameters but do not override the ones
9589       # specified by the user
9590       for name, value in einfo.items(constants.INISECT_HYP):
9591         if name not in self.op.hvparams:
9592           self.op.hvparams[name] = value
9593
9594     if einfo.has_section(constants.INISECT_BEP):
9595       # use the parameters, without overriding
9596       for name, value in einfo.items(constants.INISECT_BEP):
9597         if name not in self.op.beparams:
9598           self.op.beparams[name] = value
9599         # Compatibility for the old "memory" be param
9600         if name == constants.BE_MEMORY:
9601           if constants.BE_MAXMEM not in self.op.beparams:
9602             self.op.beparams[constants.BE_MAXMEM] = value
9603           if constants.BE_MINMEM not in self.op.beparams:
9604             self.op.beparams[constants.BE_MINMEM] = value
9605     else:
9606       # try to read the parameters old style, from the main section
9607       for name in constants.BES_PARAMETERS:
9608         if (name not in self.op.beparams and
9609             einfo.has_option(constants.INISECT_INS, name)):
9610           self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
9611
9612     if einfo.has_section(constants.INISECT_OSP):
9613       # use the parameters, without overriding
9614       for name, value in einfo.items(constants.INISECT_OSP):
9615         if name not in self.op.osparams:
9616           self.op.osparams[name] = value
9617
9618   def _RevertToDefaults(self, cluster):
9619     """Revert the instance parameters to the default values.
9620
9621     """
9622     # hvparams
9623     hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
9624     for name in self.op.hvparams.keys():
9625       if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
9626         del self.op.hvparams[name]
9627     # beparams
9628     be_defs = cluster.SimpleFillBE({})
9629     for name in self.op.beparams.keys():
9630       if name in be_defs and be_defs[name] == self.op.beparams[name]:
9631         del self.op.beparams[name]
9632     # nic params
9633     nic_defs = cluster.SimpleFillNIC({})
9634     for nic in self.op.nics:
9635       for name in constants.NICS_PARAMETERS:
9636         if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
9637           del nic[name]
9638     # osparams
9639     os_defs = cluster.SimpleFillOS(self.op.os_type, {})
9640     for name in self.op.osparams.keys():
9641       if name in os_defs and os_defs[name] == self.op.osparams[name]:
9642         del self.op.osparams[name]
9643
9644   def _CalculateFileStorageDir(self):
9645     """Calculate final instance file storage dir.
9646
9647     """
9648     # file storage dir calculation/check
9649     self.instance_file_storage_dir = None
9650     if self.op.disk_template in constants.DTS_FILEBASED:
9651       # build the full file storage dir path
9652       joinargs = []
9653
9654       if self.op.disk_template == constants.DT_SHARED_FILE:
9655         get_fsd_fn = self.cfg.GetSharedFileStorageDir
9656       else:
9657         get_fsd_fn = self.cfg.GetFileStorageDir
9658
9659       cfg_storagedir = get_fsd_fn()
9660       if not cfg_storagedir:
9661         raise errors.OpPrereqError("Cluster file storage dir not defined")
9662       joinargs.append(cfg_storagedir)
9663
9664       if self.op.file_storage_dir is not None:
9665         joinargs.append(self.op.file_storage_dir)
9666
9667       joinargs.append(self.op.instance_name)
9668
9669       # pylint: disable=W0142
9670       self.instance_file_storage_dir = utils.PathJoin(*joinargs)
9671
9672   def CheckPrereq(self): # pylint: disable=R0914
9673     """Check prerequisites.
9674
9675     """
9676     self._CalculateFileStorageDir()
9677
9678     if self.op.mode == constants.INSTANCE_IMPORT:
9679       export_info = self._ReadExportInfo()
9680       self._ReadExportParams(export_info)
9681
9682     if (not self.cfg.GetVGName() and
9683         self.op.disk_template not in constants.DTS_NOT_LVM):
9684       raise errors.OpPrereqError("Cluster does not support lvm-based"
9685                                  " instances", errors.ECODE_STATE)
9686
9687     if (self.op.hypervisor is None or
9688         self.op.hypervisor == constants.VALUE_AUTO):
9689       self.op.hypervisor = self.cfg.GetHypervisorType()
9690
9691     cluster = self.cfg.GetClusterInfo()
9692     enabled_hvs = cluster.enabled_hypervisors
9693     if self.op.hypervisor not in enabled_hvs:
9694       raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
9695                                  " cluster (%s)" % (self.op.hypervisor,
9696                                   ",".join(enabled_hvs)),
9697                                  errors.ECODE_STATE)
9698
9699     # Check tag validity
9700     for tag in self.op.tags:
9701       objects.TaggableObject.ValidateTag(tag)
9702
9703     # check hypervisor parameter syntax (locally)
9704     utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
9705     filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
9706                                       self.op.hvparams)
9707     hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
9708     hv_type.CheckParameterSyntax(filled_hvp)
9709     self.hv_full = filled_hvp
9710     # check that we don't specify global parameters on an instance
9711     _CheckGlobalHvParams(self.op.hvparams)
9712
9713     # fill and remember the beparams dict
9714     default_beparams = cluster.beparams[constants.PP_DEFAULT]
9715     for param, value in self.op.beparams.iteritems():
9716       if value == constants.VALUE_AUTO:
9717         self.op.beparams[param] = default_beparams[param]
9718     objects.UpgradeBeParams(self.op.beparams)
9719     utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
9720     self.be_full = cluster.SimpleFillBE(self.op.beparams)
9721
9722     # build os parameters
9723     self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
9724
9725     # now that hvp/bep are in final format, let's reset to defaults,
9726     # if told to do so
9727     if self.op.identify_defaults:
9728       self._RevertToDefaults(cluster)
9729
9730     # NIC buildup
9731     self.nics = []
9732     for idx, nic in enumerate(self.op.nics):
9733       nic_mode_req = nic.get(constants.INIC_MODE, None)
9734       nic_mode = nic_mode_req
9735       if nic_mode is None or nic_mode == constants.VALUE_AUTO:
9736         nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
9737
9738       # in routed mode, for the first nic, the default ip is 'auto'
9739       if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
9740         default_ip_mode = constants.VALUE_AUTO
9741       else:
9742         default_ip_mode = constants.VALUE_NONE
9743
9744       # ip validity checks
9745       ip = nic.get(constants.INIC_IP, default_ip_mode)
9746       if ip is None or ip.lower() == constants.VALUE_NONE:
9747         nic_ip = None
9748       elif ip.lower() == constants.VALUE_AUTO:
9749         if not self.op.name_check:
9750           raise errors.OpPrereqError("IP address set to auto but name checks"
9751                                      " have been skipped",
9752                                      errors.ECODE_INVAL)
9753         nic_ip = self.hostname1.ip
9754       else:
9755         if not netutils.IPAddress.IsValid(ip):
9756           raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
9757                                      errors.ECODE_INVAL)
9758         nic_ip = ip
9759
9760       # TODO: check the ip address for uniqueness
9761       if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
9762         raise errors.OpPrereqError("Routed nic mode requires an ip address",
9763                                    errors.ECODE_INVAL)
9764
9765       # MAC address verification
9766       mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
9767       if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9768         mac = utils.NormalizeAndValidateMac(mac)
9769
9770         try:
9771           self.cfg.ReserveMAC(mac, self.proc.GetECId())
9772         except errors.ReservationError:
9773           raise errors.OpPrereqError("MAC address %s already in use"
9774                                      " in cluster" % mac,
9775                                      errors.ECODE_NOTUNIQUE)
9776
9777       #  Build nic parameters
9778       link = nic.get(constants.INIC_LINK, None)
9779       if link == constants.VALUE_AUTO:
9780         link = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_LINK]
9781       nicparams = {}
9782       if nic_mode_req:
9783         nicparams[constants.NIC_MODE] = nic_mode
9784       if link:
9785         nicparams[constants.NIC_LINK] = link
9786
9787       check_params = cluster.SimpleFillNIC(nicparams)
9788       objects.NIC.CheckParameterSyntax(check_params)
9789       self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
9790
9791     # disk checks/pre-build
9792     default_vg = self.cfg.GetVGName()
9793     self.disks = []
9794     for disk in self.op.disks:
9795       mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
9796       if mode not in constants.DISK_ACCESS_SET:
9797         raise errors.OpPrereqError("Invalid disk access mode '%s'" %
9798                                    mode, errors.ECODE_INVAL)
9799       size = disk.get(constants.IDISK_SIZE, None)
9800       if size is None:
9801         raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
9802       try:
9803         size = int(size)
9804       except (TypeError, ValueError):
9805         raise errors.OpPrereqError("Invalid disk size '%s'" % size,
9806                                    errors.ECODE_INVAL)
9807
9808       data_vg = disk.get(constants.IDISK_VG, default_vg)
9809       new_disk = {
9810         constants.IDISK_SIZE: size,
9811         constants.IDISK_MODE: mode,
9812         constants.IDISK_VG: data_vg,
9813         }
9814       if constants.IDISK_METAVG in disk:
9815         new_disk[constants.IDISK_METAVG] = disk[constants.IDISK_METAVG]
9816       if constants.IDISK_ADOPT in disk:
9817         new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
9818       self.disks.append(new_disk)
9819
9820     if self.op.mode == constants.INSTANCE_IMPORT:
9821       disk_images = []
9822       for idx in range(len(self.disks)):
9823         option = "disk%d_dump" % idx
9824         if export_info.has_option(constants.INISECT_INS, option):
9825           # FIXME: are the old os-es, disk sizes, etc. useful?
9826           export_name = export_info.get(constants.INISECT_INS, option)
9827           image = utils.PathJoin(self.op.src_path, export_name)
9828           disk_images.append(image)
9829         else:
9830           disk_images.append(False)
9831
9832       self.src_images = disk_images
9833
9834       old_name = export_info.get(constants.INISECT_INS, "name")
9835       if self.op.instance_name == old_name:
9836         for idx, nic in enumerate(self.nics):
9837           if nic.mac == constants.VALUE_AUTO:
9838             nic_mac_ini = "nic%d_mac" % idx
9839             nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
9840
9841     # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
9842
9843     # ip ping checks (we use the same ip that was resolved in ExpandNames)
9844     if self.op.ip_check:
9845       if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
9846         raise errors.OpPrereqError("IP %s of instance %s already in use" %
9847                                    (self.check_ip, self.op.instance_name),
9848                                    errors.ECODE_NOTUNIQUE)
9849
9850     #### mac address generation
9851     # By generating here the mac address both the allocator and the hooks get
9852     # the real final mac address rather than the 'auto' or 'generate' value.
9853     # There is a race condition between the generation and the instance object
9854     # creation, which means that we know the mac is valid now, but we're not
9855     # sure it will be when we actually add the instance. If things go bad
9856     # adding the instance will abort because of a duplicate mac, and the
9857     # creation job will fail.
9858     for nic in self.nics:
9859       if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9860         nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
9861
9862     #### allocator run
9863
9864     if self.op.iallocator is not None:
9865       self._RunAllocator()
9866
9867     # Release all unneeded node locks
9868     _ReleaseLocks(self, locking.LEVEL_NODE,
9869                   keep=filter(None, [self.op.pnode, self.op.snode,
9870                                      self.op.src_node]))
9871     _ReleaseLocks(self, locking.LEVEL_NODE_RES,
9872                   keep=filter(None, [self.op.pnode, self.op.snode,
9873                                      self.op.src_node]))
9874
9875     #### node related checks
9876
9877     # check primary node
9878     self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
9879     assert self.pnode is not None, \
9880       "Cannot retrieve locked node %s" % self.op.pnode
9881     if pnode.offline:
9882       raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
9883                                  pnode.name, errors.ECODE_STATE)
9884     if pnode.drained:
9885       raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
9886                                  pnode.name, errors.ECODE_STATE)
9887     if not pnode.vm_capable:
9888       raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
9889                                  " '%s'" % pnode.name, errors.ECODE_STATE)
9890
9891     self.secondaries = []
9892
9893     # mirror node verification
9894     if self.op.disk_template in constants.DTS_INT_MIRROR:
9895       if self.op.snode == pnode.name:
9896         raise errors.OpPrereqError("The secondary node cannot be the"
9897                                    " primary node", errors.ECODE_INVAL)
9898       _CheckNodeOnline(self, self.op.snode)
9899       _CheckNodeNotDrained(self, self.op.snode)
9900       _CheckNodeVmCapable(self, self.op.snode)
9901       self.secondaries.append(self.op.snode)
9902
9903       snode = self.cfg.GetNodeInfo(self.op.snode)
9904       if pnode.group != snode.group:
9905         self.LogWarning("The primary and secondary nodes are in two"
9906                         " different node groups; the disk parameters"
9907                         " from the first disk's node group will be"
9908                         " used")
9909
9910     nodenames = [pnode.name] + self.secondaries
9911
9912     # Verify instance specs
9913     spindle_use = self.be_full.get(constants.BE_SPINDLE_USE, None)
9914     ispec = {
9915       constants.ISPEC_MEM_SIZE: self.be_full.get(constants.BE_MAXMEM, None),
9916       constants.ISPEC_CPU_COUNT: self.be_full.get(constants.BE_VCPUS, None),
9917       constants.ISPEC_DISK_COUNT: len(self.disks),
9918       constants.ISPEC_DISK_SIZE: [disk["size"] for disk in self.disks],
9919       constants.ISPEC_NIC_COUNT: len(self.nics),
9920       constants.ISPEC_SPINDLE_USE: spindle_use,
9921       }
9922
9923     group_info = self.cfg.GetNodeGroup(pnode.group)
9924     ipolicy = _CalculateGroupIPolicy(cluster, group_info)
9925     res = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec)
9926     if not self.op.ignore_ipolicy and res:
9927       raise errors.OpPrereqError(("Instance allocation to group %s violates"
9928                                   " policy: %s") % (pnode.group,
9929                                                     utils.CommaJoin(res)),
9930                                   errors.ECODE_INVAL)
9931
9932     # disk parameters (not customizable at instance or node level)
9933     # just use the primary node parameters, ignoring the secondary.
9934     self.diskparams = group_info.diskparams
9935
9936     if not self.adopt_disks:
9937       if self.op.disk_template == constants.DT_RBD:
9938         # _CheckRADOSFreeSpace() is just a placeholder.
9939         # Any function that checks prerequisites can be placed here.
9940         # Check if there is enough space on the RADOS cluster.
9941         _CheckRADOSFreeSpace()
9942       else:
9943         # Check lv size requirements, if not adopting
9944         req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
9945         _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
9946
9947     elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
9948       all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
9949                                 disk[constants.IDISK_ADOPT])
9950                      for disk in self.disks])
9951       if len(all_lvs) != len(self.disks):
9952         raise errors.OpPrereqError("Duplicate volume names given for adoption",
9953                                    errors.ECODE_INVAL)
9954       for lv_name in all_lvs:
9955         try:
9956           # FIXME: lv_name here is "vg/lv" need to ensure that other calls
9957           # to ReserveLV uses the same syntax
9958           self.cfg.ReserveLV(lv_name, self.proc.GetECId())
9959         except errors.ReservationError:
9960           raise errors.OpPrereqError("LV named %s used by another instance" %
9961                                      lv_name, errors.ECODE_NOTUNIQUE)
9962
9963       vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
9964       vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
9965
9966       node_lvs = self.rpc.call_lv_list([pnode.name],
9967                                        vg_names.payload.keys())[pnode.name]
9968       node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
9969       node_lvs = node_lvs.payload
9970
9971       delta = all_lvs.difference(node_lvs.keys())
9972       if delta:
9973         raise errors.OpPrereqError("Missing logical volume(s): %s" %
9974                                    utils.CommaJoin(delta),
9975                                    errors.ECODE_INVAL)
9976       online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
9977       if online_lvs:
9978         raise errors.OpPrereqError("Online logical volumes found, cannot"
9979                                    " adopt: %s" % utils.CommaJoin(online_lvs),
9980                                    errors.ECODE_STATE)
9981       # update the size of disk based on what is found
9982       for dsk in self.disks:
9983         dsk[constants.IDISK_SIZE] = \
9984           int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
9985                                         dsk[constants.IDISK_ADOPT])][0]))
9986
9987     elif self.op.disk_template == constants.DT_BLOCK:
9988       # Normalize and de-duplicate device paths
9989       all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
9990                        for disk in self.disks])
9991       if len(all_disks) != len(self.disks):
9992         raise errors.OpPrereqError("Duplicate disk names given for adoption",
9993                                    errors.ECODE_INVAL)
9994       baddisks = [d for d in all_disks
9995                   if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
9996       if baddisks:
9997         raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
9998                                    " cannot be adopted" %
9999                                    (", ".join(baddisks),
10000                                     constants.ADOPTABLE_BLOCKDEV_ROOT),
10001                                    errors.ECODE_INVAL)
10002
10003       node_disks = self.rpc.call_bdev_sizes([pnode.name],
10004                                             list(all_disks))[pnode.name]
10005       node_disks.Raise("Cannot get block device information from node %s" %
10006                        pnode.name)
10007       node_disks = node_disks.payload
10008       delta = all_disks.difference(node_disks.keys())
10009       if delta:
10010         raise errors.OpPrereqError("Missing block device(s): %s" %
10011                                    utils.CommaJoin(delta),
10012                                    errors.ECODE_INVAL)
10013       for dsk in self.disks:
10014         dsk[constants.IDISK_SIZE] = \
10015           int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
10016
10017     _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
10018
10019     _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
10020     # check OS parameters (remotely)
10021     _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
10022
10023     _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
10024
10025     # memory check on primary node
10026     #TODO(dynmem): use MINMEM for checking
10027     if self.op.start:
10028       _CheckNodeFreeMemory(self, self.pnode.name,
10029                            "creating instance %s" % self.op.instance_name,
10030                            self.be_full[constants.BE_MAXMEM],
10031                            self.op.hypervisor)
10032
10033     self.dry_run_result = list(nodenames)
10034
10035   def Exec(self, feedback_fn):
10036     """Create and add the instance to the cluster.
10037
10038     """
10039     instance = self.op.instance_name
10040     pnode_name = self.pnode.name
10041
10042     assert not (self.owned_locks(locking.LEVEL_NODE_RES) -
10043                 self.owned_locks(locking.LEVEL_NODE)), \
10044       "Node locks differ from node resource locks"
10045
10046     ht_kind = self.op.hypervisor
10047     if ht_kind in constants.HTS_REQ_PORT:
10048       network_port = self.cfg.AllocatePort()
10049     else:
10050       network_port = None
10051
10052     disks = _GenerateDiskTemplate(self,
10053                                   self.op.disk_template,
10054                                   instance, pnode_name,
10055                                   self.secondaries,
10056                                   self.disks,
10057                                   self.instance_file_storage_dir,
10058                                   self.op.file_driver,
10059                                   0,
10060                                   feedback_fn,
10061                                   self.diskparams)
10062
10063     iobj = objects.Instance(name=instance, os=self.op.os_type,
10064                             primary_node=pnode_name,
10065                             nics=self.nics, disks=disks,
10066                             disk_template=self.op.disk_template,
10067                             admin_state=constants.ADMINST_DOWN,
10068                             network_port=network_port,
10069                             beparams=self.op.beparams,
10070                             hvparams=self.op.hvparams,
10071                             hypervisor=self.op.hypervisor,
10072                             osparams=self.op.osparams,
10073                             )
10074
10075     if self.op.tags:
10076       for tag in self.op.tags:
10077         iobj.AddTag(tag)
10078
10079     if self.adopt_disks:
10080       if self.op.disk_template == constants.DT_PLAIN:
10081         # rename LVs to the newly-generated names; we need to construct
10082         # 'fake' LV disks with the old data, plus the new unique_id
10083         tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
10084         rename_to = []
10085         for t_dsk, a_dsk in zip(tmp_disks, self.disks):
10086           rename_to.append(t_dsk.logical_id)
10087           t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
10088           self.cfg.SetDiskID(t_dsk, pnode_name)
10089         result = self.rpc.call_blockdev_rename(pnode_name,
10090                                                zip(tmp_disks, rename_to))
10091         result.Raise("Failed to rename adoped LVs")
10092     else:
10093       feedback_fn("* creating instance disks...")
10094       try:
10095         _CreateDisks(self, iobj)
10096       except errors.OpExecError:
10097         self.LogWarning("Device creation failed, reverting...")
10098         try:
10099           _RemoveDisks(self, iobj)
10100         finally:
10101           self.cfg.ReleaseDRBDMinors(instance)
10102           raise
10103
10104     feedback_fn("adding instance %s to cluster config" % instance)
10105
10106     self.cfg.AddInstance(iobj, self.proc.GetECId())
10107
10108     # Declare that we don't want to remove the instance lock anymore, as we've
10109     # added the instance to the config
10110     del self.remove_locks[locking.LEVEL_INSTANCE]
10111
10112     if self.op.mode == constants.INSTANCE_IMPORT:
10113       # Release unused nodes
10114       _ReleaseLocks(self, locking.LEVEL_NODE, keep=[self.op.src_node])
10115     else:
10116       # Release all nodes
10117       _ReleaseLocks(self, locking.LEVEL_NODE)
10118
10119     disk_abort = False
10120     if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
10121       feedback_fn("* wiping instance disks...")
10122       try:
10123         _WipeDisks(self, iobj)
10124       except errors.OpExecError, err:
10125         logging.exception("Wiping disks failed")
10126         self.LogWarning("Wiping instance disks failed (%s)", err)
10127         disk_abort = True
10128
10129     if disk_abort:
10130       # Something is already wrong with the disks, don't do anything else
10131       pass
10132     elif self.op.wait_for_sync:
10133       disk_abort = not _WaitForSync(self, iobj)
10134     elif iobj.disk_template in constants.DTS_INT_MIRROR:
10135       # make sure the disks are not degraded (still sync-ing is ok)
10136       feedback_fn("* checking mirrors status")
10137       disk_abort = not _WaitForSync(self, iobj, oneshot=True)
10138     else:
10139       disk_abort = False
10140
10141     if disk_abort:
10142       _RemoveDisks(self, iobj)
10143       self.cfg.RemoveInstance(iobj.name)
10144       # Make sure the instance lock gets removed
10145       self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
10146       raise errors.OpExecError("There are some degraded disks for"
10147                                " this instance")
10148
10149     # Release all node resource locks
10150     _ReleaseLocks(self, locking.LEVEL_NODE_RES)
10151
10152     if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
10153       if self.op.mode == constants.INSTANCE_CREATE:
10154         if not self.op.no_install:
10155           pause_sync = (iobj.disk_template in constants.DTS_INT_MIRROR and
10156                         not self.op.wait_for_sync)
10157           if pause_sync:
10158             feedback_fn("* pausing disk sync to install instance OS")
10159             result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
10160                                                               iobj.disks, True)
10161             for idx, success in enumerate(result.payload):
10162               if not success:
10163                 logging.warn("pause-sync of instance %s for disk %d failed",
10164                              instance, idx)
10165
10166           feedback_fn("* running the instance OS create scripts...")
10167           # FIXME: pass debug option from opcode to backend
10168           os_add_result = \
10169             self.rpc.call_instance_os_add(pnode_name, (iobj, None), False,
10170                                           self.op.debug_level)
10171           if pause_sync:
10172             feedback_fn("* resuming disk sync")
10173             result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
10174                                                               iobj.disks, False)
10175             for idx, success in enumerate(result.payload):
10176               if not success:
10177                 logging.warn("resume-sync of instance %s for disk %d failed",
10178                              instance, idx)
10179
10180           os_add_result.Raise("Could not add os for instance %s"
10181                               " on node %s" % (instance, pnode_name))
10182
10183       elif self.op.mode == constants.INSTANCE_IMPORT:
10184         feedback_fn("* running the instance OS import scripts...")
10185
10186         transfers = []
10187
10188         for idx, image in enumerate(self.src_images):
10189           if not image:
10190             continue
10191
10192           # FIXME: pass debug option from opcode to backend
10193           dt = masterd.instance.DiskTransfer("disk/%s" % idx,
10194                                              constants.IEIO_FILE, (image, ),
10195                                              constants.IEIO_SCRIPT,
10196                                              (iobj.disks[idx], idx),
10197                                              None)
10198           transfers.append(dt)
10199
10200         import_result = \
10201           masterd.instance.TransferInstanceData(self, feedback_fn,
10202                                                 self.op.src_node, pnode_name,
10203                                                 self.pnode.secondary_ip,
10204                                                 iobj, transfers)
10205         if not compat.all(import_result):
10206           self.LogWarning("Some disks for instance %s on node %s were not"
10207                           " imported successfully" % (instance, pnode_name))
10208
10209       elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
10210         feedback_fn("* preparing remote import...")
10211         # The source cluster will stop the instance before attempting to make a
10212         # connection. In some cases stopping an instance can take a long time,
10213         # hence the shutdown timeout is added to the connection timeout.
10214         connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
10215                            self.op.source_shutdown_timeout)
10216         timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
10217
10218         assert iobj.primary_node == self.pnode.name
10219         disk_results = \
10220           masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
10221                                         self.source_x509_ca,
10222                                         self._cds, timeouts)
10223         if not compat.all(disk_results):
10224           # TODO: Should the instance still be started, even if some disks
10225           # failed to import (valid for local imports, too)?
10226           self.LogWarning("Some disks for instance %s on node %s were not"
10227                           " imported successfully" % (instance, pnode_name))
10228
10229         # Run rename script on newly imported instance
10230         assert iobj.name == instance
10231         feedback_fn("Running rename script for %s" % instance)
10232         result = self.rpc.call_instance_run_rename(pnode_name, iobj,
10233                                                    self.source_instance_name,
10234                                                    self.op.debug_level)
10235         if result.fail_msg:
10236           self.LogWarning("Failed to run rename script for %s on node"
10237                           " %s: %s" % (instance, pnode_name, result.fail_msg))
10238
10239       else:
10240         # also checked in the prereq part
10241         raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
10242                                      % self.op.mode)
10243
10244     assert not self.owned_locks(locking.LEVEL_NODE_RES)
10245
10246     if self.op.start:
10247       iobj.admin_state = constants.ADMINST_UP
10248       self.cfg.Update(iobj, feedback_fn)
10249       logging.info("Starting instance %s on node %s", instance, pnode_name)
10250       feedback_fn("* starting instance...")
10251       result = self.rpc.call_instance_start(pnode_name, (iobj, None, None),
10252                                             False)
10253       result.Raise("Could not start instance")
10254
10255     return list(iobj.all_nodes)
10256
10257
10258 def _CheckRADOSFreeSpace():
10259   """Compute disk size requirements inside the RADOS cluster.
10260
10261   """
10262   # For the RADOS cluster we assume there is always enough space.
10263   pass
10264
10265
10266 class LUInstanceConsole(NoHooksLU):
10267   """Connect to an instance's console.
10268
10269   This is somewhat special in that it returns the command line that
10270   you need to run on the master node in order to connect to the
10271   console.
10272
10273   """
10274   REQ_BGL = False
10275
10276   def ExpandNames(self):
10277     self.share_locks = _ShareAll()
10278     self._ExpandAndLockInstance()
10279
10280   def CheckPrereq(self):
10281     """Check prerequisites.
10282
10283     This checks that the instance is in the cluster.
10284
10285     """
10286     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10287     assert self.instance is not None, \
10288       "Cannot retrieve locked instance %s" % self.op.instance_name
10289     _CheckNodeOnline(self, self.instance.primary_node)
10290
10291   def Exec(self, feedback_fn):
10292     """Connect to the console of an instance
10293
10294     """
10295     instance = self.instance
10296     node = instance.primary_node
10297
10298     node_insts = self.rpc.call_instance_list([node],
10299                                              [instance.hypervisor])[node]
10300     node_insts.Raise("Can't get node information from %s" % node)
10301
10302     if instance.name not in node_insts.payload:
10303       if instance.admin_state == constants.ADMINST_UP:
10304         state = constants.INSTST_ERRORDOWN
10305       elif instance.admin_state == constants.ADMINST_DOWN:
10306         state = constants.INSTST_ADMINDOWN
10307       else:
10308         state = constants.INSTST_ADMINOFFLINE
10309       raise errors.OpExecError("Instance %s is not running (state %s)" %
10310                                (instance.name, state))
10311
10312     logging.debug("Connecting to console of %s on %s", instance.name, node)
10313
10314     return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
10315
10316
10317 def _GetInstanceConsole(cluster, instance):
10318   """Returns console information for an instance.
10319
10320   @type cluster: L{objects.Cluster}
10321   @type instance: L{objects.Instance}
10322   @rtype: dict
10323
10324   """
10325   hyper = hypervisor.GetHypervisor(instance.hypervisor)
10326   # beparams and hvparams are passed separately, to avoid editing the
10327   # instance and then saving the defaults in the instance itself.
10328   hvparams = cluster.FillHV(instance)
10329   beparams = cluster.FillBE(instance)
10330   console = hyper.GetInstanceConsole(instance, hvparams, beparams)
10331
10332   assert console.instance == instance.name
10333   assert console.Validate()
10334
10335   return console.ToDict()
10336
10337
10338 class LUInstanceReplaceDisks(LogicalUnit):
10339   """Replace the disks of an instance.
10340
10341   """
10342   HPATH = "mirrors-replace"
10343   HTYPE = constants.HTYPE_INSTANCE
10344   REQ_BGL = False
10345
10346   def CheckArguments(self):
10347     TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
10348                                   self.op.iallocator)
10349
10350   def ExpandNames(self):
10351     self._ExpandAndLockInstance()
10352
10353     assert locking.LEVEL_NODE not in self.needed_locks
10354     assert locking.LEVEL_NODE_RES not in self.needed_locks
10355     assert locking.LEVEL_NODEGROUP not in self.needed_locks
10356
10357     assert self.op.iallocator is None or self.op.remote_node is None, \
10358       "Conflicting options"
10359
10360     if self.op.remote_node is not None:
10361       self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10362
10363       # Warning: do not remove the locking of the new secondary here
10364       # unless DRBD8.AddChildren is changed to work in parallel;
10365       # currently it doesn't since parallel invocations of
10366       # FindUnusedMinor will conflict
10367       self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
10368       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
10369     else:
10370       self.needed_locks[locking.LEVEL_NODE] = []
10371       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10372
10373       if self.op.iallocator is not None:
10374         # iallocator will select a new node in the same group
10375         self.needed_locks[locking.LEVEL_NODEGROUP] = []
10376
10377     self.needed_locks[locking.LEVEL_NODE_RES] = []
10378
10379     self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
10380                                    self.op.iallocator, self.op.remote_node,
10381                                    self.op.disks, False, self.op.early_release,
10382                                    self.op.ignore_ipolicy)
10383
10384     self.tasklets = [self.replacer]
10385
10386   def DeclareLocks(self, level):
10387     if level == locking.LEVEL_NODEGROUP:
10388       assert self.op.remote_node is None
10389       assert self.op.iallocator is not None
10390       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
10391
10392       self.share_locks[locking.LEVEL_NODEGROUP] = 1
10393       # Lock all groups used by instance optimistically; this requires going
10394       # via the node before it's locked, requiring verification later on
10395       self.needed_locks[locking.LEVEL_NODEGROUP] = \
10396         self.cfg.GetInstanceNodeGroups(self.op.instance_name)
10397
10398     elif level == locking.LEVEL_NODE:
10399       if self.op.iallocator is not None:
10400         assert self.op.remote_node is None
10401         assert not self.needed_locks[locking.LEVEL_NODE]
10402
10403         # Lock member nodes of all locked groups
10404         self.needed_locks[locking.LEVEL_NODE] = [node_name
10405           for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
10406           for node_name in self.cfg.GetNodeGroup(group_uuid).members]
10407       else:
10408         self._LockInstancesNodes()
10409     elif level == locking.LEVEL_NODE_RES:
10410       # Reuse node locks
10411       self.needed_locks[locking.LEVEL_NODE_RES] = \
10412         self.needed_locks[locking.LEVEL_NODE]
10413
10414   def BuildHooksEnv(self):
10415     """Build hooks env.
10416
10417     This runs on the master, the primary and all the secondaries.
10418
10419     """
10420     instance = self.replacer.instance
10421     env = {
10422       "MODE": self.op.mode,
10423       "NEW_SECONDARY": self.op.remote_node,
10424       "OLD_SECONDARY": instance.secondary_nodes[0],
10425       }
10426     env.update(_BuildInstanceHookEnvByObject(self, instance))
10427     return env
10428
10429   def BuildHooksNodes(self):
10430     """Build hooks nodes.
10431
10432     """
10433     instance = self.replacer.instance
10434     nl = [
10435       self.cfg.GetMasterNode(),
10436       instance.primary_node,
10437       ]
10438     if self.op.remote_node is not None:
10439       nl.append(self.op.remote_node)
10440     return nl, nl
10441
10442   def CheckPrereq(self):
10443     """Check prerequisites.
10444
10445     """
10446     assert (self.glm.is_owned(locking.LEVEL_NODEGROUP) or
10447             self.op.iallocator is None)
10448
10449     # Verify if node group locks are still correct
10450     owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
10451     if owned_groups:
10452       _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
10453
10454     return LogicalUnit.CheckPrereq(self)
10455
10456
10457 class TLReplaceDisks(Tasklet):
10458   """Replaces disks for an instance.
10459
10460   Note: Locking is not within the scope of this class.
10461
10462   """
10463   def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
10464                disks, delay_iallocator, early_release, ignore_ipolicy):
10465     """Initializes this class.
10466
10467     """
10468     Tasklet.__init__(self, lu)
10469
10470     # Parameters
10471     self.instance_name = instance_name
10472     self.mode = mode
10473     self.iallocator_name = iallocator_name
10474     self.remote_node = remote_node
10475     self.disks = disks
10476     self.delay_iallocator = delay_iallocator
10477     self.early_release = early_release
10478     self.ignore_ipolicy = ignore_ipolicy
10479
10480     # Runtime data
10481     self.instance = None
10482     self.new_node = None
10483     self.target_node = None
10484     self.other_node = None
10485     self.remote_node_info = None
10486     self.node_secondary_ip = None
10487
10488   @staticmethod
10489   def CheckArguments(mode, remote_node, iallocator):
10490     """Helper function for users of this class.
10491
10492     """
10493     # check for valid parameter combination
10494     if mode == constants.REPLACE_DISK_CHG:
10495       if remote_node is None and iallocator is None:
10496         raise errors.OpPrereqError("When changing the secondary either an"
10497                                    " iallocator script must be used or the"
10498                                    " new node given", errors.ECODE_INVAL)
10499
10500       if remote_node is not None and iallocator is not None:
10501         raise errors.OpPrereqError("Give either the iallocator or the new"
10502                                    " secondary, not both", errors.ECODE_INVAL)
10503
10504     elif remote_node is not None or iallocator is not None:
10505       # Not replacing the secondary
10506       raise errors.OpPrereqError("The iallocator and new node options can"
10507                                  " only be used when changing the"
10508                                  " secondary node", errors.ECODE_INVAL)
10509
10510   @staticmethod
10511   def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
10512     """Compute a new secondary node using an IAllocator.
10513
10514     """
10515     ial = IAllocator(lu.cfg, lu.rpc,
10516                      mode=constants.IALLOCATOR_MODE_RELOC,
10517                      name=instance_name,
10518                      relocate_from=list(relocate_from))
10519
10520     ial.Run(iallocator_name)
10521
10522     if not ial.success:
10523       raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
10524                                  " %s" % (iallocator_name, ial.info),
10525                                  errors.ECODE_NORES)
10526
10527     if len(ial.result) != ial.required_nodes:
10528       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
10529                                  " of nodes (%s), required %s" %
10530                                  (iallocator_name,
10531                                   len(ial.result), ial.required_nodes),
10532                                  errors.ECODE_FAULT)
10533
10534     remote_node_name = ial.result[0]
10535
10536     lu.LogInfo("Selected new secondary for instance '%s': %s",
10537                instance_name, remote_node_name)
10538
10539     return remote_node_name
10540
10541   def _FindFaultyDisks(self, node_name):
10542     """Wrapper for L{_FindFaultyInstanceDisks}.
10543
10544     """
10545     return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
10546                                     node_name, True)
10547
10548   def _CheckDisksActivated(self, instance):
10549     """Checks if the instance disks are activated.
10550
10551     @param instance: The instance to check disks
10552     @return: True if they are activated, False otherwise
10553
10554     """
10555     nodes = instance.all_nodes
10556
10557     for idx, dev in enumerate(instance.disks):
10558       for node in nodes:
10559         self.lu.LogInfo("Checking disk/%d on %s", idx, node)
10560         self.cfg.SetDiskID(dev, node)
10561
10562         result = self.rpc.call_blockdev_find(node, dev)
10563
10564         if result.offline:
10565           continue
10566         elif result.fail_msg or not result.payload:
10567           return False
10568
10569     return True
10570
10571   def CheckPrereq(self):
10572     """Check prerequisites.
10573
10574     This checks that the instance is in the cluster.
10575
10576     """
10577     self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
10578     assert instance is not None, \
10579       "Cannot retrieve locked instance %s" % self.instance_name
10580
10581     if instance.disk_template != constants.DT_DRBD8:
10582       raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
10583                                  " instances", errors.ECODE_INVAL)
10584
10585     if len(instance.secondary_nodes) != 1:
10586       raise errors.OpPrereqError("The instance has a strange layout,"
10587                                  " expected one secondary but found %d" %
10588                                  len(instance.secondary_nodes),
10589                                  errors.ECODE_FAULT)
10590
10591     if not self.delay_iallocator:
10592       self._CheckPrereq2()
10593
10594   def _CheckPrereq2(self):
10595     """Check prerequisites, second part.
10596
10597     This function should always be part of CheckPrereq. It was separated and is
10598     now called from Exec because during node evacuation iallocator was only
10599     called with an unmodified cluster model, not taking planned changes into
10600     account.
10601
10602     """
10603     instance = self.instance
10604     secondary_node = instance.secondary_nodes[0]
10605
10606     if self.iallocator_name is None:
10607       remote_node = self.remote_node
10608     else:
10609       remote_node = self._RunAllocator(self.lu, self.iallocator_name,
10610                                        instance.name, instance.secondary_nodes)
10611
10612     if remote_node is None:
10613       self.remote_node_info = None
10614     else:
10615       assert remote_node in self.lu.owned_locks(locking.LEVEL_NODE), \
10616              "Remote node '%s' is not locked" % remote_node
10617
10618       self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
10619       assert self.remote_node_info is not None, \
10620         "Cannot retrieve locked node %s" % remote_node
10621
10622     if remote_node == self.instance.primary_node:
10623       raise errors.OpPrereqError("The specified node is the primary node of"
10624                                  " the instance", errors.ECODE_INVAL)
10625
10626     if remote_node == secondary_node:
10627       raise errors.OpPrereqError("The specified node is already the"
10628                                  " secondary node of the instance",
10629                                  errors.ECODE_INVAL)
10630
10631     if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
10632                                     constants.REPLACE_DISK_CHG):
10633       raise errors.OpPrereqError("Cannot specify disks to be replaced",
10634                                  errors.ECODE_INVAL)
10635
10636     if self.mode == constants.REPLACE_DISK_AUTO:
10637       if not self._CheckDisksActivated(instance):
10638         raise errors.OpPrereqError("Please run activate-disks on instance %s"
10639                                    " first" % self.instance_name,
10640                                    errors.ECODE_STATE)
10641       faulty_primary = self._FindFaultyDisks(instance.primary_node)
10642       faulty_secondary = self._FindFaultyDisks(secondary_node)
10643
10644       if faulty_primary and faulty_secondary:
10645         raise errors.OpPrereqError("Instance %s has faulty disks on more than"
10646                                    " one node and can not be repaired"
10647                                    " automatically" % self.instance_name,
10648                                    errors.ECODE_STATE)
10649
10650       if faulty_primary:
10651         self.disks = faulty_primary
10652         self.target_node = instance.primary_node
10653         self.other_node = secondary_node
10654         check_nodes = [self.target_node, self.other_node]
10655       elif faulty_secondary:
10656         self.disks = faulty_secondary
10657         self.target_node = secondary_node
10658         self.other_node = instance.primary_node
10659         check_nodes = [self.target_node, self.other_node]
10660       else:
10661         self.disks = []
10662         check_nodes = []
10663
10664     else:
10665       # Non-automatic modes
10666       if self.mode == constants.REPLACE_DISK_PRI:
10667         self.target_node = instance.primary_node
10668         self.other_node = secondary_node
10669         check_nodes = [self.target_node, self.other_node]
10670
10671       elif self.mode == constants.REPLACE_DISK_SEC:
10672         self.target_node = secondary_node
10673         self.other_node = instance.primary_node
10674         check_nodes = [self.target_node, self.other_node]
10675
10676       elif self.mode == constants.REPLACE_DISK_CHG:
10677         self.new_node = remote_node
10678         self.other_node = instance.primary_node
10679         self.target_node = secondary_node
10680         check_nodes = [self.new_node, self.other_node]
10681
10682         _CheckNodeNotDrained(self.lu, remote_node)
10683         _CheckNodeVmCapable(self.lu, remote_node)
10684
10685         old_node_info = self.cfg.GetNodeInfo(secondary_node)
10686         assert old_node_info is not None
10687         if old_node_info.offline and not self.early_release:
10688           # doesn't make sense to delay the release
10689           self.early_release = True
10690           self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
10691                           " early-release mode", secondary_node)
10692
10693       else:
10694         raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
10695                                      self.mode)
10696
10697       # If not specified all disks should be replaced
10698       if not self.disks:
10699         self.disks = range(len(self.instance.disks))
10700
10701     # TODO: This is ugly, but right now we can't distinguish between internal
10702     # submitted opcode and external one. We should fix that.
10703     if self.remote_node_info:
10704       # We change the node, lets verify it still meets instance policy
10705       new_group_info = self.cfg.GetNodeGroup(self.remote_node_info.group)
10706       ipolicy = _CalculateGroupIPolicy(self.cfg.GetClusterInfo(),
10707                                        new_group_info)
10708       _CheckTargetNodeIPolicy(self, ipolicy, instance, self.remote_node_info,
10709                               ignore=self.ignore_ipolicy)
10710
10711     # TODO: compute disk parameters
10712     primary_node_info = self.cfg.GetNodeInfo(instance.primary_node)
10713     secondary_node_info = self.cfg.GetNodeInfo(secondary_node)
10714     if primary_node_info.group != secondary_node_info.group:
10715       self.lu.LogInfo("The instance primary and secondary nodes are in two"
10716                       " different node groups; the disk parameters of the"
10717                       " primary node's group will be applied.")
10718
10719     self.diskparams = self.cfg.GetNodeGroup(primary_node_info.group).diskparams
10720
10721     for node in check_nodes:
10722       _CheckNodeOnline(self.lu, node)
10723
10724     touched_nodes = frozenset(node_name for node_name in [self.new_node,
10725                                                           self.other_node,
10726                                                           self.target_node]
10727                               if node_name is not None)
10728
10729     # Release unneeded node and node resource locks
10730     _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
10731     _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES, keep=touched_nodes)
10732
10733     # Release any owned node group
10734     if self.lu.glm.is_owned(locking.LEVEL_NODEGROUP):
10735       _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
10736
10737     # Check whether disks are valid
10738     for disk_idx in self.disks:
10739       instance.FindDisk(disk_idx)
10740
10741     # Get secondary node IP addresses
10742     self.node_secondary_ip = dict((name, node.secondary_ip) for (name, node)
10743                                   in self.cfg.GetMultiNodeInfo(touched_nodes))
10744
10745   def Exec(self, feedback_fn):
10746     """Execute disk replacement.
10747
10748     This dispatches the disk replacement to the appropriate handler.
10749
10750     """
10751     if self.delay_iallocator:
10752       self._CheckPrereq2()
10753
10754     if __debug__:
10755       # Verify owned locks before starting operation
10756       owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
10757       assert set(owned_nodes) == set(self.node_secondary_ip), \
10758           ("Incorrect node locks, owning %s, expected %s" %
10759            (owned_nodes, self.node_secondary_ip.keys()))
10760       assert (self.lu.owned_locks(locking.LEVEL_NODE) ==
10761               self.lu.owned_locks(locking.LEVEL_NODE_RES))
10762
10763       owned_instances = self.lu.owned_locks(locking.LEVEL_INSTANCE)
10764       assert list(owned_instances) == [self.instance_name], \
10765           "Instance '%s' not locked" % self.instance_name
10766
10767       assert not self.lu.glm.is_owned(locking.LEVEL_NODEGROUP), \
10768           "Should not own any node group lock at this point"
10769
10770     if not self.disks:
10771       feedback_fn("No disks need replacement")
10772       return
10773
10774     feedback_fn("Replacing disk(s) %s for %s" %
10775                 (utils.CommaJoin(self.disks), self.instance.name))
10776
10777     activate_disks = (self.instance.admin_state != constants.ADMINST_UP)
10778
10779     # Activate the instance disks if we're replacing them on a down instance
10780     if activate_disks:
10781       _StartInstanceDisks(self.lu, self.instance, True)
10782
10783     try:
10784       # Should we replace the secondary node?
10785       if self.new_node is not None:
10786         fn = self._ExecDrbd8Secondary
10787       else:
10788         fn = self._ExecDrbd8DiskOnly
10789
10790       result = fn(feedback_fn)
10791     finally:
10792       # Deactivate the instance disks if we're replacing them on a
10793       # down instance
10794       if activate_disks:
10795         _SafeShutdownInstanceDisks(self.lu, self.instance)
10796
10797     assert not self.lu.owned_locks(locking.LEVEL_NODE)
10798
10799     if __debug__:
10800       # Verify owned locks
10801       owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE_RES)
10802       nodes = frozenset(self.node_secondary_ip)
10803       assert ((self.early_release and not owned_nodes) or
10804               (not self.early_release and not (set(owned_nodes) - nodes))), \
10805         ("Not owning the correct locks, early_release=%s, owned=%r,"
10806          " nodes=%r" % (self.early_release, owned_nodes, nodes))
10807
10808     return result
10809
10810   def _CheckVolumeGroup(self, nodes):
10811     self.lu.LogInfo("Checking volume groups")
10812
10813     vgname = self.cfg.GetVGName()
10814
10815     # Make sure volume group exists on all involved nodes
10816     results = self.rpc.call_vg_list(nodes)
10817     if not results:
10818       raise errors.OpExecError("Can't list volume groups on the nodes")
10819
10820     for node in nodes:
10821       res = results[node]
10822       res.Raise("Error checking node %s" % node)
10823       if vgname not in res.payload:
10824         raise errors.OpExecError("Volume group '%s' not found on node %s" %
10825                                  (vgname, node))
10826
10827   def _CheckDisksExistence(self, nodes):
10828     # Check disk existence
10829     for idx, dev in enumerate(self.instance.disks):
10830       if idx not in self.disks:
10831         continue
10832
10833       for node in nodes:
10834         self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
10835         self.cfg.SetDiskID(dev, node)
10836
10837         result = self.rpc.call_blockdev_find(node, dev)
10838
10839         msg = result.fail_msg
10840         if msg or not result.payload:
10841           if not msg:
10842             msg = "disk not found"
10843           raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
10844                                    (idx, node, msg))
10845
10846   def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
10847     for idx, dev in enumerate(self.instance.disks):
10848       if idx not in self.disks:
10849         continue
10850
10851       self.lu.LogInfo("Checking disk/%d consistency on node %s" %
10852                       (idx, node_name))
10853
10854       if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
10855                                    ldisk=ldisk):
10856         raise errors.OpExecError("Node %s has degraded storage, unsafe to"
10857                                  " replace disks for instance %s" %
10858                                  (node_name, self.instance.name))
10859
10860   def _CreateNewStorage(self, node_name):
10861     """Create new storage on the primary or secondary node.
10862
10863     This is only used for same-node replaces, not for changing the
10864     secondary node, hence we don't want to modify the existing disk.
10865
10866     """
10867     iv_names = {}
10868
10869     for idx, dev in enumerate(self.instance.disks):
10870       if idx not in self.disks:
10871         continue
10872
10873       self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
10874
10875       self.cfg.SetDiskID(dev, node_name)
10876
10877       lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
10878       names = _GenerateUniqueNames(self.lu, lv_names)
10879
10880       _, data_p, meta_p = _ComputeLDParams(constants.DT_DRBD8, self.diskparams)
10881
10882       vg_data = dev.children[0].logical_id[0]
10883       lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
10884                              logical_id=(vg_data, names[0]), params=data_p)
10885       vg_meta = dev.children[1].logical_id[0]
10886       lv_meta = objects.Disk(dev_type=constants.LD_LV, size=DRBD_META_SIZE,
10887                              logical_id=(vg_meta, names[1]), params=meta_p)
10888
10889       new_lvs = [lv_data, lv_meta]
10890       old_lvs = [child.Copy() for child in dev.children]
10891       iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
10892
10893       # we pass force_create=True to force the LVM creation
10894       for new_lv in new_lvs:
10895         _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
10896                         _GetInstanceInfoText(self.instance), False)
10897
10898     return iv_names
10899
10900   def _CheckDevices(self, node_name, iv_names):
10901     for name, (dev, _, _) in iv_names.iteritems():
10902       self.cfg.SetDiskID(dev, node_name)
10903
10904       result = self.rpc.call_blockdev_find(node_name, dev)
10905
10906       msg = result.fail_msg
10907       if msg or not result.payload:
10908         if not msg:
10909           msg = "disk not found"
10910         raise errors.OpExecError("Can't find DRBD device %s: %s" %
10911                                  (name, msg))
10912
10913       if result.payload.is_degraded:
10914         raise errors.OpExecError("DRBD device %s is degraded!" % name)
10915
10916   def _RemoveOldStorage(self, node_name, iv_names):
10917     for name, (_, old_lvs, _) in iv_names.iteritems():
10918       self.lu.LogInfo("Remove logical volumes for %s" % name)
10919
10920       for lv in old_lvs:
10921         self.cfg.SetDiskID(lv, node_name)
10922
10923         msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
10924         if msg:
10925           self.lu.LogWarning("Can't remove old LV: %s" % msg,
10926                              hint="remove unused LVs manually")
10927
10928   def _ExecDrbd8DiskOnly(self, feedback_fn): # pylint: disable=W0613
10929     """Replace a disk on the primary or secondary for DRBD 8.
10930
10931     The algorithm for replace is quite complicated:
10932
10933       1. for each disk to be replaced:
10934
10935         1. create new LVs on the target node with unique names
10936         1. detach old LVs from the drbd device
10937         1. rename old LVs to name_replaced.<time_t>
10938         1. rename new LVs to old LVs
10939         1. attach the new LVs (with the old names now) to the drbd device
10940
10941       1. wait for sync across all devices
10942
10943       1. for each modified disk:
10944
10945         1. remove old LVs (which have the name name_replaces.<time_t>)
10946
10947     Failures are not very well handled.
10948
10949     """
10950     steps_total = 6
10951
10952     # Step: check device activation
10953     self.lu.LogStep(1, steps_total, "Check device existence")
10954     self._CheckDisksExistence([self.other_node, self.target_node])
10955     self._CheckVolumeGroup([self.target_node, self.other_node])
10956
10957     # Step: check other node consistency
10958     self.lu.LogStep(2, steps_total, "Check peer consistency")
10959     self._CheckDisksConsistency(self.other_node,
10960                                 self.other_node == self.instance.primary_node,
10961                                 False)
10962
10963     # Step: create new storage
10964     self.lu.LogStep(3, steps_total, "Allocate new storage")
10965     iv_names = self._CreateNewStorage(self.target_node)
10966
10967     # Step: for each lv, detach+rename*2+attach
10968     self.lu.LogStep(4, steps_total, "Changing drbd configuration")
10969     for dev, old_lvs, new_lvs in iv_names.itervalues():
10970       self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
10971
10972       result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
10973                                                      old_lvs)
10974       result.Raise("Can't detach drbd from local storage on node"
10975                    " %s for device %s" % (self.target_node, dev.iv_name))
10976       #dev.children = []
10977       #cfg.Update(instance)
10978
10979       # ok, we created the new LVs, so now we know we have the needed
10980       # storage; as such, we proceed on the target node to rename
10981       # old_lv to _old, and new_lv to old_lv; note that we rename LVs
10982       # using the assumption that logical_id == physical_id (which in
10983       # turn is the unique_id on that node)
10984
10985       # FIXME(iustin): use a better name for the replaced LVs
10986       temp_suffix = int(time.time())
10987       ren_fn = lambda d, suff: (d.physical_id[0],
10988                                 d.physical_id[1] + "_replaced-%s" % suff)
10989
10990       # Build the rename list based on what LVs exist on the node
10991       rename_old_to_new = []
10992       for to_ren in old_lvs:
10993         result = self.rpc.call_blockdev_find(self.target_node, to_ren)
10994         if not result.fail_msg and result.payload:
10995           # device exists
10996           rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
10997
10998       self.lu.LogInfo("Renaming the old LVs on the target node")
10999       result = self.rpc.call_blockdev_rename(self.target_node,
11000                                              rename_old_to_new)
11001       result.Raise("Can't rename old LVs on node %s" % self.target_node)
11002
11003       # Now we rename the new LVs to the old LVs
11004       self.lu.LogInfo("Renaming the new LVs on the target node")
11005       rename_new_to_old = [(new, old.physical_id)
11006                            for old, new in zip(old_lvs, new_lvs)]
11007       result = self.rpc.call_blockdev_rename(self.target_node,
11008                                              rename_new_to_old)
11009       result.Raise("Can't rename new LVs on node %s" % self.target_node)
11010
11011       # Intermediate steps of in memory modifications
11012       for old, new in zip(old_lvs, new_lvs):
11013         new.logical_id = old.logical_id
11014         self.cfg.SetDiskID(new, self.target_node)
11015
11016       # We need to modify old_lvs so that removal later removes the
11017       # right LVs, not the newly added ones; note that old_lvs is a
11018       # copy here
11019       for disk in old_lvs:
11020         disk.logical_id = ren_fn(disk, temp_suffix)
11021         self.cfg.SetDiskID(disk, self.target_node)
11022
11023       # Now that the new lvs have the old name, we can add them to the device
11024       self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
11025       result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
11026                                                   new_lvs)
11027       msg = result.fail_msg
11028       if msg:
11029         for new_lv in new_lvs:
11030           msg2 = self.rpc.call_blockdev_remove(self.target_node,
11031                                                new_lv).fail_msg
11032           if msg2:
11033             self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
11034                                hint=("cleanup manually the unused logical"
11035                                      "volumes"))
11036         raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
11037
11038     cstep = itertools.count(5)
11039
11040     if self.early_release:
11041       self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11042       self._RemoveOldStorage(self.target_node, iv_names)
11043       # TODO: Check if releasing locks early still makes sense
11044       _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
11045     else:
11046       # Release all resource locks except those used by the instance
11047       _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
11048                     keep=self.node_secondary_ip.keys())
11049
11050     # Release all node locks while waiting for sync
11051     _ReleaseLocks(self.lu, locking.LEVEL_NODE)
11052
11053     # TODO: Can the instance lock be downgraded here? Take the optional disk
11054     # shutdown in the caller into consideration.
11055
11056     # Wait for sync
11057     # This can fail as the old devices are degraded and _WaitForSync
11058     # does a combined result over all disks, so we don't check its return value
11059     self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
11060     _WaitForSync(self.lu, self.instance)
11061
11062     # Check all devices manually
11063     self._CheckDevices(self.instance.primary_node, iv_names)
11064
11065     # Step: remove old storage
11066     if not self.early_release:
11067       self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11068       self._RemoveOldStorage(self.target_node, iv_names)
11069
11070   def _ExecDrbd8Secondary(self, feedback_fn):
11071     """Replace the secondary node for DRBD 8.
11072
11073     The algorithm for replace is quite complicated:
11074       - for all disks of the instance:
11075         - create new LVs on the new node with same names
11076         - shutdown the drbd device on the old secondary
11077         - disconnect the drbd network on the primary
11078         - create the drbd device on the new secondary
11079         - network attach the drbd on the primary, using an artifice:
11080           the drbd code for Attach() will connect to the network if it
11081           finds a device which is connected to the good local disks but
11082           not network enabled
11083       - wait for sync across all devices
11084       - remove all disks from the old secondary
11085
11086     Failures are not very well handled.
11087
11088     """
11089     steps_total = 6
11090
11091     pnode = self.instance.primary_node
11092
11093     # Step: check device activation
11094     self.lu.LogStep(1, steps_total, "Check device existence")
11095     self._CheckDisksExistence([self.instance.primary_node])
11096     self._CheckVolumeGroup([self.instance.primary_node])
11097
11098     # Step: check other node consistency
11099     self.lu.LogStep(2, steps_total, "Check peer consistency")
11100     self._CheckDisksConsistency(self.instance.primary_node, True, True)
11101
11102     # Step: create new storage
11103     self.lu.LogStep(3, steps_total, "Allocate new storage")
11104     for idx, dev in enumerate(self.instance.disks):
11105       self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
11106                       (self.new_node, idx))
11107       # we pass force_create=True to force LVM creation
11108       for new_lv in dev.children:
11109         _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
11110                         _GetInstanceInfoText(self.instance), False)
11111
11112     # Step 4: dbrd minors and drbd setups changes
11113     # after this, we must manually remove the drbd minors on both the
11114     # error and the success paths
11115     self.lu.LogStep(4, steps_total, "Changing drbd configuration")
11116     minors = self.cfg.AllocateDRBDMinor([self.new_node
11117                                          for dev in self.instance.disks],
11118                                         self.instance.name)
11119     logging.debug("Allocated minors %r", minors)
11120
11121     iv_names = {}
11122     for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
11123       self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
11124                       (self.new_node, idx))
11125       # create new devices on new_node; note that we create two IDs:
11126       # one without port, so the drbd will be activated without
11127       # networking information on the new node at this stage, and one
11128       # with network, for the latter activation in step 4
11129       (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
11130       if self.instance.primary_node == o_node1:
11131         p_minor = o_minor1
11132       else:
11133         assert self.instance.primary_node == o_node2, "Three-node instance?"
11134         p_minor = o_minor2
11135
11136       new_alone_id = (self.instance.primary_node, self.new_node, None,
11137                       p_minor, new_minor, o_secret)
11138       new_net_id = (self.instance.primary_node, self.new_node, o_port,
11139                     p_minor, new_minor, o_secret)
11140
11141       iv_names[idx] = (dev, dev.children, new_net_id)
11142       logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
11143                     new_net_id)
11144       drbd_params, _, _ = _ComputeLDParams(constants.DT_DRBD8, self.diskparams)
11145       new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
11146                               logical_id=new_alone_id,
11147                               children=dev.children,
11148                               size=dev.size,
11149                               params=drbd_params)
11150       try:
11151         _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
11152                               _GetInstanceInfoText(self.instance), False)
11153       except errors.GenericError:
11154         self.cfg.ReleaseDRBDMinors(self.instance.name)
11155         raise
11156
11157     # We have new devices, shutdown the drbd on the old secondary
11158     for idx, dev in enumerate(self.instance.disks):
11159       self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
11160       self.cfg.SetDiskID(dev, self.target_node)
11161       msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
11162       if msg:
11163         self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
11164                            "node: %s" % (idx, msg),
11165                            hint=("Please cleanup this device manually as"
11166                                  " soon as possible"))
11167
11168     self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
11169     result = self.rpc.call_drbd_disconnect_net([pnode], self.node_secondary_ip,
11170                                                self.instance.disks)[pnode]
11171
11172     msg = result.fail_msg
11173     if msg:
11174       # detaches didn't succeed (unlikely)
11175       self.cfg.ReleaseDRBDMinors(self.instance.name)
11176       raise errors.OpExecError("Can't detach the disks from the network on"
11177                                " old node: %s" % (msg,))
11178
11179     # if we managed to detach at least one, we update all the disks of
11180     # the instance to point to the new secondary
11181     self.lu.LogInfo("Updating instance configuration")
11182     for dev, _, new_logical_id in iv_names.itervalues():
11183       dev.logical_id = new_logical_id
11184       self.cfg.SetDiskID(dev, self.instance.primary_node)
11185
11186     self.cfg.Update(self.instance, feedback_fn)
11187
11188     # Release all node locks (the configuration has been updated)
11189     _ReleaseLocks(self.lu, locking.LEVEL_NODE)
11190
11191     # and now perform the drbd attach
11192     self.lu.LogInfo("Attaching primary drbds to new secondary"
11193                     " (standalone => connected)")
11194     result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
11195                                             self.new_node],
11196                                            self.node_secondary_ip,
11197                                            self.instance.disks,
11198                                            self.instance.name,
11199                                            False)
11200     for to_node, to_result in result.items():
11201       msg = to_result.fail_msg
11202       if msg:
11203         self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
11204                            to_node, msg,
11205                            hint=("please do a gnt-instance info to see the"
11206                                  " status of disks"))
11207
11208     cstep = itertools.count(5)
11209
11210     if self.early_release:
11211       self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11212       self._RemoveOldStorage(self.target_node, iv_names)
11213       # TODO: Check if releasing locks early still makes sense
11214       _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
11215     else:
11216       # Release all resource locks except those used by the instance
11217       _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
11218                     keep=self.node_secondary_ip.keys())
11219
11220     # TODO: Can the instance lock be downgraded here? Take the optional disk
11221     # shutdown in the caller into consideration.
11222
11223     # Wait for sync
11224     # This can fail as the old devices are degraded and _WaitForSync
11225     # does a combined result over all disks, so we don't check its return value
11226     self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
11227     _WaitForSync(self.lu, self.instance)
11228
11229     # Check all devices manually
11230     self._CheckDevices(self.instance.primary_node, iv_names)
11231
11232     # Step: remove old storage
11233     if not self.early_release:
11234       self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11235       self._RemoveOldStorage(self.target_node, iv_names)
11236
11237
11238 class LURepairNodeStorage(NoHooksLU):
11239   """Repairs the volume group on a node.
11240
11241   """
11242   REQ_BGL = False
11243
11244   def CheckArguments(self):
11245     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
11246
11247     storage_type = self.op.storage_type
11248
11249     if (constants.SO_FIX_CONSISTENCY not in
11250         constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
11251       raise errors.OpPrereqError("Storage units of type '%s' can not be"
11252                                  " repaired" % storage_type,
11253                                  errors.ECODE_INVAL)
11254
11255   def ExpandNames(self):
11256     self.needed_locks = {
11257       locking.LEVEL_NODE: [self.op.node_name],
11258       }
11259
11260   def _CheckFaultyDisks(self, instance, node_name):
11261     """Ensure faulty disks abort the opcode or at least warn."""
11262     try:
11263       if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
11264                                   node_name, True):
11265         raise errors.OpPrereqError("Instance '%s' has faulty disks on"
11266                                    " node '%s'" % (instance.name, node_name),
11267                                    errors.ECODE_STATE)
11268     except errors.OpPrereqError, err:
11269       if self.op.ignore_consistency:
11270         self.proc.LogWarning(str(err.args[0]))
11271       else:
11272         raise
11273
11274   def CheckPrereq(self):
11275     """Check prerequisites.
11276
11277     """
11278     # Check whether any instance on this node has faulty disks
11279     for inst in _GetNodeInstances(self.cfg, self.op.node_name):
11280       if inst.admin_state != constants.ADMINST_UP:
11281         continue
11282       check_nodes = set(inst.all_nodes)
11283       check_nodes.discard(self.op.node_name)
11284       for inst_node_name in check_nodes:
11285         self._CheckFaultyDisks(inst, inst_node_name)
11286
11287   def Exec(self, feedback_fn):
11288     feedback_fn("Repairing storage unit '%s' on %s ..." %
11289                 (self.op.name, self.op.node_name))
11290
11291     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
11292     result = self.rpc.call_storage_execute(self.op.node_name,
11293                                            self.op.storage_type, st_args,
11294                                            self.op.name,
11295                                            constants.SO_FIX_CONSISTENCY)
11296     result.Raise("Failed to repair storage unit '%s' on %s" %
11297                  (self.op.name, self.op.node_name))
11298
11299
11300 class LUNodeEvacuate(NoHooksLU):
11301   """Evacuates instances off a list of nodes.
11302
11303   """
11304   REQ_BGL = False
11305
11306   _MODE2IALLOCATOR = {
11307     constants.NODE_EVAC_PRI: constants.IALLOCATOR_NEVAC_PRI,
11308     constants.NODE_EVAC_SEC: constants.IALLOCATOR_NEVAC_SEC,
11309     constants.NODE_EVAC_ALL: constants.IALLOCATOR_NEVAC_ALL,
11310     }
11311   assert frozenset(_MODE2IALLOCATOR.keys()) == constants.NODE_EVAC_MODES
11312   assert (frozenset(_MODE2IALLOCATOR.values()) ==
11313           constants.IALLOCATOR_NEVAC_MODES)
11314
11315   def CheckArguments(self):
11316     _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
11317
11318   def ExpandNames(self):
11319     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
11320
11321     if self.op.remote_node is not None:
11322       self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
11323       assert self.op.remote_node
11324
11325       if self.op.remote_node == self.op.node_name:
11326         raise errors.OpPrereqError("Can not use evacuated node as a new"
11327                                    " secondary node", errors.ECODE_INVAL)
11328
11329       if self.op.mode != constants.NODE_EVAC_SEC:
11330         raise errors.OpPrereqError("Without the use of an iallocator only"
11331                                    " secondary instances can be evacuated",
11332                                    errors.ECODE_INVAL)
11333
11334     # Declare locks
11335     self.share_locks = _ShareAll()
11336     self.needed_locks = {
11337       locking.LEVEL_INSTANCE: [],
11338       locking.LEVEL_NODEGROUP: [],
11339       locking.LEVEL_NODE: [],
11340       }
11341
11342     # Determine nodes (via group) optimistically, needs verification once locks
11343     # have been acquired
11344     self.lock_nodes = self._DetermineNodes()
11345
11346   def _DetermineNodes(self):
11347     """Gets the list of nodes to operate on.
11348
11349     """
11350     if self.op.remote_node is None:
11351       # Iallocator will choose any node(s) in the same group
11352       group_nodes = self.cfg.GetNodeGroupMembersByNodes([self.op.node_name])
11353     else:
11354       group_nodes = frozenset([self.op.remote_node])
11355
11356     # Determine nodes to be locked
11357     return set([self.op.node_name]) | group_nodes
11358
11359   def _DetermineInstances(self):
11360     """Builds list of instances to operate on.
11361
11362     """
11363     assert self.op.mode in constants.NODE_EVAC_MODES
11364
11365     if self.op.mode == constants.NODE_EVAC_PRI:
11366       # Primary instances only
11367       inst_fn = _GetNodePrimaryInstances
11368       assert self.op.remote_node is None, \
11369         "Evacuating primary instances requires iallocator"
11370     elif self.op.mode == constants.NODE_EVAC_SEC:
11371       # Secondary instances only
11372       inst_fn = _GetNodeSecondaryInstances
11373     else:
11374       # All instances
11375       assert self.op.mode == constants.NODE_EVAC_ALL
11376       inst_fn = _GetNodeInstances
11377       # TODO: In 2.6, change the iallocator interface to take an evacuation mode
11378       # per instance
11379       raise errors.OpPrereqError("Due to an issue with the iallocator"
11380                                  " interface it is not possible to evacuate"
11381                                  " all instances at once; specify explicitly"
11382                                  " whether to evacuate primary or secondary"
11383                                  " instances",
11384                                  errors.ECODE_INVAL)
11385
11386     return inst_fn(self.cfg, self.op.node_name)
11387
11388   def DeclareLocks(self, level):
11389     if level == locking.LEVEL_INSTANCE:
11390       # Lock instances optimistically, needs verification once node and group
11391       # locks have been acquired
11392       self.needed_locks[locking.LEVEL_INSTANCE] = \
11393         set(i.name for i in self._DetermineInstances())
11394
11395     elif level == locking.LEVEL_NODEGROUP:
11396       # Lock node groups for all potential target nodes optimistically, needs
11397       # verification once nodes have been acquired
11398       self.needed_locks[locking.LEVEL_NODEGROUP] = \
11399         self.cfg.GetNodeGroupsFromNodes(self.lock_nodes)
11400
11401     elif level == locking.LEVEL_NODE:
11402       self.needed_locks[locking.LEVEL_NODE] = self.lock_nodes
11403
11404   def CheckPrereq(self):
11405     # Verify locks
11406     owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
11407     owned_nodes = self.owned_locks(locking.LEVEL_NODE)
11408     owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
11409
11410     need_nodes = self._DetermineNodes()
11411
11412     if not owned_nodes.issuperset(need_nodes):
11413       raise errors.OpPrereqError("Nodes in same group as '%s' changed since"
11414                                  " locks were acquired, current nodes are"
11415                                  " are '%s', used to be '%s'; retry the"
11416                                  " operation" %
11417                                  (self.op.node_name,
11418                                   utils.CommaJoin(need_nodes),
11419                                   utils.CommaJoin(owned_nodes)),
11420                                  errors.ECODE_STATE)
11421
11422     wanted_groups = self.cfg.GetNodeGroupsFromNodes(owned_nodes)
11423     if owned_groups != wanted_groups:
11424       raise errors.OpExecError("Node groups changed since locks were acquired,"
11425                                " current groups are '%s', used to be '%s';"
11426                                " retry the operation" %
11427                                (utils.CommaJoin(wanted_groups),
11428                                 utils.CommaJoin(owned_groups)))
11429
11430     # Determine affected instances
11431     self.instances = self._DetermineInstances()
11432     self.instance_names = [i.name for i in self.instances]
11433
11434     if set(self.instance_names) != owned_instances:
11435       raise errors.OpExecError("Instances on node '%s' changed since locks"
11436                                " were acquired, current instances are '%s',"
11437                                " used to be '%s'; retry the operation" %
11438                                (self.op.node_name,
11439                                 utils.CommaJoin(self.instance_names),
11440                                 utils.CommaJoin(owned_instances)))
11441
11442     if self.instance_names:
11443       self.LogInfo("Evacuating instances from node '%s': %s",
11444                    self.op.node_name,
11445                    utils.CommaJoin(utils.NiceSort(self.instance_names)))
11446     else:
11447       self.LogInfo("No instances to evacuate from node '%s'",
11448                    self.op.node_name)
11449
11450     if self.op.remote_node is not None:
11451       for i in self.instances:
11452         if i.primary_node == self.op.remote_node:
11453           raise errors.OpPrereqError("Node %s is the primary node of"
11454                                      " instance %s, cannot use it as"
11455                                      " secondary" %
11456                                      (self.op.remote_node, i.name),
11457                                      errors.ECODE_INVAL)
11458
11459   def Exec(self, feedback_fn):
11460     assert (self.op.iallocator is not None) ^ (self.op.remote_node is not None)
11461
11462     if not self.instance_names:
11463       # No instances to evacuate
11464       jobs = []
11465
11466     elif self.op.iallocator is not None:
11467       # TODO: Implement relocation to other group
11468       ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_NODE_EVAC,
11469                        evac_mode=self._MODE2IALLOCATOR[self.op.mode],
11470                        instances=list(self.instance_names))
11471
11472       ial.Run(self.op.iallocator)
11473
11474       if not ial.success:
11475         raise errors.OpPrereqError("Can't compute node evacuation using"
11476                                    " iallocator '%s': %s" %
11477                                    (self.op.iallocator, ial.info),
11478                                    errors.ECODE_NORES)
11479
11480       jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, True)
11481
11482     elif self.op.remote_node is not None:
11483       assert self.op.mode == constants.NODE_EVAC_SEC
11484       jobs = [
11485         [opcodes.OpInstanceReplaceDisks(instance_name=instance_name,
11486                                         remote_node=self.op.remote_node,
11487                                         disks=[],
11488                                         mode=constants.REPLACE_DISK_CHG,
11489                                         early_release=self.op.early_release)]
11490         for instance_name in self.instance_names
11491         ]
11492
11493     else:
11494       raise errors.ProgrammerError("No iallocator or remote node")
11495
11496     return ResultWithJobs(jobs)
11497
11498
11499 def _SetOpEarlyRelease(early_release, op):
11500   """Sets C{early_release} flag on opcodes if available.
11501
11502   """
11503   try:
11504     op.early_release = early_release
11505   except AttributeError:
11506     assert not isinstance(op, opcodes.OpInstanceReplaceDisks)
11507
11508   return op
11509
11510
11511 def _NodeEvacDest(use_nodes, group, nodes):
11512   """Returns group or nodes depending on caller's choice.
11513
11514   """
11515   if use_nodes:
11516     return utils.CommaJoin(nodes)
11517   else:
11518     return group
11519
11520
11521 def _LoadNodeEvacResult(lu, alloc_result, early_release, use_nodes):
11522   """Unpacks the result of change-group and node-evacuate iallocator requests.
11523
11524   Iallocator modes L{constants.IALLOCATOR_MODE_NODE_EVAC} and
11525   L{constants.IALLOCATOR_MODE_CHG_GROUP}.
11526
11527   @type lu: L{LogicalUnit}
11528   @param lu: Logical unit instance
11529   @type alloc_result: tuple/list
11530   @param alloc_result: Result from iallocator
11531   @type early_release: bool
11532   @param early_release: Whether to release locks early if possible
11533   @type use_nodes: bool
11534   @param use_nodes: Whether to display node names instead of groups
11535
11536   """
11537   (moved, failed, jobs) = alloc_result
11538
11539   if failed:
11540     failreason = utils.CommaJoin("%s (%s)" % (name, reason)
11541                                  for (name, reason) in failed)
11542     lu.LogWarning("Unable to evacuate instances %s", failreason)
11543     raise errors.OpExecError("Unable to evacuate instances %s" % failreason)
11544
11545   if moved:
11546     lu.LogInfo("Instances to be moved: %s",
11547                utils.CommaJoin("%s (to %s)" %
11548                                (name, _NodeEvacDest(use_nodes, group, nodes))
11549                                for (name, group, nodes) in moved))
11550
11551   return [map(compat.partial(_SetOpEarlyRelease, early_release),
11552               map(opcodes.OpCode.LoadOpCode, ops))
11553           for ops in jobs]
11554
11555
11556 class LUInstanceGrowDisk(LogicalUnit):
11557   """Grow a disk of an instance.
11558
11559   """
11560   HPATH = "disk-grow"
11561   HTYPE = constants.HTYPE_INSTANCE
11562   REQ_BGL = False
11563
11564   def ExpandNames(self):
11565     self._ExpandAndLockInstance()
11566     self.needed_locks[locking.LEVEL_NODE] = []
11567     self.needed_locks[locking.LEVEL_NODE_RES] = []
11568     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
11569     self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
11570
11571   def DeclareLocks(self, level):
11572     if level == locking.LEVEL_NODE:
11573       self._LockInstancesNodes()
11574     elif level == locking.LEVEL_NODE_RES:
11575       # Copy node locks
11576       self.needed_locks[locking.LEVEL_NODE_RES] = \
11577         self.needed_locks[locking.LEVEL_NODE][:]
11578
11579   def BuildHooksEnv(self):
11580     """Build hooks env.
11581
11582     This runs on the master, the primary and all the secondaries.
11583
11584     """
11585     env = {
11586       "DISK": self.op.disk,
11587       "AMOUNT": self.op.amount,
11588       "ABSOLUTE": self.op.absolute,
11589       }
11590     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
11591     return env
11592
11593   def BuildHooksNodes(self):
11594     """Build hooks nodes.
11595
11596     """
11597     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
11598     return (nl, nl)
11599
11600   def CheckPrereq(self):
11601     """Check prerequisites.
11602
11603     This checks that the instance is in the cluster.
11604
11605     """
11606     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
11607     assert instance is not None, \
11608       "Cannot retrieve locked instance %s" % self.op.instance_name
11609     nodenames = list(instance.all_nodes)
11610     for node in nodenames:
11611       _CheckNodeOnline(self, node)
11612
11613     self.instance = instance
11614
11615     if instance.disk_template not in constants.DTS_GROWABLE:
11616       raise errors.OpPrereqError("Instance's disk layout does not support"
11617                                  " growing", errors.ECODE_INVAL)
11618
11619     self.disk = instance.FindDisk(self.op.disk)
11620
11621     if self.op.absolute:
11622       self.target = self.op.amount
11623       self.delta = self.target - self.disk.size
11624       if self.delta < 0:
11625         raise errors.OpPrereqError("Requested size (%s) is smaller than "
11626                                    "current disk size (%s)" %
11627                                    (utils.FormatUnit(self.target, "h"),
11628                                     utils.FormatUnit(self.disk.size, "h")),
11629                                    errors.ECODE_STATE)
11630     else:
11631       self.delta = self.op.amount
11632       self.target = self.disk.size + self.delta
11633       if self.delta < 0:
11634         raise errors.OpPrereqError("Requested increment (%s) is negative" %
11635                                    utils.FormatUnit(self.delta, "h"),
11636                                    errors.ECODE_INVAL)
11637
11638     if instance.disk_template not in (constants.DT_FILE,
11639                                       constants.DT_SHARED_FILE,
11640                                       constants.DT_RBD):
11641       # TODO: check the free disk space for file, when that feature will be
11642       # supported
11643       _CheckNodesFreeDiskPerVG(self, nodenames,
11644                                self.disk.ComputeGrowth(self.delta))
11645
11646   def Exec(self, feedback_fn):
11647     """Execute disk grow.
11648
11649     """
11650     instance = self.instance
11651     disk = self.disk
11652
11653     assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
11654     assert (self.owned_locks(locking.LEVEL_NODE) ==
11655             self.owned_locks(locking.LEVEL_NODE_RES))
11656
11657     disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
11658     if not disks_ok:
11659       raise errors.OpExecError("Cannot activate block device to grow")
11660
11661     feedback_fn("Growing disk %s of instance '%s' by %s to %s" %
11662                 (self.op.disk, instance.name,
11663                  utils.FormatUnit(self.delta, "h"),
11664                  utils.FormatUnit(self.target, "h")))
11665
11666     # First run all grow ops in dry-run mode
11667     for node in instance.all_nodes:
11668       self.cfg.SetDiskID(disk, node)
11669       result = self.rpc.call_blockdev_grow(node, disk, self.delta, True)
11670       result.Raise("Grow request failed to node %s" % node)
11671
11672     # We know that (as far as we can test) operations across different
11673     # nodes will succeed, time to run it for real
11674     for node in instance.all_nodes:
11675       self.cfg.SetDiskID(disk, node)
11676       result = self.rpc.call_blockdev_grow(node, disk, self.delta, False)
11677       result.Raise("Grow request failed to node %s" % node)
11678
11679       # TODO: Rewrite code to work properly
11680       # DRBD goes into sync mode for a short amount of time after executing the
11681       # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
11682       # calling "resize" in sync mode fails. Sleeping for a short amount of
11683       # time is a work-around.
11684       time.sleep(5)
11685
11686     disk.RecordGrow(self.delta)
11687     self.cfg.Update(instance, feedback_fn)
11688
11689     # Changes have been recorded, release node lock
11690     _ReleaseLocks(self, locking.LEVEL_NODE)
11691
11692     # Downgrade lock while waiting for sync
11693     self.glm.downgrade(locking.LEVEL_INSTANCE)
11694
11695     if self.op.wait_for_sync:
11696       disk_abort = not _WaitForSync(self, instance, disks=[disk])
11697       if disk_abort:
11698         self.proc.LogWarning("Disk sync-ing has not returned a good"
11699                              " status; please check the instance")
11700       if instance.admin_state != constants.ADMINST_UP:
11701         _SafeShutdownInstanceDisks(self, instance, disks=[disk])
11702     elif instance.admin_state != constants.ADMINST_UP:
11703       self.proc.LogWarning("Not shutting down the disk even if the instance is"
11704                            " not supposed to be running because no wait for"
11705                            " sync mode was requested")
11706
11707     assert self.owned_locks(locking.LEVEL_NODE_RES)
11708     assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
11709
11710
11711 class LUInstanceQueryData(NoHooksLU):
11712   """Query runtime instance data.
11713
11714   """
11715   REQ_BGL = False
11716
11717   def ExpandNames(self):
11718     self.needed_locks = {}
11719
11720     # Use locking if requested or when non-static information is wanted
11721     if not (self.op.static or self.op.use_locking):
11722       self.LogWarning("Non-static data requested, locks need to be acquired")
11723       self.op.use_locking = True
11724
11725     if self.op.instances or not self.op.use_locking:
11726       # Expand instance names right here
11727       self.wanted_names = _GetWantedInstances(self, self.op.instances)
11728     else:
11729       # Will use acquired locks
11730       self.wanted_names = None
11731
11732     if self.op.use_locking:
11733       self.share_locks = _ShareAll()
11734
11735       if self.wanted_names is None:
11736         self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
11737       else:
11738         self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
11739
11740       self.needed_locks[locking.LEVEL_NODEGROUP] = []
11741       self.needed_locks[locking.LEVEL_NODE] = []
11742       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
11743
11744   def DeclareLocks(self, level):
11745     if self.op.use_locking:
11746       if level == locking.LEVEL_NODEGROUP:
11747         owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
11748
11749         # Lock all groups used by instances optimistically; this requires going
11750         # via the node before it's locked, requiring verification later on
11751         self.needed_locks[locking.LEVEL_NODEGROUP] = \
11752           frozenset(group_uuid
11753                     for instance_name in owned_instances
11754                     for group_uuid in
11755                       self.cfg.GetInstanceNodeGroups(instance_name))
11756
11757       elif level == locking.LEVEL_NODE:
11758         self._LockInstancesNodes()
11759
11760   def CheckPrereq(self):
11761     """Check prerequisites.
11762
11763     This only checks the optional instance list against the existing names.
11764
11765     """
11766     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
11767     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
11768     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
11769
11770     if self.wanted_names is None:
11771       assert self.op.use_locking, "Locking was not used"
11772       self.wanted_names = owned_instances
11773
11774     instances = dict(self.cfg.GetMultiInstanceInfo(self.wanted_names))
11775
11776     if self.op.use_locking:
11777       _CheckInstancesNodeGroups(self.cfg, instances, owned_groups, owned_nodes,
11778                                 None)
11779     else:
11780       assert not (owned_instances or owned_groups or owned_nodes)
11781
11782     self.wanted_instances = instances.values()
11783
11784   def _ComputeBlockdevStatus(self, node, instance_name, dev):
11785     """Returns the status of a block device
11786
11787     """
11788     if self.op.static or not node:
11789       return None
11790
11791     self.cfg.SetDiskID(dev, node)
11792
11793     result = self.rpc.call_blockdev_find(node, dev)
11794     if result.offline:
11795       return None
11796
11797     result.Raise("Can't compute disk status for %s" % instance_name)
11798
11799     status = result.payload
11800     if status is None:
11801       return None
11802
11803     return (status.dev_path, status.major, status.minor,
11804             status.sync_percent, status.estimated_time,
11805             status.is_degraded, status.ldisk_status)
11806
11807   def _ComputeDiskStatus(self, instance, snode, dev):
11808     """Compute block device status.
11809
11810     """
11811     if dev.dev_type in constants.LDS_DRBD:
11812       # we change the snode then (otherwise we use the one passed in)
11813       if dev.logical_id[0] == instance.primary_node:
11814         snode = dev.logical_id[1]
11815       else:
11816         snode = dev.logical_id[0]
11817
11818     dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
11819                                               instance.name, dev)
11820     dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
11821
11822     if dev.children:
11823       dev_children = map(compat.partial(self._ComputeDiskStatus,
11824                                         instance, snode),
11825                          dev.children)
11826     else:
11827       dev_children = []
11828
11829     return {
11830       "iv_name": dev.iv_name,
11831       "dev_type": dev.dev_type,
11832       "logical_id": dev.logical_id,
11833       "physical_id": dev.physical_id,
11834       "pstatus": dev_pstatus,
11835       "sstatus": dev_sstatus,
11836       "children": dev_children,
11837       "mode": dev.mode,
11838       "size": dev.size,
11839       }
11840
11841   def Exec(self, feedback_fn):
11842     """Gather and return data"""
11843     result = {}
11844
11845     cluster = self.cfg.GetClusterInfo()
11846
11847     node_names = itertools.chain(*(i.all_nodes for i in self.wanted_instances))
11848     nodes = dict(self.cfg.GetMultiNodeInfo(node_names))
11849
11850     groups = dict(self.cfg.GetMultiNodeGroupInfo(node.group
11851                                                  for node in nodes.values()))
11852
11853     group2name_fn = lambda uuid: groups[uuid].name
11854
11855     for instance in self.wanted_instances:
11856       pnode = nodes[instance.primary_node]
11857
11858       if self.op.static or pnode.offline:
11859         remote_state = None
11860         if pnode.offline:
11861           self.LogWarning("Primary node %s is marked offline, returning static"
11862                           " information only for instance %s" %
11863                           (pnode.name, instance.name))
11864       else:
11865         remote_info = self.rpc.call_instance_info(instance.primary_node,
11866                                                   instance.name,
11867                                                   instance.hypervisor)
11868         remote_info.Raise("Error checking node %s" % instance.primary_node)
11869         remote_info = remote_info.payload
11870         if remote_info and "state" in remote_info:
11871           remote_state = "up"
11872         else:
11873           if instance.admin_state == constants.ADMINST_UP:
11874             remote_state = "down"
11875           else:
11876             remote_state = instance.admin_state
11877
11878       disks = map(compat.partial(self._ComputeDiskStatus, instance, None),
11879                   instance.disks)
11880
11881       snodes_group_uuids = [nodes[snode_name].group
11882                             for snode_name in instance.secondary_nodes]
11883
11884       result[instance.name] = {
11885         "name": instance.name,
11886         "config_state": instance.admin_state,
11887         "run_state": remote_state,
11888         "pnode": instance.primary_node,
11889         "pnode_group_uuid": pnode.group,
11890         "pnode_group_name": group2name_fn(pnode.group),
11891         "snodes": instance.secondary_nodes,
11892         "snodes_group_uuids": snodes_group_uuids,
11893         "snodes_group_names": map(group2name_fn, snodes_group_uuids),
11894         "os": instance.os,
11895         # this happens to be the same format used for hooks
11896         "nics": _NICListToTuple(self, instance.nics),
11897         "disk_template": instance.disk_template,
11898         "disks": disks,
11899         "hypervisor": instance.hypervisor,
11900         "network_port": instance.network_port,
11901         "hv_instance": instance.hvparams,
11902         "hv_actual": cluster.FillHV(instance, skip_globals=True),
11903         "be_instance": instance.beparams,
11904         "be_actual": cluster.FillBE(instance),
11905         "os_instance": instance.osparams,
11906         "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
11907         "serial_no": instance.serial_no,
11908         "mtime": instance.mtime,
11909         "ctime": instance.ctime,
11910         "uuid": instance.uuid,
11911         }
11912
11913     return result
11914
11915
11916 def PrepareContainerMods(mods, private_fn):
11917   """Prepares a list of container modifications by adding a private data field.
11918
11919   @type mods: list of tuples; (operation, index, parameters)
11920   @param mods: List of modifications
11921   @type private_fn: callable or None
11922   @param private_fn: Callable for constructing a private data field for a
11923     modification
11924   @rtype: list
11925
11926   """
11927   if private_fn is None:
11928     fn = lambda: None
11929   else:
11930     fn = private_fn
11931
11932   return [(op, idx, params, fn()) for (op, idx, params) in mods]
11933
11934
11935 #: Type description for changes as returned by L{ApplyContainerMods}'s
11936 #: callbacks
11937 _TApplyContModsCbChanges = \
11938   ht.TMaybeListOf(ht.TAnd(ht.TIsLength(2), ht.TItems([
11939     ht.TNonEmptyString,
11940     ht.TAny,
11941     ])))
11942
11943
11944 def ApplyContainerMods(kind, container, chgdesc, mods,
11945                        create_fn, modify_fn, remove_fn):
11946   """Applies descriptions in C{mods} to C{container}.
11947
11948   @type kind: string
11949   @param kind: One-word item description
11950   @type container: list
11951   @param container: Container to modify
11952   @type chgdesc: None or list
11953   @param chgdesc: List of applied changes
11954   @type mods: list
11955   @param mods: Modifications as returned by L{PrepareContainerMods}
11956   @type create_fn: callable
11957   @param create_fn: Callback for creating a new item (L{constants.DDM_ADD});
11958     receives absolute item index, parameters and private data object as added
11959     by L{PrepareContainerMods}, returns tuple containing new item and changes
11960     as list
11961   @type modify_fn: callable
11962   @param modify_fn: Callback for modifying an existing item
11963     (L{constants.DDM_MODIFY}); receives absolute item index, item, parameters
11964     and private data object as added by L{PrepareContainerMods}, returns
11965     changes as list
11966   @type remove_fn: callable
11967   @param remove_fn: Callback on removing item; receives absolute item index,
11968     item and private data object as added by L{PrepareContainerMods}
11969
11970   """
11971   for (op, idx, params, private) in mods:
11972     if idx == -1:
11973       # Append
11974       absidx = len(container) - 1
11975     elif idx < 0:
11976       raise IndexError("Not accepting negative indices other than -1")
11977     elif idx > len(container):
11978       raise IndexError("Got %s index %s, but there are only %s" %
11979                        (kind, idx, len(container)))
11980     else:
11981       absidx = idx
11982
11983     changes = None
11984
11985     if op == constants.DDM_ADD:
11986       # Calculate where item will be added
11987       if idx == -1:
11988         addidx = len(container)
11989       else:
11990         addidx = idx
11991
11992       if create_fn is None:
11993         item = params
11994       else:
11995         (item, changes) = create_fn(addidx, params, private)
11996
11997       if idx == -1:
11998         container.append(item)
11999       else:
12000         assert idx >= 0
12001         assert idx <= len(container)
12002         # list.insert does so before the specified index
12003         container.insert(idx, item)
12004     else:
12005       # Retrieve existing item
12006       try:
12007         item = container[absidx]
12008       except IndexError:
12009         raise IndexError("Invalid %s index %s" % (kind, idx))
12010
12011       if op == constants.DDM_REMOVE:
12012         assert not params
12013
12014         if remove_fn is not None:
12015           remove_fn(absidx, item, private)
12016
12017         changes = [("%s/%s" % (kind, absidx), "remove")]
12018
12019         assert container[absidx] == item
12020         del container[absidx]
12021       elif op == constants.DDM_MODIFY:
12022         if modify_fn is not None:
12023           changes = modify_fn(absidx, item, params, private)
12024       else:
12025         raise errors.ProgrammerError("Unhandled operation '%s'" % op)
12026
12027     assert _TApplyContModsCbChanges(changes)
12028
12029     if not (chgdesc is None or changes is None):
12030       chgdesc.extend(changes)
12031
12032
12033 def _UpdateIvNames(base_index, disks):
12034   """Updates the C{iv_name} attribute of disks.
12035
12036   @type disks: list of L{objects.Disk}
12037
12038   """
12039   for (idx, disk) in enumerate(disks):
12040     disk.iv_name = "disk/%s" % (base_index + idx, )
12041
12042
12043 class _InstNicModPrivate:
12044   """Data structure for network interface modifications.
12045
12046   Used by L{LUInstanceSetParams}.
12047
12048   """
12049   def __init__(self):
12050     self.params = None
12051     self.filled = None
12052
12053
12054 class LUInstanceSetParams(LogicalUnit):
12055   """Modifies an instances's parameters.
12056
12057   """
12058   HPATH = "instance-modify"
12059   HTYPE = constants.HTYPE_INSTANCE
12060   REQ_BGL = False
12061
12062   @staticmethod
12063   def _UpgradeDiskNicMods(kind, mods, verify_fn):
12064     assert ht.TList(mods)
12065     assert not mods or len(mods[0]) in (2, 3)
12066
12067     if mods and len(mods[0]) == 2:
12068       result = []
12069
12070       addremove = 0
12071       for op, params in mods:
12072         if op in (constants.DDM_ADD, constants.DDM_REMOVE):
12073           result.append((op, -1, params))
12074           addremove += 1
12075
12076           if addremove > 1:
12077             raise errors.OpPrereqError("Only one %s add or remove operation is"
12078                                        " supported at a time" % kind,
12079                                        errors.ECODE_INVAL)
12080         else:
12081           result.append((constants.DDM_MODIFY, op, params))
12082
12083       assert verify_fn(result)
12084     else:
12085       result = mods
12086
12087     return result
12088
12089   @staticmethod
12090   def _CheckMods(kind, mods, key_types, item_fn):
12091     """Ensures requested disk/NIC modifications are valid.
12092
12093     """
12094     for (op, _, params) in mods:
12095       assert ht.TDict(params)
12096
12097       utils.ForceDictType(params, key_types)
12098
12099       if op == constants.DDM_REMOVE:
12100         if params:
12101           raise errors.OpPrereqError("No settings should be passed when"
12102                                      " removing a %s" % kind,
12103                                      errors.ECODE_INVAL)
12104       elif op in (constants.DDM_ADD, constants.DDM_MODIFY):
12105         item_fn(op, params)
12106       else:
12107         raise errors.ProgrammerError("Unhandled operation '%s'" % op)
12108
12109   @staticmethod
12110   def _VerifyDiskModification(op, params):
12111     """Verifies a disk modification.
12112
12113     """
12114     if op == constants.DDM_ADD:
12115       mode = params.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
12116       if mode not in constants.DISK_ACCESS_SET:
12117         raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
12118                                    errors.ECODE_INVAL)
12119
12120       size = params.get(constants.IDISK_SIZE, None)
12121       if size is None:
12122         raise errors.OpPrereqError("Required disk parameter '%s' missing" %
12123                                    constants.IDISK_SIZE, errors.ECODE_INVAL)
12124
12125       try:
12126         size = int(size)
12127       except (TypeError, ValueError), err:
12128         raise errors.OpPrereqError("Invalid disk size parameter: %s" % err,
12129                                    errors.ECODE_INVAL)
12130
12131       params[constants.IDISK_SIZE] = size
12132
12133     elif op == constants.DDM_MODIFY and constants.IDISK_SIZE in params:
12134       raise errors.OpPrereqError("Disk size change not possible, use"
12135                                  " grow-disk", errors.ECODE_INVAL)
12136
12137   @staticmethod
12138   def _VerifyNicModification(op, params):
12139     """Verifies a network interface modification.
12140
12141     """
12142     if op in (constants.DDM_ADD, constants.DDM_MODIFY):
12143       ip = params.get(constants.INIC_IP, None)
12144       if ip is None:
12145         pass
12146       elif ip.lower() == constants.VALUE_NONE:
12147         params[constants.INIC_IP] = None
12148       elif not netutils.IPAddress.IsValid(ip):
12149         raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
12150                                    errors.ECODE_INVAL)
12151
12152       bridge = params.get("bridge", None)
12153       link = params.get(constants.INIC_LINK, None)
12154       if bridge and link:
12155         raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
12156                                    " at the same time", errors.ECODE_INVAL)
12157       elif bridge and bridge.lower() == constants.VALUE_NONE:
12158         params["bridge"] = None
12159       elif link and link.lower() == constants.VALUE_NONE:
12160         params[constants.INIC_LINK] = None
12161
12162       if op == constants.DDM_ADD:
12163         macaddr = params.get(constants.INIC_MAC, None)
12164         if macaddr is None:
12165           params[constants.INIC_MAC] = constants.VALUE_AUTO
12166
12167       if constants.INIC_MAC in params:
12168         macaddr = params[constants.INIC_MAC]
12169         if macaddr not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
12170           macaddr = utils.NormalizeAndValidateMac(macaddr)
12171
12172         if op == constants.DDM_MODIFY and macaddr == constants.VALUE_AUTO:
12173           raise errors.OpPrereqError("'auto' is not a valid MAC address when"
12174                                      " modifying an existing NIC",
12175                                      errors.ECODE_INVAL)
12176
12177   def CheckArguments(self):
12178     if not (self.op.nics or self.op.disks or self.op.disk_template or
12179             self.op.hvparams or self.op.beparams or self.op.os_name or
12180             self.op.offline is not None or self.op.runtime_mem):
12181       raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
12182
12183     if self.op.hvparams:
12184       _CheckGlobalHvParams(self.op.hvparams)
12185
12186     self.op.disks = \
12187       self._UpgradeDiskNicMods("disk", self.op.disks,
12188         opcodes.OpInstanceSetParams.TestDiskModifications)
12189     self.op.nics = \
12190       self._UpgradeDiskNicMods("NIC", self.op.nics,
12191         opcodes.OpInstanceSetParams.TestNicModifications)
12192
12193     # Check disk modifications
12194     self._CheckMods("disk", self.op.disks, constants.IDISK_PARAMS_TYPES,
12195                     self._VerifyDiskModification)
12196
12197     if self.op.disks and self.op.disk_template is not None:
12198       raise errors.OpPrereqError("Disk template conversion and other disk"
12199                                  " changes not supported at the same time",
12200                                  errors.ECODE_INVAL)
12201
12202     if (self.op.disk_template and
12203         self.op.disk_template in constants.DTS_INT_MIRROR and
12204         self.op.remote_node is None):
12205       raise errors.OpPrereqError("Changing the disk template to a mirrored"
12206                                  " one requires specifying a secondary node",
12207                                  errors.ECODE_INVAL)
12208
12209     # Check NIC modifications
12210     self._CheckMods("NIC", self.op.nics, constants.INIC_PARAMS_TYPES,
12211                     self._VerifyNicModification)
12212
12213   def ExpandNames(self):
12214     self._ExpandAndLockInstance()
12215     # Can't even acquire node locks in shared mode as upcoming changes in
12216     # Ganeti 2.6 will start to modify the node object on disk conversion
12217     self.needed_locks[locking.LEVEL_NODE] = []
12218     self.needed_locks[locking.LEVEL_NODE_RES] = []
12219     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
12220
12221   def DeclareLocks(self, level):
12222     # TODO: Acquire group lock in shared mode (disk parameters)
12223     if level == locking.LEVEL_NODE:
12224       self._LockInstancesNodes()
12225       if self.op.disk_template and self.op.remote_node:
12226         self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
12227         self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
12228     elif level == locking.LEVEL_NODE_RES and self.op.disk_template:
12229       # Copy node locks
12230       self.needed_locks[locking.LEVEL_NODE_RES] = \
12231         self.needed_locks[locking.LEVEL_NODE][:]
12232
12233   def BuildHooksEnv(self):
12234     """Build hooks env.
12235
12236     This runs on the master, primary and secondaries.
12237
12238     """
12239     args = dict()
12240     if constants.BE_MINMEM in self.be_new:
12241       args["minmem"] = self.be_new[constants.BE_MINMEM]
12242     if constants.BE_MAXMEM in self.be_new:
12243       args["maxmem"] = self.be_new[constants.BE_MAXMEM]
12244     if constants.BE_VCPUS in self.be_new:
12245       args["vcpus"] = self.be_new[constants.BE_VCPUS]
12246     # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
12247     # information at all.
12248
12249     if self._new_nics is not None:
12250       nics = []
12251
12252       for nic in self._new_nics:
12253         nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
12254         mode = nicparams[constants.NIC_MODE]
12255         link = nicparams[constants.NIC_LINK]
12256         nics.append((nic.ip, nic.mac, mode, link))
12257
12258       args["nics"] = nics
12259
12260     env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
12261     if self.op.disk_template:
12262       env["NEW_DISK_TEMPLATE"] = self.op.disk_template
12263     if self.op.runtime_mem:
12264       env["RUNTIME_MEMORY"] = self.op.runtime_mem
12265
12266     return env
12267
12268   def BuildHooksNodes(self):
12269     """Build hooks nodes.
12270
12271     """
12272     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
12273     return (nl, nl)
12274
12275   def _PrepareNicModification(self, params, private, old_ip, old_params,
12276                               cluster, pnode):
12277     update_params_dict = dict([(key, params[key])
12278                                for key in constants.NICS_PARAMETERS
12279                                if key in params])
12280
12281     if "bridge" in params:
12282       update_params_dict[constants.NIC_LINK] = params["bridge"]
12283
12284     new_params = _GetUpdatedParams(old_params, update_params_dict)
12285     utils.ForceDictType(new_params, constants.NICS_PARAMETER_TYPES)
12286
12287     new_filled_params = cluster.SimpleFillNIC(new_params)
12288     objects.NIC.CheckParameterSyntax(new_filled_params)
12289
12290     new_mode = new_filled_params[constants.NIC_MODE]
12291     if new_mode == constants.NIC_MODE_BRIDGED:
12292       bridge = new_filled_params[constants.NIC_LINK]
12293       msg = self.rpc.call_bridges_exist(pnode, [bridge]).fail_msg
12294       if msg:
12295         msg = "Error checking bridges on node '%s': %s" % (pnode, msg)
12296         if self.op.force:
12297           self.warn.append(msg)
12298         else:
12299           raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
12300
12301     elif new_mode == constants.NIC_MODE_ROUTED:
12302       ip = params.get(constants.INIC_IP, old_ip)
12303       if ip is None:
12304         raise errors.OpPrereqError("Cannot set the NIC IP address to None"
12305                                    " on a routed NIC", errors.ECODE_INVAL)
12306
12307     if constants.INIC_MAC in params:
12308       mac = params[constants.INIC_MAC]
12309       if mac is None:
12310         raise errors.OpPrereqError("Cannot unset the NIC MAC address",
12311                                    errors.ECODE_INVAL)
12312       elif mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
12313         # otherwise generate the MAC address
12314         params[constants.INIC_MAC] = \
12315           self.cfg.GenerateMAC(self.proc.GetECId())
12316       else:
12317         # or validate/reserve the current one
12318         try:
12319           self.cfg.ReserveMAC(mac, self.proc.GetECId())
12320         except errors.ReservationError:
12321           raise errors.OpPrereqError("MAC address '%s' already in use"
12322                                      " in cluster" % mac,
12323                                      errors.ECODE_NOTUNIQUE)
12324
12325     private.params = new_params
12326     private.filled = new_filled_params
12327
12328     return (None, None)
12329
12330   def CheckPrereq(self):
12331     """Check prerequisites.
12332
12333     This only checks the instance list against the existing names.
12334
12335     """
12336     # checking the new params on the primary/secondary nodes
12337
12338     instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
12339     cluster = self.cluster = self.cfg.GetClusterInfo()
12340     assert self.instance is not None, \
12341       "Cannot retrieve locked instance %s" % self.op.instance_name
12342     pnode = instance.primary_node
12343     nodelist = list(instance.all_nodes)
12344     pnode_info = self.cfg.GetNodeInfo(pnode)
12345     self.diskparams = self.cfg.GetNodeGroup(pnode_info.group).diskparams
12346
12347     # Prepare disk/NIC modifications
12348     self.diskmod = PrepareContainerMods(self.op.disks, None)
12349     self.nicmod = PrepareContainerMods(self.op.nics, _InstNicModPrivate)
12350
12351     # OS change
12352     if self.op.os_name and not self.op.force:
12353       _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
12354                       self.op.force_variant)
12355       instance_os = self.op.os_name
12356     else:
12357       instance_os = instance.os
12358
12359     assert not (self.op.disk_template and self.op.disks), \
12360       "Can't modify disk template and apply disk changes at the same time"
12361
12362     if self.op.disk_template:
12363       if instance.disk_template == self.op.disk_template:
12364         raise errors.OpPrereqError("Instance already has disk template %s" %
12365                                    instance.disk_template, errors.ECODE_INVAL)
12366
12367       if (instance.disk_template,
12368           self.op.disk_template) not in self._DISK_CONVERSIONS:
12369         raise errors.OpPrereqError("Unsupported disk template conversion from"
12370                                    " %s to %s" % (instance.disk_template,
12371                                                   self.op.disk_template),
12372                                    errors.ECODE_INVAL)
12373       _CheckInstanceState(self, instance, INSTANCE_DOWN,
12374                           msg="cannot change disk template")
12375       if self.op.disk_template in constants.DTS_INT_MIRROR:
12376         if self.op.remote_node == pnode:
12377           raise errors.OpPrereqError("Given new secondary node %s is the same"
12378                                      " as the primary node of the instance" %
12379                                      self.op.remote_node, errors.ECODE_STATE)
12380         _CheckNodeOnline(self, self.op.remote_node)
12381         _CheckNodeNotDrained(self, self.op.remote_node)
12382         # FIXME: here we assume that the old instance type is DT_PLAIN
12383         assert instance.disk_template == constants.DT_PLAIN
12384         disks = [{constants.IDISK_SIZE: d.size,
12385                   constants.IDISK_VG: d.logical_id[0]}
12386                  for d in instance.disks]
12387         required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
12388         _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
12389
12390         snode_info = self.cfg.GetNodeInfo(self.op.remote_node)
12391         snode_group = self.cfg.GetNodeGroup(snode_info.group)
12392         ipolicy = _CalculateGroupIPolicy(cluster, snode_group)
12393         _CheckTargetNodeIPolicy(self, ipolicy, instance, snode_info,
12394                                 ignore=self.op.ignore_ipolicy)
12395         if pnode_info.group != snode_info.group:
12396           self.LogWarning("The primary and secondary nodes are in two"
12397                           " different node groups; the disk parameters"
12398                           " from the first disk's node group will be"
12399                           " used")
12400
12401     # hvparams processing
12402     if self.op.hvparams:
12403       hv_type = instance.hypervisor
12404       i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
12405       utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
12406       hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
12407
12408       # local check
12409       hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
12410       _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
12411       self.hv_proposed = self.hv_new = hv_new # the new actual values
12412       self.hv_inst = i_hvdict # the new dict (without defaults)
12413     else:
12414       self.hv_proposed = cluster.SimpleFillHV(instance.hypervisor, instance.os,
12415                                               instance.hvparams)
12416       self.hv_new = self.hv_inst = {}
12417
12418     # beparams processing
12419     if self.op.beparams:
12420       i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
12421                                    use_none=True)
12422       objects.UpgradeBeParams(i_bedict)
12423       utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
12424       be_new = cluster.SimpleFillBE(i_bedict)
12425       self.be_proposed = self.be_new = be_new # the new actual values
12426       self.be_inst = i_bedict # the new dict (without defaults)
12427     else:
12428       self.be_new = self.be_inst = {}
12429       self.be_proposed = cluster.SimpleFillBE(instance.beparams)
12430     be_old = cluster.FillBE(instance)
12431
12432     # CPU param validation -- checking every time a paramtere is
12433     # changed to cover all cases where either CPU mask or vcpus have
12434     # changed
12435     if (constants.BE_VCPUS in self.be_proposed and
12436         constants.HV_CPU_MASK in self.hv_proposed):
12437       cpu_list = \
12438         utils.ParseMultiCpuMask(self.hv_proposed[constants.HV_CPU_MASK])
12439       # Verify mask is consistent with number of vCPUs. Can skip this
12440       # test if only 1 entry in the CPU mask, which means same mask
12441       # is applied to all vCPUs.
12442       if (len(cpu_list) > 1 and
12443           len(cpu_list) != self.be_proposed[constants.BE_VCPUS]):
12444         raise errors.OpPrereqError("Number of vCPUs [%d] does not match the"
12445                                    " CPU mask [%s]" %
12446                                    (self.be_proposed[constants.BE_VCPUS],
12447                                     self.hv_proposed[constants.HV_CPU_MASK]),
12448                                    errors.ECODE_INVAL)
12449
12450       # Only perform this test if a new CPU mask is given
12451       if constants.HV_CPU_MASK in self.hv_new:
12452         # Calculate the largest CPU number requested
12453         max_requested_cpu = max(map(max, cpu_list))
12454         # Check that all of the instance's nodes have enough physical CPUs to
12455         # satisfy the requested CPU mask
12456         _CheckNodesPhysicalCPUs(self, instance.all_nodes,
12457                                 max_requested_cpu + 1, instance.hypervisor)
12458
12459     # osparams processing
12460     if self.op.osparams:
12461       i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
12462       _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
12463       self.os_inst = i_osdict # the new dict (without defaults)
12464     else:
12465       self.os_inst = {}
12466
12467     self.warn = []
12468
12469     #TODO(dynmem): do the appropriate check involving MINMEM
12470     if (constants.BE_MAXMEM in self.op.beparams and not self.op.force and
12471         be_new[constants.BE_MAXMEM] > be_old[constants.BE_MAXMEM]):
12472       mem_check_list = [pnode]
12473       if be_new[constants.BE_AUTO_BALANCE]:
12474         # either we changed auto_balance to yes or it was from before
12475         mem_check_list.extend(instance.secondary_nodes)
12476       instance_info = self.rpc.call_instance_info(pnode, instance.name,
12477                                                   instance.hypervisor)
12478       nodeinfo = self.rpc.call_node_info(mem_check_list, None,
12479                                          [instance.hypervisor])
12480       pninfo = nodeinfo[pnode]
12481       msg = pninfo.fail_msg
12482       if msg:
12483         # Assume the primary node is unreachable and go ahead
12484         self.warn.append("Can't get info from primary node %s: %s" %
12485                          (pnode, msg))
12486       else:
12487         (_, _, (pnhvinfo, )) = pninfo.payload
12488         if not isinstance(pnhvinfo.get("memory_free", None), int):
12489           self.warn.append("Node data from primary node %s doesn't contain"
12490                            " free memory information" % pnode)
12491         elif instance_info.fail_msg:
12492           self.warn.append("Can't get instance runtime information: %s" %
12493                           instance_info.fail_msg)
12494         else:
12495           if instance_info.payload:
12496             current_mem = int(instance_info.payload["memory"])
12497           else:
12498             # Assume instance not running
12499             # (there is a slight race condition here, but it's not very
12500             # probable, and we have no other way to check)
12501             # TODO: Describe race condition
12502             current_mem = 0
12503           #TODO(dynmem): do the appropriate check involving MINMEM
12504           miss_mem = (be_new[constants.BE_MAXMEM] - current_mem -
12505                       pnhvinfo["memory_free"])
12506           if miss_mem > 0:
12507             raise errors.OpPrereqError("This change will prevent the instance"
12508                                        " from starting, due to %d MB of memory"
12509                                        " missing on its primary node" %
12510                                        miss_mem,
12511                                        errors.ECODE_NORES)
12512
12513       if be_new[constants.BE_AUTO_BALANCE]:
12514         for node, nres in nodeinfo.items():
12515           if node not in instance.secondary_nodes:
12516             continue
12517           nres.Raise("Can't get info from secondary node %s" % node,
12518                      prereq=True, ecode=errors.ECODE_STATE)
12519           (_, _, (nhvinfo, )) = nres.payload
12520           if not isinstance(nhvinfo.get("memory_free", None), int):
12521             raise errors.OpPrereqError("Secondary node %s didn't return free"
12522                                        " memory information" % node,
12523                                        errors.ECODE_STATE)
12524           #TODO(dynmem): do the appropriate check involving MINMEM
12525           elif be_new[constants.BE_MAXMEM] > nhvinfo["memory_free"]:
12526             raise errors.OpPrereqError("This change will prevent the instance"
12527                                        " from failover to its secondary node"
12528                                        " %s, due to not enough memory" % node,
12529                                        errors.ECODE_STATE)
12530
12531     if self.op.runtime_mem:
12532       remote_info = self.rpc.call_instance_info(instance.primary_node,
12533                                                 instance.name,
12534                                                 instance.hypervisor)
12535       remote_info.Raise("Error checking node %s" % instance.primary_node)
12536       if not remote_info.payload: # not running already
12537         raise errors.OpPrereqError("Instance %s is not running" % instance.name,
12538                                    errors.ECODE_STATE)
12539
12540       current_memory = remote_info.payload["memory"]
12541       if (not self.op.force and
12542            (self.op.runtime_mem > self.be_proposed[constants.BE_MAXMEM] or
12543             self.op.runtime_mem < self.be_proposed[constants.BE_MINMEM])):
12544         raise errors.OpPrereqError("Instance %s must have memory between %d"
12545                                    " and %d MB of memory unless --force is"
12546                                    " given" % (instance.name,
12547                                     self.be_proposed[constants.BE_MINMEM],
12548                                     self.be_proposed[constants.BE_MAXMEM]),
12549                                    errors.ECODE_INVAL)
12550
12551       if self.op.runtime_mem > current_memory:
12552         _CheckNodeFreeMemory(self, instance.primary_node,
12553                              "ballooning memory for instance %s" %
12554                              instance.name,
12555                              self.op.memory - current_memory,
12556                              instance.hypervisor)
12557
12558     if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
12559       raise errors.OpPrereqError("Disk operations not supported for"
12560                                  " diskless instances",
12561                                  errors.ECODE_INVAL)
12562
12563     def _PrepareNicCreate(_, params, private):
12564       return self._PrepareNicModification(params, private, None, {},
12565                                           cluster, pnode)
12566
12567     def _PrepareNicMod(_, nic, params, private):
12568       return self._PrepareNicModification(params, private, nic.ip,
12569                                           nic.nicparams, cluster, pnode)
12570
12571     # Verify NIC changes (operating on copy)
12572     nics = instance.nics[:]
12573     ApplyContainerMods("NIC", nics, None, self.nicmod,
12574                        _PrepareNicCreate, _PrepareNicMod, None)
12575     if len(nics) > constants.MAX_NICS:
12576       raise errors.OpPrereqError("Instance has too many network interfaces"
12577                                  " (%d), cannot add more" % constants.MAX_NICS,
12578                                  errors.ECODE_STATE)
12579
12580     # Verify disk changes (operating on a copy)
12581     disks = instance.disks[:]
12582     ApplyContainerMods("disk", disks, None, self.diskmod, None, None, None)
12583     if len(disks) > constants.MAX_DISKS:
12584       raise errors.OpPrereqError("Instance has too many disks (%d), cannot add"
12585                                  " more" % constants.MAX_DISKS,
12586                                  errors.ECODE_STATE)
12587
12588     if self.op.offline is not None:
12589       if self.op.offline:
12590         msg = "can't change to offline"
12591       else:
12592         msg = "can't change to online"
12593       _CheckInstanceState(self, instance, CAN_CHANGE_INSTANCE_OFFLINE, msg=msg)
12594
12595     # Pre-compute NIC changes (necessary to use result in hooks)
12596     self._nic_chgdesc = []
12597     if self.nicmod:
12598       # Operate on copies as this is still in prereq
12599       nics = [nic.Copy() for nic in instance.nics]
12600       ApplyContainerMods("NIC", nics, self._nic_chgdesc, self.nicmod,
12601                          self._CreateNewNic, self._ApplyNicMods, None)
12602       self._new_nics = nics
12603     else:
12604       self._new_nics = None
12605
12606   def _ConvertPlainToDrbd(self, feedback_fn):
12607     """Converts an instance from plain to drbd.
12608
12609     """
12610     feedback_fn("Converting template to drbd")
12611     instance = self.instance
12612     pnode = instance.primary_node
12613     snode = self.op.remote_node
12614
12615     assert instance.disk_template == constants.DT_PLAIN
12616
12617     # create a fake disk info for _GenerateDiskTemplate
12618     disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
12619                   constants.IDISK_VG: d.logical_id[0]}
12620                  for d in instance.disks]
12621     new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
12622                                       instance.name, pnode, [snode],
12623                                       disk_info, None, None, 0, feedback_fn,
12624                                       self.diskparams)
12625     info = _GetInstanceInfoText(instance)
12626     feedback_fn("Creating additional volumes...")
12627     # first, create the missing data and meta devices
12628     for disk in new_disks:
12629       # unfortunately this is... not too nice
12630       _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
12631                             info, True)
12632       for child in disk.children:
12633         _CreateSingleBlockDev(self, snode, instance, child, info, True)
12634     # at this stage, all new LVs have been created, we can rename the
12635     # old ones
12636     feedback_fn("Renaming original volumes...")
12637     rename_list = [(o, n.children[0].logical_id)
12638                    for (o, n) in zip(instance.disks, new_disks)]
12639     result = self.rpc.call_blockdev_rename(pnode, rename_list)
12640     result.Raise("Failed to rename original LVs")
12641
12642     feedback_fn("Initializing DRBD devices...")
12643     # all child devices are in place, we can now create the DRBD devices
12644     for disk in new_disks:
12645       for node in [pnode, snode]:
12646         f_create = node == pnode
12647         _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
12648
12649     # at this point, the instance has been modified
12650     instance.disk_template = constants.DT_DRBD8
12651     instance.disks = new_disks
12652     self.cfg.Update(instance, feedback_fn)
12653
12654     # Release node locks while waiting for sync
12655     _ReleaseLocks(self, locking.LEVEL_NODE)
12656
12657     # disks are created, waiting for sync
12658     disk_abort = not _WaitForSync(self, instance,
12659                                   oneshot=not self.op.wait_for_sync)
12660     if disk_abort:
12661       raise errors.OpExecError("There are some degraded disks for"
12662                                " this instance, please cleanup manually")
12663
12664     # Node resource locks will be released by caller
12665
12666   def _ConvertDrbdToPlain(self, feedback_fn):
12667     """Converts an instance from drbd to plain.
12668
12669     """
12670     instance = self.instance
12671
12672     assert len(instance.secondary_nodes) == 1
12673     assert instance.disk_template == constants.DT_DRBD8
12674
12675     pnode = instance.primary_node
12676     snode = instance.secondary_nodes[0]
12677     feedback_fn("Converting template to plain")
12678
12679     old_disks = instance.disks
12680     new_disks = [d.children[0] for d in old_disks]
12681
12682     # copy over size and mode
12683     for parent, child in zip(old_disks, new_disks):
12684       child.size = parent.size
12685       child.mode = parent.mode
12686
12687     # this is a DRBD disk, return its port to the pool
12688     # NOTE: this must be done right before the call to cfg.Update!
12689     for disk in old_disks:
12690       tcp_port = disk.logical_id[2]
12691       self.cfg.AddTcpUdpPort(tcp_port)
12692
12693     # update instance structure
12694     instance.disks = new_disks
12695     instance.disk_template = constants.DT_PLAIN
12696     self.cfg.Update(instance, feedback_fn)
12697
12698     # Release locks in case removing disks takes a while
12699     _ReleaseLocks(self, locking.LEVEL_NODE)
12700
12701     feedback_fn("Removing volumes on the secondary node...")
12702     for disk in old_disks:
12703       self.cfg.SetDiskID(disk, snode)
12704       msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
12705       if msg:
12706         self.LogWarning("Could not remove block device %s on node %s,"
12707                         " continuing anyway: %s", disk.iv_name, snode, msg)
12708
12709     feedback_fn("Removing unneeded volumes on the primary node...")
12710     for idx, disk in enumerate(old_disks):
12711       meta = disk.children[1]
12712       self.cfg.SetDiskID(meta, pnode)
12713       msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
12714       if msg:
12715         self.LogWarning("Could not remove metadata for disk %d on node %s,"
12716                         " continuing anyway: %s", idx, pnode, msg)
12717
12718   def _CreateNewDisk(self, idx, params, _):
12719     """Creates a new disk.
12720
12721     """
12722     instance = self.instance
12723
12724     # add a new disk
12725     if instance.disk_template in constants.DTS_FILEBASED:
12726       (file_driver, file_path) = instance.disks[0].logical_id
12727       file_path = os.path.dirname(file_path)
12728     else:
12729       file_driver = file_path = None
12730
12731     disk = \
12732       _GenerateDiskTemplate(self, instance.disk_template, instance.name,
12733                             instance.primary_node, instance.secondary_nodes,
12734                             [params], file_path, file_driver, idx,
12735                             self.Log, self.diskparams)[0]
12736
12737     info = _GetInstanceInfoText(instance)
12738
12739     logging.info("Creating volume %s for instance %s",
12740                  disk.iv_name, instance.name)
12741     # Note: this needs to be kept in sync with _CreateDisks
12742     #HARDCODE
12743     for node in instance.all_nodes:
12744       f_create = (node == instance.primary_node)
12745       try:
12746         _CreateBlockDev(self, node, instance, disk, f_create, info, f_create)
12747       except errors.OpExecError, err:
12748         self.LogWarning("Failed to create volume %s (%s) on node '%s': %s",
12749                         disk.iv_name, disk, node, err)
12750
12751     return (disk, [
12752       ("disk/%d" % idx, "add:size=%s,mode=%s" % (disk.size, disk.mode)),
12753       ])
12754
12755   @staticmethod
12756   def _ModifyDisk(idx, disk, params, _):
12757     """Modifies a disk.
12758
12759     """
12760     disk.mode = params[constants.IDISK_MODE]
12761
12762     return [
12763       ("disk.mode/%d" % idx, disk.mode),
12764       ]
12765
12766   def _RemoveDisk(self, idx, root, _):
12767     """Removes a disk.
12768
12769     """
12770     for node, disk in root.ComputeNodeTree(self.instance.primary_node):
12771       self.cfg.SetDiskID(disk, node)
12772       msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
12773       if msg:
12774         self.LogWarning("Could not remove disk/%d on node '%s': %s,"
12775                         " continuing anyway", idx, node, msg)
12776
12777     # if this is a DRBD disk, return its port to the pool
12778     if root.dev_type in constants.LDS_DRBD:
12779       self.cfg.AddTcpUdpPort(root.logical_id[2])
12780
12781   @staticmethod
12782   def _CreateNewNic(idx, params, private):
12783     """Creates data structure for a new network interface.
12784
12785     """
12786     mac = params[constants.INIC_MAC]
12787     ip = params.get(constants.INIC_IP, None)
12788     nicparams = private.params
12789
12790     return (objects.NIC(mac=mac, ip=ip, nicparams=nicparams), [
12791       ("nic.%d" % idx,
12792        "add:mac=%s,ip=%s,mode=%s,link=%s" %
12793        (mac, ip, private.filled[constants.NIC_MODE],
12794        private.filled[constants.NIC_LINK])),
12795       ])
12796
12797   @staticmethod
12798   def _ApplyNicMods(idx, nic, params, private):
12799     """Modifies a network interface.
12800
12801     """
12802     changes = []
12803
12804     for key in [constants.INIC_MAC, constants.INIC_IP]:
12805       if key in params:
12806         changes.append(("nic.%s/%d" % (key, idx), params[key]))
12807         setattr(nic, key, params[key])
12808
12809     if private.params:
12810       nic.nicparams = private.params
12811
12812       for (key, val) in params.items():
12813         changes.append(("nic.%s/%d" % (key, idx), val))
12814
12815     return changes
12816
12817   def Exec(self, feedback_fn):
12818     """Modifies an instance.
12819
12820     All parameters take effect only at the next restart of the instance.
12821
12822     """
12823     # Process here the warnings from CheckPrereq, as we don't have a
12824     # feedback_fn there.
12825     # TODO: Replace with self.LogWarning
12826     for warn in self.warn:
12827       feedback_fn("WARNING: %s" % warn)
12828
12829     assert ((self.op.disk_template is None) ^
12830             bool(self.owned_locks(locking.LEVEL_NODE_RES))), \
12831       "Not owning any node resource locks"
12832
12833     result = []
12834     instance = self.instance
12835
12836     # runtime memory
12837     if self.op.runtime_mem:
12838       rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node,
12839                                                      instance,
12840                                                      self.op.runtime_mem)
12841       rpcres.Raise("Cannot modify instance runtime memory")
12842       result.append(("runtime_memory", self.op.runtime_mem))
12843
12844     # Apply disk changes
12845     ApplyContainerMods("disk", instance.disks, result, self.diskmod,
12846                        self._CreateNewDisk, self._ModifyDisk, self._RemoveDisk)
12847     _UpdateIvNames(0, instance.disks)
12848
12849     if self.op.disk_template:
12850       if __debug__:
12851         check_nodes = set(instance.all_nodes)
12852         if self.op.remote_node:
12853           check_nodes.add(self.op.remote_node)
12854         for level in [locking.LEVEL_NODE, locking.LEVEL_NODE_RES]:
12855           owned = self.owned_locks(level)
12856           assert not (check_nodes - owned), \
12857             ("Not owning the correct locks, owning %r, expected at least %r" %
12858              (owned, check_nodes))
12859
12860       r_shut = _ShutdownInstanceDisks(self, instance)
12861       if not r_shut:
12862         raise errors.OpExecError("Cannot shutdown instance disks, unable to"
12863                                  " proceed with disk template conversion")
12864       mode = (instance.disk_template, self.op.disk_template)
12865       try:
12866         self._DISK_CONVERSIONS[mode](self, feedback_fn)
12867       except:
12868         self.cfg.ReleaseDRBDMinors(instance.name)
12869         raise
12870       result.append(("disk_template", self.op.disk_template))
12871
12872       assert instance.disk_template == self.op.disk_template, \
12873         ("Expected disk template '%s', found '%s'" %
12874          (self.op.disk_template, instance.disk_template))
12875
12876     # Release node and resource locks if there are any (they might already have
12877     # been released during disk conversion)
12878     _ReleaseLocks(self, locking.LEVEL_NODE)
12879     _ReleaseLocks(self, locking.LEVEL_NODE_RES)
12880
12881     # Apply NIC changes
12882     if self._new_nics is not None:
12883       instance.nics = self._new_nics
12884       result.extend(self._nic_chgdesc)
12885
12886     # hvparams changes
12887     if self.op.hvparams:
12888       instance.hvparams = self.hv_inst
12889       for key, val in self.op.hvparams.iteritems():
12890         result.append(("hv/%s" % key, val))
12891
12892     # beparams changes
12893     if self.op.beparams:
12894       instance.beparams = self.be_inst
12895       for key, val in self.op.beparams.iteritems():
12896         result.append(("be/%s" % key, val))
12897
12898     # OS change
12899     if self.op.os_name:
12900       instance.os = self.op.os_name
12901
12902     # osparams changes
12903     if self.op.osparams:
12904       instance.osparams = self.os_inst
12905       for key, val in self.op.osparams.iteritems():
12906         result.append(("os/%s" % key, val))
12907
12908     if self.op.offline is None:
12909       # Ignore
12910       pass
12911     elif self.op.offline:
12912       # Mark instance as offline
12913       self.cfg.MarkInstanceOffline(instance.name)
12914       result.append(("admin_state", constants.ADMINST_OFFLINE))
12915     else:
12916       # Mark instance as online, but stopped
12917       self.cfg.MarkInstanceDown(instance.name)
12918       result.append(("admin_state", constants.ADMINST_DOWN))
12919
12920     self.cfg.Update(instance, feedback_fn)
12921
12922     assert not (self.owned_locks(locking.LEVEL_NODE_RES) or
12923                 self.owned_locks(locking.LEVEL_NODE)), \
12924       "All node locks should have been released by now"
12925
12926     return result
12927
12928   _DISK_CONVERSIONS = {
12929     (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
12930     (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
12931     }
12932
12933
12934 class LUInstanceChangeGroup(LogicalUnit):
12935   HPATH = "instance-change-group"
12936   HTYPE = constants.HTYPE_INSTANCE
12937   REQ_BGL = False
12938
12939   def ExpandNames(self):
12940     self.share_locks = _ShareAll()
12941     self.needed_locks = {
12942       locking.LEVEL_NODEGROUP: [],
12943       locking.LEVEL_NODE: [],
12944       }
12945
12946     self._ExpandAndLockInstance()
12947
12948     if self.op.target_groups:
12949       self.req_target_uuids = map(self.cfg.LookupNodeGroup,
12950                                   self.op.target_groups)
12951     else:
12952       self.req_target_uuids = None
12953
12954     self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
12955
12956   def DeclareLocks(self, level):
12957     if level == locking.LEVEL_NODEGROUP:
12958       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
12959
12960       if self.req_target_uuids:
12961         lock_groups = set(self.req_target_uuids)
12962
12963         # Lock all groups used by instance optimistically; this requires going
12964         # via the node before it's locked, requiring verification later on
12965         instance_groups = self.cfg.GetInstanceNodeGroups(self.op.instance_name)
12966         lock_groups.update(instance_groups)
12967       else:
12968         # No target groups, need to lock all of them
12969         lock_groups = locking.ALL_SET
12970
12971       self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
12972
12973     elif level == locking.LEVEL_NODE:
12974       if self.req_target_uuids:
12975         # Lock all nodes used by instances
12976         self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
12977         self._LockInstancesNodes()
12978
12979         # Lock all nodes in all potential target groups
12980         lock_groups = (frozenset(self.owned_locks(locking.LEVEL_NODEGROUP)) -
12981                        self.cfg.GetInstanceNodeGroups(self.op.instance_name))
12982         member_nodes = [node_name
12983                         for group in lock_groups
12984                         for node_name in self.cfg.GetNodeGroup(group).members]
12985         self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
12986       else:
12987         # Lock all nodes as all groups are potential targets
12988         self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
12989
12990   def CheckPrereq(self):
12991     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
12992     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
12993     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
12994
12995     assert (self.req_target_uuids is None or
12996             owned_groups.issuperset(self.req_target_uuids))
12997     assert owned_instances == set([self.op.instance_name])
12998
12999     # Get instance information
13000     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
13001
13002     # Check if node groups for locked instance are still correct
13003     assert owned_nodes.issuperset(self.instance.all_nodes), \
13004       ("Instance %s's nodes changed while we kept the lock" %
13005        self.op.instance_name)
13006
13007     inst_groups = _CheckInstanceNodeGroups(self.cfg, self.op.instance_name,
13008                                            owned_groups)
13009
13010     if self.req_target_uuids:
13011       # User requested specific target groups
13012       self.target_uuids = frozenset(self.req_target_uuids)
13013     else:
13014       # All groups except those used by the instance are potential targets
13015       self.target_uuids = owned_groups - inst_groups
13016
13017     conflicting_groups = self.target_uuids & inst_groups
13018     if conflicting_groups:
13019       raise errors.OpPrereqError("Can't use group(s) '%s' as targets, they are"
13020                                  " used by the instance '%s'" %
13021                                  (utils.CommaJoin(conflicting_groups),
13022                                   self.op.instance_name),
13023                                  errors.ECODE_INVAL)
13024
13025     if not self.target_uuids:
13026       raise errors.OpPrereqError("There are no possible target groups",
13027                                  errors.ECODE_INVAL)
13028
13029   def BuildHooksEnv(self):
13030     """Build hooks env.
13031
13032     """
13033     assert self.target_uuids
13034
13035     env = {
13036       "TARGET_GROUPS": " ".join(self.target_uuids),
13037       }
13038
13039     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
13040
13041     return env
13042
13043   def BuildHooksNodes(self):
13044     """Build hooks nodes.
13045
13046     """
13047     mn = self.cfg.GetMasterNode()
13048     return ([mn], [mn])
13049
13050   def Exec(self, feedback_fn):
13051     instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
13052
13053     assert instances == [self.op.instance_name], "Instance not locked"
13054
13055     ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
13056                      instances=instances, target_groups=list(self.target_uuids))
13057
13058     ial.Run(self.op.iallocator)
13059
13060     if not ial.success:
13061       raise errors.OpPrereqError("Can't compute solution for changing group of"
13062                                  " instance '%s' using iallocator '%s': %s" %
13063                                  (self.op.instance_name, self.op.iallocator,
13064                                   ial.info),
13065                                  errors.ECODE_NORES)
13066
13067     jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
13068
13069     self.LogInfo("Iallocator returned %s job(s) for changing group of"
13070                  " instance '%s'", len(jobs), self.op.instance_name)
13071
13072     return ResultWithJobs(jobs)
13073
13074
13075 class LUBackupQuery(NoHooksLU):
13076   """Query the exports list
13077
13078   """
13079   REQ_BGL = False
13080
13081   def CheckArguments(self):
13082     self.expq = _ExportQuery(qlang.MakeSimpleFilter("node", self.op.nodes),
13083                              ["node", "export"], self.op.use_locking)
13084
13085   def ExpandNames(self):
13086     self.expq.ExpandNames(self)
13087
13088   def DeclareLocks(self, level):
13089     self.expq.DeclareLocks(self, level)
13090
13091   def Exec(self, feedback_fn):
13092     result = {}
13093
13094     for (node, expname) in self.expq.OldStyleQuery(self):
13095       if expname is None:
13096         result[node] = False
13097       else:
13098         result.setdefault(node, []).append(expname)
13099
13100     return result
13101
13102
13103 class _ExportQuery(_QueryBase):
13104   FIELDS = query.EXPORT_FIELDS
13105
13106   #: The node name is not a unique key for this query
13107   SORT_FIELD = "node"
13108
13109   def ExpandNames(self, lu):
13110     lu.needed_locks = {}
13111
13112     # The following variables interact with _QueryBase._GetNames
13113     if self.names:
13114       self.wanted = _GetWantedNodes(lu, self.names)
13115     else:
13116       self.wanted = locking.ALL_SET
13117
13118     self.do_locking = self.use_locking
13119
13120     if self.do_locking:
13121       lu.share_locks = _ShareAll()
13122       lu.needed_locks = {
13123         locking.LEVEL_NODE: self.wanted,
13124         }
13125
13126   def DeclareLocks(self, lu, level):
13127     pass
13128
13129   def _GetQueryData(self, lu):
13130     """Computes the list of nodes and their attributes.
13131
13132     """
13133     # Locking is not used
13134     # TODO
13135     assert not (compat.any(lu.glm.is_owned(level)
13136                            for level in locking.LEVELS
13137                            if level != locking.LEVEL_CLUSTER) or
13138                 self.do_locking or self.use_locking)
13139
13140     nodes = self._GetNames(lu, lu.cfg.GetNodeList(), locking.LEVEL_NODE)
13141
13142     result = []
13143
13144     for (node, nres) in lu.rpc.call_export_list(nodes).items():
13145       if nres.fail_msg:
13146         result.append((node, None))
13147       else:
13148         result.extend((node, expname) for expname in nres.payload)
13149
13150     return result
13151
13152
13153 class LUBackupPrepare(NoHooksLU):
13154   """Prepares an instance for an export and returns useful information.
13155
13156   """
13157   REQ_BGL = False
13158
13159   def ExpandNames(self):
13160     self._ExpandAndLockInstance()
13161
13162   def CheckPrereq(self):
13163     """Check prerequisites.
13164
13165     """
13166     instance_name = self.op.instance_name
13167
13168     self.instance = self.cfg.GetInstanceInfo(instance_name)
13169     assert self.instance is not None, \
13170           "Cannot retrieve locked instance %s" % self.op.instance_name
13171     _CheckNodeOnline(self, self.instance.primary_node)
13172
13173     self._cds = _GetClusterDomainSecret()
13174
13175   def Exec(self, feedback_fn):
13176     """Prepares an instance for an export.
13177
13178     """
13179     instance = self.instance
13180
13181     if self.op.mode == constants.EXPORT_MODE_REMOTE:
13182       salt = utils.GenerateSecret(8)
13183
13184       feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
13185       result = self.rpc.call_x509_cert_create(instance.primary_node,
13186                                               constants.RIE_CERT_VALIDITY)
13187       result.Raise("Can't create X509 key and certificate on %s" % result.node)
13188
13189       (name, cert_pem) = result.payload
13190
13191       cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
13192                                              cert_pem)
13193
13194       return {
13195         "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
13196         "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
13197                           salt),
13198         "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
13199         }
13200
13201     return None
13202
13203
13204 class LUBackupExport(LogicalUnit):
13205   """Export an instance to an image in the cluster.
13206
13207   """
13208   HPATH = "instance-export"
13209   HTYPE = constants.HTYPE_INSTANCE
13210   REQ_BGL = False
13211
13212   def CheckArguments(self):
13213     """Check the arguments.
13214
13215     """
13216     self.x509_key_name = self.op.x509_key_name
13217     self.dest_x509_ca_pem = self.op.destination_x509_ca
13218
13219     if self.op.mode == constants.EXPORT_MODE_REMOTE:
13220       if not self.x509_key_name:
13221         raise errors.OpPrereqError("Missing X509 key name for encryption",
13222                                    errors.ECODE_INVAL)
13223
13224       if not self.dest_x509_ca_pem:
13225         raise errors.OpPrereqError("Missing destination X509 CA",
13226                                    errors.ECODE_INVAL)
13227
13228   def ExpandNames(self):
13229     self._ExpandAndLockInstance()
13230
13231     # Lock all nodes for local exports
13232     if self.op.mode == constants.EXPORT_MODE_LOCAL:
13233       # FIXME: lock only instance primary and destination node
13234       #
13235       # Sad but true, for now we have do lock all nodes, as we don't know where
13236       # the previous export might be, and in this LU we search for it and
13237       # remove it from its current node. In the future we could fix this by:
13238       #  - making a tasklet to search (share-lock all), then create the
13239       #    new one, then one to remove, after
13240       #  - removing the removal operation altogether
13241       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
13242
13243   def DeclareLocks(self, level):
13244     """Last minute lock declaration."""
13245     # All nodes are locked anyway, so nothing to do here.
13246
13247   def BuildHooksEnv(self):
13248     """Build hooks env.
13249
13250     This will run on the master, primary node and target node.
13251
13252     """
13253     env = {
13254       "EXPORT_MODE": self.op.mode,
13255       "EXPORT_NODE": self.op.target_node,
13256       "EXPORT_DO_SHUTDOWN": self.op.shutdown,
13257       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
13258       # TODO: Generic function for boolean env variables
13259       "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
13260       }
13261
13262     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
13263
13264     return env
13265
13266   def BuildHooksNodes(self):
13267     """Build hooks nodes.
13268
13269     """
13270     nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
13271
13272     if self.op.mode == constants.EXPORT_MODE_LOCAL:
13273       nl.append(self.op.target_node)
13274
13275     return (nl, nl)
13276
13277   def CheckPrereq(self):
13278     """Check prerequisites.
13279
13280     This checks that the instance and node names are valid.
13281
13282     """
13283     instance_name = self.op.instance_name
13284
13285     self.instance = self.cfg.GetInstanceInfo(instance_name)
13286     assert self.instance is not None, \
13287           "Cannot retrieve locked instance %s" % self.op.instance_name
13288     _CheckNodeOnline(self, self.instance.primary_node)
13289
13290     if (self.op.remove_instance and
13291         self.instance.admin_state == constants.ADMINST_UP and
13292         not self.op.shutdown):
13293       raise errors.OpPrereqError("Can not remove instance without shutting it"
13294                                  " down before")
13295
13296     if self.op.mode == constants.EXPORT_MODE_LOCAL:
13297       self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
13298       self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
13299       assert self.dst_node is not None
13300
13301       _CheckNodeOnline(self, self.dst_node.name)
13302       _CheckNodeNotDrained(self, self.dst_node.name)
13303
13304       self._cds = None
13305       self.dest_disk_info = None
13306       self.dest_x509_ca = None
13307
13308     elif self.op.mode == constants.EXPORT_MODE_REMOTE:
13309       self.dst_node = None
13310
13311       if len(self.op.target_node) != len(self.instance.disks):
13312         raise errors.OpPrereqError(("Received destination information for %s"
13313                                     " disks, but instance %s has %s disks") %
13314                                    (len(self.op.target_node), instance_name,
13315                                     len(self.instance.disks)),
13316                                    errors.ECODE_INVAL)
13317
13318       cds = _GetClusterDomainSecret()
13319
13320       # Check X509 key name
13321       try:
13322         (key_name, hmac_digest, hmac_salt) = self.x509_key_name
13323       except (TypeError, ValueError), err:
13324         raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
13325
13326       if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
13327         raise errors.OpPrereqError("HMAC for X509 key name is wrong",
13328                                    errors.ECODE_INVAL)
13329
13330       # Load and verify CA
13331       try:
13332         (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
13333       except OpenSSL.crypto.Error, err:
13334         raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
13335                                    (err, ), errors.ECODE_INVAL)
13336
13337       (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
13338       if errcode is not None:
13339         raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
13340                                    (msg, ), errors.ECODE_INVAL)
13341
13342       self.dest_x509_ca = cert
13343
13344       # Verify target information
13345       disk_info = []
13346       for idx, disk_data in enumerate(self.op.target_node):
13347         try:
13348           (host, port, magic) = \
13349             masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
13350         except errors.GenericError, err:
13351           raise errors.OpPrereqError("Target info for disk %s: %s" %
13352                                      (idx, err), errors.ECODE_INVAL)
13353
13354         disk_info.append((host, port, magic))
13355
13356       assert len(disk_info) == len(self.op.target_node)
13357       self.dest_disk_info = disk_info
13358
13359     else:
13360       raise errors.ProgrammerError("Unhandled export mode %r" %
13361                                    self.op.mode)
13362
13363     # instance disk type verification
13364     # TODO: Implement export support for file-based disks
13365     for disk in self.instance.disks:
13366       if disk.dev_type == constants.LD_FILE:
13367         raise errors.OpPrereqError("Export not supported for instances with"
13368                                    " file-based disks", errors.ECODE_INVAL)
13369
13370   def _CleanupExports(self, feedback_fn):
13371     """Removes exports of current instance from all other nodes.
13372
13373     If an instance in a cluster with nodes A..D was exported to node C, its
13374     exports will be removed from the nodes A, B and D.
13375
13376     """
13377     assert self.op.mode != constants.EXPORT_MODE_REMOTE
13378
13379     nodelist = self.cfg.GetNodeList()
13380     nodelist.remove(self.dst_node.name)
13381
13382     # on one-node clusters nodelist will be empty after the removal
13383     # if we proceed the backup would be removed because OpBackupQuery
13384     # substitutes an empty list with the full cluster node list.
13385     iname = self.instance.name
13386     if nodelist:
13387       feedback_fn("Removing old exports for instance %s" % iname)
13388       exportlist = self.rpc.call_export_list(nodelist)
13389       for node in exportlist:
13390         if exportlist[node].fail_msg:
13391           continue
13392         if iname in exportlist[node].payload:
13393           msg = self.rpc.call_export_remove(node, iname).fail_msg
13394           if msg:
13395             self.LogWarning("Could not remove older export for instance %s"
13396                             " on node %s: %s", iname, node, msg)
13397
13398   def Exec(self, feedback_fn):
13399     """Export an instance to an image in the cluster.
13400
13401     """
13402     assert self.op.mode in constants.EXPORT_MODES
13403
13404     instance = self.instance
13405     src_node = instance.primary_node
13406
13407     if self.op.shutdown:
13408       # shutdown the instance, but not the disks
13409       feedback_fn("Shutting down instance %s" % instance.name)
13410       result = self.rpc.call_instance_shutdown(src_node, instance,
13411                                                self.op.shutdown_timeout)
13412       # TODO: Maybe ignore failures if ignore_remove_failures is set
13413       result.Raise("Could not shutdown instance %s on"
13414                    " node %s" % (instance.name, src_node))
13415
13416     # set the disks ID correctly since call_instance_start needs the
13417     # correct drbd minor to create the symlinks
13418     for disk in instance.disks:
13419       self.cfg.SetDiskID(disk, src_node)
13420
13421     activate_disks = (instance.admin_state != constants.ADMINST_UP)
13422
13423     if activate_disks:
13424       # Activate the instance disks if we'exporting a stopped instance
13425       feedback_fn("Activating disks for %s" % instance.name)
13426       _StartInstanceDisks(self, instance, None)
13427
13428     try:
13429       helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
13430                                                      instance)
13431
13432       helper.CreateSnapshots()
13433       try:
13434         if (self.op.shutdown and
13435             instance.admin_state == constants.ADMINST_UP and
13436             not self.op.remove_instance):
13437           assert not activate_disks
13438           feedback_fn("Starting instance %s" % instance.name)
13439           result = self.rpc.call_instance_start(src_node,
13440                                                 (instance, None, None), False)
13441           msg = result.fail_msg
13442           if msg:
13443             feedback_fn("Failed to start instance: %s" % msg)
13444             _ShutdownInstanceDisks(self, instance)
13445             raise errors.OpExecError("Could not start instance: %s" % msg)
13446
13447         if self.op.mode == constants.EXPORT_MODE_LOCAL:
13448           (fin_resu, dresults) = helper.LocalExport(self.dst_node)
13449         elif self.op.mode == constants.EXPORT_MODE_REMOTE:
13450           connect_timeout = constants.RIE_CONNECT_TIMEOUT
13451           timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
13452
13453           (key_name, _, _) = self.x509_key_name
13454
13455           dest_ca_pem = \
13456             OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
13457                                             self.dest_x509_ca)
13458
13459           (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
13460                                                      key_name, dest_ca_pem,
13461                                                      timeouts)
13462       finally:
13463         helper.Cleanup()
13464
13465       # Check for backwards compatibility
13466       assert len(dresults) == len(instance.disks)
13467       assert compat.all(isinstance(i, bool) for i in dresults), \
13468              "Not all results are boolean: %r" % dresults
13469
13470     finally:
13471       if activate_disks:
13472         feedback_fn("Deactivating disks for %s" % instance.name)
13473         _ShutdownInstanceDisks(self, instance)
13474
13475     if not (compat.all(dresults) and fin_resu):
13476       failures = []
13477       if not fin_resu:
13478         failures.append("export finalization")
13479       if not compat.all(dresults):
13480         fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
13481                                if not dsk)
13482         failures.append("disk export: disk(s) %s" % fdsk)
13483
13484       raise errors.OpExecError("Export failed, errors in %s" %
13485                                utils.CommaJoin(failures))
13486
13487     # At this point, the export was successful, we can cleanup/finish
13488
13489     # Remove instance if requested
13490     if self.op.remove_instance:
13491       feedback_fn("Removing instance %s" % instance.name)
13492       _RemoveInstance(self, feedback_fn, instance,
13493                       self.op.ignore_remove_failures)
13494
13495     if self.op.mode == constants.EXPORT_MODE_LOCAL:
13496       self._CleanupExports(feedback_fn)
13497
13498     return fin_resu, dresults
13499
13500
13501 class LUBackupRemove(NoHooksLU):
13502   """Remove exports related to the named instance.
13503
13504   """
13505   REQ_BGL = False
13506
13507   def ExpandNames(self):
13508     self.needed_locks = {}
13509     # We need all nodes to be locked in order for RemoveExport to work, but we
13510     # don't need to lock the instance itself, as nothing will happen to it (and
13511     # we can remove exports also for a removed instance)
13512     self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
13513
13514   def Exec(self, feedback_fn):
13515     """Remove any export.
13516
13517     """
13518     instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
13519     # If the instance was not found we'll try with the name that was passed in.
13520     # This will only work if it was an FQDN, though.
13521     fqdn_warn = False
13522     if not instance_name:
13523       fqdn_warn = True
13524       instance_name = self.op.instance_name
13525
13526     locked_nodes = self.owned_locks(locking.LEVEL_NODE)
13527     exportlist = self.rpc.call_export_list(locked_nodes)
13528     found = False
13529     for node in exportlist:
13530       msg = exportlist[node].fail_msg
13531       if msg:
13532         self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
13533         continue
13534       if instance_name in exportlist[node].payload:
13535         found = True
13536         result = self.rpc.call_export_remove(node, instance_name)
13537         msg = result.fail_msg
13538         if msg:
13539           logging.error("Could not remove export for instance %s"
13540                         " on node %s: %s", instance_name, node, msg)
13541
13542     if fqdn_warn and not found:
13543       feedback_fn("Export not found. If trying to remove an export belonging"
13544                   " to a deleted instance please use its Fully Qualified"
13545                   " Domain Name.")
13546
13547
13548 class LUGroupAdd(LogicalUnit):
13549   """Logical unit for creating node groups.
13550
13551   """
13552   HPATH = "group-add"
13553   HTYPE = constants.HTYPE_GROUP
13554   REQ_BGL = False
13555
13556   def ExpandNames(self):
13557     # We need the new group's UUID here so that we can create and acquire the
13558     # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
13559     # that it should not check whether the UUID exists in the configuration.
13560     self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
13561     self.needed_locks = {}
13562     self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
13563
13564   def CheckPrereq(self):
13565     """Check prerequisites.
13566
13567     This checks that the given group name is not an existing node group
13568     already.
13569
13570     """
13571     try:
13572       existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13573     except errors.OpPrereqError:
13574       pass
13575     else:
13576       raise errors.OpPrereqError("Desired group name '%s' already exists as a"
13577                                  " node group (UUID: %s)" %
13578                                  (self.op.group_name, existing_uuid),
13579                                  errors.ECODE_EXISTS)
13580
13581     if self.op.ndparams:
13582       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
13583
13584     if self.op.hv_state:
13585       self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
13586     else:
13587       self.new_hv_state = None
13588
13589     if self.op.disk_state:
13590       self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
13591     else:
13592       self.new_disk_state = None
13593
13594     if self.op.diskparams:
13595       for templ in constants.DISK_TEMPLATES:
13596         if templ not in self.op.diskparams:
13597           self.op.diskparams[templ] = {}
13598         utils.ForceDictType(self.op.diskparams[templ], constants.DISK_DT_TYPES)
13599     else:
13600       self.op.diskparams = self.cfg.GetClusterInfo().diskparams
13601
13602     if self.op.ipolicy:
13603       cluster = self.cfg.GetClusterInfo()
13604       full_ipolicy = cluster.SimpleFillIPolicy(self.op.ipolicy)
13605       try:
13606         objects.InstancePolicy.CheckParameterSyntax(full_ipolicy)
13607       except errors.ConfigurationError, err:
13608         raise errors.OpPrereqError("Invalid instance policy: %s" % err,
13609                                    errors.ECODE_INVAL)
13610
13611   def BuildHooksEnv(self):
13612     """Build hooks env.
13613
13614     """
13615     return {
13616       "GROUP_NAME": self.op.group_name,
13617       }
13618
13619   def BuildHooksNodes(self):
13620     """Build hooks nodes.
13621
13622     """
13623     mn = self.cfg.GetMasterNode()
13624     return ([mn], [mn])
13625
13626   def Exec(self, feedback_fn):
13627     """Add the node group to the cluster.
13628
13629     """
13630     group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
13631                                   uuid=self.group_uuid,
13632                                   alloc_policy=self.op.alloc_policy,
13633                                   ndparams=self.op.ndparams,
13634                                   diskparams=self.op.diskparams,
13635                                   ipolicy=self.op.ipolicy,
13636                                   hv_state_static=self.new_hv_state,
13637                                   disk_state_static=self.new_disk_state)
13638
13639     self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
13640     del self.remove_locks[locking.LEVEL_NODEGROUP]
13641
13642
13643 class LUGroupAssignNodes(NoHooksLU):
13644   """Logical unit for assigning nodes to groups.
13645
13646   """
13647   REQ_BGL = False
13648
13649   def ExpandNames(self):
13650     # These raise errors.OpPrereqError on their own:
13651     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13652     self.op.nodes = _GetWantedNodes(self, self.op.nodes)
13653
13654     # We want to lock all the affected nodes and groups. We have readily
13655     # available the list of nodes, and the *destination* group. To gather the
13656     # list of "source" groups, we need to fetch node information later on.
13657     self.needed_locks = {
13658       locking.LEVEL_NODEGROUP: set([self.group_uuid]),
13659       locking.LEVEL_NODE: self.op.nodes,
13660       }
13661
13662   def DeclareLocks(self, level):
13663     if level == locking.LEVEL_NODEGROUP:
13664       assert len(self.needed_locks[locking.LEVEL_NODEGROUP]) == 1
13665
13666       # Try to get all affected nodes' groups without having the group or node
13667       # lock yet. Needs verification later in the code flow.
13668       groups = self.cfg.GetNodeGroupsFromNodes(self.op.nodes)
13669
13670       self.needed_locks[locking.LEVEL_NODEGROUP].update(groups)
13671
13672   def CheckPrereq(self):
13673     """Check prerequisites.
13674
13675     """
13676     assert self.needed_locks[locking.LEVEL_NODEGROUP]
13677     assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
13678             frozenset(self.op.nodes))
13679
13680     expected_locks = (set([self.group_uuid]) |
13681                       self.cfg.GetNodeGroupsFromNodes(self.op.nodes))
13682     actual_locks = self.owned_locks(locking.LEVEL_NODEGROUP)
13683     if actual_locks != expected_locks:
13684       raise errors.OpExecError("Nodes changed groups since locks were acquired,"
13685                                " current groups are '%s', used to be '%s'" %
13686                                (utils.CommaJoin(expected_locks),
13687                                 utils.CommaJoin(actual_locks)))
13688
13689     self.node_data = self.cfg.GetAllNodesInfo()
13690     self.group = self.cfg.GetNodeGroup(self.group_uuid)
13691     instance_data = self.cfg.GetAllInstancesInfo()
13692
13693     if self.group is None:
13694       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
13695                                (self.op.group_name, self.group_uuid))
13696
13697     (new_splits, previous_splits) = \
13698       self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
13699                                              for node in self.op.nodes],
13700                                             self.node_data, instance_data)
13701
13702     if new_splits:
13703       fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
13704
13705       if not self.op.force:
13706         raise errors.OpExecError("The following instances get split by this"
13707                                  " change and --force was not given: %s" %
13708                                  fmt_new_splits)
13709       else:
13710         self.LogWarning("This operation will split the following instances: %s",
13711                         fmt_new_splits)
13712
13713         if previous_splits:
13714           self.LogWarning("In addition, these already-split instances continue"
13715                           " to be split across groups: %s",
13716                           utils.CommaJoin(utils.NiceSort(previous_splits)))
13717
13718   def Exec(self, feedback_fn):
13719     """Assign nodes to a new group.
13720
13721     """
13722     mods = [(node_name, self.group_uuid) for node_name in self.op.nodes]
13723
13724     self.cfg.AssignGroupNodes(mods)
13725
13726   @staticmethod
13727   def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
13728     """Check for split instances after a node assignment.
13729
13730     This method considers a series of node assignments as an atomic operation,
13731     and returns information about split instances after applying the set of
13732     changes.
13733
13734     In particular, it returns information about newly split instances, and
13735     instances that were already split, and remain so after the change.
13736
13737     Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
13738     considered.
13739
13740     @type changes: list of (node_name, new_group_uuid) pairs.
13741     @param changes: list of node assignments to consider.
13742     @param node_data: a dict with data for all nodes
13743     @param instance_data: a dict with all instances to consider
13744     @rtype: a two-tuple
13745     @return: a list of instances that were previously okay and result split as a
13746       consequence of this change, and a list of instances that were previously
13747       split and this change does not fix.
13748
13749     """
13750     changed_nodes = dict((node, group) for node, group in changes
13751                          if node_data[node].group != group)
13752
13753     all_split_instances = set()
13754     previously_split_instances = set()
13755
13756     def InstanceNodes(instance):
13757       return [instance.primary_node] + list(instance.secondary_nodes)
13758
13759     for inst in instance_data.values():
13760       if inst.disk_template not in constants.DTS_INT_MIRROR:
13761         continue
13762
13763       instance_nodes = InstanceNodes(inst)
13764
13765       if len(set(node_data[node].group for node in instance_nodes)) > 1:
13766         previously_split_instances.add(inst.name)
13767
13768       if len(set(changed_nodes.get(node, node_data[node].group)
13769                  for node in instance_nodes)) > 1:
13770         all_split_instances.add(inst.name)
13771
13772     return (list(all_split_instances - previously_split_instances),
13773             list(previously_split_instances & all_split_instances))
13774
13775
13776 class _GroupQuery(_QueryBase):
13777   FIELDS = query.GROUP_FIELDS
13778
13779   def ExpandNames(self, lu):
13780     lu.needed_locks = {}
13781
13782     self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
13783     self._cluster = lu.cfg.GetClusterInfo()
13784     name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
13785
13786     if not self.names:
13787       self.wanted = [name_to_uuid[name]
13788                      for name in utils.NiceSort(name_to_uuid.keys())]
13789     else:
13790       # Accept names to be either names or UUIDs.
13791       missing = []
13792       self.wanted = []
13793       all_uuid = frozenset(self._all_groups.keys())
13794
13795       for name in self.names:
13796         if name in all_uuid:
13797           self.wanted.append(name)
13798         elif name in name_to_uuid:
13799           self.wanted.append(name_to_uuid[name])
13800         else:
13801           missing.append(name)
13802
13803       if missing:
13804         raise errors.OpPrereqError("Some groups do not exist: %s" %
13805                                    utils.CommaJoin(missing),
13806                                    errors.ECODE_NOENT)
13807
13808   def DeclareLocks(self, lu, level):
13809     pass
13810
13811   def _GetQueryData(self, lu):
13812     """Computes the list of node groups and their attributes.
13813
13814     """
13815     do_nodes = query.GQ_NODE in self.requested_data
13816     do_instances = query.GQ_INST in self.requested_data
13817
13818     group_to_nodes = None
13819     group_to_instances = None
13820
13821     # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
13822     # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
13823     # latter GetAllInstancesInfo() is not enough, for we have to go through
13824     # instance->node. Hence, we will need to process nodes even if we only need
13825     # instance information.
13826     if do_nodes or do_instances:
13827       all_nodes = lu.cfg.GetAllNodesInfo()
13828       group_to_nodes = dict((uuid, []) for uuid in self.wanted)
13829       node_to_group = {}
13830
13831       for node in all_nodes.values():
13832         if node.group in group_to_nodes:
13833           group_to_nodes[node.group].append(node.name)
13834           node_to_group[node.name] = node.group
13835
13836       if do_instances:
13837         all_instances = lu.cfg.GetAllInstancesInfo()
13838         group_to_instances = dict((uuid, []) for uuid in self.wanted)
13839
13840         for instance in all_instances.values():
13841           node = instance.primary_node
13842           if node in node_to_group:
13843             group_to_instances[node_to_group[node]].append(instance.name)
13844
13845         if not do_nodes:
13846           # Do not pass on node information if it was not requested.
13847           group_to_nodes = None
13848
13849     return query.GroupQueryData(self._cluster,
13850                                 [self._all_groups[uuid]
13851                                  for uuid in self.wanted],
13852                                 group_to_nodes, group_to_instances)
13853
13854
13855 class LUGroupQuery(NoHooksLU):
13856   """Logical unit for querying node groups.
13857
13858   """
13859   REQ_BGL = False
13860
13861   def CheckArguments(self):
13862     self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
13863                           self.op.output_fields, False)
13864
13865   def ExpandNames(self):
13866     self.gq.ExpandNames(self)
13867
13868   def DeclareLocks(self, level):
13869     self.gq.DeclareLocks(self, level)
13870
13871   def Exec(self, feedback_fn):
13872     return self.gq.OldStyleQuery(self)
13873
13874
13875 class LUGroupSetParams(LogicalUnit):
13876   """Modifies the parameters of a node group.
13877
13878   """
13879   HPATH = "group-modify"
13880   HTYPE = constants.HTYPE_GROUP
13881   REQ_BGL = False
13882
13883   def CheckArguments(self):
13884     all_changes = [
13885       self.op.ndparams,
13886       self.op.diskparams,
13887       self.op.alloc_policy,
13888       self.op.hv_state,
13889       self.op.disk_state,
13890       self.op.ipolicy,
13891       ]
13892
13893     if all_changes.count(None) == len(all_changes):
13894       raise errors.OpPrereqError("Please pass at least one modification",
13895                                  errors.ECODE_INVAL)
13896
13897   def ExpandNames(self):
13898     # This raises errors.OpPrereqError on its own:
13899     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13900
13901     self.needed_locks = {
13902       locking.LEVEL_INSTANCE: [],
13903       locking.LEVEL_NODEGROUP: [self.group_uuid],
13904       }
13905
13906     self.share_locks[locking.LEVEL_INSTANCE] = 1
13907
13908   def DeclareLocks(self, level):
13909     if level == locking.LEVEL_INSTANCE:
13910       assert not self.needed_locks[locking.LEVEL_INSTANCE]
13911
13912       # Lock instances optimistically, needs verification once group lock has
13913       # been acquired
13914       self.needed_locks[locking.LEVEL_INSTANCE] = \
13915           self.cfg.GetNodeGroupInstances(self.group_uuid)
13916
13917   def CheckPrereq(self):
13918     """Check prerequisites.
13919
13920     """
13921     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
13922
13923     # Check if locked instances are still correct
13924     _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
13925
13926     self.group = self.cfg.GetNodeGroup(self.group_uuid)
13927     cluster = self.cfg.GetClusterInfo()
13928
13929     if self.group is None:
13930       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
13931                                (self.op.group_name, self.group_uuid))
13932
13933     if self.op.ndparams:
13934       new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
13935       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
13936       self.new_ndparams = new_ndparams
13937
13938     if self.op.diskparams:
13939       self.new_diskparams = dict()
13940       for templ in constants.DISK_TEMPLATES:
13941         if templ not in self.op.diskparams:
13942           self.op.diskparams[templ] = {}
13943         new_templ_params = _GetUpdatedParams(self.group.diskparams[templ],
13944                                              self.op.diskparams[templ])
13945         utils.ForceDictType(new_templ_params, constants.DISK_DT_TYPES)
13946         self.new_diskparams[templ] = new_templ_params
13947
13948     if self.op.hv_state:
13949       self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
13950                                                  self.group.hv_state_static)
13951
13952     if self.op.disk_state:
13953       self.new_disk_state = \
13954         _MergeAndVerifyDiskState(self.op.disk_state,
13955                                  self.group.disk_state_static)
13956
13957     if self.op.ipolicy:
13958       self.new_ipolicy = _GetUpdatedIPolicy(self.group.ipolicy,
13959                                             self.op.ipolicy,
13960                                             group_policy=True)
13961
13962       new_ipolicy = cluster.SimpleFillIPolicy(self.new_ipolicy)
13963       inst_filter = lambda inst: inst.name in owned_instances
13964       instances = self.cfg.GetInstancesInfoByFilter(inst_filter).values()
13965       violations = \
13966           _ComputeNewInstanceViolations(_CalculateGroupIPolicy(cluster,
13967                                                                self.group),
13968                                         new_ipolicy, instances)
13969
13970       if violations:
13971         self.LogWarning("After the ipolicy change the following instances"
13972                         " violate them: %s",
13973                         utils.CommaJoin(violations))
13974
13975   def BuildHooksEnv(self):
13976     """Build hooks env.
13977
13978     """
13979     return {
13980       "GROUP_NAME": self.op.group_name,
13981       "NEW_ALLOC_POLICY": self.op.alloc_policy,
13982       }
13983
13984   def BuildHooksNodes(self):
13985     """Build hooks nodes.
13986
13987     """
13988     mn = self.cfg.GetMasterNode()
13989     return ([mn], [mn])
13990
13991   def Exec(self, feedback_fn):
13992     """Modifies the node group.
13993
13994     """
13995     result = []
13996
13997     if self.op.ndparams:
13998       self.group.ndparams = self.new_ndparams
13999       result.append(("ndparams", str(self.group.ndparams)))
14000
14001     if self.op.diskparams:
14002       self.group.diskparams = self.new_diskparams
14003       result.append(("diskparams", str(self.group.diskparams)))
14004
14005     if self.op.alloc_policy:
14006       self.group.alloc_policy = self.op.alloc_policy
14007
14008     if self.op.hv_state:
14009       self.group.hv_state_static = self.new_hv_state
14010
14011     if self.op.disk_state:
14012       self.group.disk_state_static = self.new_disk_state
14013
14014     if self.op.ipolicy:
14015       self.group.ipolicy = self.new_ipolicy
14016
14017     self.cfg.Update(self.group, feedback_fn)
14018     return result
14019
14020
14021 class LUGroupRemove(LogicalUnit):
14022   HPATH = "group-remove"
14023   HTYPE = constants.HTYPE_GROUP
14024   REQ_BGL = False
14025
14026   def ExpandNames(self):
14027     # This will raises errors.OpPrereqError on its own:
14028     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14029     self.needed_locks = {
14030       locking.LEVEL_NODEGROUP: [self.group_uuid],
14031       }
14032
14033   def CheckPrereq(self):
14034     """Check prerequisites.
14035
14036     This checks that the given group name exists as a node group, that is
14037     empty (i.e., contains no nodes), and that is not the last group of the
14038     cluster.
14039
14040     """
14041     # Verify that the group is empty.
14042     group_nodes = [node.name
14043                    for node in self.cfg.GetAllNodesInfo().values()
14044                    if node.group == self.group_uuid]
14045
14046     if group_nodes:
14047       raise errors.OpPrereqError("Group '%s' not empty, has the following"
14048                                  " nodes: %s" %
14049                                  (self.op.group_name,
14050                                   utils.CommaJoin(utils.NiceSort(group_nodes))),
14051                                  errors.ECODE_STATE)
14052
14053     # Verify the cluster would not be left group-less.
14054     if len(self.cfg.GetNodeGroupList()) == 1:
14055       raise errors.OpPrereqError("Group '%s' is the only group,"
14056                                  " cannot be removed" %
14057                                  self.op.group_name,
14058                                  errors.ECODE_STATE)
14059
14060   def BuildHooksEnv(self):
14061     """Build hooks env.
14062
14063     """
14064     return {
14065       "GROUP_NAME": self.op.group_name,
14066       }
14067
14068   def BuildHooksNodes(self):
14069     """Build hooks nodes.
14070
14071     """
14072     mn = self.cfg.GetMasterNode()
14073     return ([mn], [mn])
14074
14075   def Exec(self, feedback_fn):
14076     """Remove the node group.
14077
14078     """
14079     try:
14080       self.cfg.RemoveNodeGroup(self.group_uuid)
14081     except errors.ConfigurationError:
14082       raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
14083                                (self.op.group_name, self.group_uuid))
14084
14085     self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
14086
14087
14088 class LUGroupRename(LogicalUnit):
14089   HPATH = "group-rename"
14090   HTYPE = constants.HTYPE_GROUP
14091   REQ_BGL = False
14092
14093   def ExpandNames(self):
14094     # This raises errors.OpPrereqError on its own:
14095     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14096
14097     self.needed_locks = {
14098       locking.LEVEL_NODEGROUP: [self.group_uuid],
14099       }
14100
14101   def CheckPrereq(self):
14102     """Check prerequisites.
14103
14104     Ensures requested new name is not yet used.
14105
14106     """
14107     try:
14108       new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
14109     except errors.OpPrereqError:
14110       pass
14111     else:
14112       raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
14113                                  " node group (UUID: %s)" %
14114                                  (self.op.new_name, new_name_uuid),
14115                                  errors.ECODE_EXISTS)
14116
14117   def BuildHooksEnv(self):
14118     """Build hooks env.
14119
14120     """
14121     return {
14122       "OLD_NAME": self.op.group_name,
14123       "NEW_NAME": self.op.new_name,
14124       }
14125
14126   def BuildHooksNodes(self):
14127     """Build hooks nodes.
14128
14129     """
14130     mn = self.cfg.GetMasterNode()
14131
14132     all_nodes = self.cfg.GetAllNodesInfo()
14133     all_nodes.pop(mn, None)
14134
14135     run_nodes = [mn]
14136     run_nodes.extend(node.name for node in all_nodes.values()
14137                      if node.group == self.group_uuid)
14138
14139     return (run_nodes, run_nodes)
14140
14141   def Exec(self, feedback_fn):
14142     """Rename the node group.
14143
14144     """
14145     group = self.cfg.GetNodeGroup(self.group_uuid)
14146
14147     if group is None:
14148       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
14149                                (self.op.group_name, self.group_uuid))
14150
14151     group.name = self.op.new_name
14152     self.cfg.Update(group, feedback_fn)
14153
14154     return self.op.new_name
14155
14156
14157 class LUGroupEvacuate(LogicalUnit):
14158   HPATH = "group-evacuate"
14159   HTYPE = constants.HTYPE_GROUP
14160   REQ_BGL = False
14161
14162   def ExpandNames(self):
14163     # This raises errors.OpPrereqError on its own:
14164     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14165
14166     if self.op.target_groups:
14167       self.req_target_uuids = map(self.cfg.LookupNodeGroup,
14168                                   self.op.target_groups)
14169     else:
14170       self.req_target_uuids = []
14171
14172     if self.group_uuid in self.req_target_uuids:
14173       raise errors.OpPrereqError("Group to be evacuated (%s) can not be used"
14174                                  " as a target group (targets are %s)" %
14175                                  (self.group_uuid,
14176                                   utils.CommaJoin(self.req_target_uuids)),
14177                                  errors.ECODE_INVAL)
14178
14179     self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
14180
14181     self.share_locks = _ShareAll()
14182     self.needed_locks = {
14183       locking.LEVEL_INSTANCE: [],
14184       locking.LEVEL_NODEGROUP: [],
14185       locking.LEVEL_NODE: [],
14186       }
14187
14188   def DeclareLocks(self, level):
14189     if level == locking.LEVEL_INSTANCE:
14190       assert not self.needed_locks[locking.LEVEL_INSTANCE]
14191
14192       # Lock instances optimistically, needs verification once node and group
14193       # locks have been acquired
14194       self.needed_locks[locking.LEVEL_INSTANCE] = \
14195         self.cfg.GetNodeGroupInstances(self.group_uuid)
14196
14197     elif level == locking.LEVEL_NODEGROUP:
14198       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
14199
14200       if self.req_target_uuids:
14201         lock_groups = set([self.group_uuid] + self.req_target_uuids)
14202
14203         # Lock all groups used by instances optimistically; this requires going
14204         # via the node before it's locked, requiring verification later on
14205         lock_groups.update(group_uuid
14206                            for instance_name in
14207                              self.owned_locks(locking.LEVEL_INSTANCE)
14208                            for group_uuid in
14209                              self.cfg.GetInstanceNodeGroups(instance_name))
14210       else:
14211         # No target groups, need to lock all of them
14212         lock_groups = locking.ALL_SET
14213
14214       self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
14215
14216     elif level == locking.LEVEL_NODE:
14217       # This will only lock the nodes in the group to be evacuated which
14218       # contain actual instances
14219       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
14220       self._LockInstancesNodes()
14221
14222       # Lock all nodes in group to be evacuated and target groups
14223       owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
14224       assert self.group_uuid in owned_groups
14225       member_nodes = [node_name
14226                       for group in owned_groups
14227                       for node_name in self.cfg.GetNodeGroup(group).members]
14228       self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
14229
14230   def CheckPrereq(self):
14231     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
14232     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
14233     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
14234
14235     assert owned_groups.issuperset(self.req_target_uuids)
14236     assert self.group_uuid in owned_groups
14237
14238     # Check if locked instances are still correct
14239     _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
14240
14241     # Get instance information
14242     self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
14243
14244     # Check if node groups for locked instances are still correct
14245     _CheckInstancesNodeGroups(self.cfg, self.instances,
14246                               owned_groups, owned_nodes, self.group_uuid)
14247
14248     if self.req_target_uuids:
14249       # User requested specific target groups
14250       self.target_uuids = self.req_target_uuids
14251     else:
14252       # All groups except the one to be evacuated are potential targets
14253       self.target_uuids = [group_uuid for group_uuid in owned_groups
14254                            if group_uuid != self.group_uuid]
14255
14256       if not self.target_uuids:
14257         raise errors.OpPrereqError("There are no possible target groups",
14258                                    errors.ECODE_INVAL)
14259
14260   def BuildHooksEnv(self):
14261     """Build hooks env.
14262
14263     """
14264     return {
14265       "GROUP_NAME": self.op.group_name,
14266       "TARGET_GROUPS": " ".join(self.target_uuids),
14267       }
14268
14269   def BuildHooksNodes(self):
14270     """Build hooks nodes.
14271
14272     """
14273     mn = self.cfg.GetMasterNode()
14274
14275     assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
14276
14277     run_nodes = [mn] + self.cfg.GetNodeGroup(self.group_uuid).members
14278
14279     return (run_nodes, run_nodes)
14280
14281   def Exec(self, feedback_fn):
14282     instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
14283
14284     assert self.group_uuid not in self.target_uuids
14285
14286     ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
14287                      instances=instances, target_groups=self.target_uuids)
14288
14289     ial.Run(self.op.iallocator)
14290
14291     if not ial.success:
14292       raise errors.OpPrereqError("Can't compute group evacuation using"
14293                                  " iallocator '%s': %s" %
14294                                  (self.op.iallocator, ial.info),
14295                                  errors.ECODE_NORES)
14296
14297     jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
14298
14299     self.LogInfo("Iallocator returned %s job(s) for evacuating node group %s",
14300                  len(jobs), self.op.group_name)
14301
14302     return ResultWithJobs(jobs)
14303
14304
14305 class TagsLU(NoHooksLU): # pylint: disable=W0223
14306   """Generic tags LU.
14307
14308   This is an abstract class which is the parent of all the other tags LUs.
14309
14310   """
14311   def ExpandNames(self):
14312     self.group_uuid = None
14313     self.needed_locks = {}
14314
14315     if self.op.kind == constants.TAG_NODE:
14316       self.op.name = _ExpandNodeName(self.cfg, self.op.name)
14317       lock_level = locking.LEVEL_NODE
14318       lock_name = self.op.name
14319     elif self.op.kind == constants.TAG_INSTANCE:
14320       self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
14321       lock_level = locking.LEVEL_INSTANCE
14322       lock_name = self.op.name
14323     elif self.op.kind == constants.TAG_NODEGROUP:
14324       self.group_uuid = self.cfg.LookupNodeGroup(self.op.name)
14325       lock_level = locking.LEVEL_NODEGROUP
14326       lock_name = self.group_uuid
14327     else:
14328       lock_level = None
14329       lock_name = None
14330
14331     if lock_level and getattr(self.op, "use_locking", True):
14332       self.needed_locks[lock_level] = lock_name
14333
14334     # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
14335     # not possible to acquire the BGL based on opcode parameters)
14336
14337   def CheckPrereq(self):
14338     """Check prerequisites.
14339
14340     """
14341     if self.op.kind == constants.TAG_CLUSTER:
14342       self.target = self.cfg.GetClusterInfo()
14343     elif self.op.kind == constants.TAG_NODE:
14344       self.target = self.cfg.GetNodeInfo(self.op.name)
14345     elif self.op.kind == constants.TAG_INSTANCE:
14346       self.target = self.cfg.GetInstanceInfo(self.op.name)
14347     elif self.op.kind == constants.TAG_NODEGROUP:
14348       self.target = self.cfg.GetNodeGroup(self.group_uuid)
14349     else:
14350       raise errors.OpPrereqError("Wrong tag type requested (%s)" %
14351                                  str(self.op.kind), errors.ECODE_INVAL)
14352
14353
14354 class LUTagsGet(TagsLU):
14355   """Returns the tags of a given object.
14356
14357   """
14358   REQ_BGL = False
14359
14360   def ExpandNames(self):
14361     TagsLU.ExpandNames(self)
14362
14363     # Share locks as this is only a read operation
14364     self.share_locks = _ShareAll()
14365
14366   def Exec(self, feedback_fn):
14367     """Returns the tag list.
14368
14369     """
14370     return list(self.target.GetTags())
14371
14372
14373 class LUTagsSearch(NoHooksLU):
14374   """Searches the tags for a given pattern.
14375
14376   """
14377   REQ_BGL = False
14378
14379   def ExpandNames(self):
14380     self.needed_locks = {}
14381
14382   def CheckPrereq(self):
14383     """Check prerequisites.
14384
14385     This checks the pattern passed for validity by compiling it.
14386
14387     """
14388     try:
14389       self.re = re.compile(self.op.pattern)
14390     except re.error, err:
14391       raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
14392                                  (self.op.pattern, err), errors.ECODE_INVAL)
14393
14394   def Exec(self, feedback_fn):
14395     """Returns the tag list.
14396
14397     """
14398     cfg = self.cfg
14399     tgts = [("/cluster", cfg.GetClusterInfo())]
14400     ilist = cfg.GetAllInstancesInfo().values()
14401     tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
14402     nlist = cfg.GetAllNodesInfo().values()
14403     tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
14404     tgts.extend(("/nodegroup/%s" % n.name, n)
14405                 for n in cfg.GetAllNodeGroupsInfo().values())
14406     results = []
14407     for path, target in tgts:
14408       for tag in target.GetTags():
14409         if self.re.search(tag):
14410           results.append((path, tag))
14411     return results
14412
14413
14414 class LUTagsSet(TagsLU):
14415   """Sets a tag on a given object.
14416
14417   """
14418   REQ_BGL = False
14419
14420   def CheckPrereq(self):
14421     """Check prerequisites.
14422
14423     This checks the type and length of the tag name and value.
14424
14425     """
14426     TagsLU.CheckPrereq(self)
14427     for tag in self.op.tags:
14428       objects.TaggableObject.ValidateTag(tag)
14429
14430   def Exec(self, feedback_fn):
14431     """Sets the tag.
14432
14433     """
14434     try:
14435       for tag in self.op.tags:
14436         self.target.AddTag(tag)
14437     except errors.TagError, err:
14438       raise errors.OpExecError("Error while setting tag: %s" % str(err))
14439     self.cfg.Update(self.target, feedback_fn)
14440
14441
14442 class LUTagsDel(TagsLU):
14443   """Delete a list of tags from a given object.
14444
14445   """
14446   REQ_BGL = False
14447
14448   def CheckPrereq(self):
14449     """Check prerequisites.
14450
14451     This checks that we have the given tag.
14452
14453     """
14454     TagsLU.CheckPrereq(self)
14455     for tag in self.op.tags:
14456       objects.TaggableObject.ValidateTag(tag)
14457     del_tags = frozenset(self.op.tags)
14458     cur_tags = self.target.GetTags()
14459
14460     diff_tags = del_tags - cur_tags
14461     if diff_tags:
14462       diff_names = ("'%s'" % i for i in sorted(diff_tags))
14463       raise errors.OpPrereqError("Tag(s) %s not found" %
14464                                  (utils.CommaJoin(diff_names), ),
14465                                  errors.ECODE_NOENT)
14466
14467   def Exec(self, feedback_fn):
14468     """Remove the tag from the object.
14469
14470     """
14471     for tag in self.op.tags:
14472       self.target.RemoveTag(tag)
14473     self.cfg.Update(self.target, feedback_fn)
14474
14475
14476 class LUTestDelay(NoHooksLU):
14477   """Sleep for a specified amount of time.
14478
14479   This LU sleeps on the master and/or nodes for a specified amount of
14480   time.
14481
14482   """
14483   REQ_BGL = False
14484
14485   def ExpandNames(self):
14486     """Expand names and set required locks.
14487
14488     This expands the node list, if any.
14489
14490     """
14491     self.needed_locks = {}
14492     if self.op.on_nodes:
14493       # _GetWantedNodes can be used here, but is not always appropriate to use
14494       # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
14495       # more information.
14496       self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
14497       self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
14498
14499   def _TestDelay(self):
14500     """Do the actual sleep.
14501
14502     """
14503     if self.op.on_master:
14504       if not utils.TestDelay(self.op.duration):
14505         raise errors.OpExecError("Error during master delay test")
14506     if self.op.on_nodes:
14507       result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
14508       for node, node_result in result.items():
14509         node_result.Raise("Failure during rpc call to node %s" % node)
14510
14511   def Exec(self, feedback_fn):
14512     """Execute the test delay opcode, with the wanted repetitions.
14513
14514     """
14515     if self.op.repeat == 0:
14516       self._TestDelay()
14517     else:
14518       top_value = self.op.repeat - 1
14519       for i in range(self.op.repeat):
14520         self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
14521         self._TestDelay()
14522
14523
14524 class LUTestJqueue(NoHooksLU):
14525   """Utility LU to test some aspects of the job queue.
14526
14527   """
14528   REQ_BGL = False
14529
14530   # Must be lower than default timeout for WaitForJobChange to see whether it
14531   # notices changed jobs
14532   _CLIENT_CONNECT_TIMEOUT = 20.0
14533   _CLIENT_CONFIRM_TIMEOUT = 60.0
14534
14535   @classmethod
14536   def _NotifyUsingSocket(cls, cb, errcls):
14537     """Opens a Unix socket and waits for another program to connect.
14538
14539     @type cb: callable
14540     @param cb: Callback to send socket name to client
14541     @type errcls: class
14542     @param errcls: Exception class to use for errors
14543
14544     """
14545     # Using a temporary directory as there's no easy way to create temporary
14546     # sockets without writing a custom loop around tempfile.mktemp and
14547     # socket.bind
14548     tmpdir = tempfile.mkdtemp()
14549     try:
14550       tmpsock = utils.PathJoin(tmpdir, "sock")
14551
14552       logging.debug("Creating temporary socket at %s", tmpsock)
14553       sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
14554       try:
14555         sock.bind(tmpsock)
14556         sock.listen(1)
14557
14558         # Send details to client
14559         cb(tmpsock)
14560
14561         # Wait for client to connect before continuing
14562         sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
14563         try:
14564           (conn, _) = sock.accept()
14565         except socket.error, err:
14566           raise errcls("Client didn't connect in time (%s)" % err)
14567       finally:
14568         sock.close()
14569     finally:
14570       # Remove as soon as client is connected
14571       shutil.rmtree(tmpdir)
14572
14573     # Wait for client to close
14574     try:
14575       try:
14576         # pylint: disable=E1101
14577         # Instance of '_socketobject' has no ... member
14578         conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
14579         conn.recv(1)
14580       except socket.error, err:
14581         raise errcls("Client failed to confirm notification (%s)" % err)
14582     finally:
14583       conn.close()
14584
14585   def _SendNotification(self, test, arg, sockname):
14586     """Sends a notification to the client.
14587
14588     @type test: string
14589     @param test: Test name
14590     @param arg: Test argument (depends on test)
14591     @type sockname: string
14592     @param sockname: Socket path
14593
14594     """
14595     self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
14596
14597   def _Notify(self, prereq, test, arg):
14598     """Notifies the client of a test.
14599
14600     @type prereq: bool
14601     @param prereq: Whether this is a prereq-phase test
14602     @type test: string
14603     @param test: Test name
14604     @param arg: Test argument (depends on test)
14605
14606     """
14607     if prereq:
14608       errcls = errors.OpPrereqError
14609     else:
14610       errcls = errors.OpExecError
14611
14612     return self._NotifyUsingSocket(compat.partial(self._SendNotification,
14613                                                   test, arg),
14614                                    errcls)
14615
14616   def CheckArguments(self):
14617     self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
14618     self.expandnames_calls = 0
14619
14620   def ExpandNames(self):
14621     checkargs_calls = getattr(self, "checkargs_calls", 0)
14622     if checkargs_calls < 1:
14623       raise errors.ProgrammerError("CheckArguments was not called")
14624
14625     self.expandnames_calls += 1
14626
14627     if self.op.notify_waitlock:
14628       self._Notify(True, constants.JQT_EXPANDNAMES, None)
14629
14630     self.LogInfo("Expanding names")
14631
14632     # Get lock on master node (just to get a lock, not for a particular reason)
14633     self.needed_locks = {
14634       locking.LEVEL_NODE: self.cfg.GetMasterNode(),
14635       }
14636
14637   def Exec(self, feedback_fn):
14638     if self.expandnames_calls < 1:
14639       raise errors.ProgrammerError("ExpandNames was not called")
14640
14641     if self.op.notify_exec:
14642       self._Notify(False, constants.JQT_EXEC, None)
14643
14644     self.LogInfo("Executing")
14645
14646     if self.op.log_messages:
14647       self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
14648       for idx, msg in enumerate(self.op.log_messages):
14649         self.LogInfo("Sending log message %s", idx + 1)
14650         feedback_fn(constants.JQT_MSGPREFIX + msg)
14651         # Report how many test messages have been sent
14652         self._Notify(False, constants.JQT_LOGMSG, idx + 1)
14653
14654     if self.op.fail:
14655       raise errors.OpExecError("Opcode failure was requested")
14656
14657     return True
14658
14659
14660 class IAllocator(object):
14661   """IAllocator framework.
14662
14663   An IAllocator instance has three sets of attributes:
14664     - cfg that is needed to query the cluster
14665     - input data (all members of the _KEYS class attribute are required)
14666     - four buffer attributes (in|out_data|text), that represent the
14667       input (to the external script) in text and data structure format,
14668       and the output from it, again in two formats
14669     - the result variables from the script (success, info, nodes) for
14670       easy usage
14671
14672   """
14673   # pylint: disable=R0902
14674   # lots of instance attributes
14675
14676   def __init__(self, cfg, rpc_runner, mode, **kwargs):
14677     self.cfg = cfg
14678     self.rpc = rpc_runner
14679     # init buffer variables
14680     self.in_text = self.out_text = self.in_data = self.out_data = None
14681     # init all input fields so that pylint is happy
14682     self.mode = mode
14683     self.memory = self.disks = self.disk_template = self.spindle_use = None
14684     self.os = self.tags = self.nics = self.vcpus = None
14685     self.hypervisor = None
14686     self.relocate_from = None
14687     self.name = None
14688     self.instances = None
14689     self.evac_mode = None
14690     self.target_groups = []
14691     # computed fields
14692     self.required_nodes = None
14693     # init result fields
14694     self.success = self.info = self.result = None
14695
14696     try:
14697       (fn, keydata, self._result_check) = self._MODE_DATA[self.mode]
14698     except KeyError:
14699       raise errors.ProgrammerError("Unknown mode '%s' passed to the"
14700                                    " IAllocator" % self.mode)
14701
14702     keyset = [n for (n, _) in keydata]
14703
14704     for key in kwargs:
14705       if key not in keyset:
14706         raise errors.ProgrammerError("Invalid input parameter '%s' to"
14707                                      " IAllocator" % key)
14708       setattr(self, key, kwargs[key])
14709
14710     for key in keyset:
14711       if key not in kwargs:
14712         raise errors.ProgrammerError("Missing input parameter '%s' to"
14713                                      " IAllocator" % key)
14714     self._BuildInputData(compat.partial(fn, self), keydata)
14715
14716   def _ComputeClusterData(self):
14717     """Compute the generic allocator input data.
14718
14719     This is the data that is independent of the actual operation.
14720
14721     """
14722     cfg = self.cfg
14723     cluster_info = cfg.GetClusterInfo()
14724     # cluster data
14725     data = {
14726       "version": constants.IALLOCATOR_VERSION,
14727       "cluster_name": cfg.GetClusterName(),
14728       "cluster_tags": list(cluster_info.GetTags()),
14729       "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
14730       "ipolicy": cluster_info.ipolicy,
14731       }
14732     ninfo = cfg.GetAllNodesInfo()
14733     iinfo = cfg.GetAllInstancesInfo().values()
14734     i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
14735
14736     # node data
14737     node_list = [n.name for n in ninfo.values() if n.vm_capable]
14738
14739     if self.mode == constants.IALLOCATOR_MODE_ALLOC:
14740       hypervisor_name = self.hypervisor
14741     elif self.mode == constants.IALLOCATOR_MODE_RELOC:
14742       hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
14743     else:
14744       hypervisor_name = cluster_info.primary_hypervisor
14745
14746     node_data = self.rpc.call_node_info(node_list, [cfg.GetVGName()],
14747                                         [hypervisor_name])
14748     node_iinfo = \
14749       self.rpc.call_all_instances_info(node_list,
14750                                        cluster_info.enabled_hypervisors)
14751
14752     data["nodegroups"] = self._ComputeNodeGroupData(cfg)
14753
14754     config_ndata = self._ComputeBasicNodeData(cfg, ninfo)
14755     data["nodes"] = self._ComputeDynamicNodeData(ninfo, node_data, node_iinfo,
14756                                                  i_list, config_ndata)
14757     assert len(data["nodes"]) == len(ninfo), \
14758         "Incomplete node data computed"
14759
14760     data["instances"] = self._ComputeInstanceData(cluster_info, i_list)
14761
14762     self.in_data = data
14763
14764   @staticmethod
14765   def _ComputeNodeGroupData(cfg):
14766     """Compute node groups data.
14767
14768     """
14769     cluster = cfg.GetClusterInfo()
14770     ng = dict((guuid, {
14771       "name": gdata.name,
14772       "alloc_policy": gdata.alloc_policy,
14773       "ipolicy": _CalculateGroupIPolicy(cluster, gdata),
14774       })
14775       for guuid, gdata in cfg.GetAllNodeGroupsInfo().items())
14776
14777     return ng
14778
14779   @staticmethod
14780   def _ComputeBasicNodeData(cfg, node_cfg):
14781     """Compute global node data.
14782
14783     @rtype: dict
14784     @returns: a dict of name: (node dict, node config)
14785
14786     """
14787     # fill in static (config-based) values
14788     node_results = dict((ninfo.name, {
14789       "tags": list(ninfo.GetTags()),
14790       "primary_ip": ninfo.primary_ip,
14791       "secondary_ip": ninfo.secondary_ip,
14792       "offline": ninfo.offline,
14793       "drained": ninfo.drained,
14794       "master_candidate": ninfo.master_candidate,
14795       "group": ninfo.group,
14796       "master_capable": ninfo.master_capable,
14797       "vm_capable": ninfo.vm_capable,
14798       "ndparams": cfg.GetNdParams(ninfo),
14799       })
14800       for ninfo in node_cfg.values())
14801
14802     return node_results
14803
14804   @staticmethod
14805   def _ComputeDynamicNodeData(node_cfg, node_data, node_iinfo, i_list,
14806                               node_results):
14807     """Compute global node data.
14808
14809     @param node_results: the basic node structures as filled from the config
14810
14811     """
14812     #TODO(dynmem): compute the right data on MAX and MIN memory
14813     # make a copy of the current dict
14814     node_results = dict(node_results)
14815     for nname, nresult in node_data.items():
14816       assert nname in node_results, "Missing basic data for node %s" % nname
14817       ninfo = node_cfg[nname]
14818
14819       if not (ninfo.offline or ninfo.drained):
14820         nresult.Raise("Can't get data for node %s" % nname)
14821         node_iinfo[nname].Raise("Can't get node instance info from node %s" %
14822                                 nname)
14823         remote_info = _MakeLegacyNodeInfo(nresult.payload)
14824
14825         for attr in ["memory_total", "memory_free", "memory_dom0",
14826                      "vg_size", "vg_free", "cpu_total"]:
14827           if attr not in remote_info:
14828             raise errors.OpExecError("Node '%s' didn't return attribute"
14829                                      " '%s'" % (nname, attr))
14830           if not isinstance(remote_info[attr], int):
14831             raise errors.OpExecError("Node '%s' returned invalid value"
14832                                      " for '%s': %s" %
14833                                      (nname, attr, remote_info[attr]))
14834         # compute memory used by primary instances
14835         i_p_mem = i_p_up_mem = 0
14836         for iinfo, beinfo in i_list:
14837           if iinfo.primary_node == nname:
14838             i_p_mem += beinfo[constants.BE_MAXMEM]
14839             if iinfo.name not in node_iinfo[nname].payload:
14840               i_used_mem = 0
14841             else:
14842               i_used_mem = int(node_iinfo[nname].payload[iinfo.name]["memory"])
14843             i_mem_diff = beinfo[constants.BE_MAXMEM] - i_used_mem
14844             remote_info["memory_free"] -= max(0, i_mem_diff)
14845
14846             if iinfo.admin_state == constants.ADMINST_UP:
14847               i_p_up_mem += beinfo[constants.BE_MAXMEM]
14848
14849         # compute memory used by instances
14850         pnr_dyn = {
14851           "total_memory": remote_info["memory_total"],
14852           "reserved_memory": remote_info["memory_dom0"],
14853           "free_memory": remote_info["memory_free"],
14854           "total_disk": remote_info["vg_size"],
14855           "free_disk": remote_info["vg_free"],
14856           "total_cpus": remote_info["cpu_total"],
14857           "i_pri_memory": i_p_mem,
14858           "i_pri_up_memory": i_p_up_mem,
14859           }
14860         pnr_dyn.update(node_results[nname])
14861         node_results[nname] = pnr_dyn
14862
14863     return node_results
14864
14865   @staticmethod
14866   def _ComputeInstanceData(cluster_info, i_list):
14867     """Compute global instance data.
14868
14869     """
14870     instance_data = {}
14871     for iinfo, beinfo in i_list:
14872       nic_data = []
14873       for nic in iinfo.nics:
14874         filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
14875         nic_dict = {
14876           "mac": nic.mac,
14877           "ip": nic.ip,
14878           "mode": filled_params[constants.NIC_MODE],
14879           "link": filled_params[constants.NIC_LINK],
14880           }
14881         if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
14882           nic_dict["bridge"] = filled_params[constants.NIC_LINK]
14883         nic_data.append(nic_dict)
14884       pir = {
14885         "tags": list(iinfo.GetTags()),
14886         "admin_state": iinfo.admin_state,
14887         "vcpus": beinfo[constants.BE_VCPUS],
14888         "memory": beinfo[constants.BE_MAXMEM],
14889         "spindle_use": beinfo[constants.BE_SPINDLE_USE],
14890         "os": iinfo.os,
14891         "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
14892         "nics": nic_data,
14893         "disks": [{constants.IDISK_SIZE: dsk.size,
14894                    constants.IDISK_MODE: dsk.mode}
14895                   for dsk in iinfo.disks],
14896         "disk_template": iinfo.disk_template,
14897         "hypervisor": iinfo.hypervisor,
14898         }
14899       pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
14900                                                  pir["disks"])
14901       instance_data[iinfo.name] = pir
14902
14903     return instance_data
14904
14905   def _AddNewInstance(self):
14906     """Add new instance data to allocator structure.
14907
14908     This in combination with _AllocatorGetClusterData will create the
14909     correct structure needed as input for the allocator.
14910
14911     The checks for the completeness of the opcode must have already been
14912     done.
14913
14914     """
14915     disk_space = _ComputeDiskSize(self.disk_template, self.disks)
14916
14917     if self.disk_template in constants.DTS_INT_MIRROR:
14918       self.required_nodes = 2
14919     else:
14920       self.required_nodes = 1
14921
14922     request = {
14923       "name": self.name,
14924       "disk_template": self.disk_template,
14925       "tags": self.tags,
14926       "os": self.os,
14927       "vcpus": self.vcpus,
14928       "memory": self.memory,
14929       "spindle_use": self.spindle_use,
14930       "disks": self.disks,
14931       "disk_space_total": disk_space,
14932       "nics": self.nics,
14933       "required_nodes": self.required_nodes,
14934       "hypervisor": self.hypervisor,
14935       }
14936
14937     return request
14938
14939   def _AddRelocateInstance(self):
14940     """Add relocate instance data to allocator structure.
14941
14942     This in combination with _IAllocatorGetClusterData will create the
14943     correct structure needed as input for the allocator.
14944
14945     The checks for the completeness of the opcode must have already been
14946     done.
14947
14948     """
14949     instance = self.cfg.GetInstanceInfo(self.name)
14950     if instance is None:
14951       raise errors.ProgrammerError("Unknown instance '%s' passed to"
14952                                    " IAllocator" % self.name)
14953
14954     if instance.disk_template not in constants.DTS_MIRRORED:
14955       raise errors.OpPrereqError("Can't relocate non-mirrored instances",
14956                                  errors.ECODE_INVAL)
14957
14958     if instance.disk_template in constants.DTS_INT_MIRROR and \
14959         len(instance.secondary_nodes) != 1:
14960       raise errors.OpPrereqError("Instance has not exactly one secondary node",
14961                                  errors.ECODE_STATE)
14962
14963     self.required_nodes = 1
14964     disk_sizes = [{constants.IDISK_SIZE: disk.size} for disk in instance.disks]
14965     disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
14966
14967     request = {
14968       "name": self.name,
14969       "disk_space_total": disk_space,
14970       "required_nodes": self.required_nodes,
14971       "relocate_from": self.relocate_from,
14972       }
14973     return request
14974
14975   def _AddNodeEvacuate(self):
14976     """Get data for node-evacuate requests.
14977
14978     """
14979     return {
14980       "instances": self.instances,
14981       "evac_mode": self.evac_mode,
14982       }
14983
14984   def _AddChangeGroup(self):
14985     """Get data for node-evacuate requests.
14986
14987     """
14988     return {
14989       "instances": self.instances,
14990       "target_groups": self.target_groups,
14991       }
14992
14993   def _BuildInputData(self, fn, keydata):
14994     """Build input data structures.
14995
14996     """
14997     self._ComputeClusterData()
14998
14999     request = fn()
15000     request["type"] = self.mode
15001     for keyname, keytype in keydata:
15002       if keyname not in request:
15003         raise errors.ProgrammerError("Request parameter %s is missing" %
15004                                      keyname)
15005       val = request[keyname]
15006       if not keytype(val):
15007         raise errors.ProgrammerError("Request parameter %s doesn't pass"
15008                                      " validation, value %s, expected"
15009                                      " type %s" % (keyname, val, keytype))
15010     self.in_data["request"] = request
15011
15012     self.in_text = serializer.Dump(self.in_data)
15013
15014   _STRING_LIST = ht.TListOf(ht.TString)
15015   _JOB_LIST = ht.TListOf(ht.TListOf(ht.TStrictDict(True, False, {
15016      # pylint: disable=E1101
15017      # Class '...' has no 'OP_ID' member
15018      "OP_ID": ht.TElemOf([opcodes.OpInstanceFailover.OP_ID,
15019                           opcodes.OpInstanceMigrate.OP_ID,
15020                           opcodes.OpInstanceReplaceDisks.OP_ID])
15021      })))
15022
15023   _NEVAC_MOVED = \
15024     ht.TListOf(ht.TAnd(ht.TIsLength(3),
15025                        ht.TItems([ht.TNonEmptyString,
15026                                   ht.TNonEmptyString,
15027                                   ht.TListOf(ht.TNonEmptyString),
15028                                  ])))
15029   _NEVAC_FAILED = \
15030     ht.TListOf(ht.TAnd(ht.TIsLength(2),
15031                        ht.TItems([ht.TNonEmptyString,
15032                                   ht.TMaybeString,
15033                                  ])))
15034   _NEVAC_RESULT = ht.TAnd(ht.TIsLength(3),
15035                           ht.TItems([_NEVAC_MOVED, _NEVAC_FAILED, _JOB_LIST]))
15036
15037   _MODE_DATA = {
15038     constants.IALLOCATOR_MODE_ALLOC:
15039       (_AddNewInstance,
15040        [
15041         ("name", ht.TString),
15042         ("memory", ht.TInt),
15043         ("spindle_use", ht.TInt),
15044         ("disks", ht.TListOf(ht.TDict)),
15045         ("disk_template", ht.TString),
15046         ("os", ht.TString),
15047         ("tags", _STRING_LIST),
15048         ("nics", ht.TListOf(ht.TDict)),
15049         ("vcpus", ht.TInt),
15050         ("hypervisor", ht.TString),
15051         ], ht.TList),
15052     constants.IALLOCATOR_MODE_RELOC:
15053       (_AddRelocateInstance,
15054        [("name", ht.TString), ("relocate_from", _STRING_LIST)],
15055        ht.TList),
15056      constants.IALLOCATOR_MODE_NODE_EVAC:
15057       (_AddNodeEvacuate, [
15058         ("instances", _STRING_LIST),
15059         ("evac_mode", ht.TElemOf(constants.IALLOCATOR_NEVAC_MODES)),
15060         ], _NEVAC_RESULT),
15061      constants.IALLOCATOR_MODE_CHG_GROUP:
15062       (_AddChangeGroup, [
15063         ("instances", _STRING_LIST),
15064         ("target_groups", _STRING_LIST),
15065         ], _NEVAC_RESULT),
15066     }
15067
15068   def Run(self, name, validate=True, call_fn=None):
15069     """Run an instance allocator and return the results.
15070
15071     """
15072     if call_fn is None:
15073       call_fn = self.rpc.call_iallocator_runner
15074
15075     result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
15076     result.Raise("Failure while running the iallocator script")
15077
15078     self.out_text = result.payload
15079     if validate:
15080       self._ValidateResult()
15081
15082   def _ValidateResult(self):
15083     """Process the allocator results.
15084
15085     This will process and if successful save the result in
15086     self.out_data and the other parameters.
15087
15088     """
15089     try:
15090       rdict = serializer.Load(self.out_text)
15091     except Exception, err:
15092       raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
15093
15094     if not isinstance(rdict, dict):
15095       raise errors.OpExecError("Can't parse iallocator results: not a dict")
15096
15097     # TODO: remove backwards compatiblity in later versions
15098     if "nodes" in rdict and "result" not in rdict:
15099       rdict["result"] = rdict["nodes"]
15100       del rdict["nodes"]
15101
15102     for key in "success", "info", "result":
15103       if key not in rdict:
15104         raise errors.OpExecError("Can't parse iallocator results:"
15105                                  " missing key '%s'" % key)
15106       setattr(self, key, rdict[key])
15107
15108     if not self._result_check(self.result):
15109       raise errors.OpExecError("Iallocator returned invalid result,"
15110                                " expected %s, got %s" %
15111                                (self._result_check, self.result),
15112                                errors.ECODE_INVAL)
15113
15114     if self.mode == constants.IALLOCATOR_MODE_RELOC:
15115       assert self.relocate_from is not None
15116       assert self.required_nodes == 1
15117
15118       node2group = dict((name, ndata["group"])
15119                         for (name, ndata) in self.in_data["nodes"].items())
15120
15121       fn = compat.partial(self._NodesToGroups, node2group,
15122                           self.in_data["nodegroups"])
15123
15124       instance = self.cfg.GetInstanceInfo(self.name)
15125       request_groups = fn(self.relocate_from + [instance.primary_node])
15126       result_groups = fn(rdict["result"] + [instance.primary_node])
15127
15128       if self.success and not set(result_groups).issubset(request_groups):
15129         raise errors.OpExecError("Groups of nodes returned by iallocator (%s)"
15130                                  " differ from original groups (%s)" %
15131                                  (utils.CommaJoin(result_groups),
15132                                   utils.CommaJoin(request_groups)))
15133
15134     elif self.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
15135       assert self.evac_mode in constants.IALLOCATOR_NEVAC_MODES
15136
15137     self.out_data = rdict
15138
15139   @staticmethod
15140   def _NodesToGroups(node2group, groups, nodes):
15141     """Returns a list of unique group names for a list of nodes.
15142
15143     @type node2group: dict
15144     @param node2group: Map from node name to group UUID
15145     @type groups: dict
15146     @param groups: Group information
15147     @type nodes: list
15148     @param nodes: Node names
15149
15150     """
15151     result = set()
15152
15153     for node in nodes:
15154       try:
15155         group_uuid = node2group[node]
15156       except KeyError:
15157         # Ignore unknown node
15158         pass
15159       else:
15160         try:
15161           group = groups[group_uuid]
15162         except KeyError:
15163           # Can't find group, let's use UUID
15164           group_name = group_uuid
15165         else:
15166           group_name = group["name"]
15167
15168         result.add(group_name)
15169
15170     return sorted(result)
15171
15172
15173 class LUTestAllocator(NoHooksLU):
15174   """Run allocator tests.
15175
15176   This LU runs the allocator tests
15177
15178   """
15179   def CheckPrereq(self):
15180     """Check prerequisites.
15181
15182     This checks the opcode parameters depending on the director and mode test.
15183
15184     """
15185     if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
15186       for attr in ["memory", "disks", "disk_template",
15187                    "os", "tags", "nics", "vcpus"]:
15188         if not hasattr(self.op, attr):
15189           raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
15190                                      attr, errors.ECODE_INVAL)
15191       iname = self.cfg.ExpandInstanceName(self.op.name)
15192       if iname is not None:
15193         raise errors.OpPrereqError("Instance '%s' already in the cluster" %
15194                                    iname, errors.ECODE_EXISTS)
15195       if not isinstance(self.op.nics, list):
15196         raise errors.OpPrereqError("Invalid parameter 'nics'",
15197                                    errors.ECODE_INVAL)
15198       if not isinstance(self.op.disks, list):
15199         raise errors.OpPrereqError("Invalid parameter 'disks'",
15200                                    errors.ECODE_INVAL)
15201       for row in self.op.disks:
15202         if (not isinstance(row, dict) or
15203             constants.IDISK_SIZE not in row or
15204             not isinstance(row[constants.IDISK_SIZE], int) or
15205             constants.IDISK_MODE not in row or
15206             row[constants.IDISK_MODE] not in constants.DISK_ACCESS_SET):
15207           raise errors.OpPrereqError("Invalid contents of the 'disks'"
15208                                      " parameter", errors.ECODE_INVAL)
15209       if self.op.hypervisor is None:
15210         self.op.hypervisor = self.cfg.GetHypervisorType()
15211     elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
15212       fname = _ExpandInstanceName(self.cfg, self.op.name)
15213       self.op.name = fname
15214       self.relocate_from = \
15215           list(self.cfg.GetInstanceInfo(fname).secondary_nodes)
15216     elif self.op.mode in (constants.IALLOCATOR_MODE_CHG_GROUP,
15217                           constants.IALLOCATOR_MODE_NODE_EVAC):
15218       if not self.op.instances:
15219         raise errors.OpPrereqError("Missing instances", errors.ECODE_INVAL)
15220       self.op.instances = _GetWantedInstances(self, self.op.instances)
15221     else:
15222       raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
15223                                  self.op.mode, errors.ECODE_INVAL)
15224
15225     if self.op.direction == constants.IALLOCATOR_DIR_OUT:
15226       if self.op.allocator is None:
15227         raise errors.OpPrereqError("Missing allocator name",
15228                                    errors.ECODE_INVAL)
15229     elif self.op.direction != constants.IALLOCATOR_DIR_IN:
15230       raise errors.OpPrereqError("Wrong allocator test '%s'" %
15231                                  self.op.direction, errors.ECODE_INVAL)
15232
15233   def Exec(self, feedback_fn):
15234     """Run the allocator test.
15235
15236     """
15237     if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
15238       ial = IAllocator(self.cfg, self.rpc,
15239                        mode=self.op.mode,
15240                        name=self.op.name,
15241                        memory=self.op.memory,
15242                        disks=self.op.disks,
15243                        disk_template=self.op.disk_template,
15244                        os=self.op.os,
15245                        tags=self.op.tags,
15246                        nics=self.op.nics,
15247                        vcpus=self.op.vcpus,
15248                        hypervisor=self.op.hypervisor,
15249                        )
15250     elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
15251       ial = IAllocator(self.cfg, self.rpc,
15252                        mode=self.op.mode,
15253                        name=self.op.name,
15254                        relocate_from=list(self.relocate_from),
15255                        )
15256     elif self.op.mode == constants.IALLOCATOR_MODE_CHG_GROUP:
15257       ial = IAllocator(self.cfg, self.rpc,
15258                        mode=self.op.mode,
15259                        instances=self.op.instances,
15260                        target_groups=self.op.target_groups)
15261     elif self.op.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
15262       ial = IAllocator(self.cfg, self.rpc,
15263                        mode=self.op.mode,
15264                        instances=self.op.instances,
15265                        evac_mode=self.op.evac_mode)
15266     else:
15267       raise errors.ProgrammerError("Uncatched mode %s in"
15268                                    " LUTestAllocator.Exec", self.op.mode)
15269
15270     if self.op.direction == constants.IALLOCATOR_DIR_IN:
15271       result = ial.in_text
15272     else:
15273       ial.Run(self.op.allocator, validate=False)
15274       result = ial.out_text
15275     return result
15276
15277
15278 #: Query type implementations
15279 _QUERY_IMPL = {
15280   constants.QR_CLUSTER: _ClusterQuery,
15281   constants.QR_INSTANCE: _InstanceQuery,
15282   constants.QR_NODE: _NodeQuery,
15283   constants.QR_GROUP: _GroupQuery,
15284   constants.QR_OS: _OsQuery,
15285   constants.QR_EXPORT: _ExportQuery,
15286   }
15287
15288 assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
15289
15290
15291 def _GetQueryImplementation(name):
15292   """Returns the implemtnation for a query type.
15293
15294   @param name: Query type, must be one of L{constants.QR_VIA_OP}
15295
15296   """
15297   try:
15298     return _QUERY_IMPL[name]
15299   except KeyError:
15300     raise errors.OpPrereqError("Unknown query resource '%s'" % name,
15301                                errors.ECODE_INVAL)