code.grnet.gr Git - ganeti-local/blob - lib/cmdlib.py

   1 #
   2 #
   3
   4 # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012 Google Inc.
   5 #
   6 # This program is free software; you can redistribute it and/or modify
   7 # it under the terms of the GNU General Public License as published by
   8 # the Free Software Foundation; either version 2 of the License, or
   9 # (at your option) any later version.
  10 #
  11 # This program is distributed in the hope that it will be useful, but
  12 # WITHOUT ANY WARRANTY; without even the implied warranty of
  13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 # General Public License for more details.
  15 #
  16 # You should have received a copy of the GNU General Public License
  17 # along with this program; if not, write to the Free Software
  18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  19 # 02110-1301, USA.
  20
  21
  22 """Module implementing the master-side code."""
  23
  24 # pylint: disable=W0201,C0302
  25
  26 # W0201 since most LU attributes are defined in CheckPrereq or similar
  27 # functions
  28
  29 # C0302: since we have waaaay too many lines in this module
  30
  31 import os
  32 import os.path
  33 import time
  34 import re
  35 import logging
  36 import copy
  37 import OpenSSL
  38 import socket
  39 import tempfile
  40 import shutil
  41 import itertools
  42 import operator
  43
  44 from ganeti import ssh
  45 from ganeti import utils
  46 from ganeti import errors
  47 from ganeti import hypervisor
  48 from ganeti import locking
  49 from ganeti import constants
  50 from ganeti import objects
  51 from ganeti import serializer
  52 from ganeti import ssconf
  53 from ganeti import uidpool
  54 from ganeti import compat
  55 from ganeti import masterd
  56 from ganeti import netutils
  57 from ganeti import query
  58 from ganeti import qlang
  59 from ganeti import opcodes
  60 from ganeti import ht
  61 from ganeti import rpc
  62 from ganeti import runtime
  63
  64 import ganeti.masterd.instance # pylint: disable=W0611
  65
  66
  67 #: Size of DRBD meta block device
  68 DRBD_META_SIZE = 128
  69
  70 # States of instance
  71 INSTANCE_DOWN = [constants.ADMINST_DOWN]
  72 INSTANCE_ONLINE = [constants.ADMINST_DOWN, constants.ADMINST_UP]
  73 INSTANCE_NOT_RUNNING = [constants.ADMINST_DOWN, constants.ADMINST_OFFLINE]
  74
  75 #: Instance status in which an instance can be marked as offline/online
  76 CAN_CHANGE_INSTANCE_OFFLINE = (frozenset(INSTANCE_DOWN) | frozenset([
  77   constants.ADMINST_OFFLINE,
  78   ]))
  79
  80
  81 class ResultWithJobs:
  82   """Data container for LU results with jobs.
  83
  84   Instances of this class returned from L{LogicalUnit.Exec} will be recognized
  85   by L{mcpu._ProcessResult}. The latter will then submit the jobs
  86   contained in the C{jobs} attribute and include the job IDs in the opcode
  87   result.
  88
  89   """
  90   def __init__(self, jobs, **kwargs):
  91     """Initializes this class.
  92
  93     Additional return values can be specified as keyword arguments.
  94
  95     @type jobs: list of lists of L{opcode.OpCode}
  96     @param jobs: A list of lists of opcode objects
  97
  98     """
  99     self.jobs = jobs
 100     self.other = kwargs
 101
 102
 103 class LogicalUnit(object):
 104   """Logical Unit base class.
 105
 106   Subclasses must follow these rules:
 107     - implement ExpandNames
 108     - implement CheckPrereq (except when tasklets are used)
 109     - implement Exec (except when tasklets are used)
 110     - implement BuildHooksEnv
 111     - implement BuildHooksNodes
 112     - redefine HPATH and HTYPE
 113     - optionally redefine their run requirements:
 114         REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
 115
 116   Note that all commands require root permissions.
 117
 118   @ivar dry_run_result: the value (if any) that will be returned to the caller
 119       in dry-run mode (signalled by opcode dry_run parameter)
 120
 121   """
 122   HPATH = None
 123   HTYPE = None
 124   REQ_BGL = True
 125
 126   def __init__(self, processor, op, context, rpc_runner):
 127     """Constructor for LogicalUnit.
 128
 129     This needs to be overridden in derived classes in order to check op
 130     validity.
 131
 132     """
 133     self.proc = processor
 134     self.op = op
 135     self.cfg = context.cfg
 136     self.glm = context.glm
 137     # readability alias
 138     self.owned_locks = context.glm.list_owned
 139     self.context = context
 140     self.rpc = rpc_runner
 141     # Dicts used to declare locking needs to mcpu
 142     self.needed_locks = None
 143     self.share_locks = dict.fromkeys(locking.LEVELS, 0)
 144     self.add_locks = {}
 145     self.remove_locks = {}
 146     # Used to force good behavior when calling helper functions
 147     self.recalculate_locks = {}
 148     # logging
 149     self.Log = processor.Log # pylint: disable=C0103
 150     self.LogWarning = processor.LogWarning # pylint: disable=C0103
 151     self.LogInfo = processor.LogInfo # pylint: disable=C0103
 152     self.LogStep = processor.LogStep # pylint: disable=C0103
 153     # support for dry-run
 154     self.dry_run_result = None
 155     # support for generic debug attribute
 156     if (not hasattr(self.op, "debug_level") or
 157         not isinstance(self.op.debug_level, int)):
 158       self.op.debug_level = 0
 159
 160     # Tasklets
 161     self.tasklets = None
 162
 163     # Validate opcode parameters and set defaults
 164     self.op.Validate(True)
 165
 166     self.CheckArguments()
 167
 168   def CheckArguments(self):
 169     """Check syntactic validity for the opcode arguments.
 170
 171     This method is for doing a simple syntactic check and ensure
 172     validity of opcode parameters, without any cluster-related
 173     checks. While the same can be accomplished in ExpandNames and/or
 174     CheckPrereq, doing these separate is better because:
 175
 176       - ExpandNames is left as as purely a lock-related function
 177       - CheckPrereq is run after we have acquired locks (and possible
 178         waited for them)
 179
 180     The function is allowed to change the self.op attribute so that
 181     later methods can no longer worry about missing parameters.
 182
 183     """
 184     pass
 185
 186   def ExpandNames(self):
 187     """Expand names for this LU.
 188
 189     This method is called before starting to execute the opcode, and it should
 190     update all the parameters of the opcode to their canonical form (e.g. a
 191     short node name must be fully expanded after this method has successfully
 192     completed). This way locking, hooks, logging, etc. can work correctly.
 193
 194     LUs which implement this method must also populate the self.needed_locks
 195     member, as a dict with lock levels as keys, and a list of needed lock names
 196     as values. Rules:
 197
 198       - use an empty dict if you don't need any lock
 199       - if you don't need any lock at a particular level omit that
 200         level (note that in this case C{DeclareLocks} won't be called
 201         at all for that level)
 202       - if you need locks at a level, but you can't calculate it in
 203         this function, initialise that level with an empty list and do
 204         further processing in L{LogicalUnit.DeclareLocks} (see that
 205         function's docstring)
 206       - don't put anything for the BGL level
 207       - if you want all locks at a level use L{locking.ALL_SET} as a value
 208
 209     If you need to share locks (rather than acquire them exclusively) at one
 210     level you can modify self.share_locks, setting a true value (usually 1) for
 211     that level. By default locks are not shared.
 212
 213     This function can also define a list of tasklets, which then will be
 214     executed in order instead of the usual LU-level CheckPrereq and Exec
 215     functions, if those are not defined by the LU.
 216
 217     Examples::
 218
 219       # Acquire all nodes and one instance
 220       self.needed_locks = {
 221         locking.LEVEL_NODE: locking.ALL_SET,
 222         locking.LEVEL_INSTANCE: ['instance1.example.com'],
 223       }
 224       # Acquire just two nodes
 225       self.needed_locks = {
 226         locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
 227       }
 228       # Acquire no locks
 229       self.needed_locks = {} # No, you can't leave it to the default value None
 230
 231     """
 232     # The implementation of this method is mandatory only if the new LU is
 233     # concurrent, so that old LUs don't need to be changed all at the same
 234     # time.
 235     if self.REQ_BGL:
 236       self.needed_locks = {} # Exclusive LUs don't need locks.
 237     else:
 238       raise NotImplementedError
 239
 240   def DeclareLocks(self, level):
 241     """Declare LU locking needs for a level
 242
 243     While most LUs can just declare their locking needs at ExpandNames time,
 244     sometimes there's the need to calculate some locks after having acquired
 245     the ones before. This function is called just before acquiring locks at a
 246     particular level, but after acquiring the ones at lower levels, and permits
 247     such calculations. It can be used to modify self.needed_locks, and by
 248     default it does nothing.
 249
 250     This function is only called if you have something already set in
 251     self.needed_locks for the level.
 252
 253     @param level: Locking level which is going to be locked
 254     @type level: member of L{ganeti.locking.LEVELS}
 255
 256     """
 257
 258   def CheckPrereq(self):
 259     """Check prerequisites for this LU.
 260
 261     This method should check that the prerequisites for the execution
 262     of this LU are fulfilled. It can do internode communication, but
 263     it should be idempotent - no cluster or system changes are
 264     allowed.
 265
 266     The method should raise errors.OpPrereqError in case something is
 267     not fulfilled. Its return value is ignored.
 268
 269     This method should also update all the parameters of the opcode to
 270     their canonical form if it hasn't been done by ExpandNames before.
 271
 272     """
 273     if self.tasklets is not None:
 274       for (idx, tl) in enumerate(self.tasklets):
 275         logging.debug("Checking prerequisites for tasklet %s/%s",
 276                       idx + 1, len(self.tasklets))
 277         tl.CheckPrereq()
 278     else:
 279       pass
 280
 281   def Exec(self, feedback_fn):
 282     """Execute the LU.
 283
 284     This method should implement the actual work. It should raise
 285     errors.OpExecError for failures that are somewhat dealt with in
 286     code, or expected.
 287
 288     """
 289     if self.tasklets is not None:
 290       for (idx, tl) in enumerate(self.tasklets):
 291         logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
 292         tl.Exec(feedback_fn)
 293     else:
 294       raise NotImplementedError
 295
 296   def BuildHooksEnv(self):
 297     """Build hooks environment for this LU.
 298
 299     @rtype: dict
 300     @return: Dictionary containing the environment that will be used for
 301       running the hooks for this LU. The keys of the dict must not be prefixed
 302       with "GANETI_"--that'll be added by the hooks runner. The hooks runner
 303       will extend the environment with additional variables. If no environment
 304       should be defined, an empty dictionary should be returned (not C{None}).
 305     @note: If the C{HPATH} attribute of the LU class is C{None}, this function
 306       will not be called.
 307
 308     """
 309     raise NotImplementedError
 310
 311   def BuildHooksNodes(self):
 312     """Build list of nodes to run LU's hooks.
 313
 314     @rtype: tuple; (list, list)
 315     @return: Tuple containing a list of node names on which the hook
 316       should run before the execution and a list of node names on which the
 317       hook should run after the execution. No nodes should be returned as an
 318       empty list (and not None).
 319     @note: If the C{HPATH} attribute of the LU class is C{None}, this function
 320       will not be called.
 321
 322     """
 323     raise NotImplementedError
 324
 325   def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
 326     """Notify the LU about the results of its hooks.
 327
 328     This method is called every time a hooks phase is executed, and notifies
 329     the Logical Unit about the hooks' result. The LU can then use it to alter
 330     its result based on the hooks.  By default the method does nothing and the
 331     previous result is passed back unchanged but any LU can define it if it
 332     wants to use the local cluster hook-scripts somehow.
 333
 334     @param phase: one of L{constants.HOOKS_PHASE_POST} or
 335         L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
 336     @param hook_results: the results of the multi-node hooks rpc call
 337     @param feedback_fn: function used send feedback back to the caller
 338     @param lu_result: the previous Exec result this LU had, or None
 339         in the PRE phase
 340     @return: the new Exec result, based on the previous result
 341         and hook results
 342
 343     """
 344     # API must be kept, thus we ignore the unused argument and could
 345     # be a function warnings
 346     # pylint: disable=W0613,R0201
 347     return lu_result
 348
 349   def _ExpandAndLockInstance(self):
 350     """Helper function to expand and lock an instance.
 351
 352     Many LUs that work on an instance take its name in self.op.instance_name
 353     and need to expand it and then declare the expanded name for locking. This
 354     function does it, and then updates self.op.instance_name to the expanded
 355     name. It also initializes needed_locks as a dict, if this hasn't been done
 356     before.
 357
 358     """
 359     if self.needed_locks is None:
 360       self.needed_locks = {}
 361     else:
 362       assert locking.LEVEL_INSTANCE not in self.needed_locks, \
 363         "_ExpandAndLockInstance called with instance-level locks set"
 364     self.op.instance_name = _ExpandInstanceName(self.cfg,
 365                                                 self.op.instance_name)
 366     self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
 367
 368   def _LockInstancesNodes(self, primary_only=False,
 369                           level=locking.LEVEL_NODE):
 370     """Helper function to declare instances' nodes for locking.
 371
 372     This function should be called after locking one or more instances to lock
 373     their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
 374     with all primary or secondary nodes for instances already locked and
 375     present in self.needed_locks[locking.LEVEL_INSTANCE].
 376
 377     It should be called from DeclareLocks, and for safety only works if
 378     self.recalculate_locks[locking.LEVEL_NODE] is set.
 379
 380     In the future it may grow parameters to just lock some instance's nodes, or
 381     to just lock primaries or secondary nodes, if needed.
 382
 383     If should be called in DeclareLocks in a way similar to::
 384
 385       if level == locking.LEVEL_NODE:
 386         self._LockInstancesNodes()
 387
 388     @type primary_only: boolean
 389     @param primary_only: only lock primary nodes of locked instances
 390     @param level: Which lock level to use for locking nodes
 391
 392     """
 393     assert level in self.recalculate_locks, \
 394       "_LockInstancesNodes helper function called with no nodes to recalculate"
 395
 396     # TODO: check if we're really been called with the instance locks held
 397
 398     # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
 399     # future we might want to have different behaviors depending on the value
 400     # of self.recalculate_locks[locking.LEVEL_NODE]
 401     wanted_nodes = []
 402     locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
 403     for _, instance in self.cfg.GetMultiInstanceInfo(locked_i):
 404       wanted_nodes.append(instance.primary_node)
 405       if not primary_only:
 406         wanted_nodes.extend(instance.secondary_nodes)
 407
 408     if self.recalculate_locks[level] == constants.LOCKS_REPLACE:
 409       self.needed_locks[level] = wanted_nodes
 410     elif self.recalculate_locks[level] == constants.LOCKS_APPEND:
 411       self.needed_locks[level].extend(wanted_nodes)
 412     else:
 413       raise errors.ProgrammerError("Unknown recalculation mode")
 414
 415     del self.recalculate_locks[level]
 416
 417
 418 class NoHooksLU(LogicalUnit): # pylint: disable=W0223
 419   """Simple LU which runs no hooks.
 420
 421   This LU is intended as a parent for other LogicalUnits which will
 422   run no hooks, in order to reduce duplicate code.
 423
 424   """
 425   HPATH = None
 426   HTYPE = None
 427
 428   def BuildHooksEnv(self):
 429     """Empty BuildHooksEnv for NoHooksLu.
 430
 431     This just raises an error.
 432
 433     """
 434     raise AssertionError("BuildHooksEnv called for NoHooksLUs")
 435
 436   def BuildHooksNodes(self):
 437     """Empty BuildHooksNodes for NoHooksLU.
 438
 439     """
 440     raise AssertionError("BuildHooksNodes called for NoHooksLU")
 441
 442
 443 class Tasklet:
 444   """Tasklet base class.
 445
 446   Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
 447   they can mix legacy code with tasklets. Locking needs to be done in the LU,
 448   tasklets know nothing about locks.
 449
 450   Subclasses must follow these rules:
 451     - Implement CheckPrereq
 452     - Implement Exec
 453
 454   """
 455   def __init__(self, lu):
 456     self.lu = lu
 457
 458     # Shortcuts
 459     self.cfg = lu.cfg
 460     self.rpc = lu.rpc
 461
 462   def CheckPrereq(self):
 463     """Check prerequisites for this tasklets.
 464
 465     This method should check whether the prerequisites for the execution of
 466     this tasklet are fulfilled. It can do internode communication, but it
 467     should be idempotent - no cluster or system changes are allowed.
 468
 469     The method should raise errors.OpPrereqError in case something is not
 470     fulfilled. Its return value is ignored.
 471
 472     This method should also update all parameters to their canonical form if it
 473     hasn't been done before.
 474
 475     """
 476     pass
 477
 478   def Exec(self, feedback_fn):
 479     """Execute the tasklet.
 480
 481     This method should implement the actual work. It should raise
 482     errors.OpExecError for failures that are somewhat dealt with in code, or
 483     expected.
 484
 485     """
 486     raise NotImplementedError
 487
 488
 489 class _QueryBase:
 490   """Base for query utility classes.
 491
 492   """
 493   #: Attribute holding field definitions
 494   FIELDS = None
 495
 496   #: Field to sort by
 497   SORT_FIELD = "name"
 498
 499   def __init__(self, qfilter, fields, use_locking):
 500     """Initializes this class.
 501
 502     """
 503     self.use_locking = use_locking
 504
 505     self.query = query.Query(self.FIELDS, fields, qfilter=qfilter,
 506                              namefield=self.SORT_FIELD)
 507     self.requested_data = self.query.RequestedData()
 508     self.names = self.query.RequestedNames()
 509
 510     # Sort only if no names were requested
 511     self.sort_by_name = not self.names
 512
 513     self.do_locking = None
 514     self.wanted = None
 515
 516   def _GetNames(self, lu, all_names, lock_level):
 517     """Helper function to determine names asked for in the query.
 518
 519     """
 520     if self.do_locking:
 521       names = lu.owned_locks(lock_level)
 522     else:
 523       names = all_names
 524
 525     if self.wanted == locking.ALL_SET:
 526       assert not self.names
 527       # caller didn't specify names, so ordering is not important
 528       return utils.NiceSort(names)
 529
 530     # caller specified names and we must keep the same order
 531     assert self.names
 532     assert not self.do_locking or lu.glm.is_owned(lock_level)
 533
 534     missing = set(self.wanted).difference(names)
 535     if missing:
 536       raise errors.OpExecError("Some items were removed before retrieving"
 537                                " their data: %s" % missing)
 538
 539     # Return expanded names
 540     return self.wanted
 541
 542   def ExpandNames(self, lu):
 543     """Expand names for this query.
 544
 545     See L{LogicalUnit.ExpandNames}.
 546
 547     """
 548     raise NotImplementedError()
 549
 550   def DeclareLocks(self, lu, level):
 551     """Declare locks for this query.
 552
 553     See L{LogicalUnit.DeclareLocks}.
 554
 555     """
 556     raise NotImplementedError()
 557
 558   def _GetQueryData(self, lu):
 559     """Collects all data for this query.
 560
 561     @return: Query data object
 562
 563     """
 564     raise NotImplementedError()
 565
 566   def NewStyleQuery(self, lu):
 567     """Collect data and execute query.
 568
 569     """
 570     return query.GetQueryResponse(self.query, self._GetQueryData(lu),
 571                                   sort_by_name=self.sort_by_name)
 572
 573   def OldStyleQuery(self, lu):
 574     """Collect data and execute query.
 575
 576     """
 577     return self.query.OldStyleQuery(self._GetQueryData(lu),
 578                                     sort_by_name=self.sort_by_name)
 579
 580
 581 def _ShareAll():
 582   """Returns a dict declaring all lock levels shared.
 583
 584   """
 585   return dict.fromkeys(locking.LEVELS, 1)
 586
 587
 588 def _MakeLegacyNodeInfo(data):
 589   """Formats the data returned by L{rpc.RpcRunner.call_node_info}.
 590
 591   Converts the data into a single dictionary. This is fine for most use cases,
 592   but some require information from more than one volume group or hypervisor.
 593
 594   """
 595   (bootid, (vg_info, ), (hv_info, )) = data
 596
 597   return utils.JoinDisjointDicts(utils.JoinDisjointDicts(vg_info, hv_info), {
 598     "bootid": bootid,
 599     })
 600
 601
 602 def _CheckInstancesNodeGroups(cfg, instances, owned_groups, owned_nodes,
 603                               cur_group_uuid):
 604   """Checks if node groups for locked instances are still correct.
 605
 606   @type cfg: L{config.ConfigWriter}
 607   @param cfg: Cluster configuration
 608   @type instances: dict; string as key, L{objects.Instance} as value
 609   @param instances: Dictionary, instance name as key, instance object as value
 610   @type owned_groups: iterable of string
 611   @param owned_groups: List of owned groups
 612   @type owned_nodes: iterable of string
 613   @param owned_nodes: List of owned nodes
 614   @type cur_group_uuid: string or None
 615   @param cur_group_uuid: Optional group UUID to check against instance's groups
 616
 617   """
 618   for (name, inst) in instances.items():
 619     assert owned_nodes.issuperset(inst.all_nodes), \
 620       "Instance %s's nodes changed while we kept the lock" % name
 621
 622     inst_groups = _CheckInstanceNodeGroups(cfg, name, owned_groups)
 623
 624     assert cur_group_uuid is None or cur_group_uuid in inst_groups, \
 625       "Instance %s has no node in group %s" % (name, cur_group_uuid)
 626
 627
 628 def _CheckInstanceNodeGroups(cfg, instance_name, owned_groups):
 629   """Checks if the owned node groups are still correct for an instance.
 630
 631   @type cfg: L{config.ConfigWriter}
 632   @param cfg: The cluster configuration
 633   @type instance_name: string
 634   @param instance_name: Instance name
 635   @type owned_groups: set or frozenset
 636   @param owned_groups: List of currently owned node groups
 637
 638   """
 639   inst_groups = cfg.GetInstanceNodeGroups(instance_name)
 640
 641   if not owned_groups.issuperset(inst_groups):
 642     raise errors.OpPrereqError("Instance %s's node groups changed since"
 643                                " locks were acquired, current groups are"
 644                                " are '%s', owning groups '%s'; retry the"
 645                                " operation" %
 646                                (instance_name,
 647                                 utils.CommaJoin(inst_groups),
 648                                 utils.CommaJoin(owned_groups)),
 649                                errors.ECODE_STATE)
 650
 651   return inst_groups
 652
 653
 654 def _CheckNodeGroupInstances(cfg, group_uuid, owned_instances):
 655   """Checks if the instances in a node group are still correct.
 656
 657   @type cfg: L{config.ConfigWriter}
 658   @param cfg: The cluster configuration
 659   @type group_uuid: string
 660   @param group_uuid: Node group UUID
 661   @type owned_instances: set or frozenset
 662   @param owned_instances: List of currently owned instances
 663
 664   """
 665   wanted_instances = cfg.GetNodeGroupInstances(group_uuid)
 666   if owned_instances != wanted_instances:
 667     raise errors.OpPrereqError("Instances in node group '%s' changed since"
 668                                " locks were acquired, wanted '%s', have '%s';"
 669                                " retry the operation" %
 670                                (group_uuid,
 671                                 utils.CommaJoin(wanted_instances),
 672                                 utils.CommaJoin(owned_instances)),
 673                                errors.ECODE_STATE)
 674
 675   return wanted_instances
 676
 677
 678 def _SupportsOob(cfg, node):
 679   """Tells if node supports OOB.
 680
 681   @type cfg: L{config.ConfigWriter}
 682   @param cfg: The cluster configuration
 683   @type node: L{objects.Node}
 684   @param node: The node
 685   @return: The OOB script if supported or an empty string otherwise
 686
 687   """
 688   return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
 689
 690
 691 def _GetWantedNodes(lu, nodes):
 692   """Returns list of checked and expanded node names.
 693
 694   @type lu: L{LogicalUnit}
 695   @param lu: the logical unit on whose behalf we execute
 696   @type nodes: list
 697   @param nodes: list of node names or None for all nodes
 698   @rtype: list
 699   @return: the list of nodes, sorted
 700   @raise errors.ProgrammerError: if the nodes parameter is wrong type
 701
 702   """
 703   if nodes:
 704     return [_ExpandNodeName(lu.cfg, name) for name in nodes]
 705
 706   return utils.NiceSort(lu.cfg.GetNodeList())
 707
 708
 709 def _GetWantedInstances(lu, instances):
 710   """Returns list of checked and expanded instance names.
 711
 712   @type lu: L{LogicalUnit}
 713   @param lu: the logical unit on whose behalf we execute
 714   @type instances: list
 715   @param instances: list of instance names or None for all instances
 716   @rtype: list
 717   @return: the list of instances, sorted
 718   @raise errors.OpPrereqError: if the instances parameter is wrong type
 719   @raise errors.OpPrereqError: if any of the passed instances is not found
 720
 721   """
 722   if instances:
 723     wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
 724   else:
 725     wanted = utils.NiceSort(lu.cfg.GetInstanceList())
 726   return wanted
 727
 728
 729 def _GetUpdatedParams(old_params, update_dict,
 730                       use_default=True, use_none=False):
 731   """Return the new version of a parameter dictionary.
 732
 733   @type old_params: dict
 734   @param old_params: old parameters
 735   @type update_dict: dict
 736   @param update_dict: dict containing new parameter values, or
 737       constants.VALUE_DEFAULT to reset the parameter to its default
 738       value
 739   @param use_default: boolean
 740   @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
 741       values as 'to be deleted' values
 742   @param use_none: boolean
 743   @type use_none: whether to recognise C{None} values as 'to be
 744       deleted' values
 745   @rtype: dict
 746   @return: the new parameter dictionary
 747
 748   """
 749   params_copy = copy.deepcopy(old_params)
 750   for key, val in update_dict.iteritems():
 751     if ((use_default and val == constants.VALUE_DEFAULT) or
 752         (use_none and val is None)):
 753       try:
 754         del params_copy[key]
 755       except KeyError:
 756         pass
 757     else:
 758       params_copy[key] = val
 759   return params_copy
 760
 761
 762 def _GetUpdatedIPolicy(old_ipolicy, new_ipolicy, group_policy=False):
 763   """Return the new version of a instance policy.
 764
 765   @param group_policy: whether this policy applies to a group and thus
 766     we should support removal of policy entries
 767
 768   """
 769   use_none = use_default = group_policy
 770   ipolicy = copy.deepcopy(old_ipolicy)
 771   for key, value in new_ipolicy.items():
 772     if key not in constants.IPOLICY_ALL_KEYS:
 773       raise errors.OpPrereqError("Invalid key in new ipolicy: %s" % key,
 774                                  errors.ECODE_INVAL)
 775     if key in constants.IPOLICY_ISPECS:
 776       utils.ForceDictType(value, constants.ISPECS_PARAMETER_TYPES)
 777       ipolicy[key] = _GetUpdatedParams(old_ipolicy.get(key, {}), value,
 778                                        use_none=use_none,
 779                                        use_default=use_default)
 780     else:
 781       if not value or value == [constants.VALUE_DEFAULT]:
 782         if group_policy:
 783           del ipolicy[key]
 784         else:
 785           raise errors.OpPrereqError("Can't unset ipolicy attribute '%s'"
 786                                      " on the cluster'" % key,
 787                                      errors.ECODE_INVAL)
 788       else:
 789         if key in constants.IPOLICY_PARAMETERS:
 790           # FIXME: we assume all such values are float
 791           try:
 792             ipolicy[key] = float(value)
 793           except (TypeError, ValueError), err:
 794             raise errors.OpPrereqError("Invalid value for attribute"
 795                                        " '%s': '%s', error: %s" %
 796                                        (key, value, err), errors.ECODE_INVAL)
 797         else:
 798           # FIXME: we assume all others are lists; this should be redone
 799           # in a nicer way
 800           ipolicy[key] = list(value)
 801   try:
 802     objects.InstancePolicy.CheckParameterSyntax(ipolicy)
 803   except errors.ConfigurationError, err:
 804     raise errors.OpPrereqError("Invalid instance policy: %s" % err,
 805                                errors.ECODE_INVAL)
 806   return ipolicy
 807
 808
 809 def _UpdateAndVerifySubDict(base, updates, type_check):
 810   """Updates and verifies a dict with sub dicts of the same type.
 811
 812   @param base: The dict with the old data
 813   @param updates: The dict with the new data
 814   @param type_check: Dict suitable to ForceDictType to verify correct types
 815   @returns: A new dict with updated and verified values
 816
 817   """
 818   def fn(old, value):
 819     new = _GetUpdatedParams(old, value)
 820     utils.ForceDictType(new, type_check)
 821     return new
 822
 823   ret = copy.deepcopy(base)
 824   ret.update(dict((key, fn(base.get(key, {}), value))
 825                   for key, value in updates.items()))
 826   return ret
 827
 828
 829 def _MergeAndVerifyHvState(op_input, obj_input):
 830   """Combines the hv state from an opcode with the one of the object
 831
 832   @param op_input: The input dict from the opcode
 833   @param obj_input: The input dict from the objects
 834   @return: The verified and updated dict
 835
 836   """
 837   if op_input:
 838     invalid_hvs = set(op_input) - constants.HYPER_TYPES
 839     if invalid_hvs:
 840       raise errors.OpPrereqError("Invalid hypervisor(s) in hypervisor state:"
 841                                  " %s" % utils.CommaJoin(invalid_hvs),
 842                                  errors.ECODE_INVAL)
 843     if obj_input is None:
 844       obj_input = {}
 845     type_check = constants.HVSTS_PARAMETER_TYPES
 846     return _UpdateAndVerifySubDict(obj_input, op_input, type_check)
 847
 848   return None
 849
 850
 851 def _MergeAndVerifyDiskState(op_input, obj_input):
 852   """Combines the disk state from an opcode with the one of the object
 853
 854   @param op_input: The input dict from the opcode
 855   @param obj_input: The input dict from the objects
 856   @return: The verified and updated dict
 857   """
 858   if op_input:
 859     invalid_dst = set(op_input) - constants.DS_VALID_TYPES
 860     if invalid_dst:
 861       raise errors.OpPrereqError("Invalid storage type(s) in disk state: %s" %
 862                                  utils.CommaJoin(invalid_dst),
 863                                  errors.ECODE_INVAL)
 864     type_check = constants.DSS_PARAMETER_TYPES
 865     if obj_input is None:
 866       obj_input = {}
 867     return dict((key, _UpdateAndVerifySubDict(obj_input.get(key, {}), value,
 868                                               type_check))
 869                 for key, value in op_input.items())
 870
 871   return None
 872
 873
 874 def _ReleaseLocks(lu, level, names=None, keep=None):
 875   """Releases locks owned by an LU.
 876
 877   @type lu: L{LogicalUnit}
 878   @param level: Lock level
 879   @type names: list or None
 880   @param names: Names of locks to release
 881   @type keep: list or None
 882   @param keep: Names of locks to retain
 883
 884   """
 885   assert not (keep is not None and names is not None), \
 886          "Only one of the 'names' and the 'keep' parameters can be given"
 887
 888   if names is not None:
 889     should_release = names.__contains__
 890   elif keep:
 891     should_release = lambda name: name not in keep
 892   else:
 893     should_release = None
 894
 895   owned = lu.owned_locks(level)
 896   if not owned:
 897     # Not owning any lock at this level, do nothing
 898     pass
 899
 900   elif should_release:
 901     retain = []
 902     release = []
 903
 904     # Determine which locks to release
 905     for name in owned:
 906       if should_release(name):
 907         release.append(name)
 908       else:
 909         retain.append(name)
 910
 911     assert len(lu.owned_locks(level)) == (len(retain) + len(release))
 912
 913     # Release just some locks
 914     lu.glm.release(level, names=release)
 915
 916     assert frozenset(lu.owned_locks(level)) == frozenset(retain)
 917   else:
 918     # Release everything
 919     lu.glm.release(level)
 920
 921     assert not lu.glm.is_owned(level), "No locks should be owned"
 922
 923
 924 def _MapInstanceDisksToNodes(instances):
 925   """Creates a map from (node, volume) to instance name.
 926
 927   @type instances: list of L{objects.Instance}
 928   @rtype: dict; tuple of (node name, volume name) as key, instance name as value
 929
 930   """
 931   return dict(((node, vol), inst.name)
 932               for inst in instances
 933               for (node, vols) in inst.MapLVsByNode().items()
 934               for vol in vols)
 935
 936
 937 def _RunPostHook(lu, node_name):
 938   """Runs the post-hook for an opcode on a single node.
 939
 940   """
 941   hm = lu.proc.BuildHooksManager(lu)
 942   try:
 943     hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
 944   except:
 945     # pylint: disable=W0702
 946     lu.LogWarning("Errors occurred running hooks on %s" % node_name)
 947
 948
 949 def _CheckOutputFields(static, dynamic, selected):
 950   """Checks whether all selected fields are valid.
 951
 952   @type static: L{utils.FieldSet}
 953   @param static: static fields set
 954   @type dynamic: L{utils.FieldSet}
 955   @param dynamic: dynamic fields set
 956
 957   """
 958   f = utils.FieldSet()
 959   f.Extend(static)
 960   f.Extend(dynamic)
 961
 962   delta = f.NonMatching(selected)
 963   if delta:
 964     raise errors.OpPrereqError("Unknown output fields selected: %s"
 965                                % ",".join(delta), errors.ECODE_INVAL)
 966
 967
 968 def _CheckGlobalHvParams(params):
 969   """Validates that given hypervisor params are not global ones.
 970
 971   This will ensure that instances don't get customised versions of
 972   global params.
 973
 974   """
 975   used_globals = constants.HVC_GLOBALS.intersection(params)
 976   if used_globals:
 977     msg = ("The following hypervisor parameters are global and cannot"
 978            " be customized at instance level, please modify them at"
 979            " cluster level: %s" % utils.CommaJoin(used_globals))
 980     raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
 981
 982
 983 def _CheckNodeOnline(lu, node, msg=None):
 984   """Ensure that a given node is online.
 985
 986   @param lu: the LU on behalf of which we make the check
 987   @param node: the node to check
 988   @param msg: if passed, should be a message to replace the default one
 989   @raise errors.OpPrereqError: if the node is offline
 990
 991   """
 992   if msg is None:
 993     msg = "Can't use offline node"
 994   if lu.cfg.GetNodeInfo(node).offline:
 995     raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
 996
 997
 998 def _CheckNodeNotDrained(lu, node):
 999   """Ensure that a given node is not drained.
1000
1001   @param lu: the LU on behalf of which we make the check
1002   @param node: the node to check
1003   @raise errors.OpPrereqError: if the node is drained
1004
1005   """
1006   if lu.cfg.GetNodeInfo(node).drained:
1007     raise errors.OpPrereqError("Can't use drained node %s" % node,
1008                                errors.ECODE_STATE)
1009
1010
1011 def _CheckNodeVmCapable(lu, node):
1012   """Ensure that a given node is vm capable.
1013
1014   @param lu: the LU on behalf of which we make the check
1015   @param node: the node to check
1016   @raise errors.OpPrereqError: if the node is not vm capable
1017
1018   """
1019   if not lu.cfg.GetNodeInfo(node).vm_capable:
1020     raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
1021                                errors.ECODE_STATE)
1022
1023
1024 def _CheckNodeHasOS(lu, node, os_name, force_variant):
1025   """Ensure that a node supports a given OS.
1026
1027   @param lu: the LU on behalf of which we make the check
1028   @param node: the node to check
1029   @param os_name: the OS to query about
1030   @param force_variant: whether to ignore variant errors
1031   @raise errors.OpPrereqError: if the node is not supporting the OS
1032
1033   """
1034   result = lu.rpc.call_os_get(node, os_name)
1035   result.Raise("OS '%s' not in supported OS list for node %s" %
1036                (os_name, node),
1037                prereq=True, ecode=errors.ECODE_INVAL)
1038   if not force_variant:
1039     _CheckOSVariant(result.payload, os_name)
1040
1041
1042 def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
1043   """Ensure that a node has the given secondary ip.
1044
1045   @type lu: L{LogicalUnit}
1046   @param lu: the LU on behalf of which we make the check
1047   @type node: string
1048   @param node: the node to check
1049   @type secondary_ip: string
1050   @param secondary_ip: the ip to check
1051   @type prereq: boolean
1052   @param prereq: whether to throw a prerequisite or an execute error
1053   @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
1054   @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
1055
1056   """
1057   result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
1058   result.Raise("Failure checking secondary ip on node %s" % node,
1059                prereq=prereq, ecode=errors.ECODE_ENVIRON)
1060   if not result.payload:
1061     msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
1062            " please fix and re-run this command" % secondary_ip)
1063     if prereq:
1064       raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
1065     else:
1066       raise errors.OpExecError(msg)
1067
1068
1069 def _GetClusterDomainSecret():
1070   """Reads the cluster domain secret.
1071
1072   """
1073   return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
1074                                strict=True)
1075
1076
1077 def _CheckInstanceState(lu, instance, req_states, msg=None):
1078   """Ensure that an instance is in one of the required states.
1079
1080   @param lu: the LU on behalf of which we make the check
1081   @param instance: the instance to check
1082   @param msg: if passed, should be a message to replace the default one
1083   @raise errors.OpPrereqError: if the instance is not in the required state
1084
1085   """
1086   if msg is None:
1087     msg = "can't use instance from outside %s states" % ", ".join(req_states)
1088   if instance.admin_state not in req_states:
1089     raise errors.OpPrereqError("Instance '%s' is marked to be %s, %s" %
1090                                (instance.name, instance.admin_state, msg),
1091                                errors.ECODE_STATE)
1092
1093   if constants.ADMINST_UP not in req_states:
1094     pnode = instance.primary_node
1095     ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
1096     ins_l.Raise("Can't contact node %s for instance information" % pnode,
1097                 prereq=True, ecode=errors.ECODE_ENVIRON)
1098
1099     if instance.name in ins_l.payload:
1100       raise errors.OpPrereqError("Instance %s is running, %s" %
1101                                  (instance.name, msg), errors.ECODE_STATE)
1102
1103
1104 def _ComputeMinMaxSpec(name, ipolicy, value):
1105   """Computes if value is in the desired range.
1106
1107   @param name: name of the parameter for which we perform the check
1108   @param ipolicy: dictionary containing min, max and std values
1109   @param value: actual value that we want to use
1110   @return: None or element not meeting the criteria
1111
1112
1113   """
1114   if value in [None, constants.VALUE_AUTO]:
1115     return None
1116   max_v = ipolicy[constants.ISPECS_MAX].get(name, value)
1117   min_v = ipolicy[constants.ISPECS_MIN].get(name, value)
1118   if value > max_v or min_v > value:
1119     return ("%s value %s is not in range [%s, %s]" %
1120             (name, value, min_v, max_v))
1121   return None
1122
1123
1124 def _ComputeIPolicySpecViolation(ipolicy, mem_size, cpu_count, disk_count,
1125                                  nic_count, disk_sizes, spindle_use,
1126                                  _compute_fn=_ComputeMinMaxSpec):
1127   """Verifies ipolicy against provided specs.
1128
1129   @type ipolicy: dict
1130   @param ipolicy: The ipolicy
1131   @type mem_size: int
1132   @param mem_size: The memory size
1133   @type cpu_count: int
1134   @param cpu_count: Used cpu cores
1135   @type disk_count: int
1136   @param disk_count: Number of disks used
1137   @type nic_count: int
1138   @param nic_count: Number of nics used
1139   @type disk_sizes: list of ints
1140   @param disk_sizes: Disk sizes of used disk (len must match C{disk_count})
1141   @type spindle_use: int
1142   @param spindle_use: The number of spindles this instance uses
1143   @param _compute_fn: The compute function (unittest only)
1144   @return: A list of violations, or an empty list of no violations are found
1145
1146   """
1147   assert disk_count == len(disk_sizes)
1148
1149   test_settings = [
1150     (constants.ISPEC_MEM_SIZE, mem_size),
1151     (constants.ISPEC_CPU_COUNT, cpu_count),
1152     (constants.ISPEC_DISK_COUNT, disk_count),
1153     (constants.ISPEC_NIC_COUNT, nic_count),
1154     (constants.ISPEC_SPINDLE_USE, spindle_use),
1155     ] + map((lambda d: (constants.ISPEC_DISK_SIZE, d)), disk_sizes)
1156
1157   return filter(None,
1158                 (_compute_fn(name, ipolicy, value)
1159                  for (name, value) in test_settings))
1160
1161
1162 def _ComputeIPolicyInstanceViolation(ipolicy, instance,
1163                                      _compute_fn=_ComputeIPolicySpecViolation):
1164   """Compute if instance meets the specs of ipolicy.
1165
1166   @type ipolicy: dict
1167   @param ipolicy: The ipolicy to verify against
1168   @type instance: L{objects.Instance}
1169   @param instance: The instance to verify
1170   @param _compute_fn: The function to verify ipolicy (unittest only)
1171   @see: L{_ComputeIPolicySpecViolation}
1172
1173   """
1174   mem_size = instance.beparams.get(constants.BE_MAXMEM, None)
1175   cpu_count = instance.beparams.get(constants.BE_VCPUS, None)
1176   spindle_use = instance.beparams.get(constants.BE_SPINDLE_USE, None)
1177   disk_count = len(instance.disks)
1178   disk_sizes = [disk.size for disk in instance.disks]
1179   nic_count = len(instance.nics)
1180
1181   return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1182                      disk_sizes, spindle_use)
1183
1184
1185 def _ComputeIPolicyInstanceSpecViolation(ipolicy, instance_spec,
1186     _compute_fn=_ComputeIPolicySpecViolation):
1187   """Compute if instance specs meets the specs of ipolicy.
1188
1189   @type ipolicy: dict
1190   @param ipolicy: The ipolicy to verify against
1191   @param instance_spec: dict
1192   @param instance_spec: The instance spec to verify
1193   @param _compute_fn: The function to verify ipolicy (unittest only)
1194   @see: L{_ComputeIPolicySpecViolation}
1195
1196   """
1197   mem_size = instance_spec.get(constants.ISPEC_MEM_SIZE, None)
1198   cpu_count = instance_spec.get(constants.ISPEC_CPU_COUNT, None)
1199   disk_count = instance_spec.get(constants.ISPEC_DISK_COUNT, 0)
1200   disk_sizes = instance_spec.get(constants.ISPEC_DISK_SIZE, [])
1201   nic_count = instance_spec.get(constants.ISPEC_NIC_COUNT, 0)
1202   spindle_use = instance_spec.get(constants.ISPEC_SPINDLE_USE, None)
1203
1204   return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1205                      disk_sizes, spindle_use)
1206
1207
1208 def _ComputeIPolicyNodeViolation(ipolicy, instance, current_group,
1209                                  target_group,
1210                                  _compute_fn=_ComputeIPolicyInstanceViolation):
1211   """Compute if instance meets the specs of the new target group.
1212
1213   @param ipolicy: The ipolicy to verify
1214   @param instance: The instance object to verify
1215   @param current_group: The current group of the instance
1216   @param target_group: The new group of the instance
1217   @param _compute_fn: The function to verify ipolicy (unittest only)
1218   @see: L{_ComputeIPolicySpecViolation}
1219
1220   """
1221   if current_group == target_group:
1222     return []
1223   else:
1224     return _compute_fn(ipolicy, instance)
1225
1226
1227 def _CheckTargetNodeIPolicy(lu, ipolicy, instance, node, ignore=False,
1228                             _compute_fn=_ComputeIPolicyNodeViolation):
1229   """Checks that the target node is correct in terms of instance policy.
1230
1231   @param ipolicy: The ipolicy to verify
1232   @param instance: The instance object to verify
1233   @param node: The new node to relocate
1234   @param ignore: Ignore violations of the ipolicy
1235   @param _compute_fn: The function to verify ipolicy (unittest only)
1236   @see: L{_ComputeIPolicySpecViolation}
1237
1238   """
1239   primary_node = lu.cfg.GetNodeInfo(instance.primary_node)
1240   res = _compute_fn(ipolicy, instance, primary_node.group, node.group)
1241
1242   if res:
1243     msg = ("Instance does not meet target node group's (%s) instance"
1244            " policy: %s") % (node.group, utils.CommaJoin(res))
1245     if ignore:
1246       lu.LogWarning(msg)
1247     else:
1248       raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
1249
1250
1251 def _ComputeNewInstanceViolations(old_ipolicy, new_ipolicy, instances):
1252   """Computes a set of any instances that would violate the new ipolicy.
1253
1254   @param old_ipolicy: The current (still in-place) ipolicy
1255   @param new_ipolicy: The new (to become) ipolicy
1256   @param instances: List of instances to verify
1257   @return: A list of instances which violates the new ipolicy but did not before
1258
1259   """
1260   return (_ComputeViolatingInstances(old_ipolicy, instances) -
1261           _ComputeViolatingInstances(new_ipolicy, instances))
1262
1263
1264 def _ExpandItemName(fn, name, kind):
1265   """Expand an item name.
1266
1267   @param fn: the function to use for expansion
1268   @param name: requested item name
1269   @param kind: text description ('Node' or 'Instance')
1270   @return: the resolved (full) name
1271   @raise errors.OpPrereqError: if the item is not found
1272
1273   """
1274   full_name = fn(name)
1275   if full_name is None:
1276     raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
1277                                errors.ECODE_NOENT)
1278   return full_name
1279
1280
1281 def _ExpandNodeName(cfg, name):
1282   """Wrapper over L{_ExpandItemName} for nodes."""
1283   return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
1284
1285
1286 def _ExpandInstanceName(cfg, name):
1287   """Wrapper over L{_ExpandItemName} for instance."""
1288   return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
1289
1290
1291 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
1292                           minmem, maxmem, vcpus, nics, disk_template, disks,
1293                           bep, hvp, hypervisor_name, tags):
1294   """Builds instance related env variables for hooks
1295
1296   This builds the hook environment from individual variables.
1297
1298   @type name: string
1299   @param name: the name of the instance
1300   @type primary_node: string
1301   @param primary_node: the name of the instance's primary node
1302   @type secondary_nodes: list
1303   @param secondary_nodes: list of secondary nodes as strings
1304   @type os_type: string
1305   @param os_type: the name of the instance's OS
1306   @type status: string
1307   @param status: the desired status of the instance
1308   @type minmem: string
1309   @param minmem: the minimum memory size of the instance
1310   @type maxmem: string
1311   @param maxmem: the maximum memory size of the instance
1312   @type vcpus: string
1313   @param vcpus: the count of VCPUs the instance has
1314   @type nics: list
1315   @param nics: list of tuples (ip, mac, mode, link) representing
1316       the NICs the instance has
1317   @type disk_template: string
1318   @param disk_template: the disk template of the instance
1319   @type disks: list
1320   @param disks: the list of (size, mode) pairs
1321   @type bep: dict
1322   @param bep: the backend parameters for the instance
1323   @type hvp: dict
1324   @param hvp: the hypervisor parameters for the instance
1325   @type hypervisor_name: string
1326   @param hypervisor_name: the hypervisor for the instance
1327   @type tags: list
1328   @param tags: list of instance tags as strings
1329   @rtype: dict
1330   @return: the hook environment for this instance
1331
1332   """
1333   env = {
1334     "OP_TARGET": name,
1335     "INSTANCE_NAME": name,
1336     "INSTANCE_PRIMARY": primary_node,
1337     "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
1338     "INSTANCE_OS_TYPE": os_type,
1339     "INSTANCE_STATUS": status,
1340     "INSTANCE_MINMEM": minmem,
1341     "INSTANCE_MAXMEM": maxmem,
1342     # TODO(2.7) remove deprecated "memory" value
1343     "INSTANCE_MEMORY": maxmem,
1344     "INSTANCE_VCPUS": vcpus,
1345     "INSTANCE_DISK_TEMPLATE": disk_template,
1346     "INSTANCE_HYPERVISOR": hypervisor_name,
1347   }
1348   if nics:
1349     nic_count = len(nics)
1350     for idx, (ip, mac, mode, link) in enumerate(nics):
1351       if ip is None:
1352         ip = ""
1353       env["INSTANCE_NIC%d_IP" % idx] = ip
1354       env["INSTANCE_NIC%d_MAC" % idx] = mac
1355       env["INSTANCE_NIC%d_MODE" % idx] = mode
1356       env["INSTANCE_NIC%d_LINK" % idx] = link
1357       if mode == constants.NIC_MODE_BRIDGED:
1358         env["INSTANCE_NIC%d_BRIDGE" % idx] = link
1359   else:
1360     nic_count = 0
1361
1362   env["INSTANCE_NIC_COUNT"] = nic_count
1363
1364   if disks:
1365     disk_count = len(disks)
1366     for idx, (size, mode) in enumerate(disks):
1367       env["INSTANCE_DISK%d_SIZE" % idx] = size
1368       env["INSTANCE_DISK%d_MODE" % idx] = mode
1369   else:
1370     disk_count = 0
1371
1372   env["INSTANCE_DISK_COUNT"] = disk_count
1373
1374   if not tags:
1375     tags = []
1376
1377   env["INSTANCE_TAGS"] = " ".join(tags)
1378
1379   for source, kind in [(bep, "BE"), (hvp, "HV")]:
1380     for key, value in source.items():
1381       env["INSTANCE_%s_%s" % (kind, key)] = value
1382
1383   return env
1384
1385
1386 def _NICListToTuple(lu, nics):
1387   """Build a list of nic information tuples.
1388
1389   This list is suitable to be passed to _BuildInstanceHookEnv or as a return
1390   value in LUInstanceQueryData.
1391
1392   @type lu:  L{LogicalUnit}
1393   @param lu: the logical unit on whose behalf we execute
1394   @type nics: list of L{objects.NIC}
1395   @param nics: list of nics to convert to hooks tuples
1396
1397   """
1398   hooks_nics = []
1399   cluster = lu.cfg.GetClusterInfo()
1400   for nic in nics:
1401     ip = nic.ip
1402     mac = nic.mac
1403     filled_params = cluster.SimpleFillNIC(nic.nicparams)
1404     mode = filled_params[constants.NIC_MODE]
1405     link = filled_params[constants.NIC_LINK]
1406     hooks_nics.append((ip, mac, mode, link))
1407   return hooks_nics
1408
1409
1410 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
1411   """Builds instance related env variables for hooks from an object.
1412
1413   @type lu: L{LogicalUnit}
1414   @param lu: the logical unit on whose behalf we execute
1415   @type instance: L{objects.Instance}
1416   @param instance: the instance for which we should build the
1417       environment
1418   @type override: dict
1419   @param override: dictionary with key/values that will override
1420       our values
1421   @rtype: dict
1422   @return: the hook environment dictionary
1423
1424   """
1425   cluster = lu.cfg.GetClusterInfo()
1426   bep = cluster.FillBE(instance)
1427   hvp = cluster.FillHV(instance)
1428   args = {
1429     "name": instance.name,
1430     "primary_node": instance.primary_node,
1431     "secondary_nodes": instance.secondary_nodes,
1432     "os_type": instance.os,
1433     "status": instance.admin_state,
1434     "maxmem": bep[constants.BE_MAXMEM],
1435     "minmem": bep[constants.BE_MINMEM],
1436     "vcpus": bep[constants.BE_VCPUS],
1437     "nics": _NICListToTuple(lu, instance.nics),
1438     "disk_template": instance.disk_template,
1439     "disks": [(disk.size, disk.mode) for disk in instance.disks],
1440     "bep": bep,
1441     "hvp": hvp,
1442     "hypervisor_name": instance.hypervisor,
1443     "tags": instance.tags,
1444   }
1445   if override:
1446     args.update(override)
1447   return _BuildInstanceHookEnv(**args) # pylint: disable=W0142
1448
1449
1450 def _AdjustCandidatePool(lu, exceptions):
1451   """Adjust the candidate pool after node operations.
1452
1453   """
1454   mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1455   if mod_list:
1456     lu.LogInfo("Promoted nodes to master candidate role: %s",
1457                utils.CommaJoin(node.name for node in mod_list))
1458     for name in mod_list:
1459       lu.context.ReaddNode(name)
1460   mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1461   if mc_now > mc_max:
1462     lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1463                (mc_now, mc_max))
1464
1465
1466 def _DecideSelfPromotion(lu, exceptions=None):
1467   """Decide whether I should promote myself as a master candidate.
1468
1469   """
1470   cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1471   mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1472   # the new node will increase mc_max with one, so:
1473   mc_should = min(mc_should + 1, cp_size)
1474   return mc_now < mc_should
1475
1476
1477 def _CalculateGroupIPolicy(cluster, group):
1478   """Calculate instance policy for group.
1479
1480   """
1481   return cluster.SimpleFillIPolicy(group.ipolicy)
1482
1483
1484 def _ComputeViolatingInstances(ipolicy, instances):
1485   """Computes a set of instances who violates given ipolicy.
1486
1487   @param ipolicy: The ipolicy to verify
1488   @type instances: object.Instance
1489   @param instances: List of instances to verify
1490   @return: A frozenset of instance names violating the ipolicy
1491
1492   """
1493   return frozenset([inst.name for inst in instances
1494                     if _ComputeIPolicyInstanceViolation(ipolicy, inst)])
1495
1496
1497 def _CheckNicsBridgesExist(lu, target_nics, target_node):
1498   """Check that the brigdes needed by a list of nics exist.
1499
1500   """
1501   cluster = lu.cfg.GetClusterInfo()
1502   paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1503   brlist = [params[constants.NIC_LINK] for params in paramslist
1504             if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1505   if brlist:
1506     result = lu.rpc.call_bridges_exist(target_node, brlist)
1507     result.Raise("Error checking bridges on destination node '%s'" %
1508                  target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1509
1510
1511 def _CheckInstanceBridgesExist(lu, instance, node=None):
1512   """Check that the brigdes needed by an instance exist.
1513
1514   """
1515   if node is None:
1516     node = instance.primary_node
1517   _CheckNicsBridgesExist(lu, instance.nics, node)
1518
1519
1520 def _CheckOSVariant(os_obj, name):
1521   """Check whether an OS name conforms to the os variants specification.
1522
1523   @type os_obj: L{objects.OS}
1524   @param os_obj: OS object to check
1525   @type name: string
1526   @param name: OS name passed by the user, to check for validity
1527
1528   """
1529   variant = objects.OS.GetVariant(name)
1530   if not os_obj.supported_variants:
1531     if variant:
1532       raise errors.OpPrereqError("OS '%s' doesn't support variants ('%s'"
1533                                  " passed)" % (os_obj.name, variant),
1534                                  errors.ECODE_INVAL)
1535     return
1536   if not variant:
1537     raise errors.OpPrereqError("OS name must include a variant",
1538                                errors.ECODE_INVAL)
1539
1540   if variant not in os_obj.supported_variants:
1541     raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1542
1543
1544 def _GetNodeInstancesInner(cfg, fn):
1545   return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1546
1547
1548 def _GetNodeInstances(cfg, node_name):
1549   """Returns a list of all primary and secondary instances on a node.
1550
1551   """
1552
1553   return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1554
1555
1556 def _GetNodePrimaryInstances(cfg, node_name):
1557   """Returns primary instances on a node.
1558
1559   """
1560   return _GetNodeInstancesInner(cfg,
1561                                 lambda inst: node_name == inst.primary_node)
1562
1563
1564 def _GetNodeSecondaryInstances(cfg, node_name):
1565   """Returns secondary instances on a node.
1566
1567   """
1568   return _GetNodeInstancesInner(cfg,
1569                                 lambda inst: node_name in inst.secondary_nodes)
1570
1571
1572 def _GetStorageTypeArgs(cfg, storage_type):
1573   """Returns the arguments for a storage type.
1574
1575   """
1576   # Special case for file storage
1577   if storage_type == constants.ST_FILE:
1578     # storage.FileStorage wants a list of storage directories
1579     return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1580
1581   return []
1582
1583
1584 def _FindFaultyInstanceDisks(cfg, rpc_runner, instance, node_name, prereq):
1585   faulty = []
1586
1587   for dev in instance.disks:
1588     cfg.SetDiskID(dev, node_name)
1589
1590   result = rpc_runner.call_blockdev_getmirrorstatus(node_name, instance.disks)
1591   result.Raise("Failed to get disk status from node %s" % node_name,
1592                prereq=prereq, ecode=errors.ECODE_ENVIRON)
1593
1594   for idx, bdev_status in enumerate(result.payload):
1595     if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1596       faulty.append(idx)
1597
1598   return faulty
1599
1600
1601 def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1602   """Check the sanity of iallocator and node arguments and use the
1603   cluster-wide iallocator if appropriate.
1604
1605   Check that at most one of (iallocator, node) is specified. If none is
1606   specified, then the LU's opcode's iallocator slot is filled with the
1607   cluster-wide default iallocator.
1608
1609   @type iallocator_slot: string
1610   @param iallocator_slot: the name of the opcode iallocator slot
1611   @type node_slot: string
1612   @param node_slot: the name of the opcode target node slot
1613
1614   """
1615   node = getattr(lu.op, node_slot, None)
1616   iallocator = getattr(lu.op, iallocator_slot, None)
1617
1618   if node is not None and iallocator is not None:
1619     raise errors.OpPrereqError("Do not specify both, iallocator and node",
1620                                errors.ECODE_INVAL)
1621   elif node is None and iallocator is None:
1622     default_iallocator = lu.cfg.GetDefaultIAllocator()
1623     if default_iallocator:
1624       setattr(lu.op, iallocator_slot, default_iallocator)
1625     else:
1626       raise errors.OpPrereqError("No iallocator or node given and no"
1627                                  " cluster-wide default iallocator found;"
1628                                  " please specify either an iallocator or a"
1629                                  " node, or set a cluster-wide default"
1630                                  " iallocator")
1631
1632
1633 def _GetDefaultIAllocator(cfg, iallocator):
1634   """Decides on which iallocator to use.
1635
1636   @type cfg: L{config.ConfigWriter}
1637   @param cfg: Cluster configuration object
1638   @type iallocator: string or None
1639   @param iallocator: Iallocator specified in opcode
1640   @rtype: string
1641   @return: Iallocator name
1642
1643   """
1644   if not iallocator:
1645     # Use default iallocator
1646     iallocator = cfg.GetDefaultIAllocator()
1647
1648   if not iallocator:
1649     raise errors.OpPrereqError("No iallocator was specified, neither in the"
1650                                " opcode nor as a cluster-wide default",
1651                                errors.ECODE_INVAL)
1652
1653   return iallocator
1654
1655
1656 class LUClusterPostInit(LogicalUnit):
1657   """Logical unit for running hooks after cluster initialization.
1658
1659   """
1660   HPATH = "cluster-init"
1661   HTYPE = constants.HTYPE_CLUSTER
1662
1663   def BuildHooksEnv(self):
1664     """Build hooks env.
1665
1666     """
1667     return {
1668       "OP_TARGET": self.cfg.GetClusterName(),
1669       }
1670
1671   def BuildHooksNodes(self):
1672     """Build hooks nodes.
1673
1674     """
1675     return ([], [self.cfg.GetMasterNode()])
1676
1677   def Exec(self, feedback_fn):
1678     """Nothing to do.
1679
1680     """
1681     return True
1682
1683
1684 class LUClusterDestroy(LogicalUnit):
1685   """Logical unit for destroying the cluster.
1686
1687   """
1688   HPATH = "cluster-destroy"
1689   HTYPE = constants.HTYPE_CLUSTER
1690
1691   def BuildHooksEnv(self):
1692     """Build hooks env.
1693
1694     """
1695     return {
1696       "OP_TARGET": self.cfg.GetClusterName(),
1697       }
1698
1699   def BuildHooksNodes(self):
1700     """Build hooks nodes.
1701
1702     """
1703     return ([], [])
1704
1705   def CheckPrereq(self):
1706     """Check prerequisites.
1707
1708     This checks whether the cluster is empty.
1709
1710     Any errors are signaled by raising errors.OpPrereqError.
1711
1712     """
1713     master = self.cfg.GetMasterNode()
1714
1715     nodelist = self.cfg.GetNodeList()
1716     if len(nodelist) != 1 or nodelist[0] != master:
1717       raise errors.OpPrereqError("There are still %d node(s) in"
1718                                  " this cluster." % (len(nodelist) - 1),
1719                                  errors.ECODE_INVAL)
1720     instancelist = self.cfg.GetInstanceList()
1721     if instancelist:
1722       raise errors.OpPrereqError("There are still %d instance(s) in"
1723                                  " this cluster." % len(instancelist),
1724                                  errors.ECODE_INVAL)
1725
1726   def Exec(self, feedback_fn):
1727     """Destroys the cluster.
1728
1729     """
1730     master_params = self.cfg.GetMasterNetworkParameters()
1731
1732     # Run post hooks on master node before it's removed
1733     _RunPostHook(self, master_params.name)
1734
1735     ems = self.cfg.GetUseExternalMipScript()
1736     result = self.rpc.call_node_deactivate_master_ip(master_params.name,
1737                                                      master_params, ems)
1738     if result.fail_msg:
1739       self.LogWarning("Error disabling the master IP address: %s",
1740                       result.fail_msg)
1741
1742     return master_params.name
1743
1744
1745 def _VerifyCertificate(filename):
1746   """Verifies a certificate for L{LUClusterVerifyConfig}.
1747
1748   @type filename: string
1749   @param filename: Path to PEM file
1750
1751   """
1752   try:
1753     cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1754                                            utils.ReadFile(filename))
1755   except Exception, err: # pylint: disable=W0703
1756     return (LUClusterVerifyConfig.ETYPE_ERROR,
1757             "Failed to load X509 certificate %s: %s" % (filename, err))
1758
1759   (errcode, msg) = \
1760     utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1761                                 constants.SSL_CERT_EXPIRATION_ERROR)
1762
1763   if msg:
1764     fnamemsg = "While verifying %s: %s" % (filename, msg)
1765   else:
1766     fnamemsg = None
1767
1768   if errcode is None:
1769     return (None, fnamemsg)
1770   elif errcode == utils.CERT_WARNING:
1771     return (LUClusterVerifyConfig.ETYPE_WARNING, fnamemsg)
1772   elif errcode == utils.CERT_ERROR:
1773     return (LUClusterVerifyConfig.ETYPE_ERROR, fnamemsg)
1774
1775   raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1776
1777
1778 def _GetAllHypervisorParameters(cluster, instances):
1779   """Compute the set of all hypervisor parameters.
1780
1781   @type cluster: L{objects.Cluster}
1782   @param cluster: the cluster object
1783   @param instances: list of L{objects.Instance}
1784   @param instances: additional instances from which to obtain parameters
1785   @rtype: list of (origin, hypervisor, parameters)
1786   @return: a list with all parameters found, indicating the hypervisor they
1787        apply to, and the origin (can be "cluster", "os X", or "instance Y")
1788
1789   """
1790   hvp_data = []
1791
1792   for hv_name in cluster.enabled_hypervisors:
1793     hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
1794
1795   for os_name, os_hvp in cluster.os_hvp.items():
1796     for hv_name, hv_params in os_hvp.items():
1797       if hv_params:
1798         full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
1799         hvp_data.append(("os %s" % os_name, hv_name, full_params))
1800
1801   # TODO: collapse identical parameter values in a single one
1802   for instance in instances:
1803     if instance.hvparams:
1804       hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
1805                        cluster.FillHV(instance)))
1806
1807   return hvp_data
1808
1809
1810 class _VerifyErrors(object):
1811   """Mix-in for cluster/group verify LUs.
1812
1813   It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects
1814   self.op and self._feedback_fn to be available.)
1815
1816   """
1817
1818   ETYPE_FIELD = "code"
1819   ETYPE_ERROR = "ERROR"
1820   ETYPE_WARNING = "WARNING"
1821
1822   def _Error(self, ecode, item, msg, *args, **kwargs):
1823     """Format an error message.
1824
1825     Based on the opcode's error_codes parameter, either format a
1826     parseable error code, or a simpler error string.
1827
1828     This must be called only from Exec and functions called from Exec.
1829
1830     """
1831     ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1832     itype, etxt, _ = ecode
1833     # first complete the msg
1834     if args:
1835       msg = msg % args
1836     # then format the whole message
1837     if self.op.error_codes: # This is a mix-in. pylint: disable=E1101
1838       msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1839     else:
1840       if item:
1841         item = " " + item
1842       else:
1843         item = ""
1844       msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1845     # and finally report it via the feedback_fn
1846     self._feedback_fn("  - %s" % msg) # Mix-in. pylint: disable=E1101
1847
1848   def _ErrorIf(self, cond, ecode, *args, **kwargs):
1849     """Log an error message if the passed condition is True.
1850
1851     """
1852     cond = (bool(cond)
1853             or self.op.debug_simulate_errors) # pylint: disable=E1101
1854
1855     # If the error code is in the list of ignored errors, demote the error to a
1856     # warning
1857     (_, etxt, _) = ecode
1858     if etxt in self.op.ignore_errors:     # pylint: disable=E1101
1859       kwargs[self.ETYPE_FIELD] = self.ETYPE_WARNING
1860
1861     if cond:
1862       self._Error(ecode, *args, **kwargs)
1863
1864     # do not mark the operation as failed for WARN cases only
1865     if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1866       self.bad = self.bad or cond
1867
1868
1869 class LUClusterVerify(NoHooksLU):
1870   """Submits all jobs necessary to verify the cluster.
1871
1872   """
1873   REQ_BGL = False
1874
1875   def ExpandNames(self):
1876     self.needed_locks = {}
1877
1878   def Exec(self, feedback_fn):
1879     jobs = []
1880
1881     if self.op.group_name:
1882       groups = [self.op.group_name]
1883       depends_fn = lambda: None
1884     else:
1885       groups = self.cfg.GetNodeGroupList()
1886
1887       # Verify global configuration
1888       jobs.append([
1889         opcodes.OpClusterVerifyConfig(ignore_errors=self.op.ignore_errors)
1890         ])
1891
1892       # Always depend on global verification
1893       depends_fn = lambda: [(-len(jobs), [])]
1894
1895     jobs.extend([opcodes.OpClusterVerifyGroup(group_name=group,
1896                                             ignore_errors=self.op.ignore_errors,
1897                                             depends=depends_fn())]
1898                 for group in groups)
1899
1900     # Fix up all parameters
1901     for op in itertools.chain(*jobs): # pylint: disable=W0142
1902       op.debug_simulate_errors = self.op.debug_simulate_errors
1903       op.verbose = self.op.verbose
1904       op.error_codes = self.op.error_codes
1905       try:
1906         op.skip_checks = self.op.skip_checks
1907       except AttributeError:
1908         assert not isinstance(op, opcodes.OpClusterVerifyGroup)
1909
1910     return ResultWithJobs(jobs)
1911
1912
1913 class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
1914   """Verifies the cluster config.
1915
1916   """
1917   REQ_BGL = False
1918
1919   def _VerifyHVP(self, hvp_data):
1920     """Verifies locally the syntax of the hypervisor parameters.
1921
1922     """
1923     for item, hv_name, hv_params in hvp_data:
1924       msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
1925              (item, hv_name))
1926       try:
1927         hv_class = hypervisor.GetHypervisor(hv_name)
1928         utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
1929         hv_class.CheckParameterSyntax(hv_params)
1930       except errors.GenericError, err:
1931         self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg % str(err))
1932
1933   def ExpandNames(self):
1934     self.needed_locks = dict.fromkeys(locking.LEVELS, locking.ALL_SET)
1935     self.share_locks = _ShareAll()
1936
1937   def CheckPrereq(self):
1938     """Check prerequisites.
1939
1940     """
1941     # Retrieve all information
1942     self.all_group_info = self.cfg.GetAllNodeGroupsInfo()
1943     self.all_node_info = self.cfg.GetAllNodesInfo()
1944     self.all_inst_info = self.cfg.GetAllInstancesInfo()
1945
1946   def Exec(self, feedback_fn):
1947     """Verify integrity of cluster, performing various test on nodes.
1948
1949     """
1950     self.bad = False
1951     self._feedback_fn = feedback_fn
1952
1953     feedback_fn("* Verifying cluster config")
1954
1955     for msg in self.cfg.VerifyConfig():
1956       self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg)
1957
1958     feedback_fn("* Verifying cluster certificate files")
1959
1960     for cert_filename in constants.ALL_CERT_FILES:
1961       (errcode, msg) = _VerifyCertificate(cert_filename)
1962       self._ErrorIf(errcode, constants.CV_ECLUSTERCERT, None, msg, code=errcode)
1963
1964     feedback_fn("* Verifying hypervisor parameters")
1965
1966     self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(),
1967                                                 self.all_inst_info.values()))
1968
1969     feedback_fn("* Verifying all nodes belong to an existing group")
1970
1971     # We do this verification here because, should this bogus circumstance
1972     # occur, it would never be caught by VerifyGroup, which only acts on
1973     # nodes/instances reachable from existing node groups.
1974
1975     dangling_nodes = set(node.name for node in self.all_node_info.values()
1976                          if node.group not in self.all_group_info)
1977
1978     dangling_instances = {}
1979     no_node_instances = []
1980
1981     for inst in self.all_inst_info.values():
1982       if inst.primary_node in dangling_nodes:
1983         dangling_instances.setdefault(inst.primary_node, []).append(inst.name)
1984       elif inst.primary_node not in self.all_node_info:
1985         no_node_instances.append(inst.name)
1986
1987     pretty_dangling = [
1988         "%s (%s)" %
1989         (node.name,
1990          utils.CommaJoin(dangling_instances.get(node.name,
1991                                                 ["no instances"])))
1992         for node in dangling_nodes]
1993
1994     self._ErrorIf(bool(dangling_nodes), constants.CV_ECLUSTERDANGLINGNODES,
1995                   None,
1996                   "the following nodes (and their instances) belong to a non"
1997                   " existing group: %s", utils.CommaJoin(pretty_dangling))
1998
1999     self._ErrorIf(bool(no_node_instances), constants.CV_ECLUSTERDANGLINGINST,
2000                   None,
2001                   "the following instances have a non-existing primary-node:"
2002                   " %s", utils.CommaJoin(no_node_instances))
2003
2004     return not self.bad
2005
2006
2007 class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
2008   """Verifies the status of a node group.
2009
2010   """
2011   HPATH = "cluster-verify"
2012   HTYPE = constants.HTYPE_CLUSTER
2013   REQ_BGL = False
2014
2015   _HOOKS_INDENT_RE = re.compile("^", re.M)
2016
2017   class NodeImage(object):
2018     """A class representing the logical and physical status of a node.
2019
2020     @type name: string
2021     @ivar name: the node name to which this object refers
2022     @ivar volumes: a structure as returned from
2023         L{ganeti.backend.GetVolumeList} (runtime)
2024     @ivar instances: a list of running instances (runtime)
2025     @ivar pinst: list of configured primary instances (config)
2026     @ivar sinst: list of configured secondary instances (config)
2027     @ivar sbp: dictionary of {primary-node: list of instances} for all
2028         instances for which this node is secondary (config)
2029     @ivar mfree: free memory, as reported by hypervisor (runtime)
2030     @ivar dfree: free disk, as reported by the node (runtime)
2031     @ivar offline: the offline status (config)
2032     @type rpc_fail: boolean
2033     @ivar rpc_fail: whether the RPC verify call was successfull (overall,
2034         not whether the individual keys were correct) (runtime)
2035     @type lvm_fail: boolean
2036     @ivar lvm_fail: whether the RPC call didn't return valid LVM data
2037     @type hyp_fail: boolean
2038     @ivar hyp_fail: whether the RPC call didn't return the instance list
2039     @type ghost: boolean
2040     @ivar ghost: whether this is a known node or not (config)
2041     @type os_fail: boolean
2042     @ivar os_fail: whether the RPC call didn't return valid OS data
2043     @type oslist: list
2044     @ivar oslist: list of OSes as diagnosed by DiagnoseOS
2045     @type vm_capable: boolean
2046     @ivar vm_capable: whether the node can host instances
2047
2048     """
2049     def __init__(self, offline=False, name=None, vm_capable=True):
2050       self.name = name
2051       self.volumes = {}
2052       self.instances = []
2053       self.pinst = []
2054       self.sinst = []
2055       self.sbp = {}
2056       self.mfree = 0
2057       self.dfree = 0
2058       self.offline = offline
2059       self.vm_capable = vm_capable
2060       self.rpc_fail = False
2061       self.lvm_fail = False
2062       self.hyp_fail = False
2063       self.ghost = False
2064       self.os_fail = False
2065       self.oslist = {}
2066
2067   def ExpandNames(self):
2068     # This raises errors.OpPrereqError on its own:
2069     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
2070
2071     # Get instances in node group; this is unsafe and needs verification later
2072     inst_names = \
2073       self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True)
2074
2075     self.needed_locks = {
2076       locking.LEVEL_INSTANCE: inst_names,
2077       locking.LEVEL_NODEGROUP: [self.group_uuid],
2078       locking.LEVEL_NODE: [],
2079       }
2080
2081     self.share_locks = _ShareAll()
2082
2083   def DeclareLocks(self, level):
2084     if level == locking.LEVEL_NODE:
2085       # Get members of node group; this is unsafe and needs verification later
2086       nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
2087
2088       all_inst_info = self.cfg.GetAllInstancesInfo()
2089
2090       # In Exec(), we warn about mirrored instances that have primary and
2091       # secondary living in separate node groups. To fully verify that
2092       # volumes for these instances are healthy, we will need to do an
2093       # extra call to their secondaries. We ensure here those nodes will
2094       # be locked.
2095       for inst in self.owned_locks(locking.LEVEL_INSTANCE):
2096         # Important: access only the instances whose lock is owned
2097         if all_inst_info[inst].disk_template in constants.DTS_INT_MIRROR:
2098           nodes.update(all_inst_info[inst].secondary_nodes)
2099
2100       self.needed_locks[locking.LEVEL_NODE] = nodes
2101
2102   def CheckPrereq(self):
2103     assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
2104     self.group_info = self.cfg.GetNodeGroup(self.group_uuid)
2105
2106     group_nodes = set(self.group_info.members)
2107     group_instances = \
2108       self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True)
2109
2110     unlocked_nodes = \
2111         group_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
2112
2113     unlocked_instances = \
2114         group_instances.difference(self.owned_locks(locking.LEVEL_INSTANCE))
2115
2116     if unlocked_nodes:
2117       raise errors.OpPrereqError("Missing lock for nodes: %s" %
2118                                  utils.CommaJoin(unlocked_nodes),
2119                                  errors.ECODE_STATE)
2120
2121     if unlocked_instances:
2122       raise errors.OpPrereqError("Missing lock for instances: %s" %
2123                                  utils.CommaJoin(unlocked_instances),
2124                                  errors.ECODE_STATE)
2125
2126     self.all_node_info = self.cfg.GetAllNodesInfo()
2127     self.all_inst_info = self.cfg.GetAllInstancesInfo()
2128
2129     self.my_node_names = utils.NiceSort(group_nodes)
2130     self.my_inst_names = utils.NiceSort(group_instances)
2131
2132     self.my_node_info = dict((name, self.all_node_info[name])
2133                              for name in self.my_node_names)
2134
2135     self.my_inst_info = dict((name, self.all_inst_info[name])
2136                              for name in self.my_inst_names)
2137
2138     # We detect here the nodes that will need the extra RPC calls for verifying
2139     # split LV volumes; they should be locked.
2140     extra_lv_nodes = set()
2141
2142     for inst in self.my_inst_info.values():
2143       if inst.disk_template in constants.DTS_INT_MIRROR:
2144         for nname in inst.all_nodes:
2145           if self.all_node_info[nname].group != self.group_uuid:
2146             extra_lv_nodes.add(nname)
2147
2148     unlocked_lv_nodes = \
2149         extra_lv_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
2150
2151     if unlocked_lv_nodes:
2152       raise errors.OpPrereqError("Missing node locks for LV check: %s" %
2153                                  utils.CommaJoin(unlocked_lv_nodes),
2154                                  errors.ECODE_STATE)
2155     self.extra_lv_nodes = list(extra_lv_nodes)
2156
2157   def _VerifyNode(self, ninfo, nresult):
2158     """Perform some basic validation on data returned from a node.
2159
2160       - check the result data structure is well formed and has all the
2161         mandatory fields
2162       - check ganeti version
2163
2164     @type ninfo: L{objects.Node}
2165     @param ninfo: the node to check
2166     @param nresult: the results from the node
2167     @rtype: boolean
2168     @return: whether overall this call was successful (and we can expect
2169          reasonable values in the respose)
2170
2171     """
2172     node = ninfo.name
2173     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2174
2175     # main result, nresult should be a non-empty dict
2176     test = not nresult or not isinstance(nresult, dict)
2177     _ErrorIf(test, constants.CV_ENODERPC, node,
2178                   "unable to verify node: no data returned")
2179     if test:
2180       return False
2181
2182     # compares ganeti version
2183     local_version = constants.PROTOCOL_VERSION
2184     remote_version = nresult.get("version", None)
2185     test = not (remote_version and
2186                 isinstance(remote_version, (list, tuple)) and
2187                 len(remote_version) == 2)
2188     _ErrorIf(test, constants.CV_ENODERPC, node,
2189              "connection to node returned invalid data")
2190     if test:
2191       return False
2192
2193     test = local_version != remote_version[0]
2194     _ErrorIf(test, constants.CV_ENODEVERSION, node,
2195              "incompatible protocol versions: master %s,"
2196              " node %s", local_version, remote_version[0])
2197     if test:
2198       return False
2199
2200     # node seems compatible, we can actually try to look into its results
2201
2202     # full package version
2203     self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
2204                   constants.CV_ENODEVERSION, node,
2205                   "software version mismatch: master %s, node %s",
2206                   constants.RELEASE_VERSION, remote_version[1],
2207                   code=self.ETYPE_WARNING)
2208
2209     hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
2210     if ninfo.vm_capable and isinstance(hyp_result, dict):
2211       for hv_name, hv_result in hyp_result.iteritems():
2212         test = hv_result is not None
2213         _ErrorIf(test, constants.CV_ENODEHV, node,
2214                  "hypervisor %s verify failure: '%s'", hv_name, hv_result)
2215
2216     hvp_result = nresult.get(constants.NV_HVPARAMS, None)
2217     if ninfo.vm_capable and isinstance(hvp_result, list):
2218       for item, hv_name, hv_result in hvp_result:
2219         _ErrorIf(True, constants.CV_ENODEHV, node,
2220                  "hypervisor %s parameter verify failure (source %s): %s",
2221                  hv_name, item, hv_result)
2222
2223     test = nresult.get(constants.NV_NODESETUP,
2224                        ["Missing NODESETUP results"])
2225     _ErrorIf(test, constants.CV_ENODESETUP, node, "node setup error: %s",
2226              "; ".join(test))
2227
2228     return True
2229
2230   def _VerifyNodeTime(self, ninfo, nresult,
2231                       nvinfo_starttime, nvinfo_endtime):
2232     """Check the node time.
2233
2234     @type ninfo: L{objects.Node}
2235     @param ninfo: the node to check
2236     @param nresult: the remote results for the node
2237     @param nvinfo_starttime: the start time of the RPC call
2238     @param nvinfo_endtime: the end time of the RPC call
2239
2240     """
2241     node = ninfo.name
2242     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2243
2244     ntime = nresult.get(constants.NV_TIME, None)
2245     try:
2246       ntime_merged = utils.MergeTime(ntime)
2247     except (ValueError, TypeError):
2248       _ErrorIf(True, constants.CV_ENODETIME, node, "Node returned invalid time")
2249       return
2250
2251     if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
2252       ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
2253     elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
2254       ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
2255     else:
2256       ntime_diff = None
2257
2258     _ErrorIf(ntime_diff is not None, constants.CV_ENODETIME, node,
2259              "Node time diverges by at least %s from master node time",
2260              ntime_diff)
2261
2262   def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
2263     """Check the node LVM results.
2264
2265     @type ninfo: L{objects.Node}
2266     @param ninfo: the node to check
2267     @param nresult: the remote results for the node
2268     @param vg_name: the configured VG name
2269
2270     """
2271     if vg_name is None:
2272       return
2273
2274     node = ninfo.name
2275     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2276
2277     # checks vg existence and size > 20G
2278     vglist = nresult.get(constants.NV_VGLIST, None)
2279     test = not vglist
2280     _ErrorIf(test, constants.CV_ENODELVM, node, "unable to check volume groups")
2281     if not test:
2282       vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
2283                                             constants.MIN_VG_SIZE)
2284       _ErrorIf(vgstatus, constants.CV_ENODELVM, node, vgstatus)
2285
2286     # check pv names
2287     pvlist = nresult.get(constants.NV_PVLIST, None)
2288     test = pvlist is None
2289     _ErrorIf(test, constants.CV_ENODELVM, node, "Can't get PV list from node")
2290     if not test:
2291       # check that ':' is not present in PV names, since it's a
2292       # special character for lvcreate (denotes the range of PEs to
2293       # use on the PV)
2294       for _, pvname, owner_vg in pvlist:
2295         test = ":" in pvname
2296         _ErrorIf(test, constants.CV_ENODELVM, node,
2297                  "Invalid character ':' in PV '%s' of VG '%s'",
2298                  pvname, owner_vg)
2299
2300   def _VerifyNodeBridges(self, ninfo, nresult, bridges):
2301     """Check the node bridges.
2302
2303     @type ninfo: L{objects.Node}
2304     @param ninfo: the node to check
2305     @param nresult: the remote results for the node
2306     @param bridges: the expected list of bridges
2307
2308     """
2309     if not bridges:
2310       return
2311
2312     node = ninfo.name
2313     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2314
2315     missing = nresult.get(constants.NV_BRIDGES, None)
2316     test = not isinstance(missing, list)
2317     _ErrorIf(test, constants.CV_ENODENET, node,
2318              "did not return valid bridge information")
2319     if not test:
2320       _ErrorIf(bool(missing), constants.CV_ENODENET, node,
2321                "missing bridges: %s" % utils.CommaJoin(sorted(missing)))
2322
2323   def _VerifyNodeUserScripts(self, ninfo, nresult):
2324     """Check the results of user scripts presence and executability on the node
2325
2326     @type ninfo: L{objects.Node}
2327     @param ninfo: the node to check
2328     @param nresult: the remote results for the node
2329
2330     """
2331     node = ninfo.name
2332
2333     test = not constants.NV_USERSCRIPTS in nresult
2334     self._ErrorIf(test, constants.CV_ENODEUSERSCRIPTS, node,
2335                   "did not return user scripts information")
2336
2337     broken_scripts = nresult.get(constants.NV_USERSCRIPTS, None)
2338     if not test:
2339       self._ErrorIf(broken_scripts, constants.CV_ENODEUSERSCRIPTS, node,
2340                     "user scripts not present or not executable: %s" %
2341                     utils.CommaJoin(sorted(broken_scripts)))
2342
2343   def _VerifyNodeNetwork(self, ninfo, nresult):
2344     """Check the node network connectivity results.
2345
2346     @type ninfo: L{objects.Node}
2347     @param ninfo: the node to check
2348     @param nresult: the remote results for the node
2349
2350     """
2351     node = ninfo.name
2352     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2353
2354     test = constants.NV_NODELIST not in nresult
2355     _ErrorIf(test, constants.CV_ENODESSH, node,
2356              "node hasn't returned node ssh connectivity data")
2357     if not test:
2358       if nresult[constants.NV_NODELIST]:
2359         for a_node, a_msg in nresult[constants.NV_NODELIST].items():
2360           _ErrorIf(True, constants.CV_ENODESSH, node,
2361                    "ssh communication with node '%s': %s", a_node, a_msg)
2362
2363     test = constants.NV_NODENETTEST not in nresult
2364     _ErrorIf(test, constants.CV_ENODENET, node,
2365              "node hasn't returned node tcp connectivity data")
2366     if not test:
2367       if nresult[constants.NV_NODENETTEST]:
2368         nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
2369         for anode in nlist:
2370           _ErrorIf(True, constants.CV_ENODENET, node,
2371                    "tcp communication with node '%s': %s",
2372                    anode, nresult[constants.NV_NODENETTEST][anode])
2373
2374     test = constants.NV_MASTERIP not in nresult
2375     _ErrorIf(test, constants.CV_ENODENET, node,
2376              "node hasn't returned node master IP reachability data")
2377     if not test:
2378       if not nresult[constants.NV_MASTERIP]:
2379         if node == self.master_node:
2380           msg = "the master node cannot reach the master IP (not configured?)"
2381         else:
2382           msg = "cannot reach the master IP"
2383         _ErrorIf(True, constants.CV_ENODENET, node, msg)
2384
2385   def _VerifyInstance(self, instance, instanceconfig, node_image,
2386                       diskstatus):
2387     """Verify an instance.
2388
2389     This function checks to see if the required block devices are
2390     available on the instance's node.
2391
2392     """
2393     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2394     node_current = instanceconfig.primary_node
2395
2396     node_vol_should = {}
2397     instanceconfig.MapLVsByNode(node_vol_should)
2398
2399     ipolicy = _CalculateGroupIPolicy(self.cfg.GetClusterInfo(), self.group_info)
2400     err = _ComputeIPolicyInstanceViolation(ipolicy, instanceconfig)
2401     _ErrorIf(err, constants.CV_EINSTANCEPOLICY, instance, err)
2402
2403     for node in node_vol_should:
2404       n_img = node_image[node]
2405       if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2406         # ignore missing volumes on offline or broken nodes
2407         continue
2408       for volume in node_vol_should[node]:
2409         test = volume not in n_img.volumes
2410         _ErrorIf(test, constants.CV_EINSTANCEMISSINGDISK, instance,
2411                  "volume %s missing on node %s", volume, node)
2412
2413     if instanceconfig.admin_state == constants.ADMINST_UP:
2414       pri_img = node_image[node_current]
2415       test = instance not in pri_img.instances and not pri_img.offline
2416       _ErrorIf(test, constants.CV_EINSTANCEDOWN, instance,
2417                "instance not running on its primary node %s",
2418                node_current)
2419
2420     diskdata = [(nname, success, status, idx)
2421                 for (nname, disks) in diskstatus.items()
2422                 for idx, (success, status) in enumerate(disks)]
2423
2424     for nname, success, bdev_status, idx in diskdata:
2425       # the 'ghost node' construction in Exec() ensures that we have a
2426       # node here
2427       snode = node_image[nname]
2428       bad_snode = snode.ghost or snode.offline
2429       _ErrorIf(instanceconfig.admin_state == constants.ADMINST_UP and
2430                not success and not bad_snode,
2431                constants.CV_EINSTANCEFAULTYDISK, instance,
2432                "couldn't retrieve status for disk/%s on %s: %s",
2433                idx, nname, bdev_status)
2434       _ErrorIf((instanceconfig.admin_state == constants.ADMINST_UP and
2435                 success and bdev_status.ldisk_status == constants.LDS_FAULTY),
2436                constants.CV_EINSTANCEFAULTYDISK, instance,
2437                "disk/%s on %s is faulty", idx, nname)
2438
2439   def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
2440     """Verify if there are any unknown volumes in the cluster.
2441
2442     The .os, .swap and backup volumes are ignored. All other volumes are
2443     reported as unknown.
2444
2445     @type reserved: L{ganeti.utils.FieldSet}
2446     @param reserved: a FieldSet of reserved volume names
2447
2448     """
2449     for node, n_img in node_image.items():
2450       if (n_img.offline or n_img.rpc_fail or n_img.lvm_fail or
2451           self.all_node_info[node].group != self.group_uuid):
2452         # skip non-healthy nodes
2453         continue
2454       for volume in n_img.volumes:
2455         test = ((node not in node_vol_should or
2456                 volume not in node_vol_should[node]) and
2457                 not reserved.Matches(volume))
2458         self._ErrorIf(test, constants.CV_ENODEORPHANLV, node,
2459                       "volume %s is unknown", volume)
2460
2461   def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
2462     """Verify N+1 Memory Resilience.
2463
2464     Check that if one single node dies we can still start all the
2465     instances it was primary for.
2466
2467     """
2468     cluster_info = self.cfg.GetClusterInfo()
2469     for node, n_img in node_image.items():
2470       # This code checks that every node which is now listed as
2471       # secondary has enough memory to host all instances it is
2472       # supposed to should a single other node in the cluster fail.
2473       # FIXME: not ready for failover to an arbitrary node
2474       # FIXME: does not support file-backed instances
2475       # WARNING: we currently take into account down instances as well
2476       # as up ones, considering that even if they're down someone
2477       # might want to start them even in the event of a node failure.
2478       if n_img.offline or self.all_node_info[node].group != self.group_uuid:
2479         # we're skipping nodes marked offline and nodes in other groups from
2480         # the N+1 warning, since most likely we don't have good memory
2481         # infromation from them; we already list instances living on such
2482         # nodes, and that's enough warning
2483         continue
2484       #TODO(dynmem): also consider ballooning out other instances
2485       for prinode, instances in n_img.sbp.items():
2486         needed_mem = 0
2487         for instance in instances:
2488           bep = cluster_info.FillBE(instance_cfg[instance])
2489           if bep[constants.BE_AUTO_BALANCE]:
2490             needed_mem += bep[constants.BE_MINMEM]
2491         test = n_img.mfree < needed_mem
2492         self._ErrorIf(test, constants.CV_ENODEN1, node,
2493                       "not enough memory to accomodate instance failovers"
2494                       " should node %s fail (%dMiB needed, %dMiB available)",
2495                       prinode, needed_mem, n_img.mfree)
2496
2497   @classmethod
2498   def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
2499                    (files_all, files_opt, files_mc, files_vm)):
2500     """Verifies file checksums collected from all nodes.
2501
2502     @param errorif: Callback for reporting errors
2503     @param nodeinfo: List of L{objects.Node} objects
2504     @param master_node: Name of master node
2505     @param all_nvinfo: RPC results
2506
2507     """
2508     # Define functions determining which nodes to consider for a file
2509     files2nodefn = [
2510       (files_all, None),
2511       (files_mc, lambda node: (node.master_candidate or
2512                                node.name == master_node)),
2513       (files_vm, lambda node: node.vm_capable),
2514       ]
2515
2516     # Build mapping from filename to list of nodes which should have the file
2517     nodefiles = {}
2518     for (files, fn) in files2nodefn:
2519       if fn is None:
2520         filenodes = nodeinfo
2521       else:
2522         filenodes = filter(fn, nodeinfo)
2523       nodefiles.update((filename,
2524                         frozenset(map(operator.attrgetter("name"), filenodes)))
2525                        for filename in files)
2526
2527     assert set(nodefiles) == (files_all | files_mc | files_vm)
2528
2529     fileinfo = dict((filename, {}) for filename in nodefiles)
2530     ignore_nodes = set()
2531
2532     for node in nodeinfo:
2533       if node.offline:
2534         ignore_nodes.add(node.name)
2535         continue
2536
2537       nresult = all_nvinfo[node.name]
2538
2539       if nresult.fail_msg or not nresult.payload:
2540         node_files = None
2541       else:
2542         node_files = nresult.payload.get(constants.NV_FILELIST, None)
2543
2544       test = not (node_files and isinstance(node_files, dict))
2545       errorif(test, constants.CV_ENODEFILECHECK, node.name,
2546               "Node did not return file checksum data")
2547       if test:
2548         ignore_nodes.add(node.name)
2549         continue
2550
2551       # Build per-checksum mapping from filename to nodes having it
2552       for (filename, checksum) in node_files.items():
2553         assert filename in nodefiles
2554         fileinfo[filename].setdefault(checksum, set()).add(node.name)
2555
2556     for (filename, checksums) in fileinfo.items():
2557       assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
2558
2559       # Nodes having the file
2560       with_file = frozenset(node_name
2561                             for nodes in fileinfo[filename].values()
2562                             for node_name in nodes) - ignore_nodes
2563
2564       expected_nodes = nodefiles[filename] - ignore_nodes
2565
2566       # Nodes missing file
2567       missing_file = expected_nodes - with_file
2568
2569       if filename in files_opt:
2570         # All or no nodes
2571         errorif(missing_file and missing_file != expected_nodes,
2572                 constants.CV_ECLUSTERFILECHECK, None,
2573                 "File %s is optional, but it must exist on all or no"
2574                 " nodes (not found on %s)",
2575                 filename, utils.CommaJoin(utils.NiceSort(missing_file)))
2576       else:
2577         errorif(missing_file, constants.CV_ECLUSTERFILECHECK, None,
2578                 "File %s is missing from node(s) %s", filename,
2579                 utils.CommaJoin(utils.NiceSort(missing_file)))
2580
2581         # Warn if a node has a file it shouldn't
2582         unexpected = with_file - expected_nodes
2583         errorif(unexpected,
2584                 constants.CV_ECLUSTERFILECHECK, None,
2585                 "File %s should not exist on node(s) %s",
2586                 filename, utils.CommaJoin(utils.NiceSort(unexpected)))
2587
2588       # See if there are multiple versions of the file
2589       test = len(checksums) > 1
2590       if test:
2591         variants = ["variant %s on %s" %
2592                     (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
2593                     for (idx, (checksum, nodes)) in
2594                       enumerate(sorted(checksums.items()))]
2595       else:
2596         variants = []
2597
2598       errorif(test, constants.CV_ECLUSTERFILECHECK, None,
2599               "File %s found with %s different checksums (%s)",
2600               filename, len(checksums), "; ".join(variants))
2601
2602   def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
2603                       drbd_map):
2604     """Verifies and the node DRBD status.
2605
2606     @type ninfo: L{objects.Node}
2607     @param ninfo: the node to check
2608     @param nresult: the remote results for the node
2609     @param instanceinfo: the dict of instances
2610     @param drbd_helper: the configured DRBD usermode helper
2611     @param drbd_map: the DRBD map as returned by
2612         L{ganeti.config.ConfigWriter.ComputeDRBDMap}
2613
2614     """
2615     node = ninfo.name
2616     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2617
2618     if drbd_helper:
2619       helper_result = nresult.get(constants.NV_DRBDHELPER, None)
2620       test = (helper_result == None)
2621       _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2622                "no drbd usermode helper returned")
2623       if helper_result:
2624         status, payload = helper_result
2625         test = not status
2626         _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2627                  "drbd usermode helper check unsuccessful: %s", payload)
2628         test = status and (payload != drbd_helper)
2629         _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2630                  "wrong drbd usermode helper: %s", payload)
2631
2632     # compute the DRBD minors
2633     node_drbd = {}
2634     for minor, instance in drbd_map[node].items():
2635       test = instance not in instanceinfo
2636       _ErrorIf(test, constants.CV_ECLUSTERCFG, None,
2637                "ghost instance '%s' in temporary DRBD map", instance)
2638         # ghost instance should not be running, but otherwise we
2639         # don't give double warnings (both ghost instance and
2640         # unallocated minor in use)
2641       if test:
2642         node_drbd[minor] = (instance, False)
2643       else:
2644         instance = instanceinfo[instance]
2645         node_drbd[minor] = (instance.name,
2646                             instance.admin_state == constants.ADMINST_UP)
2647
2648     # and now check them
2649     used_minors = nresult.get(constants.NV_DRBDLIST, [])
2650     test = not isinstance(used_minors, (tuple, list))
2651     _ErrorIf(test, constants.CV_ENODEDRBD, node,
2652              "cannot parse drbd status file: %s", str(used_minors))
2653     if test:
2654       # we cannot check drbd status
2655       return
2656
2657     for minor, (iname, must_exist) in node_drbd.items():
2658       test = minor not in used_minors and must_exist
2659       _ErrorIf(test, constants.CV_ENODEDRBD, node,
2660                "drbd minor %d of instance %s is not active", minor, iname)
2661     for minor in used_minors:
2662       test = minor not in node_drbd
2663       _ErrorIf(test, constants.CV_ENODEDRBD, node,
2664                "unallocated drbd minor %d is in use", minor)
2665
2666   def _UpdateNodeOS(self, ninfo, nresult, nimg):
2667     """Builds the node OS structures.
2668
2669     @type ninfo: L{objects.Node}
2670     @param ninfo: the node to check
2671     @param nresult: the remote results for the node
2672     @param nimg: the node image object
2673
2674     """
2675     node = ninfo.name
2676     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2677
2678     remote_os = nresult.get(constants.NV_OSLIST, None)
2679     test = (not isinstance(remote_os, list) or
2680             not compat.all(isinstance(v, list) and len(v) == 7
2681                            for v in remote_os))
2682
2683     _ErrorIf(test, constants.CV_ENODEOS, node,
2684              "node hasn't returned valid OS data")
2685
2686     nimg.os_fail = test
2687
2688     if test:
2689       return
2690
2691     os_dict = {}
2692
2693     for (name, os_path, status, diagnose,
2694          variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
2695
2696       if name not in os_dict:
2697         os_dict[name] = []
2698
2699       # parameters is a list of lists instead of list of tuples due to
2700       # JSON lacking a real tuple type, fix it:
2701       parameters = [tuple(v) for v in parameters]
2702       os_dict[name].append((os_path, status, diagnose,
2703                             set(variants), set(parameters), set(api_ver)))
2704
2705     nimg.oslist = os_dict
2706
2707   def _VerifyNodeOS(self, ninfo, nimg, base):
2708     """Verifies the node OS list.
2709
2710     @type ninfo: L{objects.Node}
2711     @param ninfo: the node to check
2712     @param nimg: the node image object
2713     @param base: the 'template' node we match against (e.g. from the master)
2714
2715     """
2716     node = ninfo.name
2717     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2718
2719     assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
2720
2721     beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
2722     for os_name, os_data in nimg.oslist.items():
2723       assert os_data, "Empty OS status for OS %s?!" % os_name
2724       f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
2725       _ErrorIf(not f_status, constants.CV_ENODEOS, node,
2726                "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
2727       _ErrorIf(len(os_data) > 1, constants.CV_ENODEOS, node,
2728                "OS '%s' has multiple entries (first one shadows the rest): %s",
2729                os_name, utils.CommaJoin([v[0] for v in os_data]))
2730       # comparisons with the 'base' image
2731       test = os_name not in base.oslist
2732       _ErrorIf(test, constants.CV_ENODEOS, node,
2733                "Extra OS %s not present on reference node (%s)",
2734                os_name, base.name)
2735       if test:
2736         continue
2737       assert base.oslist[os_name], "Base node has empty OS status?"
2738       _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
2739       if not b_status:
2740         # base OS is invalid, skipping
2741         continue
2742       for kind, a, b in [("API version", f_api, b_api),
2743                          ("variants list", f_var, b_var),
2744                          ("parameters", beautify_params(f_param),
2745                           beautify_params(b_param))]:
2746         _ErrorIf(a != b, constants.CV_ENODEOS, node,
2747                  "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
2748                  kind, os_name, base.name,
2749                  utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
2750
2751     # check any missing OSes
2752     missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
2753     _ErrorIf(missing, constants.CV_ENODEOS, node,
2754              "OSes present on reference node %s but missing on this node: %s",
2755              base.name, utils.CommaJoin(missing))
2756
2757   def _VerifyOob(self, ninfo, nresult):
2758     """Verifies out of band functionality of a node.
2759
2760     @type ninfo: L{objects.Node}
2761     @param ninfo: the node to check
2762     @param nresult: the remote results for the node
2763
2764     """
2765     node = ninfo.name
2766     # We just have to verify the paths on master and/or master candidates
2767     # as the oob helper is invoked on the master
2768     if ((ninfo.master_candidate or ninfo.master_capable) and
2769         constants.NV_OOB_PATHS in nresult):
2770       for path_result in nresult[constants.NV_OOB_PATHS]:
2771         self._ErrorIf(path_result, constants.CV_ENODEOOBPATH, node, path_result)
2772
2773   def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
2774     """Verifies and updates the node volume data.
2775
2776     This function will update a L{NodeImage}'s internal structures
2777     with data from the remote call.
2778
2779     @type ninfo: L{objects.Node}
2780     @param ninfo: the node to check
2781     @param nresult: the remote results for the node
2782     @param nimg: the node image object
2783     @param vg_name: the configured VG name
2784
2785     """
2786     node = ninfo.name
2787     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2788
2789     nimg.lvm_fail = True
2790     lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
2791     if vg_name is None:
2792       pass
2793     elif isinstance(lvdata, basestring):
2794       _ErrorIf(True, constants.CV_ENODELVM, node, "LVM problem on node: %s",
2795                utils.SafeEncode(lvdata))
2796     elif not isinstance(lvdata, dict):
2797       _ErrorIf(True, constants.CV_ENODELVM, node,
2798                "rpc call to node failed (lvlist)")
2799     else:
2800       nimg.volumes = lvdata
2801       nimg.lvm_fail = False
2802
2803   def _UpdateNodeInstances(self, ninfo, nresult, nimg):
2804     """Verifies and updates the node instance list.
2805
2806     If the listing was successful, then updates this node's instance
2807     list. Otherwise, it marks the RPC call as failed for the instance
2808     list key.
2809
2810     @type ninfo: L{objects.Node}
2811     @param ninfo: the node to check
2812     @param nresult: the remote results for the node
2813     @param nimg: the node image object
2814
2815     """
2816     idata = nresult.get(constants.NV_INSTANCELIST, None)
2817     test = not isinstance(idata, list)
2818     self._ErrorIf(test, constants.CV_ENODEHV, ninfo.name,
2819                   "rpc call to node failed (instancelist): %s",
2820                   utils.SafeEncode(str(idata)))
2821     if test:
2822       nimg.hyp_fail = True
2823     else:
2824       nimg.instances = idata
2825
2826   def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
2827     """Verifies and computes a node information map
2828
2829     @type ninfo: L{objects.Node}
2830     @param ninfo: the node to check
2831     @param nresult: the remote results for the node
2832     @param nimg: the node image object
2833     @param vg_name: the configured VG name
2834
2835     """
2836     node = ninfo.name
2837     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2838
2839     # try to read free memory (from the hypervisor)
2840     hv_info = nresult.get(constants.NV_HVINFO, None)
2841     test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
2842     _ErrorIf(test, constants.CV_ENODEHV, node,
2843              "rpc call to node failed (hvinfo)")
2844     if not test:
2845       try:
2846         nimg.mfree = int(hv_info["memory_free"])
2847       except (ValueError, TypeError):
2848         _ErrorIf(True, constants.CV_ENODERPC, node,
2849                  "node returned invalid nodeinfo, check hypervisor")
2850
2851     # FIXME: devise a free space model for file based instances as well
2852     if vg_name is not None:
2853       test = (constants.NV_VGLIST not in nresult or
2854               vg_name not in nresult[constants.NV_VGLIST])
2855       _ErrorIf(test, constants.CV_ENODELVM, node,
2856                "node didn't return data for the volume group '%s'"
2857                " - it is either missing or broken", vg_name)
2858       if not test:
2859         try:
2860           nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
2861         except (ValueError, TypeError):
2862           _ErrorIf(True, constants.CV_ENODERPC, node,
2863                    "node returned invalid LVM info, check LVM status")
2864
2865   def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
2866     """Gets per-disk status information for all instances.
2867
2868     @type nodelist: list of strings
2869     @param nodelist: Node names
2870     @type node_image: dict of (name, L{objects.Node})
2871     @param node_image: Node objects
2872     @type instanceinfo: dict of (name, L{objects.Instance})
2873     @param instanceinfo: Instance objects
2874     @rtype: {instance: {node: [(succes, payload)]}}
2875     @return: a dictionary of per-instance dictionaries with nodes as
2876         keys and disk information as values; the disk information is a
2877         list of tuples (success, payload)
2878
2879     """
2880     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2881
2882     node_disks = {}
2883     node_disks_devonly = {}
2884     diskless_instances = set()
2885     diskless = constants.DT_DISKLESS
2886
2887     for nname in nodelist:
2888       node_instances = list(itertools.chain(node_image[nname].pinst,
2889                                             node_image[nname].sinst))
2890       diskless_instances.update(inst for inst in node_instances
2891                                 if instanceinfo[inst].disk_template == diskless)
2892       disks = [(inst, disk)
2893                for inst in node_instances
2894                for disk in instanceinfo[inst].disks]
2895
2896       if not disks:
2897         # No need to collect data
2898         continue
2899
2900       node_disks[nname] = disks
2901
2902       # Creating copies as SetDiskID below will modify the objects and that can
2903       # lead to incorrect data returned from nodes
2904       devonly = [dev.Copy() for (_, dev) in disks]
2905
2906       for dev in devonly:
2907         self.cfg.SetDiskID(dev, nname)
2908
2909       node_disks_devonly[nname] = devonly
2910
2911     assert len(node_disks) == len(node_disks_devonly)
2912
2913     # Collect data from all nodes with disks
2914     result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
2915                                                           node_disks_devonly)
2916
2917     assert len(result) == len(node_disks)
2918
2919     instdisk = {}
2920
2921     for (nname, nres) in result.items():
2922       disks = node_disks[nname]
2923
2924       if nres.offline:
2925         # No data from this node
2926         data = len(disks) * [(False, "node offline")]
2927       else:
2928         msg = nres.fail_msg
2929         _ErrorIf(msg, constants.CV_ENODERPC, nname,
2930                  "while getting disk information: %s", msg)
2931         if msg:
2932           # No data from this node
2933           data = len(disks) * [(False, msg)]
2934         else:
2935           data = []
2936           for idx, i in enumerate(nres.payload):
2937             if isinstance(i, (tuple, list)) and len(i) == 2:
2938               data.append(i)
2939             else:
2940               logging.warning("Invalid result from node %s, entry %d: %s",
2941                               nname, idx, i)
2942               data.append((False, "Invalid result from the remote node"))
2943
2944       for ((inst, _), status) in zip(disks, data):
2945         instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
2946
2947     # Add empty entries for diskless instances.
2948     for inst in diskless_instances:
2949       assert inst not in instdisk
2950       instdisk[inst] = {}
2951
2952     assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
2953                       len(nnames) <= len(instanceinfo[inst].all_nodes) and
2954                       compat.all(isinstance(s, (tuple, list)) and
2955                                  len(s) == 2 for s in statuses)
2956                       for inst, nnames in instdisk.items()
2957                       for nname, statuses in nnames.items())
2958     assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
2959
2960     return instdisk
2961
2962   @staticmethod
2963   def _SshNodeSelector(group_uuid, all_nodes):
2964     """Create endless iterators for all potential SSH check hosts.
2965
2966     """
2967     nodes = [node for node in all_nodes
2968              if (node.group != group_uuid and
2969                  not node.offline)]
2970     keyfunc = operator.attrgetter("group")
2971
2972     return map(itertools.cycle,
2973                [sorted(map(operator.attrgetter("name"), names))
2974                 for _, names in itertools.groupby(sorted(nodes, key=keyfunc),
2975                                                   keyfunc)])
2976
2977   @classmethod
2978   def _SelectSshCheckNodes(cls, group_nodes, group_uuid, all_nodes):
2979     """Choose which nodes should talk to which other nodes.
2980
2981     We will make nodes contact all nodes in their group, and one node from
2982     every other group.
2983
2984     @warning: This algorithm has a known issue if one node group is much
2985       smaller than others (e.g. just one node). In such a case all other
2986       nodes will talk to the single node.
2987
2988     """
2989     online_nodes = sorted(node.name for node in group_nodes if not node.offline)
2990     sel = cls._SshNodeSelector(group_uuid, all_nodes)
2991
2992     return (online_nodes,
2993             dict((name, sorted([i.next() for i in sel]))
2994                  for name in online_nodes))
2995
2996   def BuildHooksEnv(self):
2997     """Build hooks env.
2998
2999     Cluster-Verify hooks just ran in the post phase and their failure makes
3000     the output be logged in the verify output and the verification to fail.
3001
3002     """
3003     env = {
3004       "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
3005       }
3006
3007     env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
3008                for node in self.my_node_info.values())
3009
3010     return env
3011
3012   def BuildHooksNodes(self):
3013     """Build hooks nodes.
3014
3015     """
3016     return ([], self.my_node_names)
3017
3018   def Exec(self, feedback_fn):
3019     """Verify integrity of the node group, performing various test on nodes.
3020
3021     """
3022     # This method has too many local variables. pylint: disable=R0914
3023     feedback_fn("* Verifying group '%s'" % self.group_info.name)
3024
3025     if not self.my_node_names:
3026       # empty node group
3027       feedback_fn("* Empty node group, skipping verification")
3028       return True
3029
3030     self.bad = False
3031     _ErrorIf = self._ErrorIf # pylint: disable=C0103
3032     verbose = self.op.verbose
3033     self._feedback_fn = feedback_fn
3034
3035     vg_name = self.cfg.GetVGName()
3036     drbd_helper = self.cfg.GetDRBDHelper()
3037     cluster = self.cfg.GetClusterInfo()
3038     groupinfo = self.cfg.GetAllNodeGroupsInfo()
3039     hypervisors = cluster.enabled_hypervisors
3040     node_data_list = [self.my_node_info[name] for name in self.my_node_names]
3041
3042     i_non_redundant = [] # Non redundant instances
3043     i_non_a_balanced = [] # Non auto-balanced instances
3044     i_offline = 0 # Count of offline instances
3045     n_offline = 0 # Count of offline nodes
3046     n_drained = 0 # Count of nodes being drained
3047     node_vol_should = {}
3048
3049     # FIXME: verify OS list
3050
3051     # File verification
3052     filemap = _ComputeAncillaryFiles(cluster, False)
3053
3054     # do local checksums
3055     master_node = self.master_node = self.cfg.GetMasterNode()
3056     master_ip = self.cfg.GetMasterIP()
3057
3058     feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names))
3059
3060     user_scripts = []
3061     if self.cfg.GetUseExternalMipScript():
3062       user_scripts.append(constants.EXTERNAL_MASTER_SETUP_SCRIPT)
3063
3064     node_verify_param = {
3065       constants.NV_FILELIST:
3066         utils.UniqueSequence(filename
3067                              for files in filemap
3068                              for filename in files),
3069       constants.NV_NODELIST:
3070         self._SelectSshCheckNodes(node_data_list, self.group_uuid,
3071                                   self.all_node_info.values()),
3072       constants.NV_HYPERVISOR: hypervisors,
3073       constants.NV_HVPARAMS:
3074         _GetAllHypervisorParameters(cluster, self.all_inst_info.values()),
3075       constants.NV_NODENETTEST: [(node.name, node.primary_ip, node.secondary_ip)
3076                                  for node in node_data_list
3077                                  if not node.offline],
3078       constants.NV_INSTANCELIST: hypervisors,
3079       constants.NV_VERSION: None,
3080       constants.NV_HVINFO: self.cfg.GetHypervisorType(),
3081       constants.NV_NODESETUP: None,
3082       constants.NV_TIME: None,
3083       constants.NV_MASTERIP: (master_node, master_ip),
3084       constants.NV_OSLIST: None,
3085       constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
3086       constants.NV_USERSCRIPTS: user_scripts,
3087       }
3088
3089     if vg_name is not None:
3090       node_verify_param[constants.NV_VGLIST] = None
3091       node_verify_param[constants.NV_LVLIST] = vg_name
3092       node_verify_param[constants.NV_PVLIST] = [vg_name]
3093       node_verify_param[constants.NV_DRBDLIST] = None
3094
3095     if drbd_helper:
3096       node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
3097
3098     # bridge checks
3099     # FIXME: this needs to be changed per node-group, not cluster-wide
3100     bridges = set()
3101     default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
3102     if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3103       bridges.add(default_nicpp[constants.NIC_LINK])
3104     for instance in self.my_inst_info.values():
3105       for nic in instance.nics:
3106         full_nic = cluster.SimpleFillNIC(nic.nicparams)
3107         if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3108           bridges.add(full_nic[constants.NIC_LINK])
3109
3110     if bridges:
3111       node_verify_param[constants.NV_BRIDGES] = list(bridges)
3112
3113     # Build our expected cluster state
3114     node_image = dict((node.name, self.NodeImage(offline=node.offline,
3115                                                  name=node.name,
3116                                                  vm_capable=node.vm_capable))
3117                       for node in node_data_list)
3118
3119     # Gather OOB paths
3120     oob_paths = []
3121     for node in self.all_node_info.values():
3122       path = _SupportsOob(self.cfg, node)
3123       if path and path not in oob_paths:
3124         oob_paths.append(path)
3125
3126     if oob_paths:
3127       node_verify_param[constants.NV_OOB_PATHS] = oob_paths
3128
3129     for instance in self.my_inst_names:
3130       inst_config = self.my_inst_info[instance]
3131
3132       for nname in inst_config.all_nodes:
3133         if nname not in node_image:
3134           gnode = self.NodeImage(name=nname)
3135           gnode.ghost = (nname not in self.all_node_info)
3136           node_image[nname] = gnode
3137
3138       inst_config.MapLVsByNode(node_vol_should)
3139
3140       pnode = inst_config.primary_node
3141       node_image[pnode].pinst.append(instance)
3142
3143       for snode in inst_config.secondary_nodes:
3144         nimg = node_image[snode]
3145         nimg.sinst.append(instance)
3146         if pnode not in nimg.sbp:
3147           nimg.sbp[pnode] = []
3148         nimg.sbp[pnode].append(instance)
3149
3150     # At this point, we have the in-memory data structures complete,
3151     # except for the runtime information, which we'll gather next
3152
3153     # Due to the way our RPC system works, exact response times cannot be
3154     # guaranteed (e.g. a broken node could run into a timeout). By keeping the
3155     # time before and after executing the request, we can at least have a time
3156     # window.
3157     nvinfo_starttime = time.time()
3158     all_nvinfo = self.rpc.call_node_verify(self.my_node_names,
3159                                            node_verify_param,
3160                                            self.cfg.GetClusterName())
3161     nvinfo_endtime = time.time()
3162
3163     if self.extra_lv_nodes and vg_name is not None:
3164       extra_lv_nvinfo = \
3165           self.rpc.call_node_verify(self.extra_lv_nodes,
3166                                     {constants.NV_LVLIST: vg_name},
3167                                     self.cfg.GetClusterName())
3168     else:
3169       extra_lv_nvinfo = {}
3170
3171     all_drbd_map = self.cfg.ComputeDRBDMap()
3172
3173     feedback_fn("* Gathering disk information (%s nodes)" %
3174                 len(self.my_node_names))
3175     instdisk = self._CollectDiskInfo(self.my_node_names, node_image,
3176                                      self.my_inst_info)
3177
3178     feedback_fn("* Verifying configuration file consistency")
3179
3180     # If not all nodes are being checked, we need to make sure the master node
3181     # and a non-checked vm_capable node are in the list.
3182     absent_nodes = set(self.all_node_info).difference(self.my_node_info)
3183     if absent_nodes:
3184       vf_nvinfo = all_nvinfo.copy()
3185       vf_node_info = list(self.my_node_info.values())
3186       additional_nodes = []
3187       if master_node not in self.my_node_info:
3188         additional_nodes.append(master_node)
3189         vf_node_info.append(self.all_node_info[master_node])
3190       # Add the first vm_capable node we find which is not included
3191       for node in absent_nodes:
3192         nodeinfo = self.all_node_info[node]
3193         if nodeinfo.vm_capable and not nodeinfo.offline:
3194           additional_nodes.append(node)
3195           vf_node_info.append(self.all_node_info[node])
3196           break
3197       key = constants.NV_FILELIST
3198       vf_nvinfo.update(self.rpc.call_node_verify(additional_nodes,
3199                                                  {key: node_verify_param[key]},
3200                                                  self.cfg.GetClusterName()))
3201     else:
3202       vf_nvinfo = all_nvinfo
3203       vf_node_info = self.my_node_info.values()
3204
3205     self._VerifyFiles(_ErrorIf, vf_node_info, master_node, vf_nvinfo, filemap)
3206
3207     feedback_fn("* Verifying node status")
3208
3209     refos_img = None
3210
3211     for node_i in node_data_list:
3212       node = node_i.name
3213       nimg = node_image[node]
3214
3215       if node_i.offline:
3216         if verbose:
3217           feedback_fn("* Skipping offline node %s" % (node,))
3218         n_offline += 1
3219         continue
3220
3221       if node == master_node:
3222         ntype = "master"
3223       elif node_i.master_candidate:
3224         ntype = "master candidate"
3225       elif node_i.drained:
3226         ntype = "drained"
3227         n_drained += 1
3228       else:
3229         ntype = "regular"
3230       if verbose:
3231         feedback_fn("* Verifying node %s (%s)" % (node, ntype))
3232
3233       msg = all_nvinfo[node].fail_msg
3234       _ErrorIf(msg, constants.CV_ENODERPC, node, "while contacting node: %s",
3235                msg)
3236       if msg:
3237         nimg.rpc_fail = True
3238         continue
3239
3240       nresult = all_nvinfo[node].payload
3241
3242       nimg.call_ok = self._VerifyNode(node_i, nresult)
3243       self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
3244       self._VerifyNodeNetwork(node_i, nresult)
3245       self._VerifyNodeUserScripts(node_i, nresult)
3246       self._VerifyOob(node_i, nresult)
3247
3248       if nimg.vm_capable:
3249         self._VerifyNodeLVM(node_i, nresult, vg_name)
3250         self._VerifyNodeDrbd(node_i, nresult, self.all_inst_info, drbd_helper,
3251                              all_drbd_map)
3252
3253         self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
3254         self._UpdateNodeInstances(node_i, nresult, nimg)
3255         self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
3256         self._UpdateNodeOS(node_i, nresult, nimg)
3257
3258         if not nimg.os_fail:
3259           if refos_img is None:
3260             refos_img = nimg
3261           self._VerifyNodeOS(node_i, nimg, refos_img)
3262         self._VerifyNodeBridges(node_i, nresult, bridges)
3263
3264         # Check whether all running instancies are primary for the node. (This
3265         # can no longer be done from _VerifyInstance below, since some of the
3266         # wrong instances could be from other node groups.)
3267         non_primary_inst = set(nimg.instances).difference(nimg.pinst)
3268
3269         for inst in non_primary_inst:
3270           # FIXME: investigate best way to handle offline insts
3271           if inst.admin_state == constants.ADMINST_OFFLINE:
3272             if verbose:
3273               feedback_fn("* Skipping offline instance %s" % inst.name)
3274             i_offline += 1
3275             continue
3276           test = inst in self.all_inst_info
3277           _ErrorIf(test, constants.CV_EINSTANCEWRONGNODE, inst,
3278                    "instance should not run on node %s", node_i.name)
3279           _ErrorIf(not test, constants.CV_ENODEORPHANINSTANCE, node_i.name,
3280                    "node is running unknown instance %s", inst)
3281
3282     for node, result in extra_lv_nvinfo.items():
3283       self._UpdateNodeVolumes(self.all_node_info[node], result.payload,
3284                               node_image[node], vg_name)
3285
3286     feedback_fn("* Verifying instance status")
3287     for instance in self.my_inst_names:
3288       if verbose:
3289         feedback_fn("* Verifying instance %s" % instance)
3290       inst_config = self.my_inst_info[instance]
3291       self._VerifyInstance(instance, inst_config, node_image,
3292                            instdisk[instance])
3293       inst_nodes_offline = []
3294
3295       pnode = inst_config.primary_node
3296       pnode_img = node_image[pnode]
3297       _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
3298                constants.CV_ENODERPC, pnode, "instance %s, connection to"
3299                " primary node failed", instance)
3300
3301       _ErrorIf(inst_config.admin_state == constants.ADMINST_UP and
3302                pnode_img.offline,
3303                constants.CV_EINSTANCEBADNODE, instance,
3304                "instance is marked as running and lives on offline node %s",
3305                inst_config.primary_node)
3306
3307       # If the instance is non-redundant we cannot survive losing its primary
3308       # node, so we are not N+1 compliant. On the other hand we have no disk
3309       # templates with more than one secondary so that situation is not well
3310       # supported either.
3311       # FIXME: does not support file-backed instances
3312       if not inst_config.secondary_nodes:
3313         i_non_redundant.append(instance)
3314
3315       _ErrorIf(len(inst_config.secondary_nodes) > 1,
3316                constants.CV_EINSTANCELAYOUT,
3317                instance, "instance has multiple secondary nodes: %s",
3318                utils.CommaJoin(inst_config.secondary_nodes),
3319                code=self.ETYPE_WARNING)
3320
3321       if inst_config.disk_template in constants.DTS_INT_MIRROR:
3322         pnode = inst_config.primary_node
3323         instance_nodes = utils.NiceSort(inst_config.all_nodes)
3324         instance_groups = {}
3325
3326         for node in instance_nodes:
3327           instance_groups.setdefault(self.all_node_info[node].group,
3328                                      []).append(node)
3329
3330         pretty_list = [
3331           "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
3332           # Sort so that we always list the primary node first.
3333           for group, nodes in sorted(instance_groups.items(),
3334                                      key=lambda (_, nodes): pnode in nodes,
3335                                      reverse=True)]
3336
3337         self._ErrorIf(len(instance_groups) > 1,
3338                       constants.CV_EINSTANCESPLITGROUPS,
3339                       instance, "instance has primary and secondary nodes in"
3340                       " different groups: %s", utils.CommaJoin(pretty_list),
3341                       code=self.ETYPE_WARNING)
3342
3343       if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
3344         i_non_a_balanced.append(instance)
3345
3346       for snode in inst_config.secondary_nodes:
3347         s_img = node_image[snode]
3348         _ErrorIf(s_img.rpc_fail and not s_img.offline, constants.CV_ENODERPC,
3349                  snode, "instance %s, connection to secondary node failed",
3350                  instance)
3351
3352         if s_img.offline:
3353           inst_nodes_offline.append(snode)
3354
3355       # warn that the instance lives on offline nodes
3356       _ErrorIf(inst_nodes_offline, constants.CV_EINSTANCEBADNODE, instance,
3357                "instance has offline secondary node(s) %s",
3358                utils.CommaJoin(inst_nodes_offline))
3359       # ... or ghost/non-vm_capable nodes
3360       for node in inst_config.all_nodes:
3361         _ErrorIf(node_image[node].ghost, constants.CV_EINSTANCEBADNODE,
3362                  instance, "instance lives on ghost node %s", node)
3363         _ErrorIf(not node_image[node].vm_capable, constants.CV_EINSTANCEBADNODE,
3364                  instance, "instance lives on non-vm_capable node %s", node)
3365
3366     feedback_fn("* Verifying orphan volumes")
3367     reserved = utils.FieldSet(*cluster.reserved_lvs)
3368
3369     # We will get spurious "unknown volume" warnings if any node of this group
3370     # is secondary for an instance whose primary is in another group. To avoid
3371     # them, we find these instances and add their volumes to node_vol_should.
3372     for inst in self.all_inst_info.values():
3373       for secondary in inst.secondary_nodes:
3374         if (secondary in self.my_node_info
3375             and inst.name not in self.my_inst_info):
3376           inst.MapLVsByNode(node_vol_should)
3377           break
3378
3379     self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
3380
3381     if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
3382       feedback_fn("* Verifying N+1 Memory redundancy")
3383       self._VerifyNPlusOneMemory(node_image, self.my_inst_info)
3384
3385     feedback_fn("* Other Notes")
3386     if i_non_redundant:
3387       feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
3388                   % len(i_non_redundant))
3389
3390     if i_non_a_balanced:
3391       feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
3392                   % len(i_non_a_balanced))
3393
3394     if i_offline:
3395       feedback_fn("  - NOTICE: %d offline instance(s) found." % i_offline)
3396
3397     if n_offline:
3398       feedback_fn("  - NOTICE: %d offline node(s) found." % n_offline)
3399
3400     if n_drained:
3401       feedback_fn("  - NOTICE: %d drained node(s) found." % n_drained)
3402
3403     return not self.bad
3404
3405   def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
3406     """Analyze the post-hooks' result
3407
3408     This method analyses the hook result, handles it, and sends some
3409     nicely-formatted feedback back to the user.
3410
3411     @param phase: one of L{constants.HOOKS_PHASE_POST} or
3412         L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
3413     @param hooks_results: the results of the multi-node hooks rpc call
3414     @param feedback_fn: function used send feedback back to the caller
3415     @param lu_result: previous Exec result
3416     @return: the new Exec result, based on the previous result
3417         and hook results
3418
3419     """
3420     # We only really run POST phase hooks, only for non-empty groups,
3421     # and are only interested in their results
3422     if not self.my_node_names:
3423       # empty node group
3424       pass
3425     elif phase == constants.HOOKS_PHASE_POST:
3426       # Used to change hooks' output to proper indentation
3427       feedback_fn("* Hooks Results")
3428       assert hooks_results, "invalid result from hooks"
3429
3430       for node_name in hooks_results:
3431         res = hooks_results[node_name]
3432         msg = res.fail_msg
3433         test = msg and not res.offline
3434         self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3435                       "Communication failure in hooks execution: %s", msg)
3436         if res.offline or msg:
3437           # No need to investigate payload if node is offline or gave
3438           # an error.
3439           continue
3440         for script, hkr, output in res.payload:
3441           test = hkr == constants.HKR_FAIL
3442           self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3443                         "Script %s failed, output:", script)
3444           if test:
3445             output = self._HOOKS_INDENT_RE.sub("      ", output)
3446             feedback_fn("%s" % output)
3447             lu_result = False
3448
3449     return lu_result
3450
3451
3452 class LUClusterVerifyDisks(NoHooksLU):
3453   """Verifies the cluster disks status.
3454
3455   """
3456   REQ_BGL = False
3457
3458   def ExpandNames(self):
3459     self.share_locks = _ShareAll()
3460     self.needed_locks = {
3461       locking.LEVEL_NODEGROUP: locking.ALL_SET,
3462       }
3463
3464   def Exec(self, feedback_fn):
3465     group_names = self.owned_locks(locking.LEVEL_NODEGROUP)
3466
3467     # Submit one instance of L{opcodes.OpGroupVerifyDisks} per node group
3468     return ResultWithJobs([[opcodes.OpGroupVerifyDisks(group_name=group)]
3469                            for group in group_names])
3470
3471
3472 class LUGroupVerifyDisks(NoHooksLU):
3473   """Verifies the status of all disks in a node group.
3474
3475   """
3476   REQ_BGL = False
3477
3478   def ExpandNames(self):
3479     # Raises errors.OpPrereqError on its own if group can't be found
3480     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
3481
3482     self.share_locks = _ShareAll()
3483     self.needed_locks = {
3484       locking.LEVEL_INSTANCE: [],
3485       locking.LEVEL_NODEGROUP: [],
3486       locking.LEVEL_NODE: [],
3487       }
3488
3489   def DeclareLocks(self, level):
3490     if level == locking.LEVEL_INSTANCE:
3491       assert not self.needed_locks[locking.LEVEL_INSTANCE]
3492
3493       # Lock instances optimistically, needs verification once node and group
3494       # locks have been acquired
3495       self.needed_locks[locking.LEVEL_INSTANCE] = \
3496         self.cfg.GetNodeGroupInstances(self.group_uuid)
3497
3498     elif level == locking.LEVEL_NODEGROUP:
3499       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
3500
3501       self.needed_locks[locking.LEVEL_NODEGROUP] = \
3502         set([self.group_uuid] +
3503             # Lock all groups used by instances optimistically; this requires
3504             # going via the node before it's locked, requiring verification
3505             # later on
3506             [group_uuid
3507              for instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
3508              for group_uuid in self.cfg.GetInstanceNodeGroups(instance_name)])
3509
3510     elif level == locking.LEVEL_NODE:
3511       # This will only lock the nodes in the group to be verified which contain
3512       # actual instances
3513       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
3514       self._LockInstancesNodes()
3515
3516       # Lock all nodes in group to be verified
3517       assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
3518       member_nodes = self.cfg.GetNodeGroup(self.group_uuid).members
3519       self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
3520
3521   def CheckPrereq(self):
3522     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
3523     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
3524     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
3525
3526     assert self.group_uuid in owned_groups
3527
3528     # Check if locked instances are still correct
3529     _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
3530
3531     # Get instance information
3532     self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
3533
3534     # Check if node groups for locked instances are still correct
3535     _CheckInstancesNodeGroups(self.cfg, self.instances,
3536                               owned_groups, owned_nodes, self.group_uuid)
3537
3538   def Exec(self, feedback_fn):
3539     """Verify integrity of cluster disks.
3540
3541     @rtype: tuple of three items
3542     @return: a tuple of (dict of node-to-node_error, list of instances
3543         which need activate-disks, dict of instance: (node, volume) for
3544         missing volumes
3545
3546     """
3547     res_nodes = {}
3548     res_instances = set()
3549     res_missing = {}
3550
3551     nv_dict = _MapInstanceDisksToNodes([inst
3552             for inst in self.instances.values()
3553             if inst.admin_state == constants.ADMINST_UP])
3554
3555     if nv_dict:
3556       nodes = utils.NiceSort(set(self.owned_locks(locking.LEVEL_NODE)) &
3557                              set(self.cfg.GetVmCapableNodeList()))
3558
3559       node_lvs = self.rpc.call_lv_list(nodes, [])
3560
3561       for (node, node_res) in node_lvs.items():
3562         if node_res.offline:
3563           continue
3564
3565         msg = node_res.fail_msg
3566         if msg:
3567           logging.warning("Error enumerating LVs on node %s: %s", node, msg)
3568           res_nodes[node] = msg
3569           continue
3570
3571         for lv_name, (_, _, lv_online) in node_res.payload.items():
3572           inst = nv_dict.pop((node, lv_name), None)
3573           if not (lv_online or inst is None):
3574             res_instances.add(inst)
3575
3576       # any leftover items in nv_dict are missing LVs, let's arrange the data
3577       # better
3578       for key, inst in nv_dict.iteritems():
3579         res_missing.setdefault(inst, []).append(list(key))
3580
3581     return (res_nodes, list(res_instances), res_missing)
3582
3583
3584 class LUClusterRepairDiskSizes(NoHooksLU):
3585   """Verifies the cluster disks sizes.
3586
3587   """
3588   REQ_BGL = False
3589
3590   def ExpandNames(self):
3591     if self.op.instances:
3592       self.wanted_names = _GetWantedInstances(self, self.op.instances)
3593       self.needed_locks = {
3594         locking.LEVEL_NODE_RES: [],
3595         locking.LEVEL_INSTANCE: self.wanted_names,
3596         }
3597       self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
3598     else:
3599       self.wanted_names = None
3600       self.needed_locks = {
3601         locking.LEVEL_NODE_RES: locking.ALL_SET,
3602         locking.LEVEL_INSTANCE: locking.ALL_SET,
3603         }
3604     self.share_locks = {
3605       locking.LEVEL_NODE_RES: 1,
3606       locking.LEVEL_INSTANCE: 0,
3607       }
3608
3609   def DeclareLocks(self, level):
3610     if level == locking.LEVEL_NODE_RES and self.wanted_names is not None:
3611       self._LockInstancesNodes(primary_only=True, level=level)
3612
3613   def CheckPrereq(self):
3614     """Check prerequisites.
3615
3616     This only checks the optional instance list against the existing names.
3617
3618     """
3619     if self.wanted_names is None:
3620       self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
3621
3622     self.wanted_instances = \
3623         map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
3624
3625   def _EnsureChildSizes(self, disk):
3626     """Ensure children of the disk have the needed disk size.
3627
3628     This is valid mainly for DRBD8 and fixes an issue where the
3629     children have smaller disk size.
3630
3631     @param disk: an L{ganeti.objects.Disk} object
3632
3633     """
3634     if disk.dev_type == constants.LD_DRBD8:
3635       assert disk.children, "Empty children for DRBD8?"
3636       fchild = disk.children[0]
3637       mismatch = fchild.size < disk.size
3638       if mismatch:
3639         self.LogInfo("Child disk has size %d, parent %d, fixing",
3640                      fchild.size, disk.size)
3641         fchild.size = disk.size
3642
3643       # and we recurse on this child only, not on the metadev
3644       return self._EnsureChildSizes(fchild) or mismatch
3645     else:
3646       return False
3647
3648   def Exec(self, feedback_fn):
3649     """Verify the size of cluster disks.
3650
3651     """
3652     # TODO: check child disks too
3653     # TODO: check differences in size between primary/secondary nodes
3654     per_node_disks = {}
3655     for instance in self.wanted_instances:
3656       pnode = instance.primary_node
3657       if pnode not in per_node_disks:
3658         per_node_disks[pnode] = []
3659       for idx, disk in enumerate(instance.disks):
3660         per_node_disks[pnode].append((instance, idx, disk))
3661
3662     assert not (frozenset(per_node_disks.keys()) -
3663                 self.owned_locks(locking.LEVEL_NODE_RES)), \
3664       "Not owning correct locks"
3665     assert not self.owned_locks(locking.LEVEL_NODE)
3666
3667     changed = []
3668     for node, dskl in per_node_disks.items():
3669       newl = [v[2].Copy() for v in dskl]
3670       for dsk in newl:
3671         self.cfg.SetDiskID(dsk, node)
3672       result = self.rpc.call_blockdev_getsize(node, newl)
3673       if result.fail_msg:
3674         self.LogWarning("Failure in blockdev_getsize call to node"
3675                         " %s, ignoring", node)
3676         continue
3677       if len(result.payload) != len(dskl):
3678         logging.warning("Invalid result from node %s: len(dksl)=%d,"
3679                         " result.payload=%s", node, len(dskl), result.payload)
3680         self.LogWarning("Invalid result from node %s, ignoring node results",
3681                         node)
3682         continue
3683       for ((instance, idx, disk), size) in zip(dskl, result.payload):
3684         if size is None:
3685           self.LogWarning("Disk %d of instance %s did not return size"
3686                           " information, ignoring", idx, instance.name)
3687           continue
3688         if not isinstance(size, (int, long)):
3689           self.LogWarning("Disk %d of instance %s did not return valid"
3690                           " size information, ignoring", idx, instance.name)
3691           continue
3692         size = size >> 20
3693         if size != disk.size:
3694           self.LogInfo("Disk %d of instance %s has mismatched size,"
3695                        " correcting: recorded %d, actual %d", idx,
3696                        instance.name, disk.size, size)
3697           disk.size = size
3698           self.cfg.Update(instance, feedback_fn)
3699           changed.append((instance.name, idx, size))
3700         if self._EnsureChildSizes(disk):
3701           self.cfg.Update(instance, feedback_fn)
3702           changed.append((instance.name, idx, disk.size))
3703     return changed
3704
3705
3706 class LUClusterRename(LogicalUnit):
3707   """Rename the cluster.
3708
3709   """
3710   HPATH = "cluster-rename"
3711   HTYPE = constants.HTYPE_CLUSTER
3712
3713   def BuildHooksEnv(self):
3714     """Build hooks env.
3715
3716     """
3717     return {
3718       "OP_TARGET": self.cfg.GetClusterName(),
3719       "NEW_NAME": self.op.name,
3720       }
3721
3722   def BuildHooksNodes(self):
3723     """Build hooks nodes.
3724
3725     """
3726     return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
3727
3728   def CheckPrereq(self):
3729     """Verify that the passed name is a valid one.
3730
3731     """
3732     hostname = netutils.GetHostname(name=self.op.name,
3733                                     family=self.cfg.GetPrimaryIPFamily())
3734
3735     new_name = hostname.name
3736     self.ip = new_ip = hostname.ip
3737     old_name = self.cfg.GetClusterName()
3738     old_ip = self.cfg.GetMasterIP()
3739     if new_name == old_name and new_ip == old_ip:
3740       raise errors.OpPrereqError("Neither the name nor the IP address of the"
3741                                  " cluster has changed",
3742                                  errors.ECODE_INVAL)
3743     if new_ip != old_ip:
3744       if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
3745         raise errors.OpPrereqError("The given cluster IP address (%s) is"
3746                                    " reachable on the network" %
3747                                    new_ip, errors.ECODE_NOTUNIQUE)
3748
3749     self.op.name = new_name
3750
3751   def Exec(self, feedback_fn):
3752     """Rename the cluster.
3753
3754     """
3755     clustername = self.op.name
3756     new_ip = self.ip
3757
3758     # shutdown the master IP
3759     master_params = self.cfg.GetMasterNetworkParameters()
3760     ems = self.cfg.GetUseExternalMipScript()
3761     result = self.rpc.call_node_deactivate_master_ip(master_params.name,
3762                                                      master_params, ems)
3763     result.Raise("Could not disable the master role")
3764
3765     try:
3766       cluster = self.cfg.GetClusterInfo()
3767       cluster.cluster_name = clustername
3768       cluster.master_ip = new_ip
3769       self.cfg.Update(cluster, feedback_fn)
3770
3771       # update the known hosts file
3772       ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
3773       node_list = self.cfg.GetOnlineNodeList()
3774       try:
3775         node_list.remove(master_params.name)
3776       except ValueError:
3777         pass
3778       _UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE)
3779     finally:
3780       master_params.ip = new_ip
3781       result = self.rpc.call_node_activate_master_ip(master_params.name,
3782                                                      master_params, ems)
3783       msg = result.fail_msg
3784       if msg:
3785         self.LogWarning("Could not re-enable the master role on"
3786                         " the master, please restart manually: %s", msg)
3787
3788     return clustername
3789
3790
3791 def _ValidateNetmask(cfg, netmask):
3792   """Checks if a netmask is valid.
3793
3794   @type cfg: L{config.ConfigWriter}
3795   @param cfg: The cluster configuration
3796   @type netmask: int
3797   @param netmask: the netmask to be verified
3798   @raise errors.OpPrereqError: if the validation fails
3799
3800   """
3801   ip_family = cfg.GetPrimaryIPFamily()
3802   try:
3803     ipcls = netutils.IPAddress.GetClassFromIpFamily(ip_family)
3804   except errors.ProgrammerError:
3805     raise errors.OpPrereqError("Invalid primary ip family: %s." %
3806                                ip_family)
3807   if not ipcls.ValidateNetmask(netmask):
3808     raise errors.OpPrereqError("CIDR netmask (%s) not valid" %
3809                                 (netmask))
3810
3811
3812 class LUClusterSetParams(LogicalUnit):
3813   """Change the parameters of the cluster.
3814
3815   """
3816   HPATH = "cluster-modify"
3817   HTYPE = constants.HTYPE_CLUSTER
3818   REQ_BGL = False
3819
3820   def CheckArguments(self):
3821     """Check parameters
3822
3823     """
3824     if self.op.uid_pool:
3825       uidpool.CheckUidPool(self.op.uid_pool)
3826
3827     if self.op.add_uids:
3828       uidpool.CheckUidPool(self.op.add_uids)
3829
3830     if self.op.remove_uids:
3831       uidpool.CheckUidPool(self.op.remove_uids)
3832
3833     if self.op.master_netmask is not None:
3834       _ValidateNetmask(self.cfg, self.op.master_netmask)
3835
3836     if self.op.diskparams:
3837       for dt_params in self.op.diskparams.values():
3838         utils.ForceDictType(dt_params, constants.DISK_DT_TYPES)
3839
3840   def ExpandNames(self):
3841     # FIXME: in the future maybe other cluster params won't require checking on
3842     # all nodes to be modified.
3843     self.needed_locks = {
3844       locking.LEVEL_NODE: locking.ALL_SET,
3845       locking.LEVEL_INSTANCE: locking.ALL_SET,
3846       locking.LEVEL_NODEGROUP: locking.ALL_SET,
3847     }
3848     self.share_locks = {
3849         locking.LEVEL_NODE: 1,
3850         locking.LEVEL_INSTANCE: 1,
3851         locking.LEVEL_NODEGROUP: 1,
3852     }
3853
3854   def BuildHooksEnv(self):
3855     """Build hooks env.
3856
3857     """
3858     return {
3859       "OP_TARGET": self.cfg.GetClusterName(),
3860       "NEW_VG_NAME": self.op.vg_name,
3861       }
3862
3863   def BuildHooksNodes(self):
3864     """Build hooks nodes.
3865
3866     """
3867     mn = self.cfg.GetMasterNode()
3868     return ([mn], [mn])
3869
3870   def CheckPrereq(self):
3871     """Check prerequisites.
3872
3873     This checks whether the given params don't conflict and
3874     if the given volume group is valid.
3875
3876     """
3877     if self.op.vg_name is not None and not self.op.vg_name:
3878       if self.cfg.HasAnyDiskOfType(constants.LD_LV):
3879         raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
3880                                    " instances exist", errors.ECODE_INVAL)
3881
3882     if self.op.drbd_helper is not None and not self.op.drbd_helper:
3883       if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
3884         raise errors.OpPrereqError("Cannot disable drbd helper while"
3885                                    " drbd-based instances exist",
3886                                    errors.ECODE_INVAL)
3887
3888     node_list = self.owned_locks(locking.LEVEL_NODE)
3889
3890     # if vg_name not None, checks given volume group on all nodes
3891     if self.op.vg_name:
3892       vglist = self.rpc.call_vg_list(node_list)
3893       for node in node_list:
3894         msg = vglist[node].fail_msg
3895         if msg:
3896           # ignoring down node
3897           self.LogWarning("Error while gathering data on node %s"
3898                           " (ignoring node): %s", node, msg)
3899           continue
3900         vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
3901                                               self.op.vg_name,
3902                                               constants.MIN_VG_SIZE)
3903         if vgstatus:
3904           raise errors.OpPrereqError("Error on node '%s': %s" %
3905                                      (node, vgstatus), errors.ECODE_ENVIRON)
3906
3907     if self.op.drbd_helper:
3908       # checks given drbd helper on all nodes
3909       helpers = self.rpc.call_drbd_helper(node_list)
3910       for (node, ninfo) in self.cfg.GetMultiNodeInfo(node_list):
3911         if ninfo.offline:
3912           self.LogInfo("Not checking drbd helper on offline node %s", node)
3913           continue
3914         msg = helpers[node].fail_msg
3915         if msg:
3916           raise errors.OpPrereqError("Error checking drbd helper on node"
3917                                      " '%s': %s" % (node, msg),
3918                                      errors.ECODE_ENVIRON)
3919         node_helper = helpers[node].payload
3920         if node_helper != self.op.drbd_helper:
3921           raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
3922                                      (node, node_helper), errors.ECODE_ENVIRON)
3923
3924     self.cluster = cluster = self.cfg.GetClusterInfo()
3925     # validate params changes
3926     if self.op.beparams:
3927       objects.UpgradeBeParams(self.op.beparams)
3928       utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
3929       self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
3930
3931     if self.op.ndparams:
3932       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
3933       self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
3934
3935       # TODO: we need a more general way to handle resetting
3936       # cluster-level parameters to default values
3937       if self.new_ndparams["oob_program"] == "":
3938         self.new_ndparams["oob_program"] = \
3939             constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
3940
3941     if self.op.hv_state:
3942       new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
3943                                             self.cluster.hv_state_static)
3944       self.new_hv_state = dict((hv, cluster.SimpleFillHvState(values))
3945                                for hv, values in new_hv_state.items())
3946
3947     if self.op.disk_state:
3948       new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state,
3949                                                 self.cluster.disk_state_static)
3950       self.new_disk_state = \
3951         dict((storage, dict((name, cluster.SimpleFillDiskState(values))
3952                             for name, values in svalues.items()))
3953              for storage, svalues in new_disk_state.items())
3954
3955     if self.op.ipolicy:
3956       self.new_ipolicy = _GetUpdatedIPolicy(cluster.ipolicy, self.op.ipolicy,
3957                                             group_policy=False)
3958
3959       all_instances = self.cfg.GetAllInstancesInfo().values()
3960       violations = set()
3961       for group in self.cfg.GetAllNodeGroupsInfo().values():
3962         instances = frozenset([inst for inst in all_instances
3963                                if compat.any(node in group.members
3964                                              for node in inst.all_nodes)])
3965         new_ipolicy = objects.FillIPolicy(self.new_ipolicy, group.ipolicy)
3966         new = _ComputeNewInstanceViolations(_CalculateGroupIPolicy(cluster,
3967                                                                    group),
3968                                             new_ipolicy, instances)
3969         if new:
3970           violations.update(new)
3971
3972       if violations:
3973         self.LogWarning("After the ipolicy change the following instances"
3974                         " violate them: %s",
3975                         utils.CommaJoin(violations))
3976
3977     if self.op.nicparams:
3978       utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
3979       self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
3980       objects.NIC.CheckParameterSyntax(self.new_nicparams)
3981       nic_errors = []
3982
3983       # check all instances for consistency
3984       for instance in self.cfg.GetAllInstancesInfo().values():
3985         for nic_idx, nic in enumerate(instance.nics):
3986           params_copy = copy.deepcopy(nic.nicparams)
3987           params_filled = objects.FillDict(self.new_nicparams, params_copy)
3988
3989           # check parameter syntax
3990           try:
3991             objects.NIC.CheckParameterSyntax(params_filled)
3992           except errors.ConfigurationError, err:
3993             nic_errors.append("Instance %s, nic/%d: %s" %
3994                               (instance.name, nic_idx, err))
3995
3996           # if we're moving instances to routed, check that they have an ip
3997           target_mode = params_filled[constants.NIC_MODE]
3998           if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
3999             nic_errors.append("Instance %s, nic/%d: routed NIC with no ip"
4000                               " address" % (instance.name, nic_idx))
4001       if nic_errors:
4002         raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
4003                                    "\n".join(nic_errors))
4004
4005     # hypervisor list/parameters
4006     self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
4007     if self.op.hvparams:
4008       for hv_name, hv_dict in self.op.hvparams.items():
4009         if hv_name not in self.new_hvparams:
4010           self.new_hvparams[hv_name] = hv_dict
4011         else:
4012           self.new_hvparams[hv_name].update(hv_dict)
4013
4014     # disk template parameters
4015     self.new_diskparams = objects.FillDict(cluster.diskparams, {})
4016     if self.op.diskparams:
4017       for dt_name, dt_params in self.op.diskparams.items():
4018         if dt_name not in self.op.diskparams:
4019           self.new_diskparams[dt_name] = dt_params
4020         else:
4021           self.new_diskparams[dt_name].update(dt_params)
4022
4023     # os hypervisor parameters
4024     self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
4025     if self.op.os_hvp:
4026       for os_name, hvs in self.op.os_hvp.items():
4027         if os_name not in self.new_os_hvp:
4028           self.new_os_hvp[os_name] = hvs
4029         else:
4030           for hv_name, hv_dict in hvs.items():
4031             if hv_name not in self.new_os_hvp[os_name]:
4032               self.new_os_hvp[os_name][hv_name] = hv_dict
4033             else:
4034               self.new_os_hvp[os_name][hv_name].update(hv_dict)
4035
4036     # os parameters
4037     self.new_osp = objects.FillDict(cluster.osparams, {})
4038     if self.op.osparams:
4039       for os_name, osp in self.op.osparams.items():
4040         if os_name not in self.new_osp:
4041           self.new_osp[os_name] = {}
4042
4043         self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
4044                                                   use_none=True)
4045
4046         if not self.new_osp[os_name]:
4047           # we removed all parameters
4048           del self.new_osp[os_name]
4049         else:
4050           # check the parameter validity (remote check)
4051           _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
4052                          os_name, self.new_osp[os_name])
4053
4054     # changes to the hypervisor list
4055     if self.op.enabled_hypervisors is not None:
4056       self.hv_list = self.op.enabled_hypervisors
4057       for hv in self.hv_list:
4058         # if the hypervisor doesn't already exist in the cluster
4059         # hvparams, we initialize it to empty, and then (in both
4060         # cases) we make sure to fill the defaults, as we might not
4061         # have a complete defaults list if the hypervisor wasn't
4062         # enabled before
4063         if hv not in new_hvp:
4064           new_hvp[hv] = {}
4065         new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
4066         utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
4067     else:
4068       self.hv_list = cluster.enabled_hypervisors
4069
4070     if self.op.hvparams or self.op.enabled_hypervisors is not None:
4071       # either the enabled list has changed, or the parameters have, validate
4072       for hv_name, hv_params in self.new_hvparams.items():
4073         if ((self.op.hvparams and hv_name in self.op.hvparams) or
4074             (self.op.enabled_hypervisors and
4075              hv_name in self.op.enabled_hypervisors)):
4076           # either this is a new hypervisor, or its parameters have changed
4077           hv_class = hypervisor.GetHypervisor(hv_name)
4078           utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
4079           hv_class.CheckParameterSyntax(hv_params)
4080           _CheckHVParams(self, node_list, hv_name, hv_params)
4081
4082     if self.op.os_hvp:
4083       # no need to check any newly-enabled hypervisors, since the
4084       # defaults have already been checked in the above code-block
4085       for os_name, os_hvp in self.new_os_hvp.items():
4086         for hv_name, hv_params in os_hvp.items():
4087           utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
4088           # we need to fill in the new os_hvp on top of the actual hv_p
4089           cluster_defaults = self.new_hvparams.get(hv_name, {})
4090           new_osp = objects.FillDict(cluster_defaults, hv_params)
4091           hv_class = hypervisor.GetHypervisor(hv_name)
4092           hv_class.CheckParameterSyntax(new_osp)
4093           _CheckHVParams(self, node_list, hv_name, new_osp)
4094
4095     if self.op.default_iallocator:
4096       alloc_script = utils.FindFile(self.op.default_iallocator,
4097                                     constants.IALLOCATOR_SEARCH_PATH,
4098                                     os.path.isfile)
4099       if alloc_script is None:
4100         raise errors.OpPrereqError("Invalid default iallocator script '%s'"
4101                                    " specified" % self.op.default_iallocator,
4102                                    errors.ECODE_INVAL)
4103
4104   def Exec(self, feedback_fn):
4105     """Change the parameters of the cluster.
4106
4107     """
4108     if self.op.vg_name is not None:
4109       new_volume = self.op.vg_name
4110       if not new_volume:
4111         new_volume = None
4112       if new_volume != self.cfg.GetVGName():
4113         self.cfg.SetVGName(new_volume)
4114       else:
4115         feedback_fn("Cluster LVM configuration already in desired"
4116                     " state, not changing")
4117     if self.op.drbd_helper is not None:
4118       new_helper = self.op.drbd_helper
4119       if not new_helper:
4120         new_helper = None
4121       if new_helper != self.cfg.GetDRBDHelper():
4122         self.cfg.SetDRBDHelper(new_helper)
4123       else:
4124         feedback_fn("Cluster DRBD helper already in desired state,"
4125                     " not changing")
4126     if self.op.hvparams:
4127       self.cluster.hvparams = self.new_hvparams
4128     if self.op.os_hvp:
4129       self.cluster.os_hvp = self.new_os_hvp
4130     if self.op.enabled_hypervisors is not None:
4131       self.cluster.hvparams = self.new_hvparams
4132       self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
4133     if self.op.beparams:
4134       self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
4135     if self.op.nicparams:
4136       self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
4137     if self.op.ipolicy:
4138       self.cluster.ipolicy = self.new_ipolicy
4139     if self.op.osparams:
4140       self.cluster.osparams = self.new_osp
4141     if self.op.ndparams:
4142       self.cluster.ndparams = self.new_ndparams
4143     if self.op.diskparams:
4144       self.cluster.diskparams = self.new_diskparams
4145     if self.op.hv_state:
4146       self.cluster.hv_state_static = self.new_hv_state
4147     if self.op.disk_state:
4148       self.cluster.disk_state_static = self.new_disk_state
4149
4150     if self.op.candidate_pool_size is not None:
4151       self.cluster.candidate_pool_size = self.op.candidate_pool_size
4152       # we need to update the pool size here, otherwise the save will fail
4153       _AdjustCandidatePool(self, [])
4154
4155     if self.op.maintain_node_health is not None:
4156       if self.op.maintain_node_health and not constants.ENABLE_CONFD:
4157         feedback_fn("Note: CONFD was disabled at build time, node health"
4158                     " maintenance is not useful (still enabling it)")
4159       self.cluster.maintain_node_health = self.op.maintain_node_health
4160
4161     if self.op.prealloc_wipe_disks is not None:
4162       self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
4163
4164     if self.op.add_uids is not None:
4165       uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
4166
4167     if self.op.remove_uids is not None:
4168       uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
4169
4170     if self.op.uid_pool is not None:
4171       self.cluster.uid_pool = self.op.uid_pool
4172
4173     if self.op.default_iallocator is not None:
4174       self.cluster.default_iallocator = self.op.default_iallocator
4175
4176     if self.op.reserved_lvs is not None:
4177       self.cluster.reserved_lvs = self.op.reserved_lvs
4178
4179     if self.op.use_external_mip_script is not None:
4180       self.cluster.use_external_mip_script = self.op.use_external_mip_script
4181
4182     def helper_os(aname, mods, desc):
4183       desc += " OS list"
4184       lst = getattr(self.cluster, aname)
4185       for key, val in mods:
4186         if key == constants.DDM_ADD:
4187           if val in lst:
4188             feedback_fn("OS %s already in %s, ignoring" % (val, desc))
4189           else:
4190             lst.append(val)
4191         elif key == constants.DDM_REMOVE:
4192           if val in lst:
4193             lst.remove(val)
4194           else:
4195             feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
4196         else:
4197           raise errors.ProgrammerError("Invalid modification '%s'" % key)
4198
4199     if self.op.hidden_os:
4200       helper_os("hidden_os", self.op.hidden_os, "hidden")
4201
4202     if self.op.blacklisted_os:
4203       helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
4204
4205     if self.op.master_netdev:
4206       master_params = self.cfg.GetMasterNetworkParameters()
4207       ems = self.cfg.GetUseExternalMipScript()
4208       feedback_fn("Shutting down master ip on the current netdev (%s)" %
4209                   self.cluster.master_netdev)
4210       result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4211                                                        master_params, ems)
4212       result.Raise("Could not disable the master ip")
4213       feedback_fn("Changing master_netdev from %s to %s" %
4214                   (master_params.netdev, self.op.master_netdev))
4215       self.cluster.master_netdev = self.op.master_netdev
4216
4217     if self.op.master_netmask:
4218       master_params = self.cfg.GetMasterNetworkParameters()
4219       feedback_fn("Changing master IP netmask to %s" % self.op.master_netmask)
4220       result = self.rpc.call_node_change_master_netmask(master_params.name,
4221                                                         master_params.netmask,
4222                                                         self.op.master_netmask,
4223                                                         master_params.ip,
4224                                                         master_params.netdev)
4225       if result.fail_msg:
4226         msg = "Could not change the master IP netmask: %s" % result.fail_msg
4227         feedback_fn(msg)
4228
4229       self.cluster.master_netmask = self.op.master_netmask
4230
4231     self.cfg.Update(self.cluster, feedback_fn)
4232
4233     if self.op.master_netdev:
4234       master_params = self.cfg.GetMasterNetworkParameters()
4235       feedback_fn("Starting the master ip on the new master netdev (%s)" %
4236                   self.op.master_netdev)
4237       ems = self.cfg.GetUseExternalMipScript()
4238       result = self.rpc.call_node_activate_master_ip(master_params.name,
4239                                                      master_params, ems)
4240       if result.fail_msg:
4241         self.LogWarning("Could not re-enable the master ip on"
4242                         " the master, please restart manually: %s",
4243                         result.fail_msg)
4244
4245
4246 def _UploadHelper(lu, nodes, fname):
4247   """Helper for uploading a file and showing warnings.
4248
4249   """
4250   if os.path.exists(fname):
4251     result = lu.rpc.call_upload_file(nodes, fname)
4252     for to_node, to_result in result.items():
4253       msg = to_result.fail_msg
4254       if msg:
4255         msg = ("Copy of file %s to node %s failed: %s" %
4256                (fname, to_node, msg))
4257         lu.proc.LogWarning(msg)
4258
4259
4260 def _ComputeAncillaryFiles(cluster, redist):
4261   """Compute files external to Ganeti which need to be consistent.
4262
4263   @type redist: boolean
4264   @param redist: Whether to include files which need to be redistributed
4265
4266   """
4267   # Compute files for all nodes
4268   files_all = set([
4269     constants.SSH_KNOWN_HOSTS_FILE,
4270     constants.CONFD_HMAC_KEY,
4271     constants.CLUSTER_DOMAIN_SECRET_FILE,
4272     constants.SPICE_CERT_FILE,
4273     constants.SPICE_CACERT_FILE,
4274     constants.RAPI_USERS_FILE,
4275     ])
4276
4277   if not redist:
4278     files_all.update(constants.ALL_CERT_FILES)
4279     files_all.update(ssconf.SimpleStore().GetFileList())
4280   else:
4281     # we need to ship at least the RAPI certificate
4282     files_all.add(constants.RAPI_CERT_FILE)
4283
4284   if cluster.modify_etc_hosts:
4285     files_all.add(constants.ETC_HOSTS)
4286
4287   # Files which are optional, these must:
4288   # - be present in one other category as well
4289   # - either exist or not exist on all nodes of that category (mc, vm all)
4290   files_opt = set([
4291     constants.RAPI_USERS_FILE,
4292     ])
4293
4294   # Files which should only be on master candidates
4295   files_mc = set()
4296
4297   if not redist:
4298     files_mc.add(constants.CLUSTER_CONF_FILE)
4299
4300     # FIXME: this should also be replicated but Ganeti doesn't support files_mc
4301     # replication
4302     files_mc.add(constants.DEFAULT_MASTER_SETUP_SCRIPT)
4303
4304   # Files which should only be on VM-capable nodes
4305   files_vm = set(filename
4306     for hv_name in cluster.enabled_hypervisors
4307     for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[0])
4308
4309   files_opt |= set(filename
4310     for hv_name in cluster.enabled_hypervisors
4311     for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[1])
4312
4313   # Filenames in each category must be unique
4314   all_files_set = files_all | files_mc | files_vm
4315   assert (len(all_files_set) ==
4316           sum(map(len, [files_all, files_mc, files_vm]))), \
4317          "Found file listed in more than one file list"
4318
4319   # Optional files must be present in one other category
4320   assert all_files_set.issuperset(files_opt), \
4321          "Optional file not in a different required list"
4322
4323   return (files_all, files_opt, files_mc, files_vm)
4324
4325
4326 def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
4327   """Distribute additional files which are part of the cluster configuration.
4328
4329   ConfigWriter takes care of distributing the config and ssconf files, but
4330   there are more files which should be distributed to all nodes. This function
4331   makes sure those are copied.
4332
4333   @param lu: calling logical unit
4334   @param additional_nodes: list of nodes not in the config to distribute to
4335   @type additional_vm: boolean
4336   @param additional_vm: whether the additional nodes are vm-capable or not
4337
4338   """
4339   # Gather target nodes
4340   cluster = lu.cfg.GetClusterInfo()
4341   master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
4342
4343   online_nodes = lu.cfg.GetOnlineNodeList()
4344   vm_nodes = lu.cfg.GetVmCapableNodeList()
4345
4346   if additional_nodes is not None:
4347     online_nodes.extend(additional_nodes)
4348     if additional_vm:
4349       vm_nodes.extend(additional_nodes)
4350
4351   # Never distribute to master node
4352   for nodelist in [online_nodes, vm_nodes]:
4353     if master_info.name in nodelist:
4354       nodelist.remove(master_info.name)
4355
4356   # Gather file lists
4357   (files_all, _, files_mc, files_vm) = \
4358     _ComputeAncillaryFiles(cluster, True)
4359
4360   # Never re-distribute configuration file from here
4361   assert not (constants.CLUSTER_CONF_FILE in files_all or
4362               constants.CLUSTER_CONF_FILE in files_vm)
4363   assert not files_mc, "Master candidates not handled in this function"
4364
4365   filemap = [
4366     (online_nodes, files_all),
4367     (vm_nodes, files_vm),
4368     ]
4369
4370   # Upload the files
4371   for (node_list, files) in filemap:
4372     for fname in files:
4373       _UploadHelper(lu, node_list, fname)
4374
4375
4376 class LUClusterRedistConf(NoHooksLU):
4377   """Force the redistribution of cluster configuration.
4378
4379   This is a very simple LU.
4380
4381   """
4382   REQ_BGL = False
4383
4384   def ExpandNames(self):
4385     self.needed_locks = {
4386       locking.LEVEL_NODE: locking.ALL_SET,
4387     }
4388     self.share_locks[locking.LEVEL_NODE] = 1
4389
4390   def Exec(self, feedback_fn):
4391     """Redistribute the configuration.
4392
4393     """
4394     self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
4395     _RedistributeAncillaryFiles(self)
4396
4397
4398 class LUClusterActivateMasterIp(NoHooksLU):
4399   """Activate the master IP on the master node.
4400
4401   """
4402   def Exec(self, feedback_fn):
4403     """Activate the master IP.
4404
4405     """
4406     master_params = self.cfg.GetMasterNetworkParameters()
4407     ems = self.cfg.GetUseExternalMipScript()
4408     result = self.rpc.call_node_activate_master_ip(master_params.name,
4409                                                    master_params, ems)
4410     result.Raise("Could not activate the master IP")
4411
4412
4413 class LUClusterDeactivateMasterIp(NoHooksLU):
4414   """Deactivate the master IP on the master node.
4415
4416   """
4417   def Exec(self, feedback_fn):
4418     """Deactivate the master IP.
4419
4420     """
4421     master_params = self.cfg.GetMasterNetworkParameters()
4422     ems = self.cfg.GetUseExternalMipScript()
4423     result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4424                                                      master_params, ems)
4425     result.Raise("Could not deactivate the master IP")
4426
4427
4428 def _WaitForSync(lu, instance, disks=None, oneshot=False):
4429   """Sleep and poll for an instance's disk to sync.
4430
4431   """
4432   if not instance.disks or disks is not None and not disks:
4433     return True
4434
4435   disks = _ExpandCheckDisks(instance, disks)
4436
4437   if not oneshot:
4438     lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
4439
4440   node = instance.primary_node
4441
4442   for dev in disks:
4443     lu.cfg.SetDiskID(dev, node)
4444
4445   # TODO: Convert to utils.Retry
4446
4447   retries = 0
4448   degr_retries = 10 # in seconds, as we sleep 1 second each time
4449   while True:
4450     max_time = 0
4451     done = True
4452     cumul_degraded = False
4453     rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
4454     msg = rstats.fail_msg
4455     if msg:
4456       lu.LogWarning("Can't get any data from node %s: %s", node, msg)
4457       retries += 1
4458       if retries >= 10:
4459         raise errors.RemoteError("Can't contact node %s for mirror data,"
4460                                  " aborting." % node)
4461       time.sleep(6)
4462       continue
4463     rstats = rstats.payload
4464     retries = 0
4465     for i, mstat in enumerate(rstats):
4466       if mstat is None:
4467         lu.LogWarning("Can't compute data for node %s/%s",
4468                            node, disks[i].iv_name)
4469         continue
4470
4471       cumul_degraded = (cumul_degraded or
4472                         (mstat.is_degraded and mstat.sync_percent is None))
4473       if mstat.sync_percent is not None:
4474         done = False
4475         if mstat.estimated_time is not None:
4476           rem_time = ("%s remaining (estimated)" %
4477                       utils.FormatSeconds(mstat.estimated_time))
4478           max_time = mstat.estimated_time
4479         else:
4480           rem_time = "no time estimate"
4481         lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
4482                         (disks[i].iv_name, mstat.sync_percent, rem_time))
4483
4484     # if we're done but degraded, let's do a few small retries, to
4485     # make sure we see a stable and not transient situation; therefore
4486     # we force restart of the loop
4487     if (done or oneshot) and cumul_degraded and degr_retries > 0:
4488       logging.info("Degraded disks found, %d retries left", degr_retries)
4489       degr_retries -= 1
4490       time.sleep(1)
4491       continue
4492
4493     if done or oneshot:
4494       break
4495
4496     time.sleep(min(60, max_time))
4497
4498   if done:
4499     lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
4500   return not cumul_degraded
4501
4502
4503 def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
4504   """Check that mirrors are not degraded.
4505
4506   The ldisk parameter, if True, will change the test from the
4507   is_degraded attribute (which represents overall non-ok status for
4508   the device(s)) to the ldisk (representing the local storage status).
4509
4510   """
4511   lu.cfg.SetDiskID(dev, node)
4512
4513   result = True
4514
4515   if on_primary or dev.AssembleOnSecondary():
4516     rstats = lu.rpc.call_blockdev_find(node, dev)
4517     msg = rstats.fail_msg
4518     if msg:
4519       lu.LogWarning("Can't find disk on node %s: %s", node, msg)
4520       result = False
4521     elif not rstats.payload:
4522       lu.LogWarning("Can't find disk on node %s", node)
4523       result = False
4524     else:
4525       if ldisk:
4526         result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
4527       else:
4528         result = result and not rstats.payload.is_degraded
4529
4530   if dev.children:
4531     for child in dev.children:
4532       result = result and _CheckDiskConsistency(lu, child, node, on_primary)
4533
4534   return result
4535
4536
4537 class LUOobCommand(NoHooksLU):
4538   """Logical unit for OOB handling.
4539
4540   """
4541   REQ_BGL = False
4542   _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
4543
4544   def ExpandNames(self):
4545     """Gather locks we need.
4546
4547     """
4548     if self.op.node_names:
4549       self.op.node_names = _GetWantedNodes(self, self.op.node_names)
4550       lock_names = self.op.node_names
4551     else:
4552       lock_names = locking.ALL_SET
4553
4554     self.needed_locks = {
4555       locking.LEVEL_NODE: lock_names,
4556       }
4557
4558   def CheckPrereq(self):
4559     """Check prerequisites.
4560
4561     This checks:
4562      - the node exists in the configuration
4563      - OOB is supported
4564
4565     Any errors are signaled by raising errors.OpPrereqError.
4566
4567     """
4568     self.nodes = []
4569     self.master_node = self.cfg.GetMasterNode()
4570
4571     assert self.op.power_delay >= 0.0
4572
4573     if self.op.node_names:
4574       if (self.op.command in self._SKIP_MASTER and
4575           self.master_node in self.op.node_names):
4576         master_node_obj = self.cfg.GetNodeInfo(self.master_node)
4577         master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
4578
4579         if master_oob_handler:
4580           additional_text = ("run '%s %s %s' if you want to operate on the"
4581                              " master regardless") % (master_oob_handler,
4582                                                       self.op.command,
4583                                                       self.master_node)
4584         else:
4585           additional_text = "it does not support out-of-band operations"
4586
4587         raise errors.OpPrereqError(("Operating on the master node %s is not"
4588                                     " allowed for %s; %s") %
4589                                    (self.master_node, self.op.command,
4590                                     additional_text), errors.ECODE_INVAL)
4591     else:
4592       self.op.node_names = self.cfg.GetNodeList()
4593       if self.op.command in self._SKIP_MASTER:
4594         self.op.node_names.remove(self.master_node)
4595
4596     if self.op.command in self._SKIP_MASTER:
4597       assert self.master_node not in self.op.node_names
4598
4599     for (node_name, node) in self.cfg.GetMultiNodeInfo(self.op.node_names):
4600       if node is None:
4601         raise errors.OpPrereqError("Node %s not found" % node_name,
4602                                    errors.ECODE_NOENT)
4603       else:
4604         self.nodes.append(node)
4605
4606       if (not self.op.ignore_status and
4607           (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
4608         raise errors.OpPrereqError(("Cannot power off node %s because it is"
4609                                     " not marked offline") % node_name,
4610                                    errors.ECODE_STATE)
4611
4612   def Exec(self, feedback_fn):
4613     """Execute OOB and return result if we expect any.
4614
4615     """
4616     master_node = self.master_node
4617     ret = []
4618
4619     for idx, node in enumerate(utils.NiceSort(self.nodes,
4620                                               key=lambda node: node.name)):
4621       node_entry = [(constants.RS_NORMAL, node.name)]
4622       ret.append(node_entry)
4623
4624       oob_program = _SupportsOob(self.cfg, node)
4625
4626       if not oob_program:
4627         node_entry.append((constants.RS_UNAVAIL, None))
4628         continue
4629
4630       logging.info("Executing out-of-band command '%s' using '%s' on %s",
4631                    self.op.command, oob_program, node.name)
4632       result = self.rpc.call_run_oob(master_node, oob_program,
4633                                      self.op.command, node.name,
4634                                      self.op.timeout)
4635
4636       if result.fail_msg:
4637         self.LogWarning("Out-of-band RPC failed on node '%s': %s",
4638                         node.name, result.fail_msg)
4639         node_entry.append((constants.RS_NODATA, None))
4640       else:
4641         try:
4642           self._CheckPayload(result)
4643         except errors.OpExecError, err:
4644           self.LogWarning("Payload returned by node '%s' is not valid: %s",
4645                           node.name, err)
4646           node_entry.append((constants.RS_NODATA, None))
4647         else:
4648           if self.op.command == constants.OOB_HEALTH:
4649             # For health we should log important events
4650             for item, status in result.payload:
4651               if status in [constants.OOB_STATUS_WARNING,
4652                             constants.OOB_STATUS_CRITICAL]:
4653                 self.LogWarning("Item '%s' on node '%s' has status '%s'",
4654                                 item, node.name, status)
4655
4656           if self.op.command == constants.OOB_POWER_ON:
4657             node.powered = True
4658           elif self.op.command == constants.OOB_POWER_OFF:
4659             node.powered = False
4660           elif self.op.command == constants.OOB_POWER_STATUS:
4661             powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
4662             if powered != node.powered:
4663               logging.warning(("Recorded power state (%s) of node '%s' does not"
4664                                " match actual power state (%s)"), node.powered,
4665                               node.name, powered)
4666
4667           # For configuration changing commands we should update the node
4668           if self.op.command in (constants.OOB_POWER_ON,
4669                                  constants.OOB_POWER_OFF):
4670             self.cfg.Update(node, feedback_fn)
4671
4672           node_entry.append((constants.RS_NORMAL, result.payload))
4673
4674           if (self.op.command == constants.OOB_POWER_ON and
4675               idx < len(self.nodes) - 1):
4676             time.sleep(self.op.power_delay)
4677
4678     return ret
4679
4680   def _CheckPayload(self, result):
4681     """Checks if the payload is valid.
4682
4683     @param result: RPC result
4684     @raises errors.OpExecError: If payload is not valid
4685
4686     """
4687     errs = []
4688     if self.op.command == constants.OOB_HEALTH:
4689       if not isinstance(result.payload, list):
4690         errs.append("command 'health' is expected to return a list but got %s" %
4691                     type(result.payload))
4692       else:
4693         for item, status in result.payload:
4694           if status not in constants.OOB_STATUSES:
4695             errs.append("health item '%s' has invalid status '%s'" %
4696                         (item, status))
4697
4698     if self.op.command == constants.OOB_POWER_STATUS:
4699       if not isinstance(result.payload, dict):
4700         errs.append("power-status is expected to return a dict but got %s" %
4701                     type(result.payload))
4702
4703     if self.op.command in [
4704         constants.OOB_POWER_ON,
4705         constants.OOB_POWER_OFF,
4706         constants.OOB_POWER_CYCLE,
4707         ]:
4708       if result.payload is not None:
4709         errs.append("%s is expected to not return payload but got '%s'" %
4710                     (self.op.command, result.payload))
4711
4712     if errs:
4713       raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
4714                                utils.CommaJoin(errs))
4715
4716
4717 class _OsQuery(_QueryBase):
4718   FIELDS = query.OS_FIELDS
4719
4720   def ExpandNames(self, lu):
4721     # Lock all nodes in shared mode
4722     # Temporary removal of locks, should be reverted later
4723     # TODO: reintroduce locks when they are lighter-weight
4724     lu.needed_locks = {}
4725     #self.share_locks[locking.LEVEL_NODE] = 1
4726     #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4727
4728     # The following variables interact with _QueryBase._GetNames
4729     if self.names:
4730       self.wanted = self.names
4731     else:
4732       self.wanted = locking.ALL_SET
4733
4734     self.do_locking = self.use_locking
4735
4736   def DeclareLocks(self, lu, level):
4737     pass
4738
4739   @staticmethod
4740   def _DiagnoseByOS(rlist):
4741     """Remaps a per-node return list into an a per-os per-node dictionary
4742
4743     @param rlist: a map with node names as keys and OS objects as values
4744
4745     @rtype: dict
4746     @return: a dictionary with osnames as keys and as value another
4747         map, with nodes as keys and tuples of (path, status, diagnose,
4748         variants, parameters, api_versions) as values, eg::
4749
4750           {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
4751                                      (/srv/..., False, "invalid api")],
4752                            "node2": [(/srv/..., True, "", [], [])]}
4753           }
4754
4755     """
4756     all_os = {}
4757     # we build here the list of nodes that didn't fail the RPC (at RPC
4758     # level), so that nodes with a non-responding node daemon don't
4759     # make all OSes invalid
4760     good_nodes = [node_name for node_name in rlist
4761                   if not rlist[node_name].fail_msg]
4762     for node_name, nr in rlist.items():
4763       if nr.fail_msg or not nr.payload:
4764         continue
4765       for (name, path, status, diagnose, variants,
4766            params, api_versions) in nr.payload:
4767         if name not in all_os:
4768           # build a list of nodes for this os containing empty lists
4769           # for each node in node_list
4770           all_os[name] = {}
4771           for nname in good_nodes:
4772             all_os[name][nname] = []
4773         # convert params from [name, help] to (name, help)
4774         params = [tuple(v) for v in params]
4775         all_os[name][node_name].append((path, status, diagnose,
4776                                         variants, params, api_versions))
4777     return all_os
4778
4779   def _GetQueryData(self, lu):
4780     """Computes the list of nodes and their attributes.
4781
4782     """
4783     # Locking is not used
4784     assert not (compat.any(lu.glm.is_owned(level)
4785                            for level in locking.LEVELS
4786                            if level != locking.LEVEL_CLUSTER) or
4787                 self.do_locking or self.use_locking)
4788
4789     valid_nodes = [node.name
4790                    for node in lu.cfg.GetAllNodesInfo().values()
4791                    if not node.offline and node.vm_capable]
4792     pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
4793     cluster = lu.cfg.GetClusterInfo()
4794
4795     data = {}
4796
4797     for (os_name, os_data) in pol.items():
4798       info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
4799                           hidden=(os_name in cluster.hidden_os),
4800                           blacklisted=(os_name in cluster.blacklisted_os))
4801
4802       variants = set()
4803       parameters = set()
4804       api_versions = set()
4805
4806       for idx, osl in enumerate(os_data.values()):
4807         info.valid = bool(info.valid and osl and osl[0][1])
4808         if not info.valid:
4809           break
4810
4811         (node_variants, node_params, node_api) = osl[0][3:6]
4812         if idx == 0:
4813           # First entry
4814           variants.update(node_variants)
4815           parameters.update(node_params)
4816           api_versions.update(node_api)
4817         else:
4818           # Filter out inconsistent values
4819           variants.intersection_update(node_variants)
4820           parameters.intersection_update(node_params)
4821           api_versions.intersection_update(node_api)
4822
4823       info.variants = list(variants)
4824       info.parameters = list(parameters)
4825       info.api_versions = list(api_versions)
4826
4827       data[os_name] = info
4828
4829     # Prepare data in requested order
4830     return [data[name] for name in self._GetNames(lu, pol.keys(), None)
4831             if name in data]
4832
4833
4834 class LUOsDiagnose(NoHooksLU):
4835   """Logical unit for OS diagnose/query.
4836
4837   """
4838   REQ_BGL = False
4839
4840   @staticmethod
4841   def _BuildFilter(fields, names):
4842     """Builds a filter for querying OSes.
4843
4844     """
4845     name_filter = qlang.MakeSimpleFilter("name", names)
4846
4847     # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
4848     # respective field is not requested
4849     status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
4850                      for fname in ["hidden", "blacklisted"]
4851                      if fname not in fields]
4852     if "valid" not in fields:
4853       status_filter.append([qlang.OP_TRUE, "valid"])
4854
4855     if status_filter:
4856       status_filter.insert(0, qlang.OP_AND)
4857     else:
4858       status_filter = None
4859
4860     if name_filter and status_filter:
4861       return [qlang.OP_AND, name_filter, status_filter]
4862     elif name_filter:
4863       return name_filter
4864     else:
4865       return status_filter
4866
4867   def CheckArguments(self):
4868     self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
4869                        self.op.output_fields, False)
4870
4871   def ExpandNames(self):
4872     self.oq.ExpandNames(self)
4873
4874   def Exec(self, feedback_fn):
4875     return self.oq.OldStyleQuery(self)
4876
4877
4878 class LUNodeRemove(LogicalUnit):
4879   """Logical unit for removing a node.
4880
4881   """
4882   HPATH = "node-remove"
4883   HTYPE = constants.HTYPE_NODE
4884
4885   def BuildHooksEnv(self):
4886     """Build hooks env.
4887
4888     """
4889     return {
4890       "OP_TARGET": self.op.node_name,
4891       "NODE_NAME": self.op.node_name,
4892       }
4893
4894   def BuildHooksNodes(self):
4895     """Build hooks nodes.
4896
4897     This doesn't run on the target node in the pre phase as a failed
4898     node would then be impossible to remove.
4899
4900     """
4901     all_nodes = self.cfg.GetNodeList()
4902     try:
4903       all_nodes.remove(self.op.node_name)
4904     except ValueError:
4905       pass
4906     return (all_nodes, all_nodes)
4907
4908   def CheckPrereq(self):
4909     """Check prerequisites.
4910
4911     This checks:
4912      - the node exists in the configuration
4913      - it does not have primary or secondary instances
4914      - it's not the master
4915
4916     Any errors are signaled by raising errors.OpPrereqError.
4917
4918     """
4919     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4920     node = self.cfg.GetNodeInfo(self.op.node_name)
4921     assert node is not None
4922
4923     masternode = self.cfg.GetMasterNode()
4924     if node.name == masternode:
4925       raise errors.OpPrereqError("Node is the master node, failover to another"
4926                                  " node is required", errors.ECODE_INVAL)
4927
4928     for instance_name, instance in self.cfg.GetAllInstancesInfo().items():
4929       if node.name in instance.all_nodes:
4930         raise errors.OpPrereqError("Instance %s is still running on the node,"
4931                                    " please remove first" % instance_name,
4932                                    errors.ECODE_INVAL)
4933     self.op.node_name = node.name
4934     self.node = node
4935
4936   def Exec(self, feedback_fn):
4937     """Removes the node from the cluster.
4938
4939     """
4940     node = self.node
4941     logging.info("Stopping the node daemon and removing configs from node %s",
4942                  node.name)
4943
4944     modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
4945
4946     assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
4947       "Not owning BGL"
4948
4949     # Promote nodes to master candidate as needed
4950     _AdjustCandidatePool(self, exceptions=[node.name])
4951     self.context.RemoveNode(node.name)
4952
4953     # Run post hooks on the node before it's removed
4954     _RunPostHook(self, node.name)
4955
4956     result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
4957     msg = result.fail_msg
4958     if msg:
4959       self.LogWarning("Errors encountered on the remote node while leaving"
4960                       " the cluster: %s", msg)
4961
4962     # Remove node from our /etc/hosts
4963     if self.cfg.GetClusterInfo().modify_etc_hosts:
4964       master_node = self.cfg.GetMasterNode()
4965       result = self.rpc.call_etc_hosts_modify(master_node,
4966                                               constants.ETC_HOSTS_REMOVE,
4967                                               node.name, None)
4968       result.Raise("Can't update hosts file with new host data")
4969       _RedistributeAncillaryFiles(self)
4970
4971
4972 class _NodeQuery(_QueryBase):
4973   FIELDS = query.NODE_FIELDS
4974
4975   def ExpandNames(self, lu):
4976     lu.needed_locks = {}
4977     lu.share_locks = _ShareAll()
4978
4979     if self.names:
4980       self.wanted = _GetWantedNodes(lu, self.names)
4981     else:
4982       self.wanted = locking.ALL_SET
4983
4984     self.do_locking = (self.use_locking and
4985                        query.NQ_LIVE in self.requested_data)
4986
4987     if self.do_locking:
4988       # If any non-static field is requested we need to lock the nodes
4989       lu.needed_locks[locking.LEVEL_NODE] = self.wanted
4990
4991   def DeclareLocks(self, lu, level):
4992     pass
4993
4994   def _GetQueryData(self, lu):
4995     """Computes the list of nodes and their attributes.
4996
4997     """
4998     all_info = lu.cfg.GetAllNodesInfo()
4999
5000     nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
5001
5002     # Gather data as requested
5003     if query.NQ_LIVE in self.requested_data:
5004       # filter out non-vm_capable nodes
5005       toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
5006
5007       node_data = lu.rpc.call_node_info(toquery_nodes, [lu.cfg.GetVGName()],
5008                                         [lu.cfg.GetHypervisorType()])
5009       live_data = dict((name, _MakeLegacyNodeInfo(nresult.payload))
5010                        for (name, nresult) in node_data.items()
5011                        if not nresult.fail_msg and nresult.payload)
5012     else:
5013       live_data = None
5014
5015     if query.NQ_INST in self.requested_data:
5016       node_to_primary = dict([(name, set()) for name in nodenames])
5017       node_to_secondary = dict([(name, set()) for name in nodenames])
5018
5019       inst_data = lu.cfg.GetAllInstancesInfo()
5020
5021       for inst in inst_data.values():
5022         if inst.primary_node in node_to_primary:
5023           node_to_primary[inst.primary_node].add(inst.name)
5024         for secnode in inst.secondary_nodes:
5025           if secnode in node_to_secondary:
5026             node_to_secondary[secnode].add(inst.name)
5027     else:
5028       node_to_primary = None
5029       node_to_secondary = None
5030
5031     if query.NQ_OOB in self.requested_data:
5032       oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
5033                          for name, node in all_info.iteritems())
5034     else:
5035       oob_support = None
5036
5037     if query.NQ_GROUP in self.requested_data:
5038       groups = lu.cfg.GetAllNodeGroupsInfo()
5039     else:
5040       groups = {}
5041
5042     return query.NodeQueryData([all_info[name] for name in nodenames],
5043                                live_data, lu.cfg.GetMasterNode(),
5044                                node_to_primary, node_to_secondary, groups,
5045                                oob_support, lu.cfg.GetClusterInfo())
5046
5047
5048 class LUNodeQuery(NoHooksLU):
5049   """Logical unit for querying nodes.
5050
5051   """
5052   # pylint: disable=W0142
5053   REQ_BGL = False
5054
5055   def CheckArguments(self):
5056     self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
5057                          self.op.output_fields, self.op.use_locking)
5058
5059   def ExpandNames(self):
5060     self.nq.ExpandNames(self)
5061
5062   def DeclareLocks(self, level):
5063     self.nq.DeclareLocks(self, level)
5064
5065   def Exec(self, feedback_fn):
5066     return self.nq.OldStyleQuery(self)
5067
5068
5069 class LUNodeQueryvols(NoHooksLU):
5070   """Logical unit for getting volumes on node(s).
5071
5072   """
5073   REQ_BGL = False
5074   _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
5075   _FIELDS_STATIC = utils.FieldSet("node")
5076
5077   def CheckArguments(self):
5078     _CheckOutputFields(static=self._FIELDS_STATIC,
5079                        dynamic=self._FIELDS_DYNAMIC,
5080                        selected=self.op.output_fields)
5081
5082   def ExpandNames(self):
5083     self.share_locks = _ShareAll()
5084     self.needed_locks = {}
5085
5086     if not self.op.nodes:
5087       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5088     else:
5089       self.needed_locks[locking.LEVEL_NODE] = \
5090         _GetWantedNodes(self, self.op.nodes)
5091
5092   def Exec(self, feedback_fn):
5093     """Computes the list of nodes and their attributes.
5094
5095     """
5096     nodenames = self.owned_locks(locking.LEVEL_NODE)
5097     volumes = self.rpc.call_node_volumes(nodenames)
5098
5099     ilist = self.cfg.GetAllInstancesInfo()
5100     vol2inst = _MapInstanceDisksToNodes(ilist.values())
5101
5102     output = []
5103     for node in nodenames:
5104       nresult = volumes[node]
5105       if nresult.offline:
5106         continue
5107       msg = nresult.fail_msg
5108       if msg:
5109         self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
5110         continue
5111
5112       node_vols = sorted(nresult.payload,
5113                          key=operator.itemgetter("dev"))
5114
5115       for vol in node_vols:
5116         node_output = []
5117         for field in self.op.output_fields:
5118           if field == "node":
5119             val = node
5120           elif field == "phys":
5121             val = vol["dev"]
5122           elif field == "vg":
5123             val = vol["vg"]
5124           elif field == "name":
5125             val = vol["name"]
5126           elif field == "size":
5127             val = int(float(vol["size"]))
5128           elif field == "instance":
5129             val = vol2inst.get((node, vol["vg"] + "/" + vol["name"]), "-")
5130           else:
5131             raise errors.ParameterError(field)
5132           node_output.append(str(val))
5133
5134         output.append(node_output)
5135
5136     return output
5137
5138
5139 class LUNodeQueryStorage(NoHooksLU):
5140   """Logical unit for getting information on storage units on node(s).
5141
5142   """
5143   _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
5144   REQ_BGL = False
5145
5146   def CheckArguments(self):
5147     _CheckOutputFields(static=self._FIELDS_STATIC,
5148                        dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
5149                        selected=self.op.output_fields)
5150
5151   def ExpandNames(self):
5152     self.share_locks = _ShareAll()
5153     self.needed_locks = {}
5154
5155     if self.op.nodes:
5156       self.needed_locks[locking.LEVEL_NODE] = \
5157         _GetWantedNodes(self, self.op.nodes)
5158     else:
5159       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5160
5161   def Exec(self, feedback_fn):
5162     """Computes the list of nodes and their attributes.
5163
5164     """
5165     self.nodes = self.owned_locks(locking.LEVEL_NODE)
5166
5167     # Always get name to sort by
5168     if constants.SF_NAME in self.op.output_fields:
5169       fields = self.op.output_fields[:]
5170     else:
5171       fields = [constants.SF_NAME] + self.op.output_fields
5172
5173     # Never ask for node or type as it's only known to the LU
5174     for extra in [constants.SF_NODE, constants.SF_TYPE]:
5175       while extra in fields:
5176         fields.remove(extra)
5177
5178     field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
5179     name_idx = field_idx[constants.SF_NAME]
5180
5181     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5182     data = self.rpc.call_storage_list(self.nodes,
5183                                       self.op.storage_type, st_args,
5184                                       self.op.name, fields)
5185
5186     result = []
5187
5188     for node in utils.NiceSort(self.nodes):
5189       nresult = data[node]
5190       if nresult.offline:
5191         continue
5192
5193       msg = nresult.fail_msg
5194       if msg:
5195         self.LogWarning("Can't get storage data from node %s: %s", node, msg)
5196         continue
5197
5198       rows = dict([(row[name_idx], row) for row in nresult.payload])
5199
5200       for name in utils.NiceSort(rows.keys()):
5201         row = rows[name]
5202
5203         out = []
5204
5205         for field in self.op.output_fields:
5206           if field == constants.SF_NODE:
5207             val = node
5208           elif field == constants.SF_TYPE:
5209             val = self.op.storage_type
5210           elif field in field_idx:
5211             val = row[field_idx[field]]
5212           else:
5213             raise errors.ParameterError(field)
5214
5215           out.append(val)
5216
5217         result.append(out)
5218
5219     return result
5220
5221
5222 class _InstanceQuery(_QueryBase):
5223   FIELDS = query.INSTANCE_FIELDS
5224
5225   def ExpandNames(self, lu):
5226     lu.needed_locks = {}
5227     lu.share_locks = _ShareAll()
5228
5229     if self.names:
5230       self.wanted = _GetWantedInstances(lu, self.names)
5231     else:
5232       self.wanted = locking.ALL_SET
5233
5234     self.do_locking = (self.use_locking and
5235                        query.IQ_LIVE in self.requested_data)
5236     if self.do_locking:
5237       lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
5238       lu.needed_locks[locking.LEVEL_NODEGROUP] = []
5239       lu.needed_locks[locking.LEVEL_NODE] = []
5240       lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5241
5242     self.do_grouplocks = (self.do_locking and
5243                           query.IQ_NODES in self.requested_data)
5244
5245   def DeclareLocks(self, lu, level):
5246     if self.do_locking:
5247       if level == locking.LEVEL_NODEGROUP and self.do_grouplocks:
5248         assert not lu.needed_locks[locking.LEVEL_NODEGROUP]
5249
5250         # Lock all groups used by instances optimistically; this requires going
5251         # via the node before it's locked, requiring verification later on
5252         lu.needed_locks[locking.LEVEL_NODEGROUP] = \
5253           set(group_uuid
5254               for instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
5255               for group_uuid in lu.cfg.GetInstanceNodeGroups(instance_name))
5256       elif level == locking.LEVEL_NODE:
5257         lu._LockInstancesNodes() # pylint: disable=W0212
5258
5259   @staticmethod
5260   def _CheckGroupLocks(lu):
5261     owned_instances = frozenset(lu.owned_locks(locking.LEVEL_INSTANCE))
5262     owned_groups = frozenset(lu.owned_locks(locking.LEVEL_NODEGROUP))
5263
5264     # Check if node groups for locked instances are still correct
5265     for instance_name in owned_instances:
5266       _CheckInstanceNodeGroups(lu.cfg, instance_name, owned_groups)
5267
5268   def _GetQueryData(self, lu):
5269     """Computes the list of instances and their attributes.
5270
5271     """
5272     if self.do_grouplocks:
5273       self._CheckGroupLocks(lu)
5274
5275     cluster = lu.cfg.GetClusterInfo()
5276     all_info = lu.cfg.GetAllInstancesInfo()
5277
5278     instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
5279
5280     instance_list = [all_info[name] for name in instance_names]
5281     nodes = frozenset(itertools.chain(*(inst.all_nodes
5282                                         for inst in instance_list)))
5283     hv_list = list(set([inst.hypervisor for inst in instance_list]))
5284     bad_nodes = []
5285     offline_nodes = []
5286     wrongnode_inst = set()
5287
5288     # Gather data as requested
5289     if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
5290       live_data = {}
5291       node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
5292       for name in nodes:
5293         result = node_data[name]
5294         if result.offline:
5295           # offline nodes will be in both lists
5296           assert result.fail_msg
5297           offline_nodes.append(name)
5298         if result.fail_msg:
5299           bad_nodes.append(name)
5300         elif result.payload:
5301           for inst in result.payload:
5302             if inst in all_info:
5303               if all_info[inst].primary_node == name:
5304                 live_data.update(result.payload)
5305               else:
5306                 wrongnode_inst.add(inst)
5307             else:
5308               # orphan instance; we don't list it here as we don't
5309               # handle this case yet in the output of instance listing
5310               logging.warning("Orphan instance '%s' found on node %s",
5311                               inst, name)
5312         # else no instance is alive
5313     else:
5314       live_data = {}
5315
5316     if query.IQ_DISKUSAGE in self.requested_data:
5317       disk_usage = dict((inst.name,
5318                          _ComputeDiskSize(inst.disk_template,
5319                                           [{constants.IDISK_SIZE: disk.size}
5320                                            for disk in inst.disks]))
5321                         for inst in instance_list)
5322     else:
5323       disk_usage = None
5324
5325     if query.IQ_CONSOLE in self.requested_data:
5326       consinfo = {}
5327       for inst in instance_list:
5328         if inst.name in live_data:
5329           # Instance is running
5330           consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
5331         else:
5332           consinfo[inst.name] = None
5333       assert set(consinfo.keys()) == set(instance_names)
5334     else:
5335       consinfo = None
5336
5337     if query.IQ_NODES in self.requested_data:
5338       node_names = set(itertools.chain(*map(operator.attrgetter("all_nodes"),
5339                                             instance_list)))
5340       nodes = dict(lu.cfg.GetMultiNodeInfo(node_names))
5341       groups = dict((uuid, lu.cfg.GetNodeGroup(uuid))
5342                     for uuid in set(map(operator.attrgetter("group"),
5343                                         nodes.values())))
5344     else:
5345       nodes = None
5346       groups = None
5347
5348     return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
5349                                    disk_usage, offline_nodes, bad_nodes,
5350                                    live_data, wrongnode_inst, consinfo,
5351                                    nodes, groups)
5352
5353
5354 class LUQuery(NoHooksLU):
5355   """Query for resources/items of a certain kind.
5356
5357   """
5358   # pylint: disable=W0142
5359   REQ_BGL = False
5360
5361   def CheckArguments(self):
5362     qcls = _GetQueryImplementation(self.op.what)
5363
5364     self.impl = qcls(self.op.qfilter, self.op.fields, self.op.use_locking)
5365
5366   def ExpandNames(self):
5367     self.impl.ExpandNames(self)
5368
5369   def DeclareLocks(self, level):
5370     self.impl.DeclareLocks(self, level)
5371
5372   def Exec(self, feedback_fn):
5373     return self.impl.NewStyleQuery(self)
5374
5375
5376 class LUQueryFields(NoHooksLU):
5377   """Query for resources/items of a certain kind.
5378
5379   """
5380   # pylint: disable=W0142
5381   REQ_BGL = False
5382
5383   def CheckArguments(self):
5384     self.qcls = _GetQueryImplementation(self.op.what)
5385
5386   def ExpandNames(self):
5387     self.needed_locks = {}
5388
5389   def Exec(self, feedback_fn):
5390     return query.QueryFields(self.qcls.FIELDS, self.op.fields)
5391
5392
5393 class LUNodeModifyStorage(NoHooksLU):
5394   """Logical unit for modifying a storage volume on a node.
5395
5396   """
5397   REQ_BGL = False
5398
5399   def CheckArguments(self):
5400     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5401
5402     storage_type = self.op.storage_type
5403
5404     try:
5405       modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
5406     except KeyError:
5407       raise errors.OpPrereqError("Storage units of type '%s' can not be"
5408                                  " modified" % storage_type,
5409                                  errors.ECODE_INVAL)
5410
5411     diff = set(self.op.changes.keys()) - modifiable
5412     if diff:
5413       raise errors.OpPrereqError("The following fields can not be modified for"
5414                                  " storage units of type '%s': %r" %
5415                                  (storage_type, list(diff)),
5416                                  errors.ECODE_INVAL)
5417
5418   def ExpandNames(self):
5419     self.needed_locks = {
5420       locking.LEVEL_NODE: self.op.node_name,
5421       }
5422
5423   def Exec(self, feedback_fn):
5424     """Computes the list of nodes and their attributes.
5425
5426     """
5427     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5428     result = self.rpc.call_storage_modify(self.op.node_name,
5429                                           self.op.storage_type, st_args,
5430                                           self.op.name, self.op.changes)
5431     result.Raise("Failed to modify storage unit '%s' on %s" %
5432                  (self.op.name, self.op.node_name))
5433
5434
5435 class LUNodeAdd(LogicalUnit):
5436   """Logical unit for adding node to the cluster.
5437
5438   """
5439   HPATH = "node-add"
5440   HTYPE = constants.HTYPE_NODE
5441   _NFLAGS = ["master_capable", "vm_capable"]
5442
5443   def CheckArguments(self):
5444     self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
5445     # validate/normalize the node name
5446     self.hostname = netutils.GetHostname(name=self.op.node_name,
5447                                          family=self.primary_ip_family)
5448     self.op.node_name = self.hostname.name
5449
5450     if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
5451       raise errors.OpPrereqError("Cannot readd the master node",
5452                                  errors.ECODE_STATE)
5453
5454     if self.op.readd and self.op.group:
5455       raise errors.OpPrereqError("Cannot pass a node group when a node is"
5456                                  " being readded", errors.ECODE_INVAL)
5457
5458   def BuildHooksEnv(self):
5459     """Build hooks env.
5460
5461     This will run on all nodes before, and on all nodes + the new node after.
5462
5463     """
5464     return {
5465       "OP_TARGET": self.op.node_name,
5466       "NODE_NAME": self.op.node_name,
5467       "NODE_PIP": self.op.primary_ip,
5468       "NODE_SIP": self.op.secondary_ip,
5469       "MASTER_CAPABLE": str(self.op.master_capable),
5470       "VM_CAPABLE": str(self.op.vm_capable),
5471       }
5472
5473   def BuildHooksNodes(self):
5474     """Build hooks nodes.
5475
5476     """
5477     # Exclude added node
5478     pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
5479     post_nodes = pre_nodes + [self.op.node_name, ]
5480
5481     return (pre_nodes, post_nodes)
5482
5483   def CheckPrereq(self):
5484     """Check prerequisites.
5485
5486     This checks:
5487      - the new node is not already in the config
5488      - it is resolvable
5489      - its parameters (single/dual homed) matches the cluster
5490
5491     Any errors are signaled by raising errors.OpPrereqError.
5492
5493     """
5494     cfg = self.cfg
5495     hostname = self.hostname
5496     node = hostname.name
5497     primary_ip = self.op.primary_ip = hostname.ip
5498     if self.op.secondary_ip is None:
5499       if self.primary_ip_family == netutils.IP6Address.family:
5500         raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
5501                                    " IPv4 address must be given as secondary",
5502                                    errors.ECODE_INVAL)
5503       self.op.secondary_ip = primary_ip
5504
5505     secondary_ip = self.op.secondary_ip
5506     if not netutils.IP4Address.IsValid(secondary_ip):
5507       raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5508                                  " address" % secondary_ip, errors.ECODE_INVAL)
5509
5510     node_list = cfg.GetNodeList()
5511     if not self.op.readd and node in node_list:
5512       raise errors.OpPrereqError("Node %s is already in the configuration" %
5513                                  node, errors.ECODE_EXISTS)
5514     elif self.op.readd and node not in node_list:
5515       raise errors.OpPrereqError("Node %s is not in the configuration" % node,
5516                                  errors.ECODE_NOENT)
5517
5518     self.changed_primary_ip = False
5519
5520     for existing_node_name, existing_node in cfg.GetMultiNodeInfo(node_list):
5521       if self.op.readd and node == existing_node_name:
5522         if existing_node.secondary_ip != secondary_ip:
5523           raise errors.OpPrereqError("Readded node doesn't have the same IP"
5524                                      " address configuration as before",
5525                                      errors.ECODE_INVAL)
5526         if existing_node.primary_ip != primary_ip:
5527           self.changed_primary_ip = True
5528
5529         continue
5530
5531       if (existing_node.primary_ip == primary_ip or
5532           existing_node.secondary_ip == primary_ip or
5533           existing_node.primary_ip == secondary_ip or
5534           existing_node.secondary_ip == secondary_ip):
5535         raise errors.OpPrereqError("New node ip address(es) conflict with"
5536                                    " existing node %s" % existing_node.name,
5537                                    errors.ECODE_NOTUNIQUE)
5538
5539     # After this 'if' block, None is no longer a valid value for the
5540     # _capable op attributes
5541     if self.op.readd:
5542       old_node = self.cfg.GetNodeInfo(node)
5543       assert old_node is not None, "Can't retrieve locked node %s" % node
5544       for attr in self._NFLAGS:
5545         if getattr(self.op, attr) is None:
5546           setattr(self.op, attr, getattr(old_node, attr))
5547     else:
5548       for attr in self._NFLAGS:
5549         if getattr(self.op, attr) is None:
5550           setattr(self.op, attr, True)
5551
5552     if self.op.readd and not self.op.vm_capable:
5553       pri, sec = cfg.GetNodeInstances(node)
5554       if pri or sec:
5555         raise errors.OpPrereqError("Node %s being re-added with vm_capable"
5556                                    " flag set to false, but it already holds"
5557                                    " instances" % node,
5558                                    errors.ECODE_STATE)
5559
5560     # check that the type of the node (single versus dual homed) is the
5561     # same as for the master
5562     myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
5563     master_singlehomed = myself.secondary_ip == myself.primary_ip
5564     newbie_singlehomed = secondary_ip == primary_ip
5565     if master_singlehomed != newbie_singlehomed:
5566       if master_singlehomed:
5567         raise errors.OpPrereqError("The master has no secondary ip but the"
5568                                    " new node has one",
5569                                    errors.ECODE_INVAL)
5570       else:
5571         raise errors.OpPrereqError("The master has a secondary ip but the"
5572                                    " new node doesn't have one",
5573                                    errors.ECODE_INVAL)
5574
5575     # checks reachability
5576     if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
5577       raise errors.OpPrereqError("Node not reachable by ping",
5578                                  errors.ECODE_ENVIRON)
5579
5580     if not newbie_singlehomed:
5581       # check reachability from my secondary ip to newbie's secondary ip
5582       if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
5583                            source=myself.secondary_ip):
5584         raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5585                                    " based ping to node daemon port",
5586                                    errors.ECODE_ENVIRON)
5587
5588     if self.op.readd:
5589       exceptions = [node]
5590     else:
5591       exceptions = []
5592
5593     if self.op.master_capable:
5594       self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
5595     else:
5596       self.master_candidate = False
5597
5598     if self.op.readd:
5599       self.new_node = old_node
5600     else:
5601       node_group = cfg.LookupNodeGroup(self.op.group)
5602       self.new_node = objects.Node(name=node,
5603                                    primary_ip=primary_ip,
5604                                    secondary_ip=secondary_ip,
5605                                    master_candidate=self.master_candidate,
5606                                    offline=False, drained=False,
5607                                    group=node_group)
5608
5609     if self.op.ndparams:
5610       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
5611
5612     if self.op.hv_state:
5613       self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
5614
5615     if self.op.disk_state:
5616       self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
5617
5618     # TODO: If we need to have multiple DnsOnlyRunner we probably should make
5619     #       it a property on the base class.
5620     result = rpc.DnsOnlyRunner().call_version([node])[node]
5621     result.Raise("Can't get version information from node %s" % node)
5622     if constants.PROTOCOL_VERSION == result.payload:
5623       logging.info("Communication to node %s fine, sw version %s match",
5624                    node, result.payload)
5625     else:
5626       raise errors.OpPrereqError("Version mismatch master version %s,"
5627                                  " node version %s" %
5628                                  (constants.PROTOCOL_VERSION, result.payload),
5629                                  errors.ECODE_ENVIRON)
5630
5631   def Exec(self, feedback_fn):
5632     """Adds the new node to the cluster.
5633
5634     """
5635     new_node = self.new_node
5636     node = new_node.name
5637
5638     assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
5639       "Not owning BGL"
5640
5641     # We adding a new node so we assume it's powered
5642     new_node.powered = True
5643
5644     # for re-adds, reset the offline/drained/master-candidate flags;
5645     # we need to reset here, otherwise offline would prevent RPC calls
5646     # later in the procedure; this also means that if the re-add
5647     # fails, we are left with a non-offlined, broken node
5648     if self.op.readd:
5649       new_node.drained = new_node.offline = False # pylint: disable=W0201
5650       self.LogInfo("Readding a node, the offline/drained flags were reset")
5651       # if we demote the node, we do cleanup later in the procedure
5652       new_node.master_candidate = self.master_candidate
5653       if self.changed_primary_ip:
5654         new_node.primary_ip = self.op.primary_ip
5655
5656     # copy the master/vm_capable flags
5657     for attr in self._NFLAGS:
5658       setattr(new_node, attr, getattr(self.op, attr))
5659
5660     # notify the user about any possible mc promotion
5661     if new_node.master_candidate:
5662       self.LogInfo("Node will be a master candidate")
5663
5664     if self.op.ndparams:
5665       new_node.ndparams = self.op.ndparams
5666     else:
5667       new_node.ndparams = {}
5668
5669     if self.op.hv_state:
5670       new_node.hv_state_static = self.new_hv_state
5671
5672     if self.op.disk_state:
5673       new_node.disk_state_static = self.new_disk_state
5674
5675     # Add node to our /etc/hosts, and add key to known_hosts
5676     if self.cfg.GetClusterInfo().modify_etc_hosts:
5677       master_node = self.cfg.GetMasterNode()
5678       result = self.rpc.call_etc_hosts_modify(master_node,
5679                                               constants.ETC_HOSTS_ADD,
5680                                               self.hostname.name,
5681                                               self.hostname.ip)
5682       result.Raise("Can't update hosts file with new host data")
5683
5684     if new_node.secondary_ip != new_node.primary_ip:
5685       _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
5686                                False)
5687
5688     node_verify_list = [self.cfg.GetMasterNode()]
5689     node_verify_param = {
5690       constants.NV_NODELIST: ([node], {}),
5691       # TODO: do a node-net-test as well?
5692     }
5693
5694     result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
5695                                        self.cfg.GetClusterName())
5696     for verifier in node_verify_list:
5697       result[verifier].Raise("Cannot communicate with node %s" % verifier)
5698       nl_payload = result[verifier].payload[constants.NV_NODELIST]
5699       if nl_payload:
5700         for failed in nl_payload:
5701           feedback_fn("ssh/hostname verification failed"
5702                       " (checking from %s): %s" %
5703                       (verifier, nl_payload[failed]))
5704         raise errors.OpExecError("ssh/hostname verification failed")
5705
5706     if self.op.readd:
5707       _RedistributeAncillaryFiles(self)
5708       self.context.ReaddNode(new_node)
5709       # make sure we redistribute the config
5710       self.cfg.Update(new_node, feedback_fn)
5711       # and make sure the new node will not have old files around
5712       if not new_node.master_candidate:
5713         result = self.rpc.call_node_demote_from_mc(new_node.name)
5714         msg = result.fail_msg
5715         if msg:
5716           self.LogWarning("Node failed to demote itself from master"
5717                           " candidate status: %s" % msg)
5718     else:
5719       _RedistributeAncillaryFiles(self, additional_nodes=[node],
5720                                   additional_vm=self.op.vm_capable)
5721       self.context.AddNode(new_node, self.proc.GetECId())
5722
5723
5724 class LUNodeSetParams(LogicalUnit):
5725   """Modifies the parameters of a node.
5726
5727   @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
5728       to the node role (as _ROLE_*)
5729   @cvar _R2F: a dictionary from node role to tuples of flags
5730   @cvar _FLAGS: a list of attribute names corresponding to the flags
5731
5732   """
5733   HPATH = "node-modify"
5734   HTYPE = constants.HTYPE_NODE
5735   REQ_BGL = False
5736   (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
5737   _F2R = {
5738     (True, False, False): _ROLE_CANDIDATE,
5739     (False, True, False): _ROLE_DRAINED,
5740     (False, False, True): _ROLE_OFFLINE,
5741     (False, False, False): _ROLE_REGULAR,
5742     }
5743   _R2F = dict((v, k) for k, v in _F2R.items())
5744   _FLAGS = ["master_candidate", "drained", "offline"]
5745
5746   def CheckArguments(self):
5747     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5748     all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
5749                 self.op.master_capable, self.op.vm_capable,
5750                 self.op.secondary_ip, self.op.ndparams, self.op.hv_state,
5751                 self.op.disk_state]
5752     if all_mods.count(None) == len(all_mods):
5753       raise errors.OpPrereqError("Please pass at least one modification",
5754                                  errors.ECODE_INVAL)
5755     if all_mods.count(True) > 1:
5756       raise errors.OpPrereqError("Can't set the node into more than one"
5757                                  " state at the same time",
5758                                  errors.ECODE_INVAL)
5759
5760     # Boolean value that tells us whether we might be demoting from MC
5761     self.might_demote = (self.op.master_candidate == False or
5762                          self.op.offline == True or
5763                          self.op.drained == True or
5764                          self.op.master_capable == False)
5765
5766     if self.op.secondary_ip:
5767       if not netutils.IP4Address.IsValid(self.op.secondary_ip):
5768         raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5769                                    " address" % self.op.secondary_ip,
5770                                    errors.ECODE_INVAL)
5771
5772     self.lock_all = self.op.auto_promote and self.might_demote
5773     self.lock_instances = self.op.secondary_ip is not None
5774
5775   def _InstanceFilter(self, instance):
5776     """Filter for getting affected instances.
5777
5778     """
5779     return (instance.disk_template in constants.DTS_INT_MIRROR and
5780             self.op.node_name in instance.all_nodes)
5781
5782   def ExpandNames(self):
5783     if self.lock_all:
5784       self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
5785     else:
5786       self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
5787
5788     # Since modifying a node can have severe effects on currently running
5789     # operations the resource lock is at least acquired in shared mode
5790     self.needed_locks[locking.LEVEL_NODE_RES] = \
5791       self.needed_locks[locking.LEVEL_NODE]
5792
5793     # Get node resource and instance locks in shared mode; they are not used
5794     # for anything but read-only access
5795     self.share_locks[locking.LEVEL_NODE_RES] = 1
5796     self.share_locks[locking.LEVEL_INSTANCE] = 1
5797
5798     if self.lock_instances:
5799       self.needed_locks[locking.LEVEL_INSTANCE] = \
5800         frozenset(self.cfg.GetInstancesInfoByFilter(self._InstanceFilter))
5801
5802   def BuildHooksEnv(self):
5803     """Build hooks env.
5804
5805     This runs on the master node.
5806
5807     """
5808     return {
5809       "OP_TARGET": self.op.node_name,
5810       "MASTER_CANDIDATE": str(self.op.master_candidate),
5811       "OFFLINE": str(self.op.offline),
5812       "DRAINED": str(self.op.drained),
5813       "MASTER_CAPABLE": str(self.op.master_capable),
5814       "VM_CAPABLE": str(self.op.vm_capable),
5815       }
5816
5817   def BuildHooksNodes(self):
5818     """Build hooks nodes.
5819
5820     """
5821     nl = [self.cfg.GetMasterNode(), self.op.node_name]
5822     return (nl, nl)
5823
5824   def CheckPrereq(self):
5825     """Check prerequisites.
5826
5827     This only checks the instance list against the existing names.
5828
5829     """
5830     node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
5831
5832     if self.lock_instances:
5833       affected_instances = \
5834         self.cfg.GetInstancesInfoByFilter(self._InstanceFilter)
5835
5836       # Verify instance locks
5837       owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
5838       wanted_instances = frozenset(affected_instances.keys())
5839       if wanted_instances - owned_instances:
5840         raise errors.OpPrereqError("Instances affected by changing node %s's"
5841                                    " secondary IP address have changed since"
5842                                    " locks were acquired, wanted '%s', have"
5843                                    " '%s'; retry the operation" %
5844                                    (self.op.node_name,
5845                                     utils.CommaJoin(wanted_instances),
5846                                     utils.CommaJoin(owned_instances)),
5847                                    errors.ECODE_STATE)
5848     else:
5849       affected_instances = None
5850
5851     if (self.op.master_candidate is not None or
5852         self.op.drained is not None or
5853         self.op.offline is not None):
5854       # we can't change the master's node flags
5855       if self.op.node_name == self.cfg.GetMasterNode():
5856         raise errors.OpPrereqError("The master role can be changed"
5857                                    " only via master-failover",
5858                                    errors.ECODE_INVAL)
5859
5860     if self.op.master_candidate and not node.master_capable:
5861       raise errors.OpPrereqError("Node %s is not master capable, cannot make"
5862                                  " it a master candidate" % node.name,
5863                                  errors.ECODE_STATE)
5864
5865     if self.op.vm_capable == False:
5866       (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
5867       if ipri or isec:
5868         raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
5869                                    " the vm_capable flag" % node.name,
5870                                    errors.ECODE_STATE)
5871
5872     if node.master_candidate and self.might_demote and not self.lock_all:
5873       assert not self.op.auto_promote, "auto_promote set but lock_all not"
5874       # check if after removing the current node, we're missing master
5875       # candidates
5876       (mc_remaining, mc_should, _) = \
5877           self.cfg.GetMasterCandidateStats(exceptions=[node.name])
5878       if mc_remaining < mc_should:
5879         raise errors.OpPrereqError("Not enough master candidates, please"
5880                                    " pass auto promote option to allow"
5881                                    " promotion", errors.ECODE_STATE)
5882
5883     self.old_flags = old_flags = (node.master_candidate,
5884                                   node.drained, node.offline)
5885     assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
5886     self.old_role = old_role = self._F2R[old_flags]
5887
5888     # Check for ineffective changes
5889     for attr in self._FLAGS:
5890       if (getattr(self.op, attr) == False and getattr(node, attr) == False):
5891         self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
5892         setattr(self.op, attr, None)
5893
5894     # Past this point, any flag change to False means a transition
5895     # away from the respective state, as only real changes are kept
5896
5897     # TODO: We might query the real power state if it supports OOB
5898     if _SupportsOob(self.cfg, node):
5899       if self.op.offline is False and not (node.powered or
5900                                            self.op.powered == True):
5901         raise errors.OpPrereqError(("Node %s needs to be turned on before its"
5902                                     " offline status can be reset") %
5903                                    self.op.node_name)
5904     elif self.op.powered is not None:
5905       raise errors.OpPrereqError(("Unable to change powered state for node %s"
5906                                   " as it does not support out-of-band"
5907                                   " handling") % self.op.node_name)
5908
5909     # If we're being deofflined/drained, we'll MC ourself if needed
5910     if (self.op.drained == False or self.op.offline == False or
5911         (self.op.master_capable and not node.master_capable)):
5912       if _DecideSelfPromotion(self):
5913         self.op.master_candidate = True
5914         self.LogInfo("Auto-promoting node to master candidate")
5915
5916     # If we're no longer master capable, we'll demote ourselves from MC
5917     if self.op.master_capable == False and node.master_candidate:
5918       self.LogInfo("Demoting from master candidate")
5919       self.op.master_candidate = False
5920
5921     # Compute new role
5922     assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
5923     if self.op.master_candidate:
5924       new_role = self._ROLE_CANDIDATE
5925     elif self.op.drained:
5926       new_role = self._ROLE_DRAINED
5927     elif self.op.offline:
5928       new_role = self._ROLE_OFFLINE
5929     elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
5930       # False is still in new flags, which means we're un-setting (the
5931       # only) True flag
5932       new_role = self._ROLE_REGULAR
5933     else: # no new flags, nothing, keep old role
5934       new_role = old_role
5935
5936     self.new_role = new_role
5937
5938     if old_role == self._ROLE_OFFLINE and new_role != old_role:
5939       # Trying to transition out of offline status
5940       result = self.rpc.call_version([node.name])[node.name]
5941       if result.fail_msg:
5942         raise errors.OpPrereqError("Node %s is being de-offlined but fails"
5943                                    " to report its version: %s" %
5944                                    (node.name, result.fail_msg),
5945                                    errors.ECODE_STATE)
5946       else:
5947         self.LogWarning("Transitioning node from offline to online state"
5948                         " without using re-add. Please make sure the node"
5949                         " is healthy!")
5950
5951     if self.op.secondary_ip:
5952       # Ok even without locking, because this can't be changed by any LU
5953       master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
5954       master_singlehomed = master.secondary_ip == master.primary_ip
5955       if master_singlehomed and self.op.secondary_ip:
5956         raise errors.OpPrereqError("Cannot change the secondary ip on a single"
5957                                    " homed cluster", errors.ECODE_INVAL)
5958
5959       assert not (frozenset(affected_instances) -
5960                   self.owned_locks(locking.LEVEL_INSTANCE))
5961
5962       if node.offline:
5963         if affected_instances:
5964           raise errors.OpPrereqError("Cannot change secondary IP address:"
5965                                      " offline node has instances (%s)"
5966                                      " configured to use it" %
5967                                      utils.CommaJoin(affected_instances.keys()))
5968       else:
5969         # On online nodes, check that no instances are running, and that
5970         # the node has the new ip and we can reach it.
5971         for instance in affected_instances.values():
5972           _CheckInstanceState(self, instance, INSTANCE_DOWN,
5973                               msg="cannot change secondary ip")
5974
5975         _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
5976         if master.name != node.name:
5977           # check reachability from master secondary ip to new secondary ip
5978           if not netutils.TcpPing(self.op.secondary_ip,
5979                                   constants.DEFAULT_NODED_PORT,
5980                                   source=master.secondary_ip):
5981             raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5982                                        " based ping to node daemon port",
5983                                        errors.ECODE_ENVIRON)
5984
5985     if self.op.ndparams:
5986       new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
5987       utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
5988       self.new_ndparams = new_ndparams
5989
5990     if self.op.hv_state:
5991       self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
5992                                                  self.node.hv_state_static)
5993
5994     if self.op.disk_state:
5995       self.new_disk_state = \
5996         _MergeAndVerifyDiskState(self.op.disk_state,
5997                                  self.node.disk_state_static)
5998
5999   def Exec(self, feedback_fn):
6000     """Modifies a node.
6001
6002     """
6003     node = self.node
6004     old_role = self.old_role
6005     new_role = self.new_role
6006
6007     result = []
6008
6009     if self.op.ndparams:
6010       node.ndparams = self.new_ndparams
6011
6012     if self.op.powered is not None:
6013       node.powered = self.op.powered
6014
6015     if self.op.hv_state:
6016       node.hv_state_static = self.new_hv_state
6017
6018     if self.op.disk_state:
6019       node.disk_state_static = self.new_disk_state
6020
6021     for attr in ["master_capable", "vm_capable"]:
6022       val = getattr(self.op, attr)
6023       if val is not None:
6024         setattr(node, attr, val)
6025         result.append((attr, str(val)))
6026
6027     if new_role != old_role:
6028       # Tell the node to demote itself, if no longer MC and not offline
6029       if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
6030         msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
6031         if msg:
6032           self.LogWarning("Node failed to demote itself: %s", msg)
6033
6034       new_flags = self._R2F[new_role]
6035       for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
6036         if of != nf:
6037           result.append((desc, str(nf)))
6038       (node.master_candidate, node.drained, node.offline) = new_flags
6039
6040       # we locked all nodes, we adjust the CP before updating this node
6041       if self.lock_all:
6042         _AdjustCandidatePool(self, [node.name])
6043
6044     if self.op.secondary_ip:
6045       node.secondary_ip = self.op.secondary_ip
6046       result.append(("secondary_ip", self.op.secondary_ip))
6047
6048     # this will trigger configuration file update, if needed
6049     self.cfg.Update(node, feedback_fn)
6050
6051     # this will trigger job queue propagation or cleanup if the mc
6052     # flag changed
6053     if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
6054       self.context.ReaddNode(node)
6055
6056     return result
6057
6058
6059 class LUNodePowercycle(NoHooksLU):
6060   """Powercycles a node.
6061
6062   """
6063   REQ_BGL = False
6064
6065   def CheckArguments(self):
6066     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6067     if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
6068       raise errors.OpPrereqError("The node is the master and the force"
6069                                  " parameter was not set",
6070                                  errors.ECODE_INVAL)
6071
6072   def ExpandNames(self):
6073     """Locking for PowercycleNode.
6074
6075     This is a last-resort option and shouldn't block on other
6076     jobs. Therefore, we grab no locks.
6077
6078     """
6079     self.needed_locks = {}
6080
6081   def Exec(self, feedback_fn):
6082     """Reboots a node.
6083
6084     """
6085     result = self.rpc.call_node_powercycle(self.op.node_name,
6086                                            self.cfg.GetHypervisorType())
6087     result.Raise("Failed to schedule the reboot")
6088     return result.payload
6089
6090
6091 class LUClusterQuery(NoHooksLU):
6092   """Query cluster configuration.
6093
6094   """
6095   REQ_BGL = False
6096
6097   def ExpandNames(self):
6098     self.needed_locks = {}
6099
6100   def Exec(self, feedback_fn):
6101     """Return cluster config.
6102
6103     """
6104     cluster = self.cfg.GetClusterInfo()
6105     os_hvp = {}
6106
6107     # Filter just for enabled hypervisors
6108     for os_name, hv_dict in cluster.os_hvp.items():
6109       os_hvp[os_name] = {}
6110       for hv_name, hv_params in hv_dict.items():
6111         if hv_name in cluster.enabled_hypervisors:
6112           os_hvp[os_name][hv_name] = hv_params
6113
6114     # Convert ip_family to ip_version
6115     primary_ip_version = constants.IP4_VERSION
6116     if cluster.primary_ip_family == netutils.IP6Address.family:
6117       primary_ip_version = constants.IP6_VERSION
6118
6119     result = {
6120       "software_version": constants.RELEASE_VERSION,
6121       "protocol_version": constants.PROTOCOL_VERSION,
6122       "config_version": constants.CONFIG_VERSION,
6123       "os_api_version": max(constants.OS_API_VERSIONS),
6124       "export_version": constants.EXPORT_VERSION,
6125       "architecture": runtime.GetArchInfo(),
6126       "name": cluster.cluster_name,
6127       "master": cluster.master_node,
6128       "default_hypervisor": cluster.primary_hypervisor,
6129       "enabled_hypervisors": cluster.enabled_hypervisors,
6130       "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
6131                         for hypervisor_name in cluster.enabled_hypervisors]),
6132       "os_hvp": os_hvp,
6133       "beparams": cluster.beparams,
6134       "osparams": cluster.osparams,
6135       "ipolicy": cluster.ipolicy,
6136       "nicparams": cluster.nicparams,
6137       "ndparams": cluster.ndparams,
6138       "candidate_pool_size": cluster.candidate_pool_size,
6139       "master_netdev": cluster.master_netdev,
6140       "master_netmask": cluster.master_netmask,
6141       "use_external_mip_script": cluster.use_external_mip_script,
6142       "volume_group_name": cluster.volume_group_name,
6143       "drbd_usermode_helper": cluster.drbd_usermode_helper,
6144       "file_storage_dir": cluster.file_storage_dir,
6145       "shared_file_storage_dir": cluster.shared_file_storage_dir,
6146       "maintain_node_health": cluster.maintain_node_health,
6147       "ctime": cluster.ctime,
6148       "mtime": cluster.mtime,
6149       "uuid": cluster.uuid,
6150       "tags": list(cluster.GetTags()),
6151       "uid_pool": cluster.uid_pool,
6152       "default_iallocator": cluster.default_iallocator,
6153       "reserved_lvs": cluster.reserved_lvs,
6154       "primary_ip_version": primary_ip_version,
6155       "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
6156       "hidden_os": cluster.hidden_os,
6157       "blacklisted_os": cluster.blacklisted_os,
6158       }
6159
6160     return result
6161
6162
6163 class LUClusterConfigQuery(NoHooksLU):
6164   """Return configuration values.
6165
6166   """
6167   REQ_BGL = False
6168
6169   def CheckArguments(self):
6170     self.cq = _ClusterQuery(None, self.op.output_fields, False)
6171
6172   def ExpandNames(self):
6173     self.cq.ExpandNames(self)
6174
6175   def DeclareLocks(self, level):
6176     self.cq.DeclareLocks(self, level)
6177
6178   def Exec(self, feedback_fn):
6179     result = self.cq.OldStyleQuery(self)
6180
6181     assert len(result) == 1
6182
6183     return result[0]
6184
6185
6186 class _ClusterQuery(_QueryBase):
6187   FIELDS = query.CLUSTER_FIELDS
6188
6189   #: Do not sort (there is only one item)
6190   SORT_FIELD = None
6191
6192   def ExpandNames(self, lu):
6193     lu.needed_locks = {}
6194
6195     # The following variables interact with _QueryBase._GetNames
6196     self.wanted = locking.ALL_SET
6197     self.do_locking = self.use_locking
6198
6199     if self.do_locking:
6200       raise errors.OpPrereqError("Can not use locking for cluster queries",
6201                                  errors.ECODE_INVAL)
6202
6203   def DeclareLocks(self, lu, level):
6204     pass
6205
6206   def _GetQueryData(self, lu):
6207     """Computes the list of nodes and their attributes.
6208
6209     """
6210     # Locking is not used
6211     assert not (compat.any(lu.glm.is_owned(level)
6212                            for level in locking.LEVELS
6213                            if level != locking.LEVEL_CLUSTER) or
6214                 self.do_locking or self.use_locking)
6215
6216     if query.CQ_CONFIG in self.requested_data:
6217       cluster = lu.cfg.GetClusterInfo()
6218     else:
6219       cluster = NotImplemented
6220
6221     if query.CQ_QUEUE_DRAINED in self.requested_data:
6222       drain_flag = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
6223     else:
6224       drain_flag = NotImplemented
6225
6226     if query.CQ_WATCHER_PAUSE in self.requested_data:
6227       watcher_pause = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
6228     else:
6229       watcher_pause = NotImplemented
6230
6231     return query.ClusterQueryData(cluster, drain_flag, watcher_pause)
6232
6233
6234 class LUInstanceActivateDisks(NoHooksLU):
6235   """Bring up an instance's disks.
6236
6237   """
6238   REQ_BGL = False
6239
6240   def ExpandNames(self):
6241     self._ExpandAndLockInstance()
6242     self.needed_locks[locking.LEVEL_NODE] = []
6243     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6244
6245   def DeclareLocks(self, level):
6246     if level == locking.LEVEL_NODE:
6247       self._LockInstancesNodes()
6248
6249   def CheckPrereq(self):
6250     """Check prerequisites.
6251
6252     This checks that the instance is in the cluster.
6253
6254     """
6255     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6256     assert self.instance is not None, \
6257       "Cannot retrieve locked instance %s" % self.op.instance_name
6258     _CheckNodeOnline(self, self.instance.primary_node)
6259
6260   def Exec(self, feedback_fn):
6261     """Activate the disks.
6262
6263     """
6264     disks_ok, disks_info = \
6265               _AssembleInstanceDisks(self, self.instance,
6266                                      ignore_size=self.op.ignore_size)
6267     if not disks_ok:
6268       raise errors.OpExecError("Cannot activate block devices")
6269
6270     return disks_info
6271
6272
6273 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
6274                            ignore_size=False):
6275   """Prepare the block devices for an instance.
6276
6277   This sets up the block devices on all nodes.
6278
6279   @type lu: L{LogicalUnit}
6280   @param lu: the logical unit on whose behalf we execute
6281   @type instance: L{objects.Instance}
6282   @param instance: the instance for whose disks we assemble
6283   @type disks: list of L{objects.Disk} or None
6284   @param disks: which disks to assemble (or all, if None)
6285   @type ignore_secondaries: boolean
6286   @param ignore_secondaries: if true, errors on secondary nodes
6287       won't result in an error return from the function
6288   @type ignore_size: boolean
6289   @param ignore_size: if true, the current known size of the disk
6290       will not be used during the disk activation, useful for cases
6291       when the size is wrong
6292   @return: False if the operation failed, otherwise a list of
6293       (host, instance_visible_name, node_visible_name)
6294       with the mapping from node devices to instance devices
6295
6296   """
6297   device_info = []
6298   disks_ok = True
6299   iname = instance.name
6300   disks = _ExpandCheckDisks(instance, disks)
6301
6302   # With the two passes mechanism we try to reduce the window of
6303   # opportunity for the race condition of switching DRBD to primary
6304   # before handshaking occured, but we do not eliminate it
6305
6306   # The proper fix would be to wait (with some limits) until the
6307   # connection has been made and drbd transitions from WFConnection
6308   # into any other network-connected state (Connected, SyncTarget,
6309   # SyncSource, etc.)
6310
6311   # 1st pass, assemble on all nodes in secondary mode
6312   for idx, inst_disk in enumerate(disks):
6313     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6314       if ignore_size:
6315         node_disk = node_disk.Copy()
6316         node_disk.UnsetSize()
6317       lu.cfg.SetDiskID(node_disk, node)
6318       result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False, idx)
6319       msg = result.fail_msg
6320       if msg:
6321         lu.proc.LogWarning("Could not prepare block device %s on node %s"
6322                            " (is_primary=False, pass=1): %s",
6323                            inst_disk.iv_name, node, msg)
6324         if not ignore_secondaries:
6325           disks_ok = False
6326
6327   # FIXME: race condition on drbd migration to primary
6328
6329   # 2nd pass, do only the primary node
6330   for idx, inst_disk in enumerate(disks):
6331     dev_path = None
6332
6333     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6334       if node != instance.primary_node:
6335         continue
6336       if ignore_size:
6337         node_disk = node_disk.Copy()
6338         node_disk.UnsetSize()
6339       lu.cfg.SetDiskID(node_disk, node)
6340       result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True, idx)
6341       msg = result.fail_msg
6342       if msg:
6343         lu.proc.LogWarning("Could not prepare block device %s on node %s"
6344                            " (is_primary=True, pass=2): %s",
6345                            inst_disk.iv_name, node, msg)
6346         disks_ok = False
6347       else:
6348         dev_path = result.payload
6349
6350     device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
6351
6352   # leave the disks configured for the primary node
6353   # this is a workaround that would be fixed better by
6354   # improving the logical/physical id handling
6355   for disk in disks:
6356     lu.cfg.SetDiskID(disk, instance.primary_node)
6357
6358   return disks_ok, device_info
6359
6360
6361 def _StartInstanceDisks(lu, instance, force):
6362   """Start the disks of an instance.
6363
6364   """
6365   disks_ok, _ = _AssembleInstanceDisks(lu, instance,
6366                                            ignore_secondaries=force)
6367   if not disks_ok:
6368     _ShutdownInstanceDisks(lu, instance)
6369     if force is not None and not force:
6370       lu.proc.LogWarning("", hint="If the message above refers to a"
6371                          " secondary node,"
6372                          " you can retry the operation using '--force'.")
6373     raise errors.OpExecError("Disk consistency error")
6374
6375
6376 class LUInstanceDeactivateDisks(NoHooksLU):
6377   """Shutdown an instance's disks.
6378
6379   """
6380   REQ_BGL = False
6381
6382   def ExpandNames(self):
6383     self._ExpandAndLockInstance()
6384     self.needed_locks[locking.LEVEL_NODE] = []
6385     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6386
6387   def DeclareLocks(self, level):
6388     if level == locking.LEVEL_NODE:
6389       self._LockInstancesNodes()
6390
6391   def CheckPrereq(self):
6392     """Check prerequisites.
6393
6394     This checks that the instance is in the cluster.
6395
6396     """
6397     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6398     assert self.instance is not None, \
6399       "Cannot retrieve locked instance %s" % self.op.instance_name
6400
6401   def Exec(self, feedback_fn):
6402     """Deactivate the disks
6403
6404     """
6405     instance = self.instance
6406     if self.op.force:
6407       _ShutdownInstanceDisks(self, instance)
6408     else:
6409       _SafeShutdownInstanceDisks(self, instance)
6410
6411
6412 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
6413   """Shutdown block devices of an instance.
6414
6415   This function checks if an instance is running, before calling
6416   _ShutdownInstanceDisks.
6417
6418   """
6419   _CheckInstanceState(lu, instance, INSTANCE_DOWN, msg="cannot shutdown disks")
6420   _ShutdownInstanceDisks(lu, instance, disks=disks)
6421
6422
6423 def _ExpandCheckDisks(instance, disks):
6424   """Return the instance disks selected by the disks list
6425
6426   @type disks: list of L{objects.Disk} or None
6427   @param disks: selected disks
6428   @rtype: list of L{objects.Disk}
6429   @return: selected instance disks to act on
6430
6431   """
6432   if disks is None:
6433     return instance.disks
6434   else:
6435     if not set(disks).issubset(instance.disks):
6436       raise errors.ProgrammerError("Can only act on disks belonging to the"
6437                                    " target instance")
6438     return disks
6439
6440
6441 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
6442   """Shutdown block devices of an instance.
6443
6444   This does the shutdown on all nodes of the instance.
6445
6446   If the ignore_primary is false, errors on the primary node are
6447   ignored.
6448
6449   """
6450   all_result = True
6451   disks = _ExpandCheckDisks(instance, disks)
6452
6453   for disk in disks:
6454     for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
6455       lu.cfg.SetDiskID(top_disk, node)
6456       result = lu.rpc.call_blockdev_shutdown(node, top_disk)
6457       msg = result.fail_msg
6458       if msg:
6459         lu.LogWarning("Could not shutdown block device %s on node %s: %s",
6460                       disk.iv_name, node, msg)
6461         if ((node == instance.primary_node and not ignore_primary) or
6462             (node != instance.primary_node and not result.offline)):
6463           all_result = False
6464   return all_result
6465
6466
6467 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
6468   """Checks if a node has enough free memory.
6469
6470   This function check if a given node has the needed amount of free
6471   memory. In case the node has less memory or we cannot get the
6472   information from the node, this function raise an OpPrereqError
6473   exception.
6474
6475   @type lu: C{LogicalUnit}
6476   @param lu: a logical unit from which we get configuration data
6477   @type node: C{str}
6478   @param node: the node to check
6479   @type reason: C{str}
6480   @param reason: string to use in the error message
6481   @type requested: C{int}
6482   @param requested: the amount of memory in MiB to check for
6483   @type hypervisor_name: C{str}
6484   @param hypervisor_name: the hypervisor to ask for memory stats
6485   @rtype: integer
6486   @return: node current free memory
6487   @raise errors.OpPrereqError: if the node doesn't have enough memory, or
6488       we cannot check the node
6489
6490   """
6491   nodeinfo = lu.rpc.call_node_info([node], None, [hypervisor_name])
6492   nodeinfo[node].Raise("Can't get data from node %s" % node,
6493                        prereq=True, ecode=errors.ECODE_ENVIRON)
6494   (_, _, (hv_info, )) = nodeinfo[node].payload
6495
6496   free_mem = hv_info.get("memory_free", None)
6497   if not isinstance(free_mem, int):
6498     raise errors.OpPrereqError("Can't compute free memory on node %s, result"
6499                                " was '%s'" % (node, free_mem),
6500                                errors.ECODE_ENVIRON)
6501   if requested > free_mem:
6502     raise errors.OpPrereqError("Not enough memory on node %s for %s:"
6503                                " needed %s MiB, available %s MiB" %
6504                                (node, reason, requested, free_mem),
6505                                errors.ECODE_NORES)
6506   return free_mem
6507
6508
6509 def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
6510   """Checks if nodes have enough free disk space in the all VGs.
6511
6512   This function check if all given nodes have the needed amount of
6513   free disk. In case any node has less disk or we cannot get the
6514   information from the node, this function raise an OpPrereqError
6515   exception.
6516
6517   @type lu: C{LogicalUnit}
6518   @param lu: a logical unit from which we get configuration data
6519   @type nodenames: C{list}
6520   @param nodenames: the list of node names to check
6521   @type req_sizes: C{dict}
6522   @param req_sizes: the hash of vg and corresponding amount of disk in
6523       MiB to check for
6524   @raise errors.OpPrereqError: if the node doesn't have enough disk,
6525       or we cannot check the node
6526
6527   """
6528   for vg, req_size in req_sizes.items():
6529     _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
6530
6531
6532 def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
6533   """Checks if nodes have enough free disk space in the specified VG.
6534
6535   This function check if all given nodes have the needed amount of
6536   free disk. In case any node has less disk or we cannot get the
6537   information from the node, this function raise an OpPrereqError
6538   exception.
6539
6540   @type lu: C{LogicalUnit}
6541   @param lu: a logical unit from which we get configuration data
6542   @type nodenames: C{list}
6543   @param nodenames: the list of node names to check
6544   @type vg: C{str}
6545   @param vg: the volume group to check
6546   @type requested: C{int}
6547   @param requested: the amount of disk in MiB to check for
6548   @raise errors.OpPrereqError: if the node doesn't have enough disk,
6549       or we cannot check the node
6550
6551   """
6552   nodeinfo = lu.rpc.call_node_info(nodenames, [vg], None)
6553   for node in nodenames:
6554     info = nodeinfo[node]
6555     info.Raise("Cannot get current information from node %s" % node,
6556                prereq=True, ecode=errors.ECODE_ENVIRON)
6557     (_, (vg_info, ), _) = info.payload
6558     vg_free = vg_info.get("vg_free", None)
6559     if not isinstance(vg_free, int):
6560       raise errors.OpPrereqError("Can't compute free disk space on node"
6561                                  " %s for vg %s, result was '%s'" %
6562                                  (node, vg, vg_free), errors.ECODE_ENVIRON)
6563     if requested > vg_free:
6564       raise errors.OpPrereqError("Not enough disk space on target node %s"
6565                                  " vg %s: required %d MiB, available %d MiB" %
6566                                  (node, vg, requested, vg_free),
6567                                  errors.ECODE_NORES)
6568
6569
6570 def _CheckNodesPhysicalCPUs(lu, nodenames, requested, hypervisor_name):
6571   """Checks if nodes have enough physical CPUs
6572
6573   This function checks if all given nodes have the needed number of
6574   physical CPUs. In case any node has less CPUs or we cannot get the
6575   information from the node, this function raises an OpPrereqError
6576   exception.
6577
6578   @type lu: C{LogicalUnit}
6579   @param lu: a logical unit from which we get configuration data
6580   @type nodenames: C{list}
6581   @param nodenames: the list of node names to check
6582   @type requested: C{int}
6583   @param requested: the minimum acceptable number of physical CPUs
6584   @raise errors.OpPrereqError: if the node doesn't have enough CPUs,
6585       or we cannot check the node
6586
6587   """
6588   nodeinfo = lu.rpc.call_node_info(nodenames, None, [hypervisor_name])
6589   for node in nodenames:
6590     info = nodeinfo[node]
6591     info.Raise("Cannot get current information from node %s" % node,
6592                prereq=True, ecode=errors.ECODE_ENVIRON)
6593     (_, _, (hv_info, )) = info.payload
6594     num_cpus = hv_info.get("cpu_total", None)
6595     if not isinstance(num_cpus, int):
6596       raise errors.OpPrereqError("Can't compute the number of physical CPUs"
6597                                  " on node %s, result was '%s'" %
6598                                  (node, num_cpus), errors.ECODE_ENVIRON)
6599     if requested > num_cpus:
6600       raise errors.OpPrereqError("Node %s has %s physical CPUs, but %s are "
6601                                  "required" % (node, num_cpus, requested),
6602                                  errors.ECODE_NORES)
6603
6604
6605 class LUInstanceStartup(LogicalUnit):
6606   """Starts an instance.
6607
6608   """
6609   HPATH = "instance-start"
6610   HTYPE = constants.HTYPE_INSTANCE
6611   REQ_BGL = False
6612
6613   def CheckArguments(self):
6614     # extra beparams
6615     if self.op.beparams:
6616       # fill the beparams dict
6617       objects.UpgradeBeParams(self.op.beparams)
6618       utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
6619
6620   def ExpandNames(self):
6621     self._ExpandAndLockInstance()
6622     self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
6623
6624   def DeclareLocks(self, level):
6625     if level == locking.LEVEL_NODE_RES:
6626       self._LockInstancesNodes(primary_only=True, level=locking.LEVEL_NODE_RES)
6627
6628   def BuildHooksEnv(self):
6629     """Build hooks env.
6630
6631     This runs on master, primary and secondary nodes of the instance.
6632
6633     """
6634     env = {
6635       "FORCE": self.op.force,
6636       }
6637
6638     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6639
6640     return env
6641
6642   def BuildHooksNodes(self):
6643     """Build hooks nodes.
6644
6645     """
6646     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6647     return (nl, nl)
6648
6649   def CheckPrereq(self):
6650     """Check prerequisites.
6651
6652     This checks that the instance is in the cluster.
6653
6654     """
6655     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6656     assert self.instance is not None, \
6657       "Cannot retrieve locked instance %s" % self.op.instance_name
6658
6659     # extra hvparams
6660     if self.op.hvparams:
6661       # check hypervisor parameter syntax (locally)
6662       cluster = self.cfg.GetClusterInfo()
6663       utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
6664       filled_hvp = cluster.FillHV(instance)
6665       filled_hvp.update(self.op.hvparams)
6666       hv_type = hypervisor.GetHypervisor(instance.hypervisor)
6667       hv_type.CheckParameterSyntax(filled_hvp)
6668       _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
6669
6670     _CheckInstanceState(self, instance, INSTANCE_ONLINE)
6671
6672     self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
6673
6674     if self.primary_offline and self.op.ignore_offline_nodes:
6675       self.proc.LogWarning("Ignoring offline primary node")
6676
6677       if self.op.hvparams or self.op.beparams:
6678         self.proc.LogWarning("Overridden parameters are ignored")
6679     else:
6680       _CheckNodeOnline(self, instance.primary_node)
6681
6682       bep = self.cfg.GetClusterInfo().FillBE(instance)
6683       bep.update(self.op.beparams)
6684
6685       # check bridges existence
6686       _CheckInstanceBridgesExist(self, instance)
6687
6688       remote_info = self.rpc.call_instance_info(instance.primary_node,
6689                                                 instance.name,
6690                                                 instance.hypervisor)
6691       remote_info.Raise("Error checking node %s" % instance.primary_node,
6692                         prereq=True, ecode=errors.ECODE_ENVIRON)
6693       if not remote_info.payload: # not running already
6694         _CheckNodeFreeMemory(self, instance.primary_node,
6695                              "starting instance %s" % instance.name,
6696                              bep[constants.BE_MINMEM], instance.hypervisor)
6697
6698   def Exec(self, feedback_fn):
6699     """Start the instance.
6700
6701     """
6702     instance = self.instance
6703     force = self.op.force
6704
6705     if not self.op.no_remember:
6706       self.cfg.MarkInstanceUp(instance.name)
6707
6708     if self.primary_offline:
6709       assert self.op.ignore_offline_nodes
6710       self.proc.LogInfo("Primary node offline, marked instance as started")
6711     else:
6712       node_current = instance.primary_node
6713
6714       _StartInstanceDisks(self, instance, force)
6715
6716       result = \
6717         self.rpc.call_instance_start(node_current,
6718                                      (instance, self.op.hvparams,
6719                                       self.op.beparams),
6720                                      self.op.startup_paused)
6721       msg = result.fail_msg
6722       if msg:
6723         _ShutdownInstanceDisks(self, instance)
6724         raise errors.OpExecError("Could not start instance: %s" % msg)
6725
6726
6727 class LUInstanceReboot(LogicalUnit):
6728   """Reboot an instance.
6729
6730   """
6731   HPATH = "instance-reboot"
6732   HTYPE = constants.HTYPE_INSTANCE
6733   REQ_BGL = False
6734
6735   def ExpandNames(self):
6736     self._ExpandAndLockInstance()
6737
6738   def BuildHooksEnv(self):
6739     """Build hooks env.
6740
6741     This runs on master, primary and secondary nodes of the instance.
6742
6743     """
6744     env = {
6745       "IGNORE_SECONDARIES": self.op.ignore_secondaries,
6746       "REBOOT_TYPE": self.op.reboot_type,
6747       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6748       }
6749
6750     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6751
6752     return env
6753
6754   def BuildHooksNodes(self):
6755     """Build hooks nodes.
6756
6757     """
6758     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6759     return (nl, nl)
6760
6761   def CheckPrereq(self):
6762     """Check prerequisites.
6763
6764     This checks that the instance is in the cluster.
6765
6766     """
6767     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6768     assert self.instance is not None, \
6769       "Cannot retrieve locked instance %s" % self.op.instance_name
6770     _CheckInstanceState(self, instance, INSTANCE_ONLINE)
6771     _CheckNodeOnline(self, instance.primary_node)
6772
6773     # check bridges existence
6774     _CheckInstanceBridgesExist(self, instance)
6775
6776   def Exec(self, feedback_fn):
6777     """Reboot the instance.
6778
6779     """
6780     instance = self.instance
6781     ignore_secondaries = self.op.ignore_secondaries
6782     reboot_type = self.op.reboot_type
6783
6784     remote_info = self.rpc.call_instance_info(instance.primary_node,
6785                                               instance.name,
6786                                               instance.hypervisor)
6787     remote_info.Raise("Error checking node %s" % instance.primary_node)
6788     instance_running = bool(remote_info.payload)
6789
6790     node_current = instance.primary_node
6791
6792     if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
6793                                             constants.INSTANCE_REBOOT_HARD]:
6794       for disk in instance.disks:
6795         self.cfg.SetDiskID(disk, node_current)
6796       result = self.rpc.call_instance_reboot(node_current, instance,
6797                                              reboot_type,
6798                                              self.op.shutdown_timeout)
6799       result.Raise("Could not reboot instance")
6800     else:
6801       if instance_running:
6802         result = self.rpc.call_instance_shutdown(node_current, instance,
6803                                                  self.op.shutdown_timeout)
6804         result.Raise("Could not shutdown instance for full reboot")
6805         _ShutdownInstanceDisks(self, instance)
6806       else:
6807         self.LogInfo("Instance %s was already stopped, starting now",
6808                      instance.name)
6809       _StartInstanceDisks(self, instance, ignore_secondaries)
6810       result = self.rpc.call_instance_start(node_current,
6811                                             (instance, None, None), False)
6812       msg = result.fail_msg
6813       if msg:
6814         _ShutdownInstanceDisks(self, instance)
6815         raise errors.OpExecError("Could not start instance for"
6816                                  " full reboot: %s" % msg)
6817
6818     self.cfg.MarkInstanceUp(instance.name)
6819
6820
6821 class LUInstanceShutdown(LogicalUnit):
6822   """Shutdown an instance.
6823
6824   """
6825   HPATH = "instance-stop"
6826   HTYPE = constants.HTYPE_INSTANCE
6827   REQ_BGL = False
6828
6829   def ExpandNames(self):
6830     self._ExpandAndLockInstance()
6831
6832   def BuildHooksEnv(self):
6833     """Build hooks env.
6834
6835     This runs on master, primary and secondary nodes of the instance.
6836
6837     """
6838     env = _BuildInstanceHookEnvByObject(self, self.instance)
6839     env["TIMEOUT"] = self.op.timeout
6840     return env
6841
6842   def BuildHooksNodes(self):
6843     """Build hooks nodes.
6844
6845     """
6846     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6847     return (nl, nl)
6848
6849   def CheckPrereq(self):
6850     """Check prerequisites.
6851
6852     This checks that the instance is in the cluster.
6853
6854     """
6855     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6856     assert self.instance is not None, \
6857       "Cannot retrieve locked instance %s" % self.op.instance_name
6858
6859     _CheckInstanceState(self, self.instance, INSTANCE_ONLINE)
6860
6861     self.primary_offline = \
6862       self.cfg.GetNodeInfo(self.instance.primary_node).offline
6863
6864     if self.primary_offline and self.op.ignore_offline_nodes:
6865       self.proc.LogWarning("Ignoring offline primary node")
6866     else:
6867       _CheckNodeOnline(self, self.instance.primary_node)
6868
6869   def Exec(self, feedback_fn):
6870     """Shutdown the instance.
6871
6872     """
6873     instance = self.instance
6874     node_current = instance.primary_node
6875     timeout = self.op.timeout
6876
6877     if not self.op.no_remember:
6878       self.cfg.MarkInstanceDown(instance.name)
6879
6880     if self.primary_offline:
6881       assert self.op.ignore_offline_nodes
6882       self.proc.LogInfo("Primary node offline, marked instance as stopped")
6883     else:
6884       result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
6885       msg = result.fail_msg
6886       if msg:
6887         self.proc.LogWarning("Could not shutdown instance: %s" % msg)
6888
6889       _ShutdownInstanceDisks(self, instance)
6890
6891
6892 class LUInstanceReinstall(LogicalUnit):
6893   """Reinstall an instance.
6894
6895   """
6896   HPATH = "instance-reinstall"
6897   HTYPE = constants.HTYPE_INSTANCE
6898   REQ_BGL = False
6899
6900   def ExpandNames(self):
6901     self._ExpandAndLockInstance()
6902
6903   def BuildHooksEnv(self):
6904     """Build hooks env.
6905
6906     This runs on master, primary and secondary nodes of the instance.
6907
6908     """
6909     return _BuildInstanceHookEnvByObject(self, self.instance)
6910
6911   def BuildHooksNodes(self):
6912     """Build hooks nodes.
6913
6914     """
6915     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6916     return (nl, nl)
6917
6918   def CheckPrereq(self):
6919     """Check prerequisites.
6920
6921     This checks that the instance is in the cluster and is not running.
6922
6923     """
6924     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6925     assert instance is not None, \
6926       "Cannot retrieve locked instance %s" % self.op.instance_name
6927     _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
6928                      " offline, cannot reinstall")
6929     for node in instance.secondary_nodes:
6930       _CheckNodeOnline(self, node, "Instance secondary node offline,"
6931                        " cannot reinstall")
6932
6933     if instance.disk_template == constants.DT_DISKLESS:
6934       raise errors.OpPrereqError("Instance '%s' has no disks" %
6935                                  self.op.instance_name,
6936                                  errors.ECODE_INVAL)
6937     _CheckInstanceState(self, instance, INSTANCE_DOWN, msg="cannot reinstall")
6938
6939     if self.op.os_type is not None:
6940       # OS verification
6941       pnode = _ExpandNodeName(self.cfg, instance.primary_node)
6942       _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
6943       instance_os = self.op.os_type
6944     else:
6945       instance_os = instance.os
6946
6947     nodelist = list(instance.all_nodes)
6948
6949     if self.op.osparams:
6950       i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
6951       _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
6952       self.os_inst = i_osdict # the new dict (without defaults)
6953     else:
6954       self.os_inst = None
6955
6956     self.instance = instance
6957
6958   def Exec(self, feedback_fn):
6959     """Reinstall the instance.
6960
6961     """
6962     inst = self.instance
6963
6964     if self.op.os_type is not None:
6965       feedback_fn("Changing OS to '%s'..." % self.op.os_type)
6966       inst.os = self.op.os_type
6967       # Write to configuration
6968       self.cfg.Update(inst, feedback_fn)
6969
6970     _StartInstanceDisks(self, inst, None)
6971     try:
6972       feedback_fn("Running the instance OS create scripts...")
6973       # FIXME: pass debug option from opcode to backend
6974       result = self.rpc.call_instance_os_add(inst.primary_node,
6975                                              (inst, self.os_inst), True,
6976                                              self.op.debug_level)
6977       result.Raise("Could not install OS for instance %s on node %s" %
6978                    (inst.name, inst.primary_node))
6979     finally:
6980       _ShutdownInstanceDisks(self, inst)
6981
6982
6983 class LUInstanceRecreateDisks(LogicalUnit):
6984   """Recreate an instance's missing disks.
6985
6986   """
6987   HPATH = "instance-recreate-disks"
6988   HTYPE = constants.HTYPE_INSTANCE
6989   REQ_BGL = False
6990
6991   _MODIFYABLE = frozenset([
6992     constants.IDISK_SIZE,
6993     constants.IDISK_MODE,
6994     ])
6995
6996   # New or changed disk parameters may have different semantics
6997   assert constants.IDISK_PARAMS == (_MODIFYABLE | frozenset([
6998     constants.IDISK_ADOPT,
6999
7000     # TODO: Implement support changing VG while recreating
7001     constants.IDISK_VG,
7002     constants.IDISK_METAVG,
7003     ]))
7004
7005   def CheckArguments(self):
7006     if self.op.disks and ht.TPositiveInt(self.op.disks[0]):
7007       # Normalize and convert deprecated list of disk indices
7008       self.op.disks = [(idx, {}) for idx in sorted(frozenset(self.op.disks))]
7009
7010     duplicates = utils.FindDuplicates(map(compat.fst, self.op.disks))
7011     if duplicates:
7012       raise errors.OpPrereqError("Some disks have been specified more than"
7013                                  " once: %s" % utils.CommaJoin(duplicates),
7014                                  errors.ECODE_INVAL)
7015
7016     for (idx, params) in self.op.disks:
7017       utils.ForceDictType(params, constants.IDISK_PARAMS_TYPES)
7018       unsupported = frozenset(params.keys()) - self._MODIFYABLE
7019       if unsupported:
7020         raise errors.OpPrereqError("Parameters for disk %s try to change"
7021                                    " unmodifyable parameter(s): %s" %
7022                                    (idx, utils.CommaJoin(unsupported)),
7023                                    errors.ECODE_INVAL)
7024
7025   def ExpandNames(self):
7026     self._ExpandAndLockInstance()
7027     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7028     if self.op.nodes:
7029       self.op.nodes = [_ExpandNodeName(self.cfg, n) for n in self.op.nodes]
7030       self.needed_locks[locking.LEVEL_NODE] = list(self.op.nodes)
7031     else:
7032       self.needed_locks[locking.LEVEL_NODE] = []
7033     self.needed_locks[locking.LEVEL_NODE_RES] = []
7034
7035   def DeclareLocks(self, level):
7036     if level == locking.LEVEL_NODE:
7037       # if we replace the nodes, we only need to lock the old primary,
7038       # otherwise we need to lock all nodes for disk re-creation
7039       primary_only = bool(self.op.nodes)
7040       self._LockInstancesNodes(primary_only=primary_only)
7041     elif level == locking.LEVEL_NODE_RES:
7042       # Copy node locks
7043       self.needed_locks[locking.LEVEL_NODE_RES] = \
7044         self.needed_locks[locking.LEVEL_NODE][:]
7045
7046   def BuildHooksEnv(self):
7047     """Build hooks env.
7048
7049     This runs on master, primary and secondary nodes of the instance.
7050
7051     """
7052     return _BuildInstanceHookEnvByObject(self, self.instance)
7053
7054   def BuildHooksNodes(self):
7055     """Build hooks nodes.
7056
7057     """
7058     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7059     return (nl, nl)
7060
7061   def CheckPrereq(self):
7062     """Check prerequisites.
7063
7064     This checks that the instance is in the cluster and is not running.
7065
7066     """
7067     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7068     assert instance is not None, \
7069       "Cannot retrieve locked instance %s" % self.op.instance_name
7070     if self.op.nodes:
7071       if len(self.op.nodes) != len(instance.all_nodes):
7072         raise errors.OpPrereqError("Instance %s currently has %d nodes, but"
7073                                    " %d replacement nodes were specified" %
7074                                    (instance.name, len(instance.all_nodes),
7075                                     len(self.op.nodes)),
7076                                    errors.ECODE_INVAL)
7077       assert instance.disk_template != constants.DT_DRBD8 or \
7078           len(self.op.nodes) == 2
7079       assert instance.disk_template != constants.DT_PLAIN or \
7080           len(self.op.nodes) == 1
7081       primary_node = self.op.nodes[0]
7082     else:
7083       primary_node = instance.primary_node
7084     _CheckNodeOnline(self, primary_node)
7085
7086     if instance.disk_template == constants.DT_DISKLESS:
7087       raise errors.OpPrereqError("Instance '%s' has no disks" %
7088                                  self.op.instance_name, errors.ECODE_INVAL)
7089
7090     # if we replace nodes *and* the old primary is offline, we don't
7091     # check
7092     assert instance.primary_node in self.owned_locks(locking.LEVEL_NODE)
7093     assert instance.primary_node in self.owned_locks(locking.LEVEL_NODE_RES)
7094     old_pnode = self.cfg.GetNodeInfo(instance.primary_node)
7095     if not (self.op.nodes and old_pnode.offline):
7096       _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
7097                           msg="cannot recreate disks")
7098
7099     if self.op.disks:
7100       self.disks = dict(self.op.disks)
7101     else:
7102       self.disks = dict((idx, {}) for idx in range(len(instance.disks)))
7103
7104     maxidx = max(self.disks.keys())
7105     if maxidx >= len(instance.disks):
7106       raise errors.OpPrereqError("Invalid disk index '%s'" % maxidx,
7107                                  errors.ECODE_INVAL)
7108
7109     if (self.op.nodes and
7110         sorted(self.disks.keys()) != range(len(instance.disks))):
7111       raise errors.OpPrereqError("Can't recreate disks partially and"
7112                                  " change the nodes at the same time",
7113                                  errors.ECODE_INVAL)
7114
7115     self.instance = instance
7116
7117   def Exec(self, feedback_fn):
7118     """Recreate the disks.
7119
7120     """
7121     instance = self.instance
7122
7123     assert (self.owned_locks(locking.LEVEL_NODE) ==
7124             self.owned_locks(locking.LEVEL_NODE_RES))
7125
7126     to_skip = []
7127     mods = [] # keeps track of needed changes
7128
7129     for idx, disk in enumerate(instance.disks):
7130       try:
7131         changes = self.disks[idx]
7132       except KeyError:
7133         # Disk should not be recreated
7134         to_skip.append(idx)
7135         continue
7136
7137       # update secondaries for disks, if needed
7138       if self.op.nodes and disk.dev_type == constants.LD_DRBD8:
7139         # need to update the nodes and minors
7140         assert len(self.op.nodes) == 2
7141         assert len(disk.logical_id) == 6 # otherwise disk internals
7142                                          # have changed
7143         (_, _, old_port, _, _, old_secret) = disk.logical_id
7144         new_minors = self.cfg.AllocateDRBDMinor(self.op.nodes, instance.name)
7145         new_id = (self.op.nodes[0], self.op.nodes[1], old_port,
7146                   new_minors[0], new_minors[1], old_secret)
7147         assert len(disk.logical_id) == len(new_id)
7148       else:
7149         new_id = None
7150
7151       mods.append((idx, new_id, changes))
7152
7153     # now that we have passed all asserts above, we can apply the mods
7154     # in a single run (to avoid partial changes)
7155     for idx, new_id, changes in mods:
7156       disk = instance.disks[idx]
7157       if new_id is not None:
7158         assert disk.dev_type == constants.LD_DRBD8
7159         disk.logical_id = new_id
7160       if changes:
7161         disk.Update(size=changes.get(constants.IDISK_SIZE, None),
7162                     mode=changes.get(constants.IDISK_MODE, None))
7163
7164     # change primary node, if needed
7165     if self.op.nodes:
7166       instance.primary_node = self.op.nodes[0]
7167       self.LogWarning("Changing the instance's nodes, you will have to"
7168                       " remove any disks left on the older nodes manually")
7169
7170     if self.op.nodes:
7171       self.cfg.Update(instance, feedback_fn)
7172
7173     _CreateDisks(self, instance, to_skip=to_skip)
7174
7175
7176 class LUInstanceRename(LogicalUnit):
7177   """Rename an instance.
7178
7179   """
7180   HPATH = "instance-rename"
7181   HTYPE = constants.HTYPE_INSTANCE
7182
7183   def CheckArguments(self):
7184     """Check arguments.
7185
7186     """
7187     if self.op.ip_check and not self.op.name_check:
7188       # TODO: make the ip check more flexible and not depend on the name check
7189       raise errors.OpPrereqError("IP address check requires a name check",
7190                                  errors.ECODE_INVAL)
7191
7192   def BuildHooksEnv(self):
7193     """Build hooks env.
7194
7195     This runs on master, primary and secondary nodes of the instance.
7196
7197     """
7198     env = _BuildInstanceHookEnvByObject(self, self.instance)
7199     env["INSTANCE_NEW_NAME"] = self.op.new_name
7200     return env
7201
7202   def BuildHooksNodes(self):
7203     """Build hooks nodes.
7204
7205     """
7206     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7207     return (nl, nl)
7208
7209   def CheckPrereq(self):
7210     """Check prerequisites.
7211
7212     This checks that the instance is in the cluster and is not running.
7213
7214     """
7215     self.op.instance_name = _ExpandInstanceName(self.cfg,
7216                                                 self.op.instance_name)
7217     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7218     assert instance is not None
7219     _CheckNodeOnline(self, instance.primary_node)
7220     _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
7221                         msg="cannot rename")
7222     self.instance = instance
7223
7224     new_name = self.op.new_name
7225     if self.op.name_check:
7226       hostname = netutils.GetHostname(name=new_name)
7227       if hostname.name != new_name:
7228         self.LogInfo("Resolved given name '%s' to '%s'", new_name,
7229                      hostname.name)
7230       if not utils.MatchNameComponent(self.op.new_name, [hostname.name]):
7231         raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
7232                                     " same as given hostname '%s'") %
7233                                     (hostname.name, self.op.new_name),
7234                                     errors.ECODE_INVAL)
7235       new_name = self.op.new_name = hostname.name
7236       if (self.op.ip_check and
7237           netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
7238         raise errors.OpPrereqError("IP %s of instance %s already in use" %
7239                                    (hostname.ip, new_name),
7240                                    errors.ECODE_NOTUNIQUE)
7241
7242     instance_list = self.cfg.GetInstanceList()
7243     if new_name in instance_list and new_name != instance.name:
7244       raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
7245                                  new_name, errors.ECODE_EXISTS)
7246
7247   def Exec(self, feedback_fn):
7248     """Rename the instance.
7249
7250     """
7251     inst = self.instance
7252     old_name = inst.name
7253
7254     rename_file_storage = False
7255     if (inst.disk_template in constants.DTS_FILEBASED and
7256         self.op.new_name != inst.name):
7257       old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
7258       rename_file_storage = True
7259
7260     self.cfg.RenameInstance(inst.name, self.op.new_name)
7261     # Change the instance lock. This is definitely safe while we hold the BGL.
7262     # Otherwise the new lock would have to be added in acquired mode.
7263     assert self.REQ_BGL
7264     self.glm.remove(locking.LEVEL_INSTANCE, old_name)
7265     self.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
7266
7267     # re-read the instance from the configuration after rename
7268     inst = self.cfg.GetInstanceInfo(self.op.new_name)
7269
7270     if rename_file_storage:
7271       new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
7272       result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
7273                                                      old_file_storage_dir,
7274                                                      new_file_storage_dir)
7275       result.Raise("Could not rename on node %s directory '%s' to '%s'"
7276                    " (but the instance has been renamed in Ganeti)" %
7277                    (inst.primary_node, old_file_storage_dir,
7278                     new_file_storage_dir))
7279
7280     _StartInstanceDisks(self, inst, None)
7281     try:
7282       result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
7283                                                  old_name, self.op.debug_level)
7284       msg = result.fail_msg
7285       if msg:
7286         msg = ("Could not run OS rename script for instance %s on node %s"
7287                " (but the instance has been renamed in Ganeti): %s" %
7288                (inst.name, inst.primary_node, msg))
7289         self.proc.LogWarning(msg)
7290     finally:
7291       _ShutdownInstanceDisks(self, inst)
7292
7293     return inst.name
7294
7295
7296 class LUInstanceRemove(LogicalUnit):
7297   """Remove an instance.
7298
7299   """
7300   HPATH = "instance-remove"
7301   HTYPE = constants.HTYPE_INSTANCE
7302   REQ_BGL = False
7303
7304   def ExpandNames(self):
7305     self._ExpandAndLockInstance()
7306     self.needed_locks[locking.LEVEL_NODE] = []
7307     self.needed_locks[locking.LEVEL_NODE_RES] = []
7308     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7309
7310   def DeclareLocks(self, level):
7311     if level == locking.LEVEL_NODE:
7312       self._LockInstancesNodes()
7313     elif level == locking.LEVEL_NODE_RES:
7314       # Copy node locks
7315       self.needed_locks[locking.LEVEL_NODE_RES] = \
7316         self.needed_locks[locking.LEVEL_NODE][:]
7317
7318   def BuildHooksEnv(self):
7319     """Build hooks env.
7320
7321     This runs on master, primary and secondary nodes of the instance.
7322
7323     """
7324     env = _BuildInstanceHookEnvByObject(self, self.instance)
7325     env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
7326     return env
7327
7328   def BuildHooksNodes(self):
7329     """Build hooks nodes.
7330
7331     """
7332     nl = [self.cfg.GetMasterNode()]
7333     nl_post = list(self.instance.all_nodes) + nl
7334     return (nl, nl_post)
7335
7336   def CheckPrereq(self):
7337     """Check prerequisites.
7338
7339     This checks that the instance is in the cluster.
7340
7341     """
7342     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7343     assert self.instance is not None, \
7344       "Cannot retrieve locked instance %s" % self.op.instance_name
7345
7346   def Exec(self, feedback_fn):
7347     """Remove the instance.
7348
7349     """
7350     instance = self.instance
7351     logging.info("Shutting down instance %s on node %s",
7352                  instance.name, instance.primary_node)
7353
7354     result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
7355                                              self.op.shutdown_timeout)
7356     msg = result.fail_msg
7357     if msg:
7358       if self.op.ignore_failures:
7359         feedback_fn("Warning: can't shutdown instance: %s" % msg)
7360       else:
7361         raise errors.OpExecError("Could not shutdown instance %s on"
7362                                  " node %s: %s" %
7363                                  (instance.name, instance.primary_node, msg))
7364
7365     assert (self.owned_locks(locking.LEVEL_NODE) ==
7366             self.owned_locks(locking.LEVEL_NODE_RES))
7367     assert not (set(instance.all_nodes) -
7368                 self.owned_locks(locking.LEVEL_NODE)), \
7369       "Not owning correct locks"
7370
7371     _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
7372
7373
7374 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
7375   """Utility function to remove an instance.
7376
7377   """
7378   logging.info("Removing block devices for instance %s", instance.name)
7379
7380   if not _RemoveDisks(lu, instance, ignore_failures=ignore_failures):
7381     if not ignore_failures:
7382       raise errors.OpExecError("Can't remove instance's disks")
7383     feedback_fn("Warning: can't remove instance's disks")
7384
7385   logging.info("Removing instance %s out of cluster config", instance.name)
7386
7387   lu.cfg.RemoveInstance(instance.name)
7388
7389   assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
7390     "Instance lock removal conflict"
7391
7392   # Remove lock for the instance
7393   lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
7394
7395
7396 class LUInstanceQuery(NoHooksLU):
7397   """Logical unit for querying instances.
7398
7399   """
7400   # pylint: disable=W0142
7401   REQ_BGL = False
7402
7403   def CheckArguments(self):
7404     self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
7405                              self.op.output_fields, self.op.use_locking)
7406
7407   def ExpandNames(self):
7408     self.iq.ExpandNames(self)
7409
7410   def DeclareLocks(self, level):
7411     self.iq.DeclareLocks(self, level)
7412
7413   def Exec(self, feedback_fn):
7414     return self.iq.OldStyleQuery(self)
7415
7416
7417 class LUInstanceFailover(LogicalUnit):
7418   """Failover an instance.
7419
7420   """
7421   HPATH = "instance-failover"
7422   HTYPE = constants.HTYPE_INSTANCE
7423   REQ_BGL = False
7424
7425   def CheckArguments(self):
7426     """Check the arguments.
7427
7428     """
7429     self.iallocator = getattr(self.op, "iallocator", None)
7430     self.target_node = getattr(self.op, "target_node", None)
7431
7432   def ExpandNames(self):
7433     self._ExpandAndLockInstance()
7434
7435     if self.op.target_node is not None:
7436       self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7437
7438     self.needed_locks[locking.LEVEL_NODE] = []
7439     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7440
7441     self.needed_locks[locking.LEVEL_NODE_RES] = []
7442     self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
7443
7444     ignore_consistency = self.op.ignore_consistency
7445     shutdown_timeout = self.op.shutdown_timeout
7446     self._migrater = TLMigrateInstance(self, self.op.instance_name,
7447                                        cleanup=False,
7448                                        failover=True,
7449                                        ignore_consistency=ignore_consistency,
7450                                        shutdown_timeout=shutdown_timeout,
7451                                        ignore_ipolicy=self.op.ignore_ipolicy)
7452     self.tasklets = [self._migrater]
7453
7454   def DeclareLocks(self, level):
7455     if level == locking.LEVEL_NODE:
7456       instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
7457       if instance.disk_template in constants.DTS_EXT_MIRROR:
7458         if self.op.target_node is None:
7459           self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7460         else:
7461           self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
7462                                                    self.op.target_node]
7463         del self.recalculate_locks[locking.LEVEL_NODE]
7464       else:
7465         self._LockInstancesNodes()
7466     elif level == locking.LEVEL_NODE_RES:
7467       # Copy node locks
7468       self.needed_locks[locking.LEVEL_NODE_RES] = \
7469         self.needed_locks[locking.LEVEL_NODE][:]
7470
7471   def BuildHooksEnv(self):
7472     """Build hooks env.
7473
7474     This runs on master, primary and secondary nodes of the instance.
7475
7476     """
7477     instance = self._migrater.instance
7478     source_node = instance.primary_node
7479     target_node = self.op.target_node
7480     env = {
7481       "IGNORE_CONSISTENCY": self.op.ignore_consistency,
7482       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7483       "OLD_PRIMARY": source_node,
7484       "NEW_PRIMARY": target_node,
7485       }
7486
7487     if instance.disk_template in constants.DTS_INT_MIRROR:
7488       env["OLD_SECONDARY"] = instance.secondary_nodes[0]
7489       env["NEW_SECONDARY"] = source_node
7490     else:
7491       env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
7492
7493     env.update(_BuildInstanceHookEnvByObject(self, instance))
7494
7495     return env
7496
7497   def BuildHooksNodes(self):
7498     """Build hooks nodes.
7499
7500     """
7501     instance = self._migrater.instance
7502     nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
7503     return (nl, nl + [instance.primary_node])
7504
7505
7506 class LUInstanceMigrate(LogicalUnit):
7507   """Migrate an instance.
7508
7509   This is migration without shutting down, compared to the failover,
7510   which is done with shutdown.
7511
7512   """
7513   HPATH = "instance-migrate"
7514   HTYPE = constants.HTYPE_INSTANCE
7515   REQ_BGL = False
7516
7517   def ExpandNames(self):
7518     self._ExpandAndLockInstance()
7519
7520     if self.op.target_node is not None:
7521       self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7522
7523     self.needed_locks[locking.LEVEL_NODE] = []
7524     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7525
7526     self.needed_locks[locking.LEVEL_NODE] = []
7527     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7528
7529     self._migrater = \
7530       TLMigrateInstance(self, self.op.instance_name,
7531                         cleanup=self.op.cleanup,
7532                         failover=False,
7533                         fallback=self.op.allow_failover,
7534                         allow_runtime_changes=self.op.allow_runtime_changes,
7535                         ignore_ipolicy=self.op.ignore_ipolicy)
7536     self.tasklets = [self._migrater]
7537
7538   def DeclareLocks(self, level):
7539     if level == locking.LEVEL_NODE:
7540       instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
7541       if instance.disk_template in constants.DTS_EXT_MIRROR:
7542         if self.op.target_node is None:
7543           self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7544         else:
7545           self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
7546                                                    self.op.target_node]
7547         del self.recalculate_locks[locking.LEVEL_NODE]
7548       else:
7549         self._LockInstancesNodes()
7550     elif level == locking.LEVEL_NODE_RES:
7551       # Copy node locks
7552       self.needed_locks[locking.LEVEL_NODE_RES] = \
7553         self.needed_locks[locking.LEVEL_NODE][:]
7554
7555   def BuildHooksEnv(self):
7556     """Build hooks env.
7557
7558     This runs on master, primary and secondary nodes of the instance.
7559
7560     """
7561     instance = self._migrater.instance
7562     source_node = instance.primary_node
7563     target_node = self.op.target_node
7564     env = _BuildInstanceHookEnvByObject(self, instance)
7565     env.update({
7566       "MIGRATE_LIVE": self._migrater.live,
7567       "MIGRATE_CLEANUP": self.op.cleanup,
7568       "OLD_PRIMARY": source_node,
7569       "NEW_PRIMARY": target_node,
7570       "ALLOW_RUNTIME_CHANGES": self.op.allow_runtime_changes,
7571       })
7572
7573     if instance.disk_template in constants.DTS_INT_MIRROR:
7574       env["OLD_SECONDARY"] = target_node
7575       env["NEW_SECONDARY"] = source_node
7576     else:
7577       env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
7578
7579     return env
7580
7581   def BuildHooksNodes(self):
7582     """Build hooks nodes.
7583
7584     """
7585     instance = self._migrater.instance
7586     nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
7587     return (nl, nl + [instance.primary_node])
7588
7589
7590 class LUInstanceMove(LogicalUnit):
7591   """Move an instance by data-copying.
7592
7593   """
7594   HPATH = "instance-move"
7595   HTYPE = constants.HTYPE_INSTANCE
7596   REQ_BGL = False
7597
7598   def ExpandNames(self):
7599     self._ExpandAndLockInstance()
7600     target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7601     self.op.target_node = target_node
7602     self.needed_locks[locking.LEVEL_NODE] = [target_node]
7603     self.needed_locks[locking.LEVEL_NODE_RES] = []
7604     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7605
7606   def DeclareLocks(self, level):
7607     if level == locking.LEVEL_NODE:
7608       self._LockInstancesNodes(primary_only=True)
7609     elif level == locking.LEVEL_NODE_RES:
7610       # Copy node locks
7611       self.needed_locks[locking.LEVEL_NODE_RES] = \
7612         self.needed_locks[locking.LEVEL_NODE][:]
7613
7614   def BuildHooksEnv(self):
7615     """Build hooks env.
7616
7617     This runs on master, primary and secondary nodes of the instance.
7618
7619     """
7620     env = {
7621       "TARGET_NODE": self.op.target_node,
7622       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7623       }
7624     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
7625     return env
7626
7627   def BuildHooksNodes(self):
7628     """Build hooks nodes.
7629
7630     """
7631     nl = [
7632       self.cfg.GetMasterNode(),
7633       self.instance.primary_node,
7634       self.op.target_node,
7635       ]
7636     return (nl, nl)
7637
7638   def CheckPrereq(self):
7639     """Check prerequisites.
7640
7641     This checks that the instance is in the cluster.
7642
7643     """
7644     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7645     assert self.instance is not None, \
7646       "Cannot retrieve locked instance %s" % self.op.instance_name
7647
7648     node = self.cfg.GetNodeInfo(self.op.target_node)
7649     assert node is not None, \
7650       "Cannot retrieve locked node %s" % self.op.target_node
7651
7652     self.target_node = target_node = node.name
7653
7654     if target_node == instance.primary_node:
7655       raise errors.OpPrereqError("Instance %s is already on the node %s" %
7656                                  (instance.name, target_node),
7657                                  errors.ECODE_STATE)
7658
7659     bep = self.cfg.GetClusterInfo().FillBE(instance)
7660
7661     for idx, dsk in enumerate(instance.disks):
7662       if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
7663         raise errors.OpPrereqError("Instance disk %d has a complex layout,"
7664                                    " cannot copy" % idx, errors.ECODE_STATE)
7665
7666     _CheckNodeOnline(self, target_node)
7667     _CheckNodeNotDrained(self, target_node)
7668     _CheckNodeVmCapable(self, target_node)
7669     ipolicy = _CalculateGroupIPolicy(self.cfg.GetClusterInfo(),
7670                                      self.cfg.GetNodeGroup(node.group))
7671     _CheckTargetNodeIPolicy(self, ipolicy, instance, node,
7672                             ignore=self.op.ignore_ipolicy)
7673
7674     if instance.admin_state == constants.ADMINST_UP:
7675       # check memory requirements on the secondary node
7676       _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
7677                            instance.name, bep[constants.BE_MAXMEM],
7678                            instance.hypervisor)
7679     else:
7680       self.LogInfo("Not checking memory on the secondary node as"
7681                    " instance will not be started")
7682
7683     # check bridge existance
7684     _CheckInstanceBridgesExist(self, instance, node=target_node)
7685
7686   def Exec(self, feedback_fn):
7687     """Move an instance.
7688
7689     The move is done by shutting it down on its present node, copying
7690     the data over (slow) and starting it on the new node.
7691
7692     """
7693     instance = self.instance
7694
7695     source_node = instance.primary_node
7696     target_node = self.target_node
7697
7698     self.LogInfo("Shutting down instance %s on source node %s",
7699                  instance.name, source_node)
7700
7701     assert (self.owned_locks(locking.LEVEL_NODE) ==
7702             self.owned_locks(locking.LEVEL_NODE_RES))
7703
7704     result = self.rpc.call_instance_shutdown(source_node, instance,
7705                                              self.op.shutdown_timeout)
7706     msg = result.fail_msg
7707     if msg:
7708       if self.op.ignore_consistency:
7709         self.proc.LogWarning("Could not shutdown instance %s on node %s."
7710                              " Proceeding anyway. Please make sure node"
7711                              " %s is down. Error details: %s",
7712                              instance.name, source_node, source_node, msg)
7713       else:
7714         raise errors.OpExecError("Could not shutdown instance %s on"
7715                                  " node %s: %s" %
7716                                  (instance.name, source_node, msg))
7717
7718     # create the target disks
7719     try:
7720       _CreateDisks(self, instance, target_node=target_node)
7721     except errors.OpExecError:
7722       self.LogWarning("Device creation failed, reverting...")
7723       try:
7724         _RemoveDisks(self, instance, target_node=target_node)
7725       finally:
7726         self.cfg.ReleaseDRBDMinors(instance.name)
7727         raise
7728
7729     cluster_name = self.cfg.GetClusterInfo().cluster_name
7730
7731     errs = []
7732     # activate, get path, copy the data over
7733     for idx, disk in enumerate(instance.disks):
7734       self.LogInfo("Copying data for disk %d", idx)
7735       result = self.rpc.call_blockdev_assemble(target_node, disk,
7736                                                instance.name, True, idx)
7737       if result.fail_msg:
7738         self.LogWarning("Can't assemble newly created disk %d: %s",
7739                         idx, result.fail_msg)
7740         errs.append(result.fail_msg)
7741         break
7742       dev_path = result.payload
7743       result = self.rpc.call_blockdev_export(source_node, disk,
7744                                              target_node, dev_path,
7745                                              cluster_name)
7746       if result.fail_msg:
7747         self.LogWarning("Can't copy data over for disk %d: %s",
7748                         idx, result.fail_msg)
7749         errs.append(result.fail_msg)
7750         break
7751
7752     if errs:
7753       self.LogWarning("Some disks failed to copy, aborting")
7754       try:
7755         _RemoveDisks(self, instance, target_node=target_node)
7756       finally:
7757         self.cfg.ReleaseDRBDMinors(instance.name)
7758         raise errors.OpExecError("Errors during disk copy: %s" %
7759                                  (",".join(errs),))
7760
7761     instance.primary_node = target_node
7762     self.cfg.Update(instance, feedback_fn)
7763
7764     self.LogInfo("Removing the disks on the original node")
7765     _RemoveDisks(self, instance, target_node=source_node)
7766
7767     # Only start the instance if it's marked as up
7768     if instance.admin_state == constants.ADMINST_UP:
7769       self.LogInfo("Starting instance %s on node %s",
7770                    instance.name, target_node)
7771
7772       disks_ok, _ = _AssembleInstanceDisks(self, instance,
7773                                            ignore_secondaries=True)
7774       if not disks_ok:
7775         _ShutdownInstanceDisks(self, instance)
7776         raise errors.OpExecError("Can't activate the instance's disks")
7777
7778       result = self.rpc.call_instance_start(target_node,
7779                                             (instance, None, None), False)
7780       msg = result.fail_msg
7781       if msg:
7782         _ShutdownInstanceDisks(self, instance)
7783         raise errors.OpExecError("Could not start instance %s on node %s: %s" %
7784                                  (instance.name, target_node, msg))
7785
7786
7787 class LUNodeMigrate(LogicalUnit):
7788   """Migrate all instances from a node.
7789
7790   """
7791   HPATH = "node-migrate"
7792   HTYPE = constants.HTYPE_NODE
7793   REQ_BGL = False
7794
7795   def CheckArguments(self):
7796     pass
7797
7798   def ExpandNames(self):
7799     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
7800
7801     self.share_locks = _ShareAll()
7802     self.needed_locks = {
7803       locking.LEVEL_NODE: [self.op.node_name],
7804       }
7805
7806   def BuildHooksEnv(self):
7807     """Build hooks env.
7808
7809     This runs on the master, the primary and all the secondaries.
7810
7811     """
7812     return {
7813       "NODE_NAME": self.op.node_name,
7814       "ALLOW_RUNTIME_CHANGES": self.op.allow_runtime_changes,
7815       }
7816
7817   def BuildHooksNodes(self):
7818     """Build hooks nodes.
7819
7820     """
7821     nl = [self.cfg.GetMasterNode()]
7822     return (nl, nl)
7823
7824   def CheckPrereq(self):
7825     pass
7826
7827   def Exec(self, feedback_fn):
7828     # Prepare jobs for migration instances
7829     allow_runtime_changes = self.op.allow_runtime_changes
7830     jobs = [
7831       [opcodes.OpInstanceMigrate(instance_name=inst.name,
7832                                  mode=self.op.mode,
7833                                  live=self.op.live,
7834                                  iallocator=self.op.iallocator,
7835                                  target_node=self.op.target_node,
7836                                  allow_runtime_changes=allow_runtime_changes,
7837                                  ignore_ipolicy=self.op.ignore_ipolicy)]
7838       for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name)
7839       ]
7840
7841     # TODO: Run iallocator in this opcode and pass correct placement options to
7842     # OpInstanceMigrate. Since other jobs can modify the cluster between
7843     # running the iallocator and the actual migration, a good consistency model
7844     # will have to be found.
7845
7846     assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
7847             frozenset([self.op.node_name]))
7848
7849     return ResultWithJobs(jobs)
7850
7851
7852 class TLMigrateInstance(Tasklet):
7853   """Tasklet class for instance migration.
7854
7855   @type live: boolean
7856   @ivar live: whether the migration will be done live or non-live;
7857       this variable is initalized only after CheckPrereq has run
7858   @type cleanup: boolean
7859   @ivar cleanup: Wheater we cleanup from a failed migration
7860   @type iallocator: string
7861   @ivar iallocator: The iallocator used to determine target_node
7862   @type target_node: string
7863   @ivar target_node: If given, the target_node to reallocate the instance to
7864   @type failover: boolean
7865   @ivar failover: Whether operation results in failover or migration
7866   @type fallback: boolean
7867   @ivar fallback: Whether fallback to failover is allowed if migration not
7868                   possible
7869   @type ignore_consistency: boolean
7870   @ivar ignore_consistency: Wheter we should ignore consistency between source
7871                             and target node
7872   @type shutdown_timeout: int
7873   @ivar shutdown_timeout: In case of failover timeout of the shutdown
7874   @type ignore_ipolicy: bool
7875   @ivar ignore_ipolicy: If true, we can ignore instance policy when migrating
7876
7877   """
7878
7879   # Constants
7880   _MIGRATION_POLL_INTERVAL = 1      # seconds
7881   _MIGRATION_FEEDBACK_INTERVAL = 10 # seconds
7882
7883   def __init__(self, lu, instance_name, cleanup=False,
7884                failover=False, fallback=False,
7885                ignore_consistency=False,
7886                allow_runtime_changes=True,
7887                shutdown_timeout=constants.DEFAULT_SHUTDOWN_TIMEOUT,
7888                ignore_ipolicy=False):
7889     """Initializes this class.
7890
7891     """
7892     Tasklet.__init__(self, lu)
7893
7894     # Parameters
7895     self.instance_name = instance_name
7896     self.cleanup = cleanup
7897     self.live = False # will be overridden later
7898     self.failover = failover
7899     self.fallback = fallback
7900     self.ignore_consistency = ignore_consistency
7901     self.shutdown_timeout = shutdown_timeout
7902     self.ignore_ipolicy = ignore_ipolicy
7903     self.allow_runtime_changes = allow_runtime_changes
7904
7905   def CheckPrereq(self):
7906     """Check prerequisites.
7907
7908     This checks that the instance is in the cluster.
7909
7910     """
7911     instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
7912     instance = self.cfg.GetInstanceInfo(instance_name)
7913     assert instance is not None
7914     self.instance = instance
7915     cluster = self.cfg.GetClusterInfo()
7916
7917     if (not self.cleanup and
7918         not instance.admin_state == constants.ADMINST_UP and
7919         not self.failover and self.fallback):
7920       self.lu.LogInfo("Instance is marked down or offline, fallback allowed,"
7921                       " switching to failover")
7922       self.failover = True
7923
7924     if instance.disk_template not in constants.DTS_MIRRORED:
7925       if self.failover:
7926         text = "failovers"
7927       else:
7928         text = "migrations"
7929       raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
7930                                  " %s" % (instance.disk_template, text),
7931                                  errors.ECODE_STATE)
7932
7933     if instance.disk_template in constants.DTS_EXT_MIRROR:
7934       _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
7935
7936       if self.lu.op.iallocator:
7937         self._RunAllocator()
7938       else:
7939         # We set set self.target_node as it is required by
7940         # BuildHooksEnv
7941         self.target_node = self.lu.op.target_node
7942
7943       # Check that the target node is correct in terms of instance policy
7944       nodeinfo = self.cfg.GetNodeInfo(self.target_node)
7945       group_info = self.cfg.GetNodeGroup(nodeinfo.group)
7946       ipolicy = _CalculateGroupIPolicy(cluster, group_info)
7947       _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo,
7948                               ignore=self.ignore_ipolicy)
7949
7950       # self.target_node is already populated, either directly or by the
7951       # iallocator run
7952       target_node = self.target_node
7953       if self.target_node == instance.primary_node:
7954         raise errors.OpPrereqError("Cannot migrate instance %s"
7955                                    " to its primary (%s)" %
7956                                    (instance.name, instance.primary_node))
7957
7958       if len(self.lu.tasklets) == 1:
7959         # It is safe to release locks only when we're the only tasklet
7960         # in the LU
7961         _ReleaseLocks(self.lu, locking.LEVEL_NODE,
7962                       keep=[instance.primary_node, self.target_node])
7963
7964     else:
7965       secondary_nodes = instance.secondary_nodes
7966       if not secondary_nodes:
7967         raise errors.ConfigurationError("No secondary node but using"
7968                                         " %s disk template" %
7969                                         instance.disk_template)
7970       target_node = secondary_nodes[0]
7971       if self.lu.op.iallocator or (self.lu.op.target_node and
7972                                    self.lu.op.target_node != target_node):
7973         if self.failover:
7974           text = "failed over"
7975         else:
7976           text = "migrated"
7977         raise errors.OpPrereqError("Instances with disk template %s cannot"
7978                                    " be %s to arbitrary nodes"
7979                                    " (neither an iallocator nor a target"
7980                                    " node can be passed)" %
7981                                    (instance.disk_template, text),
7982                                    errors.ECODE_INVAL)
7983       nodeinfo = self.cfg.GetNodeInfo(target_node)
7984       group_info = self.cfg.GetNodeGroup(nodeinfo.group)
7985       ipolicy = _CalculateGroupIPolicy(cluster, group_info)
7986       _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo,
7987                               ignore=self.ignore_ipolicy)
7988
7989     i_be = cluster.FillBE(instance)
7990
7991     # check memory requirements on the secondary node
7992     if (not self.cleanup and
7993          (not self.failover or instance.admin_state == constants.ADMINST_UP)):
7994       self.tgt_free_mem = _CheckNodeFreeMemory(self.lu, target_node,
7995                                                "migrating instance %s" %
7996                                                instance.name,
7997                                                i_be[constants.BE_MINMEM],
7998                                                instance.hypervisor)
7999     else:
8000       self.lu.LogInfo("Not checking memory on the secondary node as"
8001                       " instance will not be started")
8002
8003     # check if failover must be forced instead of migration
8004     if (not self.cleanup and not self.failover and
8005         i_be[constants.BE_ALWAYS_FAILOVER]):
8006       if self.fallback:
8007         self.lu.LogInfo("Instance configured to always failover; fallback"
8008                         " to failover")
8009         self.failover = True
8010       else:
8011         raise errors.OpPrereqError("This instance has been configured to"
8012                                    " always failover, please allow failover",
8013                                    errors.ECODE_STATE)
8014
8015     # check bridge existance
8016     _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
8017
8018     if not self.cleanup:
8019       _CheckNodeNotDrained(self.lu, target_node)
8020       if not self.failover:
8021         result = self.rpc.call_instance_migratable(instance.primary_node,
8022                                                    instance)
8023         if result.fail_msg and self.fallback:
8024           self.lu.LogInfo("Can't migrate, instance offline, fallback to"
8025                           " failover")
8026           self.failover = True
8027         else:
8028           result.Raise("Can't migrate, please use failover",
8029                        prereq=True, ecode=errors.ECODE_STATE)
8030
8031     assert not (self.failover and self.cleanup)
8032
8033     if not self.failover:
8034       if self.lu.op.live is not None and self.lu.op.mode is not None:
8035         raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
8036                                    " parameters are accepted",
8037                                    errors.ECODE_INVAL)
8038       if self.lu.op.live is not None:
8039         if self.lu.op.live:
8040           self.lu.op.mode = constants.HT_MIGRATION_LIVE
8041         else:
8042           self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
8043         # reset the 'live' parameter to None so that repeated
8044         # invocations of CheckPrereq do not raise an exception
8045         self.lu.op.live = None
8046       elif self.lu.op.mode is None:
8047         # read the default value from the hypervisor
8048         i_hv = cluster.FillHV(self.instance, skip_globals=False)
8049         self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
8050
8051       self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
8052     else:
8053       # Failover is never live
8054       self.live = False
8055
8056     if not (self.failover or self.cleanup):
8057       remote_info = self.rpc.call_instance_info(instance.primary_node,
8058                                                 instance.name,
8059                                                 instance.hypervisor)
8060       remote_info.Raise("Error checking instance on node %s" %
8061                         instance.primary_node)
8062       instance_running = bool(remote_info.payload)
8063       if instance_running:
8064         self.current_mem = int(remote_info.payload["memory"])
8065
8066   def _RunAllocator(self):
8067     """Run the allocator based on input opcode.
8068
8069     """
8070     # FIXME: add a self.ignore_ipolicy option
8071     ial = IAllocator(self.cfg, self.rpc,
8072                      mode=constants.IALLOCATOR_MODE_RELOC,
8073                      name=self.instance_name,
8074                      relocate_from=[self.instance.primary_node],
8075                      )
8076
8077     ial.Run(self.lu.op.iallocator)
8078
8079     if not ial.success:
8080       raise errors.OpPrereqError("Can't compute nodes using"
8081                                  " iallocator '%s': %s" %
8082                                  (self.lu.op.iallocator, ial.info),
8083                                  errors.ECODE_NORES)
8084     if len(ial.result) != ial.required_nodes:
8085       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
8086                                  " of nodes (%s), required %s" %
8087                                  (self.lu.op.iallocator, len(ial.result),
8088                                   ial.required_nodes), errors.ECODE_FAULT)
8089     self.target_node = ial.result[0]
8090     self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
8091                  self.instance_name, self.lu.op.iallocator,
8092                  utils.CommaJoin(ial.result))
8093
8094   def _WaitUntilSync(self):
8095     """Poll with custom rpc for disk sync.
8096
8097     This uses our own step-based rpc call.
8098
8099     """
8100     self.feedback_fn("* wait until resync is done")
8101     all_done = False
8102     while not all_done:
8103       all_done = True
8104       result = self.rpc.call_drbd_wait_sync(self.all_nodes,
8105                                             self.nodes_ip,
8106                                             self.instance.disks)
8107       min_percent = 100
8108       for node, nres in result.items():
8109         nres.Raise("Cannot resync disks on node %s" % node)
8110         node_done, node_percent = nres.payload
8111         all_done = all_done and node_done
8112         if node_percent is not None:
8113           min_percent = min(min_percent, node_percent)
8114       if not all_done:
8115         if min_percent < 100:
8116           self.feedback_fn("   - progress: %.1f%%" % min_percent)
8117         time.sleep(2)
8118
8119   def _EnsureSecondary(self, node):
8120     """Demote a node to secondary.
8121
8122     """
8123     self.feedback_fn("* switching node %s to secondary mode" % node)
8124
8125     for dev in self.instance.disks:
8126       self.cfg.SetDiskID(dev, node)
8127
8128     result = self.rpc.call_blockdev_close(node, self.instance.name,
8129                                           self.instance.disks)
8130     result.Raise("Cannot change disk to secondary on node %s" % node)
8131
8132   def _GoStandalone(self):
8133     """Disconnect from the network.
8134
8135     """
8136     self.feedback_fn("* changing into standalone mode")
8137     result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
8138                                                self.instance.disks)
8139     for node, nres in result.items():
8140       nres.Raise("Cannot disconnect disks node %s" % node)
8141
8142   def _GoReconnect(self, multimaster):
8143     """Reconnect to the network.
8144
8145     """
8146     if multimaster:
8147       msg = "dual-master"
8148     else:
8149       msg = "single-master"
8150     self.feedback_fn("* changing disks into %s mode" % msg)
8151     result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
8152                                            self.instance.disks,
8153                                            self.instance.name, multimaster)
8154     for node, nres in result.items():
8155       nres.Raise("Cannot change disks config on node %s" % node)
8156
8157   def _ExecCleanup(self):
8158     """Try to cleanup after a failed migration.
8159
8160     The cleanup is done by:
8161       - check that the instance is running only on one node
8162         (and update the config if needed)
8163       - change disks on its secondary node to secondary
8164       - wait until disks are fully synchronized
8165       - disconnect from the network
8166       - change disks into single-master mode
8167       - wait again until disks are fully synchronized
8168
8169     """
8170     instance = self.instance
8171     target_node = self.target_node
8172     source_node = self.source_node
8173
8174     # check running on only one node
8175     self.feedback_fn("* checking where the instance actually runs"
8176                      " (if this hangs, the hypervisor might be in"
8177                      " a bad state)")
8178     ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
8179     for node, result in ins_l.items():
8180       result.Raise("Can't contact node %s" % node)
8181
8182     runningon_source = instance.name in ins_l[source_node].payload
8183     runningon_target = instance.name in ins_l[target_node].payload
8184
8185     if runningon_source and runningon_target:
8186       raise errors.OpExecError("Instance seems to be running on two nodes,"
8187                                " or the hypervisor is confused; you will have"
8188                                " to ensure manually that it runs only on one"
8189                                " and restart this operation")
8190
8191     if not (runningon_source or runningon_target):
8192       raise errors.OpExecError("Instance does not seem to be running at all;"
8193                                " in this case it's safer to repair by"
8194                                " running 'gnt-instance stop' to ensure disk"
8195                                " shutdown, and then restarting it")
8196
8197     if runningon_target:
8198       # the migration has actually succeeded, we need to update the config
8199       self.feedback_fn("* instance running on secondary node (%s),"
8200                        " updating config" % target_node)
8201       instance.primary_node = target_node
8202       self.cfg.Update(instance, self.feedback_fn)
8203       demoted_node = source_node
8204     else:
8205       self.feedback_fn("* instance confirmed to be running on its"
8206                        " primary node (%s)" % source_node)
8207       demoted_node = target_node
8208
8209     if instance.disk_template in constants.DTS_INT_MIRROR:
8210       self._EnsureSecondary(demoted_node)
8211       try:
8212         self._WaitUntilSync()
8213       except errors.OpExecError:
8214         # we ignore here errors, since if the device is standalone, it
8215         # won't be able to sync
8216         pass
8217       self._GoStandalone()
8218       self._GoReconnect(False)
8219       self._WaitUntilSync()
8220
8221     self.feedback_fn("* done")
8222
8223   def _RevertDiskStatus(self):
8224     """Try to revert the disk status after a failed migration.
8225
8226     """
8227     target_node = self.target_node
8228     if self.instance.disk_template in constants.DTS_EXT_MIRROR:
8229       return
8230
8231     try:
8232       self._EnsureSecondary(target_node)
8233       self._GoStandalone()
8234       self._GoReconnect(False)
8235       self._WaitUntilSync()
8236     except errors.OpExecError, err:
8237       self.lu.LogWarning("Migration failed and I can't reconnect the drives,"
8238                          " please try to recover the instance manually;"
8239                          " error '%s'" % str(err))
8240
8241   def _AbortMigration(self):
8242     """Call the hypervisor code to abort a started migration.
8243
8244     """
8245     instance = self.instance
8246     target_node = self.target_node
8247     source_node = self.source_node
8248     migration_info = self.migration_info
8249
8250     abort_result = self.rpc.call_instance_finalize_migration_dst(target_node,
8251                                                                  instance,
8252                                                                  migration_info,
8253                                                                  False)
8254     abort_msg = abort_result.fail_msg
8255     if abort_msg:
8256       logging.error("Aborting migration failed on target node %s: %s",
8257                     target_node, abort_msg)
8258       # Don't raise an exception here, as we stil have to try to revert the
8259       # disk status, even if this step failed.
8260
8261     abort_result = self.rpc.call_instance_finalize_migration_src(source_node,
8262         instance, False, self.live)
8263     abort_msg = abort_result.fail_msg
8264     if abort_msg:
8265       logging.error("Aborting migration failed on source node %s: %s",
8266                     source_node, abort_msg)
8267
8268   def _ExecMigration(self):
8269     """Migrate an instance.
8270
8271     The migrate is done by:
8272       - change the disks into dual-master mode
8273       - wait until disks are fully synchronized again
8274       - migrate the instance
8275       - change disks on the new secondary node (the old primary) to secondary
8276       - wait until disks are fully synchronized
8277       - change disks into single-master mode
8278
8279     """
8280     instance = self.instance
8281     target_node = self.target_node
8282     source_node = self.source_node
8283
8284     # Check for hypervisor version mismatch and warn the user.
8285     nodeinfo = self.rpc.call_node_info([source_node, target_node],
8286                                        None, [self.instance.hypervisor])
8287     for ninfo in nodeinfo.values():
8288       ninfo.Raise("Unable to retrieve node information from node '%s'" %
8289                   ninfo.node)
8290     (_, _, (src_info, )) = nodeinfo[source_node].payload
8291     (_, _, (dst_info, )) = nodeinfo[target_node].payload
8292
8293     if ((constants.HV_NODEINFO_KEY_VERSION in src_info) and
8294         (constants.HV_NODEINFO_KEY_VERSION in dst_info)):
8295       src_version = src_info[constants.HV_NODEINFO_KEY_VERSION]
8296       dst_version = dst_info[constants.HV_NODEINFO_KEY_VERSION]
8297       if src_version != dst_version:
8298         self.feedback_fn("* warning: hypervisor version mismatch between"
8299                          " source (%s) and target (%s) node" %
8300                          (src_version, dst_version))
8301
8302     self.feedback_fn("* checking disk consistency between source and target")
8303     for (idx, dev) in enumerate(instance.disks):
8304       if not _CheckDiskConsistency(self.lu, dev, target_node, False):
8305         raise errors.OpExecError("Disk %s is degraded or not fully"
8306                                  " synchronized on target node,"
8307                                  " aborting migration" % idx)
8308
8309     if self.current_mem > self.tgt_free_mem:
8310       if not self.allow_runtime_changes:
8311         raise errors.OpExecError("Memory ballooning not allowed and not enough"
8312                                  " free memory to fit instance %s on target"
8313                                  " node %s (have %dMB, need %dMB)" %
8314                                  (instance.name, target_node,
8315                                   self.tgt_free_mem, self.current_mem))
8316       self.feedback_fn("* setting instance memory to %s" % self.tgt_free_mem)
8317       rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node,
8318                                                      instance,
8319                                                      self.tgt_free_mem)
8320       rpcres.Raise("Cannot modify instance runtime memory")
8321
8322     # First get the migration information from the remote node
8323     result = self.rpc.call_migration_info(source_node, instance)
8324     msg = result.fail_msg
8325     if msg:
8326       log_err = ("Failed fetching source migration information from %s: %s" %
8327                  (source_node, msg))
8328       logging.error(log_err)
8329       raise errors.OpExecError(log_err)
8330
8331     self.migration_info = migration_info = result.payload
8332
8333     if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
8334       # Then switch the disks to master/master mode
8335       self._EnsureSecondary(target_node)
8336       self._GoStandalone()
8337       self._GoReconnect(True)
8338       self._WaitUntilSync()
8339
8340     self.feedback_fn("* preparing %s to accept the instance" % target_node)
8341     result = self.rpc.call_accept_instance(target_node,
8342                                            instance,
8343                                            migration_info,
8344                                            self.nodes_ip[target_node])
8345
8346     msg = result.fail_msg
8347     if msg:
8348       logging.error("Instance pre-migration failed, trying to revert"
8349                     " disk status: %s", msg)
8350       self.feedback_fn("Pre-migration failed, aborting")
8351       self._AbortMigration()
8352       self._RevertDiskStatus()
8353       raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
8354                                (instance.name, msg))
8355
8356     self.feedback_fn("* migrating instance to %s" % target_node)
8357     result = self.rpc.call_instance_migrate(source_node, instance,
8358                                             self.nodes_ip[target_node],
8359                                             self.live)
8360     msg = result.fail_msg
8361     if msg:
8362       logging.error("Instance migration failed, trying to revert"
8363                     " disk status: %s", msg)
8364       self.feedback_fn("Migration failed, aborting")
8365       self._AbortMigration()
8366       self._RevertDiskStatus()
8367       raise errors.OpExecError("Could not migrate instance %s: %s" %
8368                                (instance.name, msg))
8369
8370     self.feedback_fn("* starting memory transfer")
8371     last_feedback = time.time()
8372     while True:
8373       result = self.rpc.call_instance_get_migration_status(source_node,
8374                                                            instance)
8375       msg = result.fail_msg
8376       ms = result.payload   # MigrationStatus instance
8377       if msg or (ms.status in constants.HV_MIGRATION_FAILED_STATUSES):
8378         logging.error("Instance migration failed, trying to revert"
8379                       " disk status: %s", msg)
8380         self.feedback_fn("Migration failed, aborting")
8381         self._AbortMigration()
8382         self._RevertDiskStatus()
8383         raise errors.OpExecError("Could not migrate instance %s: %s" %
8384                                  (instance.name, msg))
8385
8386       if result.payload.status != constants.HV_MIGRATION_ACTIVE:
8387         self.feedback_fn("* memory transfer complete")
8388         break
8389
8390       if (utils.TimeoutExpired(last_feedback,
8391                                self._MIGRATION_FEEDBACK_INTERVAL) and
8392           ms.transferred_ram is not None):
8393         mem_progress = 100 * float(ms.transferred_ram) / float(ms.total_ram)
8394         self.feedback_fn("* memory transfer progress: %.2f %%" % mem_progress)
8395         last_feedback = time.time()
8396
8397       time.sleep(self._MIGRATION_POLL_INTERVAL)
8398
8399     result = self.rpc.call_instance_finalize_migration_src(source_node,
8400                                                            instance,
8401                                                            True,
8402                                                            self.live)
8403     msg = result.fail_msg
8404     if msg:
8405       logging.error("Instance migration succeeded, but finalization failed"
8406                     " on the source node: %s", msg)
8407       raise errors.OpExecError("Could not finalize instance migration: %s" %
8408                                msg)
8409
8410     instance.primary_node = target_node
8411
8412     # distribute new instance config to the other nodes
8413     self.cfg.Update(instance, self.feedback_fn)
8414
8415     result = self.rpc.call_instance_finalize_migration_dst(target_node,
8416                                                            instance,
8417                                                            migration_info,
8418                                                            True)
8419     msg = result.fail_msg
8420     if msg:
8421       logging.error("Instance migration succeeded, but finalization failed"
8422                     " on the target node: %s", msg)
8423       raise errors.OpExecError("Could not finalize instance migration: %s" %
8424                                msg)
8425
8426     if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
8427       self._EnsureSecondary(source_node)
8428       self._WaitUntilSync()
8429       self._GoStandalone()
8430       self._GoReconnect(False)
8431       self._WaitUntilSync()
8432
8433     # If the instance's disk template is `rbd' and there was a successful
8434     # migration, unmap the device from the source node.
8435     if self.instance.disk_template == constants.DT_RBD:
8436       disks = _ExpandCheckDisks(instance, instance.disks)
8437       self.feedback_fn("* unmapping instance's disks from %s" % source_node)
8438       for disk in disks:
8439         result = self.rpc.call_blockdev_shutdown(source_node, disk)
8440         msg = result.fail_msg
8441         if msg:
8442           logging.error("Migration was successful, but couldn't unmap the"
8443                         " block device %s on source node %s: %s",
8444                         disk.iv_name, source_node, msg)
8445           logging.error("You need to unmap the device %s manually on %s",
8446                         disk.iv_name, source_node)
8447
8448     self.feedback_fn("* done")
8449
8450   def _ExecFailover(self):
8451     """Failover an instance.
8452
8453     The failover is done by shutting it down on its present node and
8454     starting it on the secondary.
8455
8456     """
8457     instance = self.instance
8458     primary_node = self.cfg.GetNodeInfo(instance.primary_node)
8459
8460     source_node = instance.primary_node
8461     target_node = self.target_node
8462
8463     if instance.admin_state == constants.ADMINST_UP:
8464       self.feedback_fn("* checking disk consistency between source and target")
8465       for (idx, dev) in enumerate(instance.disks):
8466         # for drbd, these are drbd over lvm
8467         if not _CheckDiskConsistency(self.lu, dev, target_node, False):
8468           if primary_node.offline:
8469             self.feedback_fn("Node %s is offline, ignoring degraded disk %s on"
8470                              " target node %s" %
8471                              (primary_node.name, idx, target_node))
8472           elif not self.ignore_consistency:
8473             raise errors.OpExecError("Disk %s is degraded on target node,"
8474                                      " aborting failover" % idx)
8475     else:
8476       self.feedback_fn("* not checking disk consistency as instance is not"
8477                        " running")
8478
8479     self.feedback_fn("* shutting down instance on source node")
8480     logging.info("Shutting down instance %s on node %s",
8481                  instance.name, source_node)
8482
8483     result = self.rpc.call_instance_shutdown(source_node, instance,
8484                                              self.shutdown_timeout)
8485     msg = result.fail_msg
8486     if msg:
8487       if self.ignore_consistency or primary_node.offline:
8488         self.lu.LogWarning("Could not shutdown instance %s on node %s,"
8489                            " proceeding anyway; please make sure node"
8490                            " %s is down; error details: %s",
8491                            instance.name, source_node, source_node, msg)
8492       else:
8493         raise errors.OpExecError("Could not shutdown instance %s on"
8494                                  " node %s: %s" %
8495                                  (instance.name, source_node, msg))
8496
8497     self.feedback_fn("* deactivating the instance's disks on source node")
8498     if not _ShutdownInstanceDisks(self.lu, instance, ignore_primary=True):
8499       raise errors.OpExecError("Can't shut down the instance's disks")
8500
8501     instance.primary_node = target_node
8502     # distribute new instance config to the other nodes
8503     self.cfg.Update(instance, self.feedback_fn)
8504
8505     # Only start the instance if it's marked as up
8506     if instance.admin_state == constants.ADMINST_UP:
8507       self.feedback_fn("* activating the instance's disks on target node %s" %
8508                        target_node)
8509       logging.info("Starting instance %s on node %s",
8510                    instance.name, target_node)
8511
8512       disks_ok, _ = _AssembleInstanceDisks(self.lu, instance,
8513                                            ignore_secondaries=True)
8514       if not disks_ok:
8515         _ShutdownInstanceDisks(self.lu, instance)
8516         raise errors.OpExecError("Can't activate the instance's disks")
8517
8518       self.feedback_fn("* starting the instance on the target node %s" %
8519                        target_node)
8520       result = self.rpc.call_instance_start(target_node, (instance, None, None),
8521                                             False)
8522       msg = result.fail_msg
8523       if msg:
8524         _ShutdownInstanceDisks(self.lu, instance)
8525         raise errors.OpExecError("Could not start instance %s on node %s: %s" %
8526                                  (instance.name, target_node, msg))
8527
8528   def Exec(self, feedback_fn):
8529     """Perform the migration.
8530
8531     """
8532     self.feedback_fn = feedback_fn
8533     self.source_node = self.instance.primary_node
8534
8535     # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
8536     if self.instance.disk_template in constants.DTS_INT_MIRROR:
8537       self.target_node = self.instance.secondary_nodes[0]
8538       # Otherwise self.target_node has been populated either
8539       # directly, or through an iallocator.
8540
8541     self.all_nodes = [self.source_node, self.target_node]
8542     self.nodes_ip = dict((name, node.secondary_ip) for (name, node)
8543                          in self.cfg.GetMultiNodeInfo(self.all_nodes))
8544
8545     if self.failover:
8546       feedback_fn("Failover instance %s" % self.instance.name)
8547       self._ExecFailover()
8548     else:
8549       feedback_fn("Migrating instance %s" % self.instance.name)
8550
8551       if self.cleanup:
8552         return self._ExecCleanup()
8553       else:
8554         return self._ExecMigration()
8555
8556
8557 def _CreateBlockDev(lu, node, instance, device, force_create,
8558                     info, force_open):
8559   """Create a tree of block devices on a given node.
8560
8561   If this device type has to be created on secondaries, create it and
8562   all its children.
8563
8564   If not, just recurse to children keeping the same 'force' value.
8565
8566   @param lu: the lu on whose behalf we execute
8567   @param node: the node on which to create the device
8568   @type instance: L{objects.Instance}
8569   @param instance: the instance which owns the device
8570   @type device: L{objects.Disk}
8571   @param device: the device to create
8572   @type force_create: boolean
8573   @param force_create: whether to force creation of this device; this
8574       will be change to True whenever we find a device which has
8575       CreateOnSecondary() attribute
8576   @param info: the extra 'metadata' we should attach to the device
8577       (this will be represented as a LVM tag)
8578   @type force_open: boolean
8579   @param force_open: this parameter will be passes to the
8580       L{backend.BlockdevCreate} function where it specifies
8581       whether we run on primary or not, and it affects both
8582       the child assembly and the device own Open() execution
8583
8584   """
8585   if device.CreateOnSecondary():
8586     force_create = True
8587
8588   if device.children:
8589     for child in device.children:
8590       _CreateBlockDev(lu, node, instance, child, force_create,
8591                       info, force_open)
8592
8593   if not force_create:
8594     return
8595
8596   _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
8597
8598
8599 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
8600   """Create a single block device on a given node.
8601
8602   This will not recurse over children of the device, so they must be
8603   created in advance.
8604
8605   @param lu: the lu on whose behalf we execute
8606   @param node: the node on which to create the device
8607   @type instance: L{objects.Instance}
8608   @param instance: the instance which owns the device
8609   @type device: L{objects.Disk}
8610   @param device: the device to create
8611   @param info: the extra 'metadata' we should attach to the device
8612       (this will be represented as a LVM tag)
8613   @type force_open: boolean
8614   @param force_open: this parameter will be passes to the
8615       L{backend.BlockdevCreate} function where it specifies
8616       whether we run on primary or not, and it affects both
8617       the child assembly and the device own Open() execution
8618
8619   """
8620   lu.cfg.SetDiskID(device, node)
8621   result = lu.rpc.call_blockdev_create(node, device, device.size,
8622                                        instance.name, force_open, info)
8623   result.Raise("Can't create block device %s on"
8624                " node %s for instance %s" % (device, node, instance.name))
8625   if device.physical_id is None:
8626     device.physical_id = result.payload
8627
8628
8629 def _GenerateUniqueNames(lu, exts):
8630   """Generate a suitable LV name.
8631
8632   This will generate a logical volume name for the given instance.
8633
8634   """
8635   results = []
8636   for val in exts:
8637     new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
8638     results.append("%s%s" % (new_id, val))
8639   return results
8640
8641
8642 def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
8643                          iv_name, p_minor, s_minor, drbd_params, data_params,
8644                          meta_params):
8645   """Generate a drbd8 device complete with its children.
8646
8647   """
8648   assert len(vgnames) == len(names) == 2
8649   port = lu.cfg.AllocatePort()
8650   shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
8651
8652   dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
8653                           logical_id=(vgnames[0], names[0]),
8654                           params=data_params)
8655   dev_meta = objects.Disk(dev_type=constants.LD_LV, size=DRBD_META_SIZE,
8656                           logical_id=(vgnames[1], names[1]),
8657                           params=meta_params)
8658   drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
8659                           logical_id=(primary, secondary, port,
8660                                       p_minor, s_minor,
8661                                       shared_secret),
8662                           children=[dev_data, dev_meta],
8663                           iv_name=iv_name, params=drbd_params)
8664   return drbd_dev
8665
8666
8667 _DISK_TEMPLATE_NAME_PREFIX = {
8668   constants.DT_PLAIN: "",
8669   constants.DT_RBD: ".rbd",
8670   }
8671
8672
8673 _DISK_TEMPLATE_DEVICE_TYPE = {
8674   constants.DT_PLAIN: constants.LD_LV,
8675   constants.DT_FILE: constants.LD_FILE,
8676   constants.DT_SHARED_FILE: constants.LD_FILE,
8677   constants.DT_BLOCK: constants.LD_BLOCKDEV,
8678   constants.DT_RBD: constants.LD_RBD,
8679   }
8680
8681
8682 def _GenerateDiskTemplate(lu, template_name, instance_name, primary_node,
8683     secondary_nodes, disk_info, file_storage_dir, file_driver, base_index,
8684     feedback_fn, disk_params,
8685     _req_file_storage=opcodes.RequireFileStorage,
8686     _req_shr_file_storage=opcodes.RequireSharedFileStorage):
8687   """Generate the entire disk layout for a given template type.
8688
8689   """
8690   #TODO: compute space requirements
8691
8692   vgname = lu.cfg.GetVGName()
8693   disk_count = len(disk_info)
8694   disks = []
8695   ld_params = _ComputeLDParams(template_name, disk_params)
8696
8697   if template_name == constants.DT_DISKLESS:
8698     pass
8699   elif template_name == constants.DT_DRBD8:
8700     drbd_params, data_params, meta_params = ld_params
8701     if len(secondary_nodes) != 1:
8702       raise errors.ProgrammerError("Wrong template configuration")
8703     remote_node = secondary_nodes[0]
8704     minors = lu.cfg.AllocateDRBDMinor(
8705       [primary_node, remote_node] * len(disk_info), instance_name)
8706
8707     names = []
8708     for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
8709                                                for i in range(disk_count)]):
8710       names.append(lv_prefix + "_data")
8711       names.append(lv_prefix + "_meta")
8712     for idx, disk in enumerate(disk_info):
8713       disk_index = idx + base_index
8714       drbd_default_metavg = drbd_params[constants.LDP_DEFAULT_METAVG]
8715       data_vg = disk.get(constants.IDISK_VG, vgname)
8716       meta_vg = disk.get(constants.IDISK_METAVG, drbd_default_metavg)
8717       disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
8718                                       disk[constants.IDISK_SIZE],
8719                                       [data_vg, meta_vg],
8720                                       names[idx * 2:idx * 2 + 2],
8721                                       "disk/%d" % disk_index,
8722                                       minors[idx * 2], minors[idx * 2 + 1],
8723                                       drbd_params, data_params, meta_params)
8724       disk_dev.mode = disk[constants.IDISK_MODE]
8725       disks.append(disk_dev)
8726   else:
8727     if secondary_nodes:
8728       raise errors.ProgrammerError("Wrong template configuration")
8729
8730     if template_name == constants.DT_FILE:
8731       _req_file_storage()
8732     elif template_name == constants.DT_SHARED_FILE:
8733       _req_shr_file_storage()
8734
8735     name_prefix = _DISK_TEMPLATE_NAME_PREFIX.get(template_name, None)
8736     if name_prefix is None:
8737       names = None
8738     else:
8739       names = _GenerateUniqueNames(lu, ["%s.disk%s" %
8740                                         (name_prefix, base_index + i)
8741                                         for i in range(disk_count)])
8742
8743     dev_type = _DISK_TEMPLATE_DEVICE_TYPE[template_name]
8744
8745     if template_name == constants.DT_PLAIN:
8746       def logical_id_fn(idx, _, disk):
8747         vg = disk.get(constants.IDISK_VG, vgname)
8748         return (vg, names[idx])
8749     elif template_name in (constants.DT_FILE, constants.DT_SHARED_FILE):
8750       logical_id_fn = \
8751         lambda _, disk_index, disk: (file_driver,
8752                                      "%s/disk%d" % (file_storage_dir,
8753                                                     disk_index))
8754     elif template_name == constants.DT_BLOCK:
8755       logical_id_fn = \
8756         lambda idx, disk_index, disk: (constants.BLOCKDEV_DRIVER_MANUAL,
8757                                        disk[constants.IDISK_ADOPT])
8758     elif template_name == constants.DT_RBD:
8759       logical_id_fn = lambda idx, _, disk: ("rbd", names[idx])
8760     else:
8761       raise errors.ProgrammerError("Unknown disk template '%s'" % template_name)
8762
8763     for idx, disk in enumerate(disk_info):
8764       disk_index = idx + base_index
8765       size = disk[constants.IDISK_SIZE]
8766       feedback_fn("* disk %s, size %s" %
8767                   (disk_index, utils.FormatUnit(size, "h")))
8768       disks.append(objects.Disk(dev_type=dev_type, size=size,
8769                                 logical_id=logical_id_fn(idx, disk_index, disk),
8770                                 iv_name="disk/%d" % disk_index,
8771                                 mode=disk[constants.IDISK_MODE],
8772                                 params=ld_params[0]))
8773
8774   return disks
8775
8776
8777 def _GetInstanceInfoText(instance):
8778   """Compute that text that should be added to the disk's metadata.
8779
8780   """
8781   return "originstname+%s" % instance.name
8782
8783
8784 def _CalcEta(time_taken, written, total_size):
8785   """Calculates the ETA based on size written and total size.
8786
8787   @param time_taken: The time taken so far
8788   @param written: amount written so far
8789   @param total_size: The total size of data to be written
8790   @return: The remaining time in seconds
8791
8792   """
8793   avg_time = time_taken / float(written)
8794   return (total_size - written) * avg_time
8795
8796
8797 def _WipeDisks(lu, instance):
8798   """Wipes instance disks.
8799
8800   @type lu: L{LogicalUnit}
8801   @param lu: the logical unit on whose behalf we execute
8802   @type instance: L{objects.Instance}
8803   @param instance: the instance whose disks we should create
8804   @return: the success of the wipe
8805
8806   """
8807   node = instance.primary_node
8808
8809   for device in instance.disks:
8810     lu.cfg.SetDiskID(device, node)
8811
8812   logging.info("Pause sync of instance %s disks", instance.name)
8813   result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, True)
8814
8815   for idx, success in enumerate(result.payload):
8816     if not success:
8817       logging.warn("pause-sync of instance %s for disks %d failed",
8818                    instance.name, idx)
8819
8820   try:
8821     for idx, device in enumerate(instance.disks):
8822       # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
8823       # MAX_WIPE_CHUNK at max
8824       wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 *
8825                             constants.MIN_WIPE_CHUNK_PERCENT)
8826       # we _must_ make this an int, otherwise rounding errors will
8827       # occur
8828       wipe_chunk_size = int(wipe_chunk_size)
8829
8830       lu.LogInfo("* Wiping disk %d", idx)
8831       logging.info("Wiping disk %d for instance %s, node %s using"
8832                    " chunk size %s", idx, instance.name, node, wipe_chunk_size)
8833
8834       offset = 0
8835       size = device.size
8836       last_output = 0
8837       start_time = time.time()
8838
8839       while offset < size:
8840         wipe_size = min(wipe_chunk_size, size - offset)
8841         logging.debug("Wiping disk %d, offset %s, chunk %s",
8842                       idx, offset, wipe_size)
8843         result = lu.rpc.call_blockdev_wipe(node, device, offset, wipe_size)
8844         result.Raise("Could not wipe disk %d at offset %d for size %d" %
8845                      (idx, offset, wipe_size))
8846         now = time.time()
8847         offset += wipe_size
8848         if now - last_output >= 60:
8849           eta = _CalcEta(now - start_time, offset, size)
8850           lu.LogInfo(" - done: %.1f%% ETA: %s" %
8851                      (offset / float(size) * 100, utils.FormatSeconds(eta)))
8852           last_output = now
8853   finally:
8854     logging.info("Resume sync of instance %s disks", instance.name)
8855
8856     result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, False)
8857
8858     for idx, success in enumerate(result.payload):
8859       if not success:
8860         lu.LogWarning("Resume sync of disk %d failed, please have a"
8861                       " look at the status and troubleshoot the issue", idx)
8862         logging.warn("resume-sync of instance %s for disks %d failed",
8863                      instance.name, idx)
8864
8865
8866 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
8867   """Create all disks for an instance.
8868
8869   This abstracts away some work from AddInstance.
8870
8871   @type lu: L{LogicalUnit}
8872   @param lu: the logical unit on whose behalf we execute
8873   @type instance: L{objects.Instance}
8874   @param instance: the instance whose disks we should create
8875   @type to_skip: list
8876   @param to_skip: list of indices to skip
8877   @type target_node: string
8878   @param target_node: if passed, overrides the target node for creation
8879   @rtype: boolean
8880   @return: the success of the creation
8881
8882   """
8883   info = _GetInstanceInfoText(instance)
8884   if target_node is None:
8885     pnode = instance.primary_node
8886     all_nodes = instance.all_nodes
8887   else:
8888     pnode = target_node
8889     all_nodes = [pnode]
8890
8891   if instance.disk_template in constants.DTS_FILEBASED:
8892     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
8893     result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
8894
8895     result.Raise("Failed to create directory '%s' on"
8896                  " node %s" % (file_storage_dir, pnode))
8897
8898   # Note: this needs to be kept in sync with adding of disks in
8899   # LUInstanceSetParams
8900   for idx, device in enumerate(instance.disks):
8901     if to_skip and idx in to_skip:
8902       continue
8903     logging.info("Creating disk %s for instance '%s'", idx, instance.name)
8904     #HARDCODE
8905     for node in all_nodes:
8906       f_create = node == pnode
8907       _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
8908
8909
8910 def _RemoveDisks(lu, instance, target_node=None, ignore_failures=False):
8911   """Remove all disks for an instance.
8912
8913   This abstracts away some work from `AddInstance()` and
8914   `RemoveInstance()`. Note that in case some of the devices couldn't
8915   be removed, the removal will continue with the other ones (compare
8916   with `_CreateDisks()`).
8917
8918   @type lu: L{LogicalUnit}
8919   @param lu: the logical unit on whose behalf we execute
8920   @type instance: L{objects.Instance}
8921   @param instance: the instance whose disks we should remove
8922   @type target_node: string
8923   @param target_node: used to override the node on which to remove the disks
8924   @rtype: boolean
8925   @return: the success of the removal
8926
8927   """
8928   logging.info("Removing block devices for instance %s", instance.name)
8929
8930   all_result = True
8931   ports_to_release = set()
8932   for (idx, device) in enumerate(instance.disks):
8933     if target_node:
8934       edata = [(target_node, device)]
8935     else:
8936       edata = device.ComputeNodeTree(instance.primary_node)
8937     for node, disk in edata:
8938       lu.cfg.SetDiskID(disk, node)
8939       msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
8940       if msg:
8941         lu.LogWarning("Could not remove disk %s on node %s,"
8942                       " continuing anyway: %s", idx, node, msg)
8943         all_result = False
8944
8945     # if this is a DRBD disk, return its port to the pool
8946     if device.dev_type in constants.LDS_DRBD:
8947       ports_to_release.add(device.logical_id[2])
8948
8949   if all_result or ignore_failures:
8950     for port in ports_to_release:
8951       lu.cfg.AddTcpUdpPort(port)
8952
8953   if instance.disk_template == constants.DT_FILE:
8954     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
8955     if target_node:
8956       tgt = target_node
8957     else:
8958       tgt = instance.primary_node
8959     result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
8960     if result.fail_msg:
8961       lu.LogWarning("Could not remove directory '%s' on node %s: %s",
8962                     file_storage_dir, instance.primary_node, result.fail_msg)
8963       all_result = False
8964
8965   return all_result
8966
8967
8968 def _ComputeDiskSizePerVG(disk_template, disks):
8969   """Compute disk size requirements in the volume group
8970
8971   """
8972   def _compute(disks, payload):
8973     """Universal algorithm.
8974
8975     """
8976     vgs = {}
8977     for disk in disks:
8978       vgs[disk[constants.IDISK_VG]] = \
8979         vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
8980
8981     return vgs
8982
8983   # Required free disk space as a function of disk and swap space
8984   req_size_dict = {
8985     constants.DT_DISKLESS: {},
8986     constants.DT_PLAIN: _compute(disks, 0),
8987     # 128 MB are added for drbd metadata for each disk
8988     constants.DT_DRBD8: _compute(disks, DRBD_META_SIZE),
8989     constants.DT_FILE: {},
8990     constants.DT_SHARED_FILE: {},
8991   }
8992
8993   if disk_template not in req_size_dict:
8994     raise errors.ProgrammerError("Disk template '%s' size requirement"
8995                                  " is unknown" % disk_template)
8996
8997   return req_size_dict[disk_template]
8998
8999
9000 def _ComputeDiskSize(disk_template, disks):
9001   """Compute disk size requirements in the volume group
9002
9003   """
9004   # Required free disk space as a function of disk and swap space
9005   req_size_dict = {
9006     constants.DT_DISKLESS: None,
9007     constants.DT_PLAIN: sum(d[constants.IDISK_SIZE] for d in disks),
9008     # 128 MB are added for drbd metadata for each disk
9009     constants.DT_DRBD8:
9010       sum(d[constants.IDISK_SIZE] + DRBD_META_SIZE for d in disks),
9011     constants.DT_FILE: None,
9012     constants.DT_SHARED_FILE: 0,
9013     constants.DT_BLOCK: 0,
9014     constants.DT_RBD: 0,
9015   }
9016
9017   if disk_template not in req_size_dict:
9018     raise errors.ProgrammerError("Disk template '%s' size requirement"
9019                                  " is unknown" % disk_template)
9020
9021   return req_size_dict[disk_template]
9022
9023
9024 def _FilterVmNodes(lu, nodenames):
9025   """Filters out non-vm_capable nodes from a list.
9026
9027   @type lu: L{LogicalUnit}
9028   @param lu: the logical unit for which we check
9029   @type nodenames: list
9030   @param nodenames: the list of nodes on which we should check
9031   @rtype: list
9032   @return: the list of vm-capable nodes
9033
9034   """
9035   vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
9036   return [name for name in nodenames if name not in vm_nodes]
9037
9038
9039 def _CheckHVParams(lu, nodenames, hvname, hvparams):
9040   """Hypervisor parameter validation.
9041
9042   This function abstract the hypervisor parameter validation to be
9043   used in both instance create and instance modify.
9044
9045   @type lu: L{LogicalUnit}
9046   @param lu: the logical unit for which we check
9047   @type nodenames: list
9048   @param nodenames: the list of nodes on which we should check
9049   @type hvname: string
9050   @param hvname: the name of the hypervisor we should use
9051   @type hvparams: dict
9052   @param hvparams: the parameters which we need to check
9053   @raise errors.OpPrereqError: if the parameters are not valid
9054
9055   """
9056   nodenames = _FilterVmNodes(lu, nodenames)
9057
9058   cluster = lu.cfg.GetClusterInfo()
9059   hvfull = objects.FillDict(cluster.hvparams.get(hvname, {}), hvparams)
9060
9061   hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames, hvname, hvfull)
9062   for node in nodenames:
9063     info = hvinfo[node]
9064     if info.offline:
9065       continue
9066     info.Raise("Hypervisor parameter validation failed on node %s" % node)
9067
9068
9069 def _CheckOSParams(lu, required, nodenames, osname, osparams):
9070   """OS parameters validation.
9071
9072   @type lu: L{LogicalUnit}
9073   @param lu: the logical unit for which we check
9074   @type required: boolean
9075   @param required: whether the validation should fail if the OS is not
9076       found
9077   @type nodenames: list
9078   @param nodenames: the list of nodes on which we should check
9079   @type osname: string
9080   @param osname: the name of the hypervisor we should use
9081   @type osparams: dict
9082   @param osparams: the parameters which we need to check
9083   @raise errors.OpPrereqError: if the parameters are not valid
9084
9085   """
9086   nodenames = _FilterVmNodes(lu, nodenames)
9087   result = lu.rpc.call_os_validate(nodenames, required, osname,
9088                                    [constants.OS_VALIDATE_PARAMETERS],
9089                                    osparams)
9090   for node, nres in result.items():
9091     # we don't check for offline cases since this should be run only
9092     # against the master node and/or an instance's nodes
9093     nres.Raise("OS Parameters validation failed on node %s" % node)
9094     if not nres.payload:
9095       lu.LogInfo("OS %s not found on node %s, validation skipped",
9096                  osname, node)
9097
9098
9099 class LUInstanceCreate(LogicalUnit):
9100   """Create an instance.
9101
9102   """
9103   HPATH = "instance-add"
9104   HTYPE = constants.HTYPE_INSTANCE
9105   REQ_BGL = False
9106
9107   def CheckArguments(self):
9108     """Check arguments.
9109
9110     """
9111     # do not require name_check to ease forward/backward compatibility
9112     # for tools
9113     if self.op.no_install and self.op.start:
9114       self.LogInfo("No-installation mode selected, disabling startup")
9115       self.op.start = False
9116     # validate/normalize the instance name
9117     self.op.instance_name = \
9118       netutils.Hostname.GetNormalizedName(self.op.instance_name)
9119
9120     if self.op.ip_check and not self.op.name_check:
9121       # TODO: make the ip check more flexible and not depend on the name check
9122       raise errors.OpPrereqError("Cannot do IP address check without a name"
9123                                  " check", errors.ECODE_INVAL)
9124
9125     # check nics' parameter names
9126     for nic in self.op.nics:
9127       utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
9128
9129     # check disks. parameter names and consistent adopt/no-adopt strategy
9130     has_adopt = has_no_adopt = False
9131     for disk in self.op.disks:
9132       utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
9133       if constants.IDISK_ADOPT in disk:
9134         has_adopt = True
9135       else:
9136         has_no_adopt = True
9137     if has_adopt and has_no_adopt:
9138       raise errors.OpPrereqError("Either all disks are adopted or none is",
9139                                  errors.ECODE_INVAL)
9140     if has_adopt:
9141       if self.op.disk_template not in constants.DTS_MAY_ADOPT:
9142         raise errors.OpPrereqError("Disk adoption is not supported for the"
9143                                    " '%s' disk template" %
9144                                    self.op.disk_template,
9145                                    errors.ECODE_INVAL)
9146       if self.op.iallocator is not None:
9147         raise errors.OpPrereqError("Disk adoption not allowed with an"
9148                                    " iallocator script", errors.ECODE_INVAL)
9149       if self.op.mode == constants.INSTANCE_IMPORT:
9150         raise errors.OpPrereqError("Disk adoption not allowed for"
9151                                    " instance import", errors.ECODE_INVAL)
9152     else:
9153       if self.op.disk_template in constants.DTS_MUST_ADOPT:
9154         raise errors.OpPrereqError("Disk template %s requires disk adoption,"
9155                                    " but no 'adopt' parameter given" %
9156                                    self.op.disk_template,
9157                                    errors.ECODE_INVAL)
9158
9159     self.adopt_disks = has_adopt
9160
9161     # instance name verification
9162     if self.op.name_check:
9163       self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
9164       self.op.instance_name = self.hostname1.name
9165       # used in CheckPrereq for ip ping check
9166       self.check_ip = self.hostname1.ip
9167     else:
9168       self.check_ip = None
9169
9170     # file storage checks
9171     if (self.op.file_driver and
9172         not self.op.file_driver in constants.FILE_DRIVER):
9173       raise errors.OpPrereqError("Invalid file driver name '%s'" %
9174                                  self.op.file_driver, errors.ECODE_INVAL)
9175
9176     if self.op.disk_template == constants.DT_FILE:
9177       opcodes.RequireFileStorage()
9178     elif self.op.disk_template == constants.DT_SHARED_FILE:
9179       opcodes.RequireSharedFileStorage()
9180
9181     ### Node/iallocator related checks
9182     _CheckIAllocatorOrNode(self, "iallocator", "pnode")
9183
9184     if self.op.pnode is not None:
9185       if self.op.disk_template in constants.DTS_INT_MIRROR:
9186         if self.op.snode is None:
9187           raise errors.OpPrereqError("The networked disk templates need"
9188                                      " a mirror node", errors.ECODE_INVAL)
9189       elif self.op.snode:
9190         self.LogWarning("Secondary node will be ignored on non-mirrored disk"
9191                         " template")
9192         self.op.snode = None
9193
9194     self._cds = _GetClusterDomainSecret()
9195
9196     if self.op.mode == constants.INSTANCE_IMPORT:
9197       # On import force_variant must be True, because if we forced it at
9198       # initial install, our only chance when importing it back is that it
9199       # works again!
9200       self.op.force_variant = True
9201
9202       if self.op.no_install:
9203         self.LogInfo("No-installation mode has no effect during import")
9204
9205     elif self.op.mode == constants.INSTANCE_CREATE:
9206       if self.op.os_type is None:
9207         raise errors.OpPrereqError("No guest OS specified",
9208                                    errors.ECODE_INVAL)
9209       if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
9210         raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
9211                                    " installation" % self.op.os_type,
9212                                    errors.ECODE_STATE)
9213       if self.op.disk_template is None:
9214         raise errors.OpPrereqError("No disk template specified",
9215                                    errors.ECODE_INVAL)
9216
9217     elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
9218       # Check handshake to ensure both clusters have the same domain secret
9219       src_handshake = self.op.source_handshake
9220       if not src_handshake:
9221         raise errors.OpPrereqError("Missing source handshake",
9222                                    errors.ECODE_INVAL)
9223
9224       errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
9225                                                            src_handshake)
9226       if errmsg:
9227         raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
9228                                    errors.ECODE_INVAL)
9229
9230       # Load and check source CA
9231       self.source_x509_ca_pem = self.op.source_x509_ca
9232       if not self.source_x509_ca_pem:
9233         raise errors.OpPrereqError("Missing source X509 CA",
9234                                    errors.ECODE_INVAL)
9235
9236       try:
9237         (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
9238                                                     self._cds)
9239       except OpenSSL.crypto.Error, err:
9240         raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
9241                                    (err, ), errors.ECODE_INVAL)
9242
9243       (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
9244       if errcode is not None:
9245         raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
9246                                    errors.ECODE_INVAL)
9247
9248       self.source_x509_ca = cert
9249
9250       src_instance_name = self.op.source_instance_name
9251       if not src_instance_name:
9252         raise errors.OpPrereqError("Missing source instance name",
9253                                    errors.ECODE_INVAL)
9254
9255       self.source_instance_name = \
9256           netutils.GetHostname(name=src_instance_name).name
9257
9258     else:
9259       raise errors.OpPrereqError("Invalid instance creation mode %r" %
9260                                  self.op.mode, errors.ECODE_INVAL)
9261
9262   def ExpandNames(self):
9263     """ExpandNames for CreateInstance.
9264
9265     Figure out the right locks for instance creation.
9266
9267     """
9268     self.needed_locks = {}
9269
9270     instance_name = self.op.instance_name
9271     # this is just a preventive check, but someone might still add this
9272     # instance in the meantime, and creation will fail at lock-add time
9273     if instance_name in self.cfg.GetInstanceList():
9274       raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
9275                                  instance_name, errors.ECODE_EXISTS)
9276
9277     self.add_locks[locking.LEVEL_INSTANCE] = instance_name
9278
9279     if self.op.iallocator:
9280       # TODO: Find a solution to not lock all nodes in the cluster, e.g. by
9281       # specifying a group on instance creation and then selecting nodes from
9282       # that group
9283       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9284       self.needed_locks[locking.LEVEL_NODE_RES] = locking.ALL_SET
9285     else:
9286       self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
9287       nodelist = [self.op.pnode]
9288       if self.op.snode is not None:
9289         self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
9290         nodelist.append(self.op.snode)
9291       self.needed_locks[locking.LEVEL_NODE] = nodelist
9292       # Lock resources of instance's primary and secondary nodes (copy to
9293       # prevent accidential modification)
9294       self.needed_locks[locking.LEVEL_NODE_RES] = list(nodelist)
9295
9296     # in case of import lock the source node too
9297     if self.op.mode == constants.INSTANCE_IMPORT:
9298       src_node = self.op.src_node
9299       src_path = self.op.src_path
9300
9301       if src_path is None:
9302         self.op.src_path = src_path = self.op.instance_name
9303
9304       if src_node is None:
9305         self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9306         self.op.src_node = None
9307         if os.path.isabs(src_path):
9308           raise errors.OpPrereqError("Importing an instance from a path"
9309                                      " requires a source node option",
9310                                      errors.ECODE_INVAL)
9311       else:
9312         self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
9313         if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
9314           self.needed_locks[locking.LEVEL_NODE].append(src_node)
9315         if not os.path.isabs(src_path):
9316           self.op.src_path = src_path = \
9317             utils.PathJoin(constants.EXPORT_DIR, src_path)
9318
9319   def _RunAllocator(self):
9320     """Run the allocator based on input opcode.
9321
9322     """
9323     nics = [n.ToDict() for n in self.nics]
9324     ial = IAllocator(self.cfg, self.rpc,
9325                      mode=constants.IALLOCATOR_MODE_ALLOC,
9326                      name=self.op.instance_name,
9327                      disk_template=self.op.disk_template,
9328                      tags=self.op.tags,
9329                      os=self.op.os_type,
9330                      vcpus=self.be_full[constants.BE_VCPUS],
9331                      memory=self.be_full[constants.BE_MAXMEM],
9332                      spindle_use=self.be_full[constants.BE_SPINDLE_USE],
9333                      disks=self.disks,
9334                      nics=nics,
9335                      hypervisor=self.op.hypervisor,
9336                      )
9337
9338     ial.Run(self.op.iallocator)
9339
9340     if not ial.success:
9341       raise errors.OpPrereqError("Can't compute nodes using"
9342                                  " iallocator '%s': %s" %
9343                                  (self.op.iallocator, ial.info),
9344                                  errors.ECODE_NORES)
9345     if len(ial.result) != ial.required_nodes:
9346       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
9347                                  " of nodes (%s), required %s" %
9348                                  (self.op.iallocator, len(ial.result),
9349                                   ial.required_nodes), errors.ECODE_FAULT)
9350     self.op.pnode = ial.result[0]
9351     self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
9352                  self.op.instance_name, self.op.iallocator,
9353                  utils.CommaJoin(ial.result))
9354     if ial.required_nodes == 2:
9355       self.op.snode = ial.result[1]
9356
9357   def BuildHooksEnv(self):
9358     """Build hooks env.
9359
9360     This runs on master, primary and secondary nodes of the instance.
9361
9362     """
9363     env = {
9364       "ADD_MODE": self.op.mode,
9365       }
9366     if self.op.mode == constants.INSTANCE_IMPORT:
9367       env["SRC_NODE"] = self.op.src_node
9368       env["SRC_PATH"] = self.op.src_path
9369       env["SRC_IMAGES"] = self.src_images
9370
9371     env.update(_BuildInstanceHookEnv(
9372       name=self.op.instance_name,
9373       primary_node=self.op.pnode,
9374       secondary_nodes=self.secondaries,
9375       status=self.op.start,
9376       os_type=self.op.os_type,
9377       minmem=self.be_full[constants.BE_MINMEM],
9378       maxmem=self.be_full[constants.BE_MAXMEM],
9379       vcpus=self.be_full[constants.BE_VCPUS],
9380       nics=_NICListToTuple(self, self.nics),
9381       disk_template=self.op.disk_template,
9382       disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE])
9383              for d in self.disks],
9384       bep=self.be_full,
9385       hvp=self.hv_full,
9386       hypervisor_name=self.op.hypervisor,
9387       tags=self.op.tags,
9388     ))
9389
9390     return env
9391
9392   def BuildHooksNodes(self):
9393     """Build hooks nodes.
9394
9395     """
9396     nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
9397     return nl, nl
9398
9399   def _ReadExportInfo(self):
9400     """Reads the export information from disk.
9401
9402     It will override the opcode source node and path with the actual
9403     information, if these two were not specified before.
9404
9405     @return: the export information
9406
9407     """
9408     assert self.op.mode == constants.INSTANCE_IMPORT
9409
9410     src_node = self.op.src_node
9411     src_path = self.op.src_path
9412
9413     if src_node is None:
9414       locked_nodes = self.owned_locks(locking.LEVEL_NODE)
9415       exp_list = self.rpc.call_export_list(locked_nodes)
9416       found = False
9417       for node in exp_list:
9418         if exp_list[node].fail_msg:
9419           continue
9420         if src_path in exp_list[node].payload:
9421           found = True
9422           self.op.src_node = src_node = node
9423           self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
9424                                                        src_path)
9425           break
9426       if not found:
9427         raise errors.OpPrereqError("No export found for relative path %s" %
9428                                     src_path, errors.ECODE_INVAL)
9429
9430     _CheckNodeOnline(self, src_node)
9431     result = self.rpc.call_export_info(src_node, src_path)
9432     result.Raise("No export or invalid export found in dir %s" % src_path)
9433
9434     export_info = objects.SerializableConfigParser.Loads(str(result.payload))
9435     if not export_info.has_section(constants.INISECT_EXP):
9436       raise errors.ProgrammerError("Corrupted export config",
9437                                    errors.ECODE_ENVIRON)
9438
9439     ei_version = export_info.get(constants.INISECT_EXP, "version")
9440     if (int(ei_version) != constants.EXPORT_VERSION):
9441       raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
9442                                  (ei_version, constants.EXPORT_VERSION),
9443                                  errors.ECODE_ENVIRON)
9444     return export_info
9445
9446   def _ReadExportParams(self, einfo):
9447     """Use export parameters as defaults.
9448
9449     In case the opcode doesn't specify (as in override) some instance
9450     parameters, then try to use them from the export information, if
9451     that declares them.
9452
9453     """
9454     self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
9455
9456     if self.op.disk_template is None:
9457       if einfo.has_option(constants.INISECT_INS, "disk_template"):
9458         self.op.disk_template = einfo.get(constants.INISECT_INS,
9459                                           "disk_template")
9460         if self.op.disk_template not in constants.DISK_TEMPLATES:
9461           raise errors.OpPrereqError("Disk template specified in configuration"
9462                                      " file is not one of the allowed values:"
9463                                      " %s" % " ".join(constants.DISK_TEMPLATES))
9464       else:
9465         raise errors.OpPrereqError("No disk template specified and the export"
9466                                    " is missing the disk_template information",
9467                                    errors.ECODE_INVAL)
9468
9469     if not self.op.disks:
9470       disks = []
9471       # TODO: import the disk iv_name too
9472       for idx in range(constants.MAX_DISKS):
9473         if einfo.has_option(constants.INISECT_INS, "disk%d_size" % idx):
9474           disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
9475           disks.append({constants.IDISK_SIZE: disk_sz})
9476       self.op.disks = disks
9477       if not disks and self.op.disk_template != constants.DT_DISKLESS:
9478         raise errors.OpPrereqError("No disk info specified and the export"
9479                                    " is missing the disk information",
9480                                    errors.ECODE_INVAL)
9481
9482     if not self.op.nics:
9483       nics = []
9484       for idx in range(constants.MAX_NICS):
9485         if einfo.has_option(constants.INISECT_INS, "nic%d_mac" % idx):
9486           ndict = {}
9487           for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
9488             v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
9489             ndict[name] = v
9490           nics.append(ndict)
9491         else:
9492           break
9493       self.op.nics = nics
9494
9495     if not self.op.tags and einfo.has_option(constants.INISECT_INS, "tags"):
9496       self.op.tags = einfo.get(constants.INISECT_INS, "tags").split()
9497
9498     if (self.op.hypervisor is None and
9499         einfo.has_option(constants.INISECT_INS, "hypervisor")):
9500       self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
9501
9502     if einfo.has_section(constants.INISECT_HYP):
9503       # use the export parameters but do not override the ones
9504       # specified by the user
9505       for name, value in einfo.items(constants.INISECT_HYP):
9506         if name not in self.op.hvparams:
9507           self.op.hvparams[name] = value
9508
9509     if einfo.has_section(constants.INISECT_BEP):
9510       # use the parameters, without overriding
9511       for name, value in einfo.items(constants.INISECT_BEP):
9512         if name not in self.op.beparams:
9513           self.op.beparams[name] = value
9514         # Compatibility for the old "memory" be param
9515         if name == constants.BE_MEMORY:
9516           if constants.BE_MAXMEM not in self.op.beparams:
9517             self.op.beparams[constants.BE_MAXMEM] = value
9518           if constants.BE_MINMEM not in self.op.beparams:
9519             self.op.beparams[constants.BE_MINMEM] = value
9520     else:
9521       # try to read the parameters old style, from the main section
9522       for name in constants.BES_PARAMETERS:
9523         if (name not in self.op.beparams and
9524             einfo.has_option(constants.INISECT_INS, name)):
9525           self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
9526
9527     if einfo.has_section(constants.INISECT_OSP):
9528       # use the parameters, without overriding
9529       for name, value in einfo.items(constants.INISECT_OSP):
9530         if name not in self.op.osparams:
9531           self.op.osparams[name] = value
9532
9533   def _RevertToDefaults(self, cluster):
9534     """Revert the instance parameters to the default values.
9535
9536     """
9537     # hvparams
9538     hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
9539     for name in self.op.hvparams.keys():
9540       if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
9541         del self.op.hvparams[name]
9542     # beparams
9543     be_defs = cluster.SimpleFillBE({})
9544     for name in self.op.beparams.keys():
9545       if name in be_defs and be_defs[name] == self.op.beparams[name]:
9546         del self.op.beparams[name]
9547     # nic params
9548     nic_defs = cluster.SimpleFillNIC({})
9549     for nic in self.op.nics:
9550       for name in constants.NICS_PARAMETERS:
9551         if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
9552           del nic[name]
9553     # osparams
9554     os_defs = cluster.SimpleFillOS(self.op.os_type, {})
9555     for name in self.op.osparams.keys():
9556       if name in os_defs and os_defs[name] == self.op.osparams[name]:
9557         del self.op.osparams[name]
9558
9559   def _CalculateFileStorageDir(self):
9560     """Calculate final instance file storage dir.
9561
9562     """
9563     # file storage dir calculation/check
9564     self.instance_file_storage_dir = None
9565     if self.op.disk_template in constants.DTS_FILEBASED:
9566       # build the full file storage dir path
9567       joinargs = []
9568
9569       if self.op.disk_template == constants.DT_SHARED_FILE:
9570         get_fsd_fn = self.cfg.GetSharedFileStorageDir
9571       else:
9572         get_fsd_fn = self.cfg.GetFileStorageDir
9573
9574       cfg_storagedir = get_fsd_fn()
9575       if not cfg_storagedir:
9576         raise errors.OpPrereqError("Cluster file storage dir not defined")
9577       joinargs.append(cfg_storagedir)
9578
9579       if self.op.file_storage_dir is not None:
9580         joinargs.append(self.op.file_storage_dir)
9581
9582       joinargs.append(self.op.instance_name)
9583
9584       # pylint: disable=W0142
9585       self.instance_file_storage_dir = utils.PathJoin(*joinargs)
9586
9587   def CheckPrereq(self): # pylint: disable=R0914
9588     """Check prerequisites.
9589
9590     """
9591     self._CalculateFileStorageDir()
9592
9593     if self.op.mode == constants.INSTANCE_IMPORT:
9594       export_info = self._ReadExportInfo()
9595       self._ReadExportParams(export_info)
9596
9597     if (not self.cfg.GetVGName() and
9598         self.op.disk_template not in constants.DTS_NOT_LVM):
9599       raise errors.OpPrereqError("Cluster does not support lvm-based"
9600                                  " instances", errors.ECODE_STATE)
9601
9602     if (self.op.hypervisor is None or
9603         self.op.hypervisor == constants.VALUE_AUTO):
9604       self.op.hypervisor = self.cfg.GetHypervisorType()
9605
9606     cluster = self.cfg.GetClusterInfo()
9607     enabled_hvs = cluster.enabled_hypervisors
9608     if self.op.hypervisor not in enabled_hvs:
9609       raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
9610                                  " cluster (%s)" % (self.op.hypervisor,
9611                                   ",".join(enabled_hvs)),
9612                                  errors.ECODE_STATE)
9613
9614     # Check tag validity
9615     for tag in self.op.tags:
9616       objects.TaggableObject.ValidateTag(tag)
9617
9618     # check hypervisor parameter syntax (locally)
9619     utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
9620     filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
9621                                       self.op.hvparams)
9622     hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
9623     hv_type.CheckParameterSyntax(filled_hvp)
9624     self.hv_full = filled_hvp
9625     # check that we don't specify global parameters on an instance
9626     _CheckGlobalHvParams(self.op.hvparams)
9627
9628     # fill and remember the beparams dict
9629     default_beparams = cluster.beparams[constants.PP_DEFAULT]
9630     for param, value in self.op.beparams.iteritems():
9631       if value == constants.VALUE_AUTO:
9632         self.op.beparams[param] = default_beparams[param]
9633     objects.UpgradeBeParams(self.op.beparams)
9634     utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
9635     self.be_full = cluster.SimpleFillBE(self.op.beparams)
9636
9637     # build os parameters
9638     self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
9639
9640     # now that hvp/bep are in final format, let's reset to defaults,
9641     # if told to do so
9642     if self.op.identify_defaults:
9643       self._RevertToDefaults(cluster)
9644
9645     # NIC buildup
9646     self.nics = []
9647     for idx, nic in enumerate(self.op.nics):
9648       nic_mode_req = nic.get(constants.INIC_MODE, None)
9649       nic_mode = nic_mode_req
9650       if nic_mode is None or nic_mode == constants.VALUE_AUTO:
9651         nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
9652
9653       # in routed mode, for the first nic, the default ip is 'auto'
9654       if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
9655         default_ip_mode = constants.VALUE_AUTO
9656       else:
9657         default_ip_mode = constants.VALUE_NONE
9658
9659       # ip validity checks
9660       ip = nic.get(constants.INIC_IP, default_ip_mode)
9661       if ip is None or ip.lower() == constants.VALUE_NONE:
9662         nic_ip = None
9663       elif ip.lower() == constants.VALUE_AUTO:
9664         if not self.op.name_check:
9665           raise errors.OpPrereqError("IP address set to auto but name checks"
9666                                      " have been skipped",
9667                                      errors.ECODE_INVAL)
9668         nic_ip = self.hostname1.ip
9669       else:
9670         if not netutils.IPAddress.IsValid(ip):
9671           raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
9672                                      errors.ECODE_INVAL)
9673         nic_ip = ip
9674
9675       # TODO: check the ip address for uniqueness
9676       if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
9677         raise errors.OpPrereqError("Routed nic mode requires an ip address",
9678                                    errors.ECODE_INVAL)
9679
9680       # MAC address verification
9681       mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
9682       if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9683         mac = utils.NormalizeAndValidateMac(mac)
9684
9685         try:
9686           self.cfg.ReserveMAC(mac, self.proc.GetECId())
9687         except errors.ReservationError:
9688           raise errors.OpPrereqError("MAC address %s already in use"
9689                                      " in cluster" % mac,
9690                                      errors.ECODE_NOTUNIQUE)
9691
9692       #  Build nic parameters
9693       link = nic.get(constants.INIC_LINK, None)
9694       if link == constants.VALUE_AUTO:
9695         link = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_LINK]
9696       nicparams = {}
9697       if nic_mode_req:
9698         nicparams[constants.NIC_MODE] = nic_mode
9699       if link:
9700         nicparams[constants.NIC_LINK] = link
9701
9702       check_params = cluster.SimpleFillNIC(nicparams)
9703       objects.NIC.CheckParameterSyntax(check_params)
9704       self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
9705
9706     # disk checks/pre-build
9707     default_vg = self.cfg.GetVGName()
9708     self.disks = []
9709     for disk in self.op.disks:
9710       mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
9711       if mode not in constants.DISK_ACCESS_SET:
9712         raise errors.OpPrereqError("Invalid disk access mode '%s'" %
9713                                    mode, errors.ECODE_INVAL)
9714       size = disk.get(constants.IDISK_SIZE, None)
9715       if size is None:
9716         raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
9717       try:
9718         size = int(size)
9719       except (TypeError, ValueError):
9720         raise errors.OpPrereqError("Invalid disk size '%s'" % size,
9721                                    errors.ECODE_INVAL)
9722
9723       data_vg = disk.get(constants.IDISK_VG, default_vg)
9724       new_disk = {
9725         constants.IDISK_SIZE: size,
9726         constants.IDISK_MODE: mode,
9727         constants.IDISK_VG: data_vg,
9728         }
9729       if constants.IDISK_METAVG in disk:
9730         new_disk[constants.IDISK_METAVG] = disk[constants.IDISK_METAVG]
9731       if constants.IDISK_ADOPT in disk:
9732         new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
9733       self.disks.append(new_disk)
9734
9735     if self.op.mode == constants.INSTANCE_IMPORT:
9736       disk_images = []
9737       for idx in range(len(self.disks)):
9738         option = "disk%d_dump" % idx
9739         if export_info.has_option(constants.INISECT_INS, option):
9740           # FIXME: are the old os-es, disk sizes, etc. useful?
9741           export_name = export_info.get(constants.INISECT_INS, option)
9742           image = utils.PathJoin(self.op.src_path, export_name)
9743           disk_images.append(image)
9744         else:
9745           disk_images.append(False)
9746
9747       self.src_images = disk_images
9748
9749       old_name = export_info.get(constants.INISECT_INS, "name")
9750       if self.op.instance_name == old_name:
9751         for idx, nic in enumerate(self.nics):
9752           if nic.mac == constants.VALUE_AUTO:
9753             nic_mac_ini = "nic%d_mac" % idx
9754             nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
9755
9756     # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
9757
9758     # ip ping checks (we use the same ip that was resolved in ExpandNames)
9759     if self.op.ip_check:
9760       if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
9761         raise errors.OpPrereqError("IP %s of instance %s already in use" %
9762                                    (self.check_ip, self.op.instance_name),
9763                                    errors.ECODE_NOTUNIQUE)
9764
9765     #### mac address generation
9766     # By generating here the mac address both the allocator and the hooks get
9767     # the real final mac address rather than the 'auto' or 'generate' value.
9768     # There is a race condition between the generation and the instance object
9769     # creation, which means that we know the mac is valid now, but we're not
9770     # sure it will be when we actually add the instance. If things go bad
9771     # adding the instance will abort because of a duplicate mac, and the
9772     # creation job will fail.
9773     for nic in self.nics:
9774       if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9775         nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
9776
9777     #### allocator run
9778
9779     if self.op.iallocator is not None:
9780       self._RunAllocator()
9781
9782     # Release all unneeded node locks
9783     _ReleaseLocks(self, locking.LEVEL_NODE,
9784                   keep=filter(None, [self.op.pnode, self.op.snode,
9785                                      self.op.src_node]))
9786     _ReleaseLocks(self, locking.LEVEL_NODE_RES,
9787                   keep=filter(None, [self.op.pnode, self.op.snode,
9788                                      self.op.src_node]))
9789
9790     #### node related checks
9791
9792     # check primary node
9793     self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
9794     assert self.pnode is not None, \
9795       "Cannot retrieve locked node %s" % self.op.pnode
9796     if pnode.offline:
9797       raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
9798                                  pnode.name, errors.ECODE_STATE)
9799     if pnode.drained:
9800       raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
9801                                  pnode.name, errors.ECODE_STATE)
9802     if not pnode.vm_capable:
9803       raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
9804                                  " '%s'" % pnode.name, errors.ECODE_STATE)
9805
9806     self.secondaries = []
9807
9808     # mirror node verification
9809     if self.op.disk_template in constants.DTS_INT_MIRROR:
9810       if self.op.snode == pnode.name:
9811         raise errors.OpPrereqError("The secondary node cannot be the"
9812                                    " primary node", errors.ECODE_INVAL)
9813       _CheckNodeOnline(self, self.op.snode)
9814       _CheckNodeNotDrained(self, self.op.snode)
9815       _CheckNodeVmCapable(self, self.op.snode)
9816       self.secondaries.append(self.op.snode)
9817
9818       snode = self.cfg.GetNodeInfo(self.op.snode)
9819       if pnode.group != snode.group:
9820         self.LogWarning("The primary and secondary nodes are in two"
9821                         " different node groups; the disk parameters"
9822                         " from the first disk's node group will be"
9823                         " used")
9824
9825     nodenames = [pnode.name] + self.secondaries
9826
9827     # Verify instance specs
9828     spindle_use = self.be_full.get(constants.BE_SPINDLE_USE, None)
9829     ispec = {
9830       constants.ISPEC_MEM_SIZE: self.be_full.get(constants.BE_MAXMEM, None),
9831       constants.ISPEC_CPU_COUNT: self.be_full.get(constants.BE_VCPUS, None),
9832       constants.ISPEC_DISK_COUNT: len(self.disks),
9833       constants.ISPEC_DISK_SIZE: [disk["size"] for disk in self.disks],
9834       constants.ISPEC_NIC_COUNT: len(self.nics),
9835       constants.ISPEC_SPINDLE_USE: spindle_use,
9836       }
9837
9838     group_info = self.cfg.GetNodeGroup(pnode.group)
9839     ipolicy = _CalculateGroupIPolicy(cluster, group_info)
9840     res = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec)
9841     if not self.op.ignore_ipolicy and res:
9842       raise errors.OpPrereqError(("Instance allocation to group %s violates"
9843                                   " policy: %s") % (pnode.group,
9844                                                     utils.CommaJoin(res)),
9845                                   errors.ECODE_INVAL)
9846
9847     # disk parameters (not customizable at instance or node level)
9848     # just use the primary node parameters, ignoring the secondary.
9849     self.diskparams = group_info.diskparams
9850
9851     if not self.adopt_disks:
9852       if self.op.disk_template == constants.DT_RBD:
9853         # _CheckRADOSFreeSpace() is just a placeholder.
9854         # Any function that checks prerequisites can be placed here.
9855         # Check if there is enough space on the RADOS cluster.
9856         _CheckRADOSFreeSpace()
9857       else:
9858         # Check lv size requirements, if not adopting
9859         req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
9860         _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
9861
9862     elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
9863       all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
9864                                 disk[constants.IDISK_ADOPT])
9865                      for disk in self.disks])
9866       if len(all_lvs) != len(self.disks):
9867         raise errors.OpPrereqError("Duplicate volume names given for adoption",
9868                                    errors.ECODE_INVAL)
9869       for lv_name in all_lvs:
9870         try:
9871           # FIXME: lv_name here is "vg/lv" need to ensure that other calls
9872           # to ReserveLV uses the same syntax
9873           self.cfg.ReserveLV(lv_name, self.proc.GetECId())
9874         except errors.ReservationError:
9875           raise errors.OpPrereqError("LV named %s used by another instance" %
9876                                      lv_name, errors.ECODE_NOTUNIQUE)
9877
9878       vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
9879       vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
9880
9881       node_lvs = self.rpc.call_lv_list([pnode.name],
9882                                        vg_names.payload.keys())[pnode.name]
9883       node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
9884       node_lvs = node_lvs.payload
9885
9886       delta = all_lvs.difference(node_lvs.keys())
9887       if delta:
9888         raise errors.OpPrereqError("Missing logical volume(s): %s" %
9889                                    utils.CommaJoin(delta),
9890                                    errors.ECODE_INVAL)
9891       online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
9892       if online_lvs:
9893         raise errors.OpPrereqError("Online logical volumes found, cannot"
9894                                    " adopt: %s" % utils.CommaJoin(online_lvs),
9895                                    errors.ECODE_STATE)
9896       # update the size of disk based on what is found
9897       for dsk in self.disks:
9898         dsk[constants.IDISK_SIZE] = \
9899           int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
9900                                         dsk[constants.IDISK_ADOPT])][0]))
9901
9902     elif self.op.disk_template == constants.DT_BLOCK:
9903       # Normalize and de-duplicate device paths
9904       all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
9905                        for disk in self.disks])
9906       if len(all_disks) != len(self.disks):
9907         raise errors.OpPrereqError("Duplicate disk names given for adoption",
9908                                    errors.ECODE_INVAL)
9909       baddisks = [d for d in all_disks
9910                   if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
9911       if baddisks:
9912         raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
9913                                    " cannot be adopted" %
9914                                    (", ".join(baddisks),
9915                                     constants.ADOPTABLE_BLOCKDEV_ROOT),
9916                                    errors.ECODE_INVAL)
9917
9918       node_disks = self.rpc.call_bdev_sizes([pnode.name],
9919                                             list(all_disks))[pnode.name]
9920       node_disks.Raise("Cannot get block device information from node %s" %
9921                        pnode.name)
9922       node_disks = node_disks.payload
9923       delta = all_disks.difference(node_disks.keys())
9924       if delta:
9925         raise errors.OpPrereqError("Missing block device(s): %s" %
9926                                    utils.CommaJoin(delta),
9927                                    errors.ECODE_INVAL)
9928       for dsk in self.disks:
9929         dsk[constants.IDISK_SIZE] = \
9930           int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
9931
9932     _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
9933
9934     _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
9935     # check OS parameters (remotely)
9936     _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
9937
9938     _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
9939
9940     # memory check on primary node
9941     #TODO(dynmem): use MINMEM for checking
9942     if self.op.start:
9943       _CheckNodeFreeMemory(self, self.pnode.name,
9944                            "creating instance %s" % self.op.instance_name,
9945                            self.be_full[constants.BE_MAXMEM],
9946                            self.op.hypervisor)
9947
9948     self.dry_run_result = list(nodenames)
9949
9950   def Exec(self, feedback_fn):
9951     """Create and add the instance to the cluster.
9952
9953     """
9954     instance = self.op.instance_name
9955     pnode_name = self.pnode.name
9956
9957     assert not (self.owned_locks(locking.LEVEL_NODE_RES) -
9958                 self.owned_locks(locking.LEVEL_NODE)), \
9959       "Node locks differ from node resource locks"
9960
9961     ht_kind = self.op.hypervisor
9962     if ht_kind in constants.HTS_REQ_PORT:
9963       network_port = self.cfg.AllocatePort()
9964     else:
9965       network_port = None
9966
9967     disks = _GenerateDiskTemplate(self,
9968                                   self.op.disk_template,
9969                                   instance, pnode_name,
9970                                   self.secondaries,
9971                                   self.disks,
9972                                   self.instance_file_storage_dir,
9973                                   self.op.file_driver,
9974                                   0,
9975                                   feedback_fn,
9976                                   self.diskparams)
9977
9978     iobj = objects.Instance(name=instance, os=self.op.os_type,
9979                             primary_node=pnode_name,
9980                             nics=self.nics, disks=disks,
9981                             disk_template=self.op.disk_template,
9982                             admin_state=constants.ADMINST_DOWN,
9983                             network_port=network_port,
9984                             beparams=self.op.beparams,
9985                             hvparams=self.op.hvparams,
9986                             hypervisor=self.op.hypervisor,
9987                             osparams=self.op.osparams,
9988                             )
9989
9990     if self.op.tags:
9991       for tag in self.op.tags:
9992         iobj.AddTag(tag)
9993
9994     if self.adopt_disks:
9995       if self.op.disk_template == constants.DT_PLAIN:
9996         # rename LVs to the newly-generated names; we need to construct
9997         # 'fake' LV disks with the old data, plus the new unique_id
9998         tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
9999         rename_to = []
10000         for t_dsk, a_dsk in zip(tmp_disks, self.disks):
10001           rename_to.append(t_dsk.logical_id)
10002           t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
10003           self.cfg.SetDiskID(t_dsk, pnode_name)
10004         result = self.rpc.call_blockdev_rename(pnode_name,
10005                                                zip(tmp_disks, rename_to))
10006         result.Raise("Failed to rename adoped LVs")
10007     else:
10008       feedback_fn("* creating instance disks...")
10009       try:
10010         _CreateDisks(self, iobj)
10011       except errors.OpExecError:
10012         self.LogWarning("Device creation failed, reverting...")
10013         try:
10014           _RemoveDisks(self, iobj)
10015         finally:
10016           self.cfg.ReleaseDRBDMinors(instance)
10017           raise
10018
10019     feedback_fn("adding instance %s to cluster config" % instance)
10020
10021     self.cfg.AddInstance(iobj, self.proc.GetECId())
10022
10023     # Declare that we don't want to remove the instance lock anymore, as we've
10024     # added the instance to the config
10025     del self.remove_locks[locking.LEVEL_INSTANCE]
10026
10027     if self.op.mode == constants.INSTANCE_IMPORT:
10028       # Release unused nodes
10029       _ReleaseLocks(self, locking.LEVEL_NODE, keep=[self.op.src_node])
10030     else:
10031       # Release all nodes
10032       _ReleaseLocks(self, locking.LEVEL_NODE)
10033
10034     disk_abort = False
10035     if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
10036       feedback_fn("* wiping instance disks...")
10037       try:
10038         _WipeDisks(self, iobj)
10039       except errors.OpExecError, err:
10040         logging.exception("Wiping disks failed")
10041         self.LogWarning("Wiping instance disks failed (%s)", err)
10042         disk_abort = True
10043
10044     if disk_abort:
10045       # Something is already wrong with the disks, don't do anything else
10046       pass
10047     elif self.op.wait_for_sync:
10048       disk_abort = not _WaitForSync(self, iobj)
10049     elif iobj.disk_template in constants.DTS_INT_MIRROR:
10050       # make sure the disks are not degraded (still sync-ing is ok)
10051       feedback_fn("* checking mirrors status")
10052       disk_abort = not _WaitForSync(self, iobj, oneshot=True)
10053     else:
10054       disk_abort = False
10055
10056     if disk_abort:
10057       _RemoveDisks(self, iobj)
10058       self.cfg.RemoveInstance(iobj.name)
10059       # Make sure the instance lock gets removed
10060       self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
10061       raise errors.OpExecError("There are some degraded disks for"
10062                                " this instance")
10063
10064     # Release all node resource locks
10065     _ReleaseLocks(self, locking.LEVEL_NODE_RES)
10066
10067     if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
10068       if self.op.mode == constants.INSTANCE_CREATE:
10069         if not self.op.no_install:
10070           pause_sync = (iobj.disk_template in constants.DTS_INT_MIRROR and
10071                         not self.op.wait_for_sync)
10072           if pause_sync:
10073             feedback_fn("* pausing disk sync to install instance OS")
10074             result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
10075                                                               iobj.disks, True)
10076             for idx, success in enumerate(result.payload):
10077               if not success:
10078                 logging.warn("pause-sync of instance %s for disk %d failed",
10079                              instance, idx)
10080
10081           feedback_fn("* running the instance OS create scripts...")
10082           # FIXME: pass debug option from opcode to backend
10083           os_add_result = \
10084             self.rpc.call_instance_os_add(pnode_name, (iobj, None), False,
10085                                           self.op.debug_level)
10086           if pause_sync:
10087             feedback_fn("* resuming disk sync")
10088             result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
10089                                                               iobj.disks, False)
10090             for idx, success in enumerate(result.payload):
10091               if not success:
10092                 logging.warn("resume-sync of instance %s for disk %d failed",
10093                              instance, idx)
10094
10095           os_add_result.Raise("Could not add os for instance %s"
10096                               " on node %s" % (instance, pnode_name))
10097
10098       elif self.op.mode == constants.INSTANCE_IMPORT:
10099         feedback_fn("* running the instance OS import scripts...")
10100
10101         transfers = []
10102
10103         for idx, image in enumerate(self.src_images):
10104           if not image:
10105             continue
10106
10107           # FIXME: pass debug option from opcode to backend
10108           dt = masterd.instance.DiskTransfer("disk/%s" % idx,
10109                                              constants.IEIO_FILE, (image, ),
10110                                              constants.IEIO_SCRIPT,
10111                                              (iobj.disks[idx], idx),
10112                                              None)
10113           transfers.append(dt)
10114
10115         import_result = \
10116           masterd.instance.TransferInstanceData(self, feedback_fn,
10117                                                 self.op.src_node, pnode_name,
10118                                                 self.pnode.secondary_ip,
10119                                                 iobj, transfers)
10120         if not compat.all(import_result):
10121           self.LogWarning("Some disks for instance %s on node %s were not"
10122                           " imported successfully" % (instance, pnode_name))
10123
10124       elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
10125         feedback_fn("* preparing remote import...")
10126         # The source cluster will stop the instance before attempting to make a
10127         # connection. In some cases stopping an instance can take a long time,
10128         # hence the shutdown timeout is added to the connection timeout.
10129         connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
10130                            self.op.source_shutdown_timeout)
10131         timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
10132
10133         assert iobj.primary_node == self.pnode.name
10134         disk_results = \
10135           masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
10136                                         self.source_x509_ca,
10137                                         self._cds, timeouts)
10138         if not compat.all(disk_results):
10139           # TODO: Should the instance still be started, even if some disks
10140           # failed to import (valid for local imports, too)?
10141           self.LogWarning("Some disks for instance %s on node %s were not"
10142                           " imported successfully" % (instance, pnode_name))
10143
10144         # Run rename script on newly imported instance
10145         assert iobj.name == instance
10146         feedback_fn("Running rename script for %s" % instance)
10147         result = self.rpc.call_instance_run_rename(pnode_name, iobj,
10148                                                    self.source_instance_name,
10149                                                    self.op.debug_level)
10150         if result.fail_msg:
10151           self.LogWarning("Failed to run rename script for %s on node"
10152                           " %s: %s" % (instance, pnode_name, result.fail_msg))
10153
10154       else:
10155         # also checked in the prereq part
10156         raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
10157                                      % self.op.mode)
10158
10159     assert not self.owned_locks(locking.LEVEL_NODE_RES)
10160
10161     if self.op.start:
10162       iobj.admin_state = constants.ADMINST_UP
10163       self.cfg.Update(iobj, feedback_fn)
10164       logging.info("Starting instance %s on node %s", instance, pnode_name)
10165       feedback_fn("* starting instance...")
10166       result = self.rpc.call_instance_start(pnode_name, (iobj, None, None),
10167                                             False)
10168       result.Raise("Could not start instance")
10169
10170     return list(iobj.all_nodes)
10171
10172
10173 def _CheckRADOSFreeSpace():
10174   """Compute disk size requirements inside the RADOS cluster.
10175
10176   """
10177   # For the RADOS cluster we assume there is always enough space.
10178   pass
10179
10180
10181 class LUInstanceConsole(NoHooksLU):
10182   """Connect to an instance's console.
10183
10184   This is somewhat special in that it returns the command line that
10185   you need to run on the master node in order to connect to the
10186   console.
10187
10188   """
10189   REQ_BGL = False
10190
10191   def ExpandNames(self):
10192     self.share_locks = _ShareAll()
10193     self._ExpandAndLockInstance()
10194
10195   def CheckPrereq(self):
10196     """Check prerequisites.
10197
10198     This checks that the instance is in the cluster.
10199
10200     """
10201     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10202     assert self.instance is not None, \
10203       "Cannot retrieve locked instance %s" % self.op.instance_name
10204     _CheckNodeOnline(self, self.instance.primary_node)
10205
10206   def Exec(self, feedback_fn):
10207     """Connect to the console of an instance
10208
10209     """
10210     instance = self.instance
10211     node = instance.primary_node
10212
10213     node_insts = self.rpc.call_instance_list([node],
10214                                              [instance.hypervisor])[node]
10215     node_insts.Raise("Can't get node information from %s" % node)
10216
10217     if instance.name not in node_insts.payload:
10218       if instance.admin_state == constants.ADMINST_UP:
10219         state = constants.INSTST_ERRORDOWN
10220       elif instance.admin_state == constants.ADMINST_DOWN:
10221         state = constants.INSTST_ADMINDOWN
10222       else:
10223         state = constants.INSTST_ADMINOFFLINE
10224       raise errors.OpExecError("Instance %s is not running (state %s)" %
10225                                (instance.name, state))
10226
10227     logging.debug("Connecting to console of %s on %s", instance.name, node)
10228
10229     return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
10230
10231
10232 def _GetInstanceConsole(cluster, instance):
10233   """Returns console information for an instance.
10234
10235   @type cluster: L{objects.Cluster}
10236   @type instance: L{objects.Instance}
10237   @rtype: dict
10238
10239   """
10240   hyper = hypervisor.GetHypervisor(instance.hypervisor)
10241   # beparams and hvparams are passed separately, to avoid editing the
10242   # instance and then saving the defaults in the instance itself.
10243   hvparams = cluster.FillHV(instance)
10244   beparams = cluster.FillBE(instance)
10245   console = hyper.GetInstanceConsole(instance, hvparams, beparams)
10246
10247   assert console.instance == instance.name
10248   assert console.Validate()
10249
10250   return console.ToDict()
10251
10252
10253 class LUInstanceReplaceDisks(LogicalUnit):
10254   """Replace the disks of an instance.
10255
10256   """
10257   HPATH = "mirrors-replace"
10258   HTYPE = constants.HTYPE_INSTANCE
10259   REQ_BGL = False
10260
10261   def CheckArguments(self):
10262     TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
10263                                   self.op.iallocator)
10264
10265   def ExpandNames(self):
10266     self._ExpandAndLockInstance()
10267
10268     assert locking.LEVEL_NODE not in self.needed_locks
10269     assert locking.LEVEL_NODE_RES not in self.needed_locks
10270     assert locking.LEVEL_NODEGROUP not in self.needed_locks
10271
10272     assert self.op.iallocator is None or self.op.remote_node is None, \
10273       "Conflicting options"
10274
10275     if self.op.remote_node is not None:
10276       self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10277
10278       # Warning: do not remove the locking of the new secondary here
10279       # unless DRBD8.AddChildren is changed to work in parallel;
10280       # currently it doesn't since parallel invocations of
10281       # FindUnusedMinor will conflict
10282       self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
10283       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
10284     else:
10285       self.needed_locks[locking.LEVEL_NODE] = []
10286       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10287
10288       if self.op.iallocator is not None:
10289         # iallocator will select a new node in the same group
10290         self.needed_locks[locking.LEVEL_NODEGROUP] = []
10291
10292     self.needed_locks[locking.LEVEL_NODE_RES] = []
10293
10294     self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
10295                                    self.op.iallocator, self.op.remote_node,
10296                                    self.op.disks, False, self.op.early_release,
10297                                    self.op.ignore_ipolicy)
10298
10299     self.tasklets = [self.replacer]
10300
10301   def DeclareLocks(self, level):
10302     if level == locking.LEVEL_NODEGROUP:
10303       assert self.op.remote_node is None
10304       assert self.op.iallocator is not None
10305       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
10306
10307       self.share_locks[locking.LEVEL_NODEGROUP] = 1
10308       # Lock all groups used by instance optimistically; this requires going
10309       # via the node before it's locked, requiring verification later on
10310       self.needed_locks[locking.LEVEL_NODEGROUP] = \
10311         self.cfg.GetInstanceNodeGroups(self.op.instance_name)
10312
10313     elif level == locking.LEVEL_NODE:
10314       if self.op.iallocator is not None:
10315         assert self.op.remote_node is None
10316         assert not self.needed_locks[locking.LEVEL_NODE]
10317
10318         # Lock member nodes of all locked groups
10319         self.needed_locks[locking.LEVEL_NODE] = [node_name
10320           for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
10321           for node_name in self.cfg.GetNodeGroup(group_uuid).members]
10322       else:
10323         self._LockInstancesNodes()
10324     elif level == locking.LEVEL_NODE_RES:
10325       # Reuse node locks
10326       self.needed_locks[locking.LEVEL_NODE_RES] = \
10327         self.needed_locks[locking.LEVEL_NODE]
10328
10329   def BuildHooksEnv(self):
10330     """Build hooks env.
10331
10332     This runs on the master, the primary and all the secondaries.
10333
10334     """
10335     instance = self.replacer.instance
10336     env = {
10337       "MODE": self.op.mode,
10338       "NEW_SECONDARY": self.op.remote_node,
10339       "OLD_SECONDARY": instance.secondary_nodes[0],
10340       }
10341     env.update(_BuildInstanceHookEnvByObject(self, instance))
10342     return env
10343
10344   def BuildHooksNodes(self):
10345     """Build hooks nodes.
10346
10347     """
10348     instance = self.replacer.instance
10349     nl = [
10350       self.cfg.GetMasterNode(),
10351       instance.primary_node,
10352       ]
10353     if self.op.remote_node is not None:
10354       nl.append(self.op.remote_node)
10355     return nl, nl
10356
10357   def CheckPrereq(self):
10358     """Check prerequisites.
10359
10360     """
10361     assert (self.glm.is_owned(locking.LEVEL_NODEGROUP) or
10362             self.op.iallocator is None)
10363
10364     # Verify if node group locks are still correct
10365     owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
10366     if owned_groups:
10367       _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
10368
10369     return LogicalUnit.CheckPrereq(self)
10370
10371
10372 class TLReplaceDisks(Tasklet):
10373   """Replaces disks for an instance.
10374
10375   Note: Locking is not within the scope of this class.
10376
10377   """
10378   def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
10379                disks, delay_iallocator, early_release, ignore_ipolicy):
10380     """Initializes this class.
10381
10382     """
10383     Tasklet.__init__(self, lu)
10384
10385     # Parameters
10386     self.instance_name = instance_name
10387     self.mode = mode
10388     self.iallocator_name = iallocator_name
10389     self.remote_node = remote_node
10390     self.disks = disks
10391     self.delay_iallocator = delay_iallocator
10392     self.early_release = early_release
10393     self.ignore_ipolicy = ignore_ipolicy
10394
10395     # Runtime data
10396     self.instance = None
10397     self.new_node = None
10398     self.target_node = None
10399     self.other_node = None
10400     self.remote_node_info = None
10401     self.node_secondary_ip = None
10402
10403   @staticmethod
10404   def CheckArguments(mode, remote_node, iallocator):
10405     """Helper function for users of this class.
10406
10407     """
10408     # check for valid parameter combination
10409     if mode == constants.REPLACE_DISK_CHG:
10410       if remote_node is None and iallocator is None:
10411         raise errors.OpPrereqError("When changing the secondary either an"
10412                                    " iallocator script must be used or the"
10413                                    " new node given", errors.ECODE_INVAL)
10414
10415       if remote_node is not None and iallocator is not None:
10416         raise errors.OpPrereqError("Give either the iallocator or the new"
10417                                    " secondary, not both", errors.ECODE_INVAL)
10418
10419     elif remote_node is not None or iallocator is not None:
10420       # Not replacing the secondary
10421       raise errors.OpPrereqError("The iallocator and new node options can"
10422                                  " only be used when changing the"
10423                                  " secondary node", errors.ECODE_INVAL)
10424
10425   @staticmethod
10426   def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
10427     """Compute a new secondary node using an IAllocator.
10428
10429     """
10430     ial = IAllocator(lu.cfg, lu.rpc,
10431                      mode=constants.IALLOCATOR_MODE_RELOC,
10432                      name=instance_name,
10433                      relocate_from=list(relocate_from))
10434
10435     ial.Run(iallocator_name)
10436
10437     if not ial.success:
10438       raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
10439                                  " %s" % (iallocator_name, ial.info),
10440                                  errors.ECODE_NORES)
10441
10442     if len(ial.result) != ial.required_nodes:
10443       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
10444                                  " of nodes (%s), required %s" %
10445                                  (iallocator_name,
10446                                   len(ial.result), ial.required_nodes),
10447                                  errors.ECODE_FAULT)
10448
10449     remote_node_name = ial.result[0]
10450
10451     lu.LogInfo("Selected new secondary for instance '%s': %s",
10452                instance_name, remote_node_name)
10453
10454     return remote_node_name
10455
10456   def _FindFaultyDisks(self, node_name):
10457     """Wrapper for L{_FindFaultyInstanceDisks}.
10458
10459     """
10460     return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
10461                                     node_name, True)
10462
10463   def _CheckDisksActivated(self, instance):
10464     """Checks if the instance disks are activated.
10465
10466     @param instance: The instance to check disks
10467     @return: True if they are activated, False otherwise
10468
10469     """
10470     nodes = instance.all_nodes
10471
10472     for idx, dev in enumerate(instance.disks):
10473       for node in nodes:
10474         self.lu.LogInfo("Checking disk/%d on %s", idx, node)
10475         self.cfg.SetDiskID(dev, node)
10476
10477         result = self.rpc.call_blockdev_find(node, dev)
10478
10479         if result.offline:
10480           continue
10481         elif result.fail_msg or not result.payload:
10482           return False
10483
10484     return True
10485
10486   def CheckPrereq(self):
10487     """Check prerequisites.
10488
10489     This checks that the instance is in the cluster.
10490
10491     """
10492     self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
10493     assert instance is not None, \
10494       "Cannot retrieve locked instance %s" % self.instance_name
10495
10496     if instance.disk_template != constants.DT_DRBD8:
10497       raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
10498                                  " instances", errors.ECODE_INVAL)
10499
10500     if len(instance.secondary_nodes) != 1:
10501       raise errors.OpPrereqError("The instance has a strange layout,"
10502                                  " expected one secondary but found %d" %
10503                                  len(instance.secondary_nodes),
10504                                  errors.ECODE_FAULT)
10505
10506     if not self.delay_iallocator:
10507       self._CheckPrereq2()
10508
10509   def _CheckPrereq2(self):
10510     """Check prerequisites, second part.
10511
10512     This function should always be part of CheckPrereq. It was separated and is
10513     now called from Exec because during node evacuation iallocator was only
10514     called with an unmodified cluster model, not taking planned changes into
10515     account.
10516
10517     """
10518     instance = self.instance
10519     secondary_node = instance.secondary_nodes[0]
10520
10521     if self.iallocator_name is None:
10522       remote_node = self.remote_node
10523     else:
10524       remote_node = self._RunAllocator(self.lu, self.iallocator_name,
10525                                        instance.name, instance.secondary_nodes)
10526
10527     if remote_node is None:
10528       self.remote_node_info = None
10529     else:
10530       assert remote_node in self.lu.owned_locks(locking.LEVEL_NODE), \
10531              "Remote node '%s' is not locked" % remote_node
10532
10533       self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
10534       assert self.remote_node_info is not None, \
10535         "Cannot retrieve locked node %s" % remote_node
10536
10537     if remote_node == self.instance.primary_node:
10538       raise errors.OpPrereqError("The specified node is the primary node of"
10539                                  " the instance", errors.ECODE_INVAL)
10540
10541     if remote_node == secondary_node:
10542       raise errors.OpPrereqError("The specified node is already the"
10543                                  " secondary node of the instance",
10544                                  errors.ECODE_INVAL)
10545
10546     if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
10547                                     constants.REPLACE_DISK_CHG):
10548       raise errors.OpPrereqError("Cannot specify disks to be replaced",
10549                                  errors.ECODE_INVAL)
10550
10551     if self.mode == constants.REPLACE_DISK_AUTO:
10552       if not self._CheckDisksActivated(instance):
10553         raise errors.OpPrereqError("Please run activate-disks on instance %s"
10554                                    " first" % self.instance_name,
10555                                    errors.ECODE_STATE)
10556       faulty_primary = self._FindFaultyDisks(instance.primary_node)
10557       faulty_secondary = self._FindFaultyDisks(secondary_node)
10558
10559       if faulty_primary and faulty_secondary:
10560         raise errors.OpPrereqError("Instance %s has faulty disks on more than"
10561                                    " one node and can not be repaired"
10562                                    " automatically" % self.instance_name,
10563                                    errors.ECODE_STATE)
10564
10565       if faulty_primary:
10566         self.disks = faulty_primary
10567         self.target_node = instance.primary_node
10568         self.other_node = secondary_node
10569         check_nodes = [self.target_node, self.other_node]
10570       elif faulty_secondary:
10571         self.disks = faulty_secondary
10572         self.target_node = secondary_node
10573         self.other_node = instance.primary_node
10574         check_nodes = [self.target_node, self.other_node]
10575       else:
10576         self.disks = []
10577         check_nodes = []
10578
10579     else:
10580       # Non-automatic modes
10581       if self.mode == constants.REPLACE_DISK_PRI:
10582         self.target_node = instance.primary_node
10583         self.other_node = secondary_node
10584         check_nodes = [self.target_node, self.other_node]
10585
10586       elif self.mode == constants.REPLACE_DISK_SEC:
10587         self.target_node = secondary_node
10588         self.other_node = instance.primary_node
10589         check_nodes = [self.target_node, self.other_node]
10590
10591       elif self.mode == constants.REPLACE_DISK_CHG:
10592         self.new_node = remote_node
10593         self.other_node = instance.primary_node
10594         self.target_node = secondary_node
10595         check_nodes = [self.new_node, self.other_node]
10596
10597         _CheckNodeNotDrained(self.lu, remote_node)
10598         _CheckNodeVmCapable(self.lu, remote_node)
10599
10600         old_node_info = self.cfg.GetNodeInfo(secondary_node)
10601         assert old_node_info is not None
10602         if old_node_info.offline and not self.early_release:
10603           # doesn't make sense to delay the release
10604           self.early_release = True
10605           self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
10606                           " early-release mode", secondary_node)
10607
10608       else:
10609         raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
10610                                      self.mode)
10611
10612       # If not specified all disks should be replaced
10613       if not self.disks:
10614         self.disks = range(len(self.instance.disks))
10615
10616     # TODO: This is ugly, but right now we can't distinguish between internal
10617     # submitted opcode and external one. We should fix that.
10618     if self.remote_node_info:
10619       # We change the node, lets verify it still meets instance policy
10620       new_group_info = self.cfg.GetNodeGroup(self.remote_node_info.group)
10621       ipolicy = _CalculateGroupIPolicy(self.cfg.GetClusterInfo(),
10622                                        new_group_info)
10623       _CheckTargetNodeIPolicy(self, ipolicy, instance, self.remote_node_info,
10624                               ignore=self.ignore_ipolicy)
10625
10626     # TODO: compute disk parameters
10627     primary_node_info = self.cfg.GetNodeInfo(instance.primary_node)
10628     secondary_node_info = self.cfg.GetNodeInfo(secondary_node)
10629     if primary_node_info.group != secondary_node_info.group:
10630       self.lu.LogInfo("The instance primary and secondary nodes are in two"
10631                       " different node groups; the disk parameters of the"
10632                       " primary node's group will be applied.")
10633
10634     self.diskparams = self.cfg.GetNodeGroup(primary_node_info.group).diskparams
10635
10636     for node in check_nodes:
10637       _CheckNodeOnline(self.lu, node)
10638
10639     touched_nodes = frozenset(node_name for node_name in [self.new_node,
10640                                                           self.other_node,
10641                                                           self.target_node]
10642                               if node_name is not None)
10643
10644     # Release unneeded node and node resource locks
10645     _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
10646     _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES, keep=touched_nodes)
10647
10648     # Release any owned node group
10649     if self.lu.glm.is_owned(locking.LEVEL_NODEGROUP):
10650       _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
10651
10652     # Check whether disks are valid
10653     for disk_idx in self.disks:
10654       instance.FindDisk(disk_idx)
10655
10656     # Get secondary node IP addresses
10657     self.node_secondary_ip = dict((name, node.secondary_ip) for (name, node)
10658                                   in self.cfg.GetMultiNodeInfo(touched_nodes))
10659
10660   def Exec(self, feedback_fn):
10661     """Execute disk replacement.
10662
10663     This dispatches the disk replacement to the appropriate handler.
10664
10665     """
10666     if self.delay_iallocator:
10667       self._CheckPrereq2()
10668
10669     if __debug__:
10670       # Verify owned locks before starting operation
10671       owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
10672       assert set(owned_nodes) == set(self.node_secondary_ip), \
10673           ("Incorrect node locks, owning %s, expected %s" %
10674            (owned_nodes, self.node_secondary_ip.keys()))
10675       assert (self.lu.owned_locks(locking.LEVEL_NODE) ==
10676               self.lu.owned_locks(locking.LEVEL_NODE_RES))
10677
10678       owned_instances = self.lu.owned_locks(locking.LEVEL_INSTANCE)
10679       assert list(owned_instances) == [self.instance_name], \
10680           "Instance '%s' not locked" % self.instance_name
10681
10682       assert not self.lu.glm.is_owned(locking.LEVEL_NODEGROUP), \
10683           "Should not own any node group lock at this point"
10684
10685     if not self.disks:
10686       feedback_fn("No disks need replacement")
10687       return
10688
10689     feedback_fn("Replacing disk(s) %s for %s" %
10690                 (utils.CommaJoin(self.disks), self.instance.name))
10691
10692     activate_disks = (self.instance.admin_state != constants.ADMINST_UP)
10693
10694     # Activate the instance disks if we're replacing them on a down instance
10695     if activate_disks:
10696       _StartInstanceDisks(self.lu, self.instance, True)
10697
10698     try:
10699       # Should we replace the secondary node?
10700       if self.new_node is not None:
10701         fn = self._ExecDrbd8Secondary
10702       else:
10703         fn = self._ExecDrbd8DiskOnly
10704
10705       result = fn(feedback_fn)
10706     finally:
10707       # Deactivate the instance disks if we're replacing them on a
10708       # down instance
10709       if activate_disks:
10710         _SafeShutdownInstanceDisks(self.lu, self.instance)
10711
10712     assert not self.lu.owned_locks(locking.LEVEL_NODE)
10713
10714     if __debug__:
10715       # Verify owned locks
10716       owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE_RES)
10717       nodes = frozenset(self.node_secondary_ip)
10718       assert ((self.early_release and not owned_nodes) or
10719               (not self.early_release and not (set(owned_nodes) - nodes))), \
10720         ("Not owning the correct locks, early_release=%s, owned=%r,"
10721          " nodes=%r" % (self.early_release, owned_nodes, nodes))
10722
10723     return result
10724
10725   def _CheckVolumeGroup(self, nodes):
10726     self.lu.LogInfo("Checking volume groups")
10727
10728     vgname = self.cfg.GetVGName()
10729
10730     # Make sure volume group exists on all involved nodes
10731     results = self.rpc.call_vg_list(nodes)
10732     if not results:
10733       raise errors.OpExecError("Can't list volume groups on the nodes")
10734
10735     for node in nodes:
10736       res = results[node]
10737       res.Raise("Error checking node %s" % node)
10738       if vgname not in res.payload:
10739         raise errors.OpExecError("Volume group '%s' not found on node %s" %
10740                                  (vgname, node))
10741
10742   def _CheckDisksExistence(self, nodes):
10743     # Check disk existence
10744     for idx, dev in enumerate(self.instance.disks):
10745       if idx not in self.disks:
10746         continue
10747
10748       for node in nodes:
10749         self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
10750         self.cfg.SetDiskID(dev, node)
10751
10752         result = self.rpc.call_blockdev_find(node, dev)
10753
10754         msg = result.fail_msg
10755         if msg or not result.payload:
10756           if not msg:
10757             msg = "disk not found"
10758           raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
10759                                    (idx, node, msg))
10760
10761   def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
10762     for idx, dev in enumerate(self.instance.disks):
10763       if idx not in self.disks:
10764         continue
10765
10766       self.lu.LogInfo("Checking disk/%d consistency on node %s" %
10767                       (idx, node_name))
10768
10769       if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
10770                                    ldisk=ldisk):
10771         raise errors.OpExecError("Node %s has degraded storage, unsafe to"
10772                                  " replace disks for instance %s" %
10773                                  (node_name, self.instance.name))
10774
10775   def _CreateNewStorage(self, node_name):
10776     """Create new storage on the primary or secondary node.
10777
10778     This is only used for same-node replaces, not for changing the
10779     secondary node, hence we don't want to modify the existing disk.
10780
10781     """
10782     iv_names = {}
10783
10784     for idx, dev in enumerate(self.instance.disks):
10785       if idx not in self.disks:
10786         continue
10787
10788       self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
10789
10790       self.cfg.SetDiskID(dev, node_name)
10791
10792       lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
10793       names = _GenerateUniqueNames(self.lu, lv_names)
10794
10795       _, data_p, meta_p = _ComputeLDParams(constants.DT_DRBD8, self.diskparams)
10796
10797       vg_data = dev.children[0].logical_id[0]
10798       lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
10799                              logical_id=(vg_data, names[0]), params=data_p)
10800       vg_meta = dev.children[1].logical_id[0]
10801       lv_meta = objects.Disk(dev_type=constants.LD_LV, size=DRBD_META_SIZE,
10802                              logical_id=(vg_meta, names[1]), params=meta_p)
10803
10804       new_lvs = [lv_data, lv_meta]
10805       old_lvs = [child.Copy() for child in dev.children]
10806       iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
10807
10808       # we pass force_create=True to force the LVM creation
10809       for new_lv in new_lvs:
10810         _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
10811                         _GetInstanceInfoText(self.instance), False)
10812
10813     return iv_names
10814
10815   def _CheckDevices(self, node_name, iv_names):
10816     for name, (dev, _, _) in iv_names.iteritems():
10817       self.cfg.SetDiskID(dev, node_name)
10818
10819       result = self.rpc.call_blockdev_find(node_name, dev)
10820
10821       msg = result.fail_msg
10822       if msg or not result.payload:
10823         if not msg:
10824           msg = "disk not found"
10825         raise errors.OpExecError("Can't find DRBD device %s: %s" %
10826                                  (name, msg))
10827
10828       if result.payload.is_degraded:
10829         raise errors.OpExecError("DRBD device %s is degraded!" % name)
10830
10831   def _RemoveOldStorage(self, node_name, iv_names):
10832     for name, (_, old_lvs, _) in iv_names.iteritems():
10833       self.lu.LogInfo("Remove logical volumes for %s" % name)
10834
10835       for lv in old_lvs:
10836         self.cfg.SetDiskID(lv, node_name)
10837
10838         msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
10839         if msg:
10840           self.lu.LogWarning("Can't remove old LV: %s" % msg,
10841                              hint="remove unused LVs manually")
10842
10843   def _ExecDrbd8DiskOnly(self, feedback_fn): # pylint: disable=W0613
10844     """Replace a disk on the primary or secondary for DRBD 8.
10845
10846     The algorithm for replace is quite complicated:
10847
10848       1. for each disk to be replaced:
10849
10850         1. create new LVs on the target node with unique names
10851         1. detach old LVs from the drbd device
10852         1. rename old LVs to name_replaced.<time_t>
10853         1. rename new LVs to old LVs
10854         1. attach the new LVs (with the old names now) to the drbd device
10855
10856       1. wait for sync across all devices
10857
10858       1. for each modified disk:
10859
10860         1. remove old LVs (which have the name name_replaces.<time_t>)
10861
10862     Failures are not very well handled.
10863
10864     """
10865     steps_total = 6
10866
10867     # Step: check device activation
10868     self.lu.LogStep(1, steps_total, "Check device existence")
10869     self._CheckDisksExistence([self.other_node, self.target_node])
10870     self._CheckVolumeGroup([self.target_node, self.other_node])
10871
10872     # Step: check other node consistency
10873     self.lu.LogStep(2, steps_total, "Check peer consistency")
10874     self._CheckDisksConsistency(self.other_node,
10875                                 self.other_node == self.instance.primary_node,
10876                                 False)
10877
10878     # Step: create new storage
10879     self.lu.LogStep(3, steps_total, "Allocate new storage")
10880     iv_names = self._CreateNewStorage(self.target_node)
10881
10882     # Step: for each lv, detach+rename*2+attach
10883     self.lu.LogStep(4, steps_total, "Changing drbd configuration")
10884     for dev, old_lvs, new_lvs in iv_names.itervalues():
10885       self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
10886
10887       result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
10888                                                      old_lvs)
10889       result.Raise("Can't detach drbd from local storage on node"
10890                    " %s for device %s" % (self.target_node, dev.iv_name))
10891       #dev.children = []
10892       #cfg.Update(instance)
10893
10894       # ok, we created the new LVs, so now we know we have the needed
10895       # storage; as such, we proceed on the target node to rename
10896       # old_lv to _old, and new_lv to old_lv; note that we rename LVs
10897       # using the assumption that logical_id == physical_id (which in
10898       # turn is the unique_id on that node)
10899
10900       # FIXME(iustin): use a better name for the replaced LVs
10901       temp_suffix = int(time.time())
10902       ren_fn = lambda d, suff: (d.physical_id[0],
10903                                 d.physical_id[1] + "_replaced-%s" % suff)
10904
10905       # Build the rename list based on what LVs exist on the node
10906       rename_old_to_new = []
10907       for to_ren in old_lvs:
10908         result = self.rpc.call_blockdev_find(self.target_node, to_ren)
10909         if not result.fail_msg and result.payload:
10910           # device exists
10911           rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
10912
10913       self.lu.LogInfo("Renaming the old LVs on the target node")
10914       result = self.rpc.call_blockdev_rename(self.target_node,
10915                                              rename_old_to_new)
10916       result.Raise("Can't rename old LVs on node %s" % self.target_node)
10917
10918       # Now we rename the new LVs to the old LVs
10919       self.lu.LogInfo("Renaming the new LVs on the target node")
10920       rename_new_to_old = [(new, old.physical_id)
10921                            for old, new in zip(old_lvs, new_lvs)]
10922       result = self.rpc.call_blockdev_rename(self.target_node,
10923                                              rename_new_to_old)
10924       result.Raise("Can't rename new LVs on node %s" % self.target_node)
10925
10926       # Intermediate steps of in memory modifications
10927       for old, new in zip(old_lvs, new_lvs):
10928         new.logical_id = old.logical_id
10929         self.cfg.SetDiskID(new, self.target_node)
10930
10931       # We need to modify old_lvs so that removal later removes the
10932       # right LVs, not the newly added ones; note that old_lvs is a
10933       # copy here
10934       for disk in old_lvs:
10935         disk.logical_id = ren_fn(disk, temp_suffix)
10936         self.cfg.SetDiskID(disk, self.target_node)
10937
10938       # Now that the new lvs have the old name, we can add them to the device
10939       self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
10940       result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
10941                                                   new_lvs)
10942       msg = result.fail_msg
10943       if msg:
10944         for new_lv in new_lvs:
10945           msg2 = self.rpc.call_blockdev_remove(self.target_node,
10946                                                new_lv).fail_msg
10947           if msg2:
10948             self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
10949                                hint=("cleanup manually the unused logical"
10950                                      "volumes"))
10951         raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
10952
10953     cstep = itertools.count(5)
10954
10955     if self.early_release:
10956       self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
10957       self._RemoveOldStorage(self.target_node, iv_names)
10958       # TODO: Check if releasing locks early still makes sense
10959       _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
10960     else:
10961       # Release all resource locks except those used by the instance
10962       _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
10963                     keep=self.node_secondary_ip.keys())
10964
10965     # Release all node locks while waiting for sync
10966     _ReleaseLocks(self.lu, locking.LEVEL_NODE)
10967
10968     # TODO: Can the instance lock be downgraded here? Take the optional disk
10969     # shutdown in the caller into consideration.
10970
10971     # Wait for sync
10972     # This can fail as the old devices are degraded and _WaitForSync
10973     # does a combined result over all disks, so we don't check its return value
10974     self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
10975     _WaitForSync(self.lu, self.instance)
10976
10977     # Check all devices manually
10978     self._CheckDevices(self.instance.primary_node, iv_names)
10979
10980     # Step: remove old storage
10981     if not self.early_release:
10982       self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
10983       self._RemoveOldStorage(self.target_node, iv_names)
10984
10985   def _ExecDrbd8Secondary(self, feedback_fn):
10986     """Replace the secondary node for DRBD 8.
10987
10988     The algorithm for replace is quite complicated:
10989       - for all disks of the instance:
10990         - create new LVs on the new node with same names
10991         - shutdown the drbd device on the old secondary
10992         - disconnect the drbd network on the primary
10993         - create the drbd device on the new secondary
10994         - network attach the drbd on the primary, using an artifice:
10995           the drbd code for Attach() will connect to the network if it
10996           finds a device which is connected to the good local disks but
10997           not network enabled
10998       - wait for sync across all devices
10999       - remove all disks from the old secondary
11000
11001     Failures are not very well handled.
11002
11003     """
11004     steps_total = 6
11005
11006     pnode = self.instance.primary_node
11007
11008     # Step: check device activation
11009     self.lu.LogStep(1, steps_total, "Check device existence")
11010     self._CheckDisksExistence([self.instance.primary_node])
11011     self._CheckVolumeGroup([self.instance.primary_node])
11012
11013     # Step: check other node consistency
11014     self.lu.LogStep(2, steps_total, "Check peer consistency")
11015     self._CheckDisksConsistency(self.instance.primary_node, True, True)
11016
11017     # Step: create new storage
11018     self.lu.LogStep(3, steps_total, "Allocate new storage")
11019     for idx, dev in enumerate(self.instance.disks):
11020       self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
11021                       (self.new_node, idx))
11022       # we pass force_create=True to force LVM creation
11023       for new_lv in dev.children:
11024         _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
11025                         _GetInstanceInfoText(self.instance), False)
11026
11027     # Step 4: dbrd minors and drbd setups changes
11028     # after this, we must manually remove the drbd minors on both the
11029     # error and the success paths
11030     self.lu.LogStep(4, steps_total, "Changing drbd configuration")
11031     minors = self.cfg.AllocateDRBDMinor([self.new_node
11032                                          for dev in self.instance.disks],
11033                                         self.instance.name)
11034     logging.debug("Allocated minors %r", minors)
11035
11036     iv_names = {}
11037     for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
11038       self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
11039                       (self.new_node, idx))
11040       # create new devices on new_node; note that we create two IDs:
11041       # one without port, so the drbd will be activated without
11042       # networking information on the new node at this stage, and one
11043       # with network, for the latter activation in step 4
11044       (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
11045       if self.instance.primary_node == o_node1:
11046         p_minor = o_minor1
11047       else:
11048         assert self.instance.primary_node == o_node2, "Three-node instance?"
11049         p_minor = o_minor2
11050
11051       new_alone_id = (self.instance.primary_node, self.new_node, None,
11052                       p_minor, new_minor, o_secret)
11053       new_net_id = (self.instance.primary_node, self.new_node, o_port,
11054                     p_minor, new_minor, o_secret)
11055
11056       iv_names[idx] = (dev, dev.children, new_net_id)
11057       logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
11058                     new_net_id)
11059       drbd_params, _, _ = _ComputeLDParams(constants.DT_DRBD8, self.diskparams)
11060       new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
11061                               logical_id=new_alone_id,
11062                               children=dev.children,
11063                               size=dev.size,
11064                               params=drbd_params)
11065       try:
11066         _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
11067                               _GetInstanceInfoText(self.instance), False)
11068       except errors.GenericError:
11069         self.cfg.ReleaseDRBDMinors(self.instance.name)
11070         raise
11071
11072     # We have new devices, shutdown the drbd on the old secondary
11073     for idx, dev in enumerate(self.instance.disks):
11074       self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
11075       self.cfg.SetDiskID(dev, self.target_node)
11076       msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
11077       if msg:
11078         self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
11079                            "node: %s" % (idx, msg),
11080                            hint=("Please cleanup this device manually as"
11081                                  " soon as possible"))
11082
11083     self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
11084     result = self.rpc.call_drbd_disconnect_net([pnode], self.node_secondary_ip,
11085                                                self.instance.disks)[pnode]
11086
11087     msg = result.fail_msg
11088     if msg:
11089       # detaches didn't succeed (unlikely)
11090       self.cfg.ReleaseDRBDMinors(self.instance.name)
11091       raise errors.OpExecError("Can't detach the disks from the network on"
11092                                " old node: %s" % (msg,))
11093
11094     # if we managed to detach at least one, we update all the disks of
11095     # the instance to point to the new secondary
11096     self.lu.LogInfo("Updating instance configuration")
11097     for dev, _, new_logical_id in iv_names.itervalues():
11098       dev.logical_id = new_logical_id
11099       self.cfg.SetDiskID(dev, self.instance.primary_node)
11100
11101     self.cfg.Update(self.instance, feedback_fn)
11102
11103     # Release all node locks (the configuration has been updated)
11104     _ReleaseLocks(self.lu, locking.LEVEL_NODE)
11105
11106     # and now perform the drbd attach
11107     self.lu.LogInfo("Attaching primary drbds to new secondary"
11108                     " (standalone => connected)")
11109     result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
11110                                             self.new_node],
11111                                            self.node_secondary_ip,
11112                                            self.instance.disks,
11113                                            self.instance.name,
11114                                            False)
11115     for to_node, to_result in result.items():
11116       msg = to_result.fail_msg
11117       if msg:
11118         self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
11119                            to_node, msg,
11120                            hint=("please do a gnt-instance info to see the"
11121                                  " status of disks"))
11122
11123     cstep = itertools.count(5)
11124
11125     if self.early_release:
11126       self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11127       self._RemoveOldStorage(self.target_node, iv_names)
11128       # TODO: Check if releasing locks early still makes sense
11129       _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
11130     else:
11131       # Release all resource locks except those used by the instance
11132       _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
11133                     keep=self.node_secondary_ip.keys())
11134
11135     # TODO: Can the instance lock be downgraded here? Take the optional disk
11136     # shutdown in the caller into consideration.
11137
11138     # Wait for sync
11139     # This can fail as the old devices are degraded and _WaitForSync
11140     # does a combined result over all disks, so we don't check its return value
11141     self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
11142     _WaitForSync(self.lu, self.instance)
11143
11144     # Check all devices manually
11145     self._CheckDevices(self.instance.primary_node, iv_names)
11146
11147     # Step: remove old storage
11148     if not self.early_release:
11149       self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11150       self._RemoveOldStorage(self.target_node, iv_names)
11151
11152
11153 class LURepairNodeStorage(NoHooksLU):
11154   """Repairs the volume group on a node.
11155
11156   """
11157   REQ_BGL = False
11158
11159   def CheckArguments(self):
11160     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
11161
11162     storage_type = self.op.storage_type
11163
11164     if (constants.SO_FIX_CONSISTENCY not in
11165         constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
11166       raise errors.OpPrereqError("Storage units of type '%s' can not be"
11167                                  " repaired" % storage_type,
11168                                  errors.ECODE_INVAL)
11169
11170   def ExpandNames(self):
11171     self.needed_locks = {
11172       locking.LEVEL_NODE: [self.op.node_name],
11173       }
11174
11175   def _CheckFaultyDisks(self, instance, node_name):
11176     """Ensure faulty disks abort the opcode or at least warn."""
11177     try:
11178       if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
11179                                   node_name, True):
11180         raise errors.OpPrereqError("Instance '%s' has faulty disks on"
11181                                    " node '%s'" % (instance.name, node_name),
11182                                    errors.ECODE_STATE)
11183     except errors.OpPrereqError, err:
11184       if self.op.ignore_consistency:
11185         self.proc.LogWarning(str(err.args[0]))
11186       else:
11187         raise
11188
11189   def CheckPrereq(self):
11190     """Check prerequisites.
11191
11192     """
11193     # Check whether any instance on this node has faulty disks
11194     for inst in _GetNodeInstances(self.cfg, self.op.node_name):
11195       if inst.admin_state != constants.ADMINST_UP:
11196         continue
11197       check_nodes = set(inst.all_nodes)
11198       check_nodes.discard(self.op.node_name)
11199       for inst_node_name in check_nodes:
11200         self._CheckFaultyDisks(inst, inst_node_name)
11201
11202   def Exec(self, feedback_fn):
11203     feedback_fn("Repairing storage unit '%s' on %s ..." %
11204                 (self.op.name, self.op.node_name))
11205
11206     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
11207     result = self.rpc.call_storage_execute(self.op.node_name,
11208                                            self.op.storage_type, st_args,
11209                                            self.op.name,
11210                                            constants.SO_FIX_CONSISTENCY)
11211     result.Raise("Failed to repair storage unit '%s' on %s" %
11212                  (self.op.name, self.op.node_name))
11213
11214
11215 class LUNodeEvacuate(NoHooksLU):
11216   """Evacuates instances off a list of nodes.
11217
11218   """
11219   REQ_BGL = False
11220
11221   _MODE2IALLOCATOR = {
11222     constants.NODE_EVAC_PRI: constants.IALLOCATOR_NEVAC_PRI,
11223     constants.NODE_EVAC_SEC: constants.IALLOCATOR_NEVAC_SEC,
11224     constants.NODE_EVAC_ALL: constants.IALLOCATOR_NEVAC_ALL,
11225     }
11226   assert frozenset(_MODE2IALLOCATOR.keys()) == constants.NODE_EVAC_MODES
11227   assert (frozenset(_MODE2IALLOCATOR.values()) ==
11228           constants.IALLOCATOR_NEVAC_MODES)
11229
11230   def CheckArguments(self):
11231     _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
11232
11233   def ExpandNames(self):
11234     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
11235
11236     if self.op.remote_node is not None:
11237       self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
11238       assert self.op.remote_node
11239
11240       if self.op.remote_node == self.op.node_name:
11241         raise errors.OpPrereqError("Can not use evacuated node as a new"
11242                                    " secondary node", errors.ECODE_INVAL)
11243
11244       if self.op.mode != constants.NODE_EVAC_SEC:
11245         raise errors.OpPrereqError("Without the use of an iallocator only"
11246                                    " secondary instances can be evacuated",
11247                                    errors.ECODE_INVAL)
11248
11249     # Declare locks
11250     self.share_locks = _ShareAll()
11251     self.needed_locks = {
11252       locking.LEVEL_INSTANCE: [],
11253       locking.LEVEL_NODEGROUP: [],
11254       locking.LEVEL_NODE: [],
11255       }
11256
11257     # Determine nodes (via group) optimistically, needs verification once locks
11258     # have been acquired
11259     self.lock_nodes = self._DetermineNodes()
11260
11261   def _DetermineNodes(self):
11262     """Gets the list of nodes to operate on.
11263
11264     """
11265     if self.op.remote_node is None:
11266       # Iallocator will choose any node(s) in the same group
11267       group_nodes = self.cfg.GetNodeGroupMembersByNodes([self.op.node_name])
11268     else:
11269       group_nodes = frozenset([self.op.remote_node])
11270
11271     # Determine nodes to be locked
11272     return set([self.op.node_name]) | group_nodes
11273
11274   def _DetermineInstances(self):
11275     """Builds list of instances to operate on.
11276
11277     """
11278     assert self.op.mode in constants.NODE_EVAC_MODES
11279
11280     if self.op.mode == constants.NODE_EVAC_PRI:
11281       # Primary instances only
11282       inst_fn = _GetNodePrimaryInstances
11283       assert self.op.remote_node is None, \
11284         "Evacuating primary instances requires iallocator"
11285     elif self.op.mode == constants.NODE_EVAC_SEC:
11286       # Secondary instances only
11287       inst_fn = _GetNodeSecondaryInstances
11288     else:
11289       # All instances
11290       assert self.op.mode == constants.NODE_EVAC_ALL
11291       inst_fn = _GetNodeInstances
11292       # TODO: In 2.6, change the iallocator interface to take an evacuation mode
11293       # per instance
11294       raise errors.OpPrereqError("Due to an issue with the iallocator"
11295                                  " interface it is not possible to evacuate"
11296                                  " all instances at once; specify explicitly"
11297                                  " whether to evacuate primary or secondary"
11298                                  " instances",
11299                                  errors.ECODE_INVAL)
11300
11301     return inst_fn(self.cfg, self.op.node_name)
11302
11303   def DeclareLocks(self, level):
11304     if level == locking.LEVEL_INSTANCE:
11305       # Lock instances optimistically, needs verification once node and group
11306       # locks have been acquired
11307       self.needed_locks[locking.LEVEL_INSTANCE] = \
11308         set(i.name for i in self._DetermineInstances())
11309
11310     elif level == locking.LEVEL_NODEGROUP:
11311       # Lock node groups for all potential target nodes optimistically, needs
11312       # verification once nodes have been acquired
11313       self.needed_locks[locking.LEVEL_NODEGROUP] = \
11314         self.cfg.GetNodeGroupsFromNodes(self.lock_nodes)
11315
11316     elif level == locking.LEVEL_NODE:
11317       self.needed_locks[locking.LEVEL_NODE] = self.lock_nodes
11318
11319   def CheckPrereq(self):
11320     # Verify locks
11321     owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
11322     owned_nodes = self.owned_locks(locking.LEVEL_NODE)
11323     owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
11324
11325     need_nodes = self._DetermineNodes()
11326
11327     if not owned_nodes.issuperset(need_nodes):
11328       raise errors.OpPrereqError("Nodes in same group as '%s' changed since"
11329                                  " locks were acquired, current nodes are"
11330                                  " are '%s', used to be '%s'; retry the"
11331                                  " operation" %
11332                                  (self.op.node_name,
11333                                   utils.CommaJoin(need_nodes),
11334                                   utils.CommaJoin(owned_nodes)),
11335                                  errors.ECODE_STATE)
11336
11337     wanted_groups = self.cfg.GetNodeGroupsFromNodes(owned_nodes)
11338     if owned_groups != wanted_groups:
11339       raise errors.OpExecError("Node groups changed since locks were acquired,"
11340                                " current groups are '%s', used to be '%s';"
11341                                " retry the operation" %
11342                                (utils.CommaJoin(wanted_groups),
11343                                 utils.CommaJoin(owned_groups)))
11344
11345     # Determine affected instances
11346     self.instances = self._DetermineInstances()
11347     self.instance_names = [i.name for i in self.instances]
11348
11349     if set(self.instance_names) != owned_instances:
11350       raise errors.OpExecError("Instances on node '%s' changed since locks"
11351                                " were acquired, current instances are '%s',"
11352                                " used to be '%s'; retry the operation" %
11353                                (self.op.node_name,
11354                                 utils.CommaJoin(self.instance_names),
11355                                 utils.CommaJoin(owned_instances)))
11356
11357     if self.instance_names:
11358       self.LogInfo("Evacuating instances from node '%s': %s",
11359                    self.op.node_name,
11360                    utils.CommaJoin(utils.NiceSort(self.instance_names)))
11361     else:
11362       self.LogInfo("No instances to evacuate from node '%s'",
11363                    self.op.node_name)
11364
11365     if self.op.remote_node is not None:
11366       for i in self.instances:
11367         if i.primary_node == self.op.remote_node:
11368           raise errors.OpPrereqError("Node %s is the primary node of"
11369                                      " instance %s, cannot use it as"
11370                                      " secondary" %
11371                                      (self.op.remote_node, i.name),
11372                                      errors.ECODE_INVAL)
11373
11374   def Exec(self, feedback_fn):
11375     assert (self.op.iallocator is not None) ^ (self.op.remote_node is not None)
11376
11377     if not self.instance_names:
11378       # No instances to evacuate
11379       jobs = []
11380
11381     elif self.op.iallocator is not None:
11382       # TODO: Implement relocation to other group
11383       ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_NODE_EVAC,
11384                        evac_mode=self._MODE2IALLOCATOR[self.op.mode],
11385                        instances=list(self.instance_names))
11386
11387       ial.Run(self.op.iallocator)
11388
11389       if not ial.success:
11390         raise errors.OpPrereqError("Can't compute node evacuation using"
11391                                    " iallocator '%s': %s" %
11392                                    (self.op.iallocator, ial.info),
11393                                    errors.ECODE_NORES)
11394
11395       jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, True)
11396
11397     elif self.op.remote_node is not None:
11398       assert self.op.mode == constants.NODE_EVAC_SEC
11399       jobs = [
11400         [opcodes.OpInstanceReplaceDisks(instance_name=instance_name,
11401                                         remote_node=self.op.remote_node,
11402                                         disks=[],
11403                                         mode=constants.REPLACE_DISK_CHG,
11404                                         early_release=self.op.early_release)]
11405         for instance_name in self.instance_names
11406         ]
11407
11408     else:
11409       raise errors.ProgrammerError("No iallocator or remote node")
11410
11411     return ResultWithJobs(jobs)
11412
11413
11414 def _SetOpEarlyRelease(early_release, op):
11415   """Sets C{early_release} flag on opcodes if available.
11416
11417   """
11418   try:
11419     op.early_release = early_release
11420   except AttributeError:
11421     assert not isinstance(op, opcodes.OpInstanceReplaceDisks)
11422
11423   return op
11424
11425
11426 def _NodeEvacDest(use_nodes, group, nodes):
11427   """Returns group or nodes depending on caller's choice.
11428
11429   """
11430   if use_nodes:
11431     return utils.CommaJoin(nodes)
11432   else:
11433     return group
11434
11435
11436 def _LoadNodeEvacResult(lu, alloc_result, early_release, use_nodes):
11437   """Unpacks the result of change-group and node-evacuate iallocator requests.
11438
11439   Iallocator modes L{constants.IALLOCATOR_MODE_NODE_EVAC} and
11440   L{constants.IALLOCATOR_MODE_CHG_GROUP}.
11441
11442   @type lu: L{LogicalUnit}
11443   @param lu: Logical unit instance
11444   @type alloc_result: tuple/list
11445   @param alloc_result: Result from iallocator
11446   @type early_release: bool
11447   @param early_release: Whether to release locks early if possible
11448   @type use_nodes: bool
11449   @param use_nodes: Whether to display node names instead of groups
11450
11451   """
11452   (moved, failed, jobs) = alloc_result
11453
11454   if failed:
11455     failreason = utils.CommaJoin("%s (%s)" % (name, reason)
11456                                  for (name, reason) in failed)
11457     lu.LogWarning("Unable to evacuate instances %s", failreason)
11458     raise errors.OpExecError("Unable to evacuate instances %s" % failreason)
11459
11460   if moved:
11461     lu.LogInfo("Instances to be moved: %s",
11462                utils.CommaJoin("%s (to %s)" %
11463                                (name, _NodeEvacDest(use_nodes, group, nodes))
11464                                for (name, group, nodes) in moved))
11465
11466   return [map(compat.partial(_SetOpEarlyRelease, early_release),
11467               map(opcodes.OpCode.LoadOpCode, ops))
11468           for ops in jobs]
11469
11470
11471 class LUInstanceGrowDisk(LogicalUnit):
11472   """Grow a disk of an instance.
11473
11474   """
11475   HPATH = "disk-grow"
11476   HTYPE = constants.HTYPE_INSTANCE
11477   REQ_BGL = False
11478
11479   def ExpandNames(self):
11480     self._ExpandAndLockInstance()
11481     self.needed_locks[locking.LEVEL_NODE] = []
11482     self.needed_locks[locking.LEVEL_NODE_RES] = []
11483     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
11484     self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
11485
11486   def DeclareLocks(self, level):
11487     if level == locking.LEVEL_NODE:
11488       self._LockInstancesNodes()
11489     elif level == locking.LEVEL_NODE_RES:
11490       # Copy node locks
11491       self.needed_locks[locking.LEVEL_NODE_RES] = \
11492         self.needed_locks[locking.LEVEL_NODE][:]
11493
11494   def BuildHooksEnv(self):
11495     """Build hooks env.
11496
11497     This runs on the master, the primary and all the secondaries.
11498
11499     """
11500     env = {
11501       "DISK": self.op.disk,
11502       "AMOUNT": self.op.amount,
11503       "ABSOLUTE": self.op.absolute,
11504       }
11505     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
11506     return env
11507
11508   def BuildHooksNodes(self):
11509     """Build hooks nodes.
11510
11511     """
11512     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
11513     return (nl, nl)
11514
11515   def CheckPrereq(self):
11516     """Check prerequisites.
11517
11518     This checks that the instance is in the cluster.
11519
11520     """
11521     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
11522     assert instance is not None, \
11523       "Cannot retrieve locked instance %s" % self.op.instance_name
11524     nodenames = list(instance.all_nodes)
11525     for node in nodenames:
11526       _CheckNodeOnline(self, node)
11527
11528     self.instance = instance
11529
11530     if instance.disk_template not in constants.DTS_GROWABLE:
11531       raise errors.OpPrereqError("Instance's disk layout does not support"
11532                                  " growing", errors.ECODE_INVAL)
11533
11534     self.disk = instance.FindDisk(self.op.disk)
11535
11536     if self.op.absolute:
11537       self.target = self.op.amount
11538       self.delta = self.target - self.disk.size
11539       if self.delta < 0:
11540         raise errors.OpPrereqError("Requested size (%s) is smaller than "
11541                                    "current disk size (%s)" %
11542                                    (utils.FormatUnit(self.target, "h"),
11543                                     utils.FormatUnit(self.disk.size, "h")),
11544                                    errors.ECODE_STATE)
11545     else:
11546       self.delta = self.op.amount
11547       self.target = self.disk.size + self.delta
11548       if self.delta < 0:
11549         raise errors.OpPrereqError("Requested increment (%s) is negative" %
11550                                    utils.FormatUnit(self.delta, "h"),
11551                                    errors.ECODE_INVAL)
11552
11553     if instance.disk_template not in (constants.DT_FILE,
11554                                       constants.DT_SHARED_FILE,
11555                                       constants.DT_RBD):
11556       # TODO: check the free disk space for file, when that feature will be
11557       # supported
11558       _CheckNodesFreeDiskPerVG(self, nodenames,
11559                                self.disk.ComputeGrowth(self.delta))
11560
11561   def Exec(self, feedback_fn):
11562     """Execute disk grow.
11563
11564     """
11565     instance = self.instance
11566     disk = self.disk
11567
11568     assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
11569     assert (self.owned_locks(locking.LEVEL_NODE) ==
11570             self.owned_locks(locking.LEVEL_NODE_RES))
11571
11572     disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
11573     if not disks_ok:
11574       raise errors.OpExecError("Cannot activate block device to grow")
11575
11576     feedback_fn("Growing disk %s of instance '%s' by %s to %s" %
11577                 (self.op.disk, instance.name,
11578                  utils.FormatUnit(self.delta, "h"),
11579                  utils.FormatUnit(self.target, "h")))
11580
11581     # First run all grow ops in dry-run mode
11582     for node in instance.all_nodes:
11583       self.cfg.SetDiskID(disk, node)
11584       result = self.rpc.call_blockdev_grow(node, disk, self.delta, True)
11585       result.Raise("Grow request failed to node %s" % node)
11586
11587     # We know that (as far as we can test) operations across different
11588     # nodes will succeed, time to run it for real
11589     for node in instance.all_nodes:
11590       self.cfg.SetDiskID(disk, node)
11591       result = self.rpc.call_blockdev_grow(node, disk, self.delta, False)
11592       result.Raise("Grow request failed to node %s" % node)
11593
11594       # TODO: Rewrite code to work properly
11595       # DRBD goes into sync mode for a short amount of time after executing the
11596       # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
11597       # calling "resize" in sync mode fails. Sleeping for a short amount of
11598       # time is a work-around.
11599       time.sleep(5)
11600
11601     disk.RecordGrow(self.delta)
11602     self.cfg.Update(instance, feedback_fn)
11603
11604     # Changes have been recorded, release node lock
11605     _ReleaseLocks(self, locking.LEVEL_NODE)
11606
11607     # Downgrade lock while waiting for sync
11608     self.glm.downgrade(locking.LEVEL_INSTANCE)
11609
11610     if self.op.wait_for_sync:
11611       disk_abort = not _WaitForSync(self, instance, disks=[disk])
11612       if disk_abort:
11613         self.proc.LogWarning("Disk sync-ing has not returned a good"
11614                              " status; please check the instance")
11615       if instance.admin_state != constants.ADMINST_UP:
11616         _SafeShutdownInstanceDisks(self, instance, disks=[disk])
11617     elif instance.admin_state != constants.ADMINST_UP:
11618       self.proc.LogWarning("Not shutting down the disk even if the instance is"
11619                            " not supposed to be running because no wait for"
11620                            " sync mode was requested")
11621
11622     assert self.owned_locks(locking.LEVEL_NODE_RES)
11623     assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
11624
11625
11626 class LUInstanceQueryData(NoHooksLU):
11627   """Query runtime instance data.
11628
11629   """
11630   REQ_BGL = False
11631
11632   def ExpandNames(self):
11633     self.needed_locks = {}
11634
11635     # Use locking if requested or when non-static information is wanted
11636     if not (self.op.static or self.op.use_locking):
11637       self.LogWarning("Non-static data requested, locks need to be acquired")
11638       self.op.use_locking = True
11639
11640     if self.op.instances or not self.op.use_locking:
11641       # Expand instance names right here
11642       self.wanted_names = _GetWantedInstances(self, self.op.instances)
11643     else:
11644       # Will use acquired locks
11645       self.wanted_names = None
11646
11647     if self.op.use_locking:
11648       self.share_locks = _ShareAll()
11649
11650       if self.wanted_names is None:
11651         self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
11652       else:
11653         self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
11654
11655       self.needed_locks[locking.LEVEL_NODEGROUP] = []
11656       self.needed_locks[locking.LEVEL_NODE] = []
11657       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
11658
11659   def DeclareLocks(self, level):
11660     if self.op.use_locking:
11661       if level == locking.LEVEL_NODEGROUP:
11662         owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
11663
11664         # Lock all groups used by instances optimistically; this requires going
11665         # via the node before it's locked, requiring verification later on
11666         self.needed_locks[locking.LEVEL_NODEGROUP] = \
11667           frozenset(group_uuid
11668                     for instance_name in owned_instances
11669                     for group_uuid in
11670                       self.cfg.GetInstanceNodeGroups(instance_name))
11671
11672       elif level == locking.LEVEL_NODE:
11673         self._LockInstancesNodes()
11674
11675   def CheckPrereq(self):
11676     """Check prerequisites.
11677
11678     This only checks the optional instance list against the existing names.
11679
11680     """
11681     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
11682     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
11683     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
11684
11685     if self.wanted_names is None:
11686       assert self.op.use_locking, "Locking was not used"
11687       self.wanted_names = owned_instances
11688
11689     instances = dict(self.cfg.GetMultiInstanceInfo(self.wanted_names))
11690
11691     if self.op.use_locking:
11692       _CheckInstancesNodeGroups(self.cfg, instances, owned_groups, owned_nodes,
11693                                 None)
11694     else:
11695       assert not (owned_instances or owned_groups or owned_nodes)
11696
11697     self.wanted_instances = instances.values()
11698
11699   def _ComputeBlockdevStatus(self, node, instance_name, dev):
11700     """Returns the status of a block device
11701
11702     """
11703     if self.op.static or not node:
11704       return None
11705
11706     self.cfg.SetDiskID(dev, node)
11707
11708     result = self.rpc.call_blockdev_find(node, dev)
11709     if result.offline:
11710       return None
11711
11712     result.Raise("Can't compute disk status for %s" % instance_name)
11713
11714     status = result.payload
11715     if status is None:
11716       return None
11717
11718     return (status.dev_path, status.major, status.minor,
11719             status.sync_percent, status.estimated_time,
11720             status.is_degraded, status.ldisk_status)
11721
11722   def _ComputeDiskStatus(self, instance, snode, dev):
11723     """Compute block device status.
11724
11725     """
11726     if dev.dev_type in constants.LDS_DRBD:
11727       # we change the snode then (otherwise we use the one passed in)
11728       if dev.logical_id[0] == instance.primary_node:
11729         snode = dev.logical_id[1]
11730       else:
11731         snode = dev.logical_id[0]
11732
11733     dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
11734                                               instance.name, dev)
11735     dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
11736
11737     if dev.children:
11738       dev_children = map(compat.partial(self._ComputeDiskStatus,
11739                                         instance, snode),
11740                          dev.children)
11741     else:
11742       dev_children = []
11743
11744     return {
11745       "iv_name": dev.iv_name,
11746       "dev_type": dev.dev_type,
11747       "logical_id": dev.logical_id,
11748       "physical_id": dev.physical_id,
11749       "pstatus": dev_pstatus,
11750       "sstatus": dev_sstatus,
11751       "children": dev_children,
11752       "mode": dev.mode,
11753       "size": dev.size,
11754       }
11755
11756   def Exec(self, feedback_fn):
11757     """Gather and return data"""
11758     result = {}
11759
11760     cluster = self.cfg.GetClusterInfo()
11761
11762     node_names = itertools.chain(*(i.all_nodes for i in self.wanted_instances))
11763     nodes = dict(self.cfg.GetMultiNodeInfo(node_names))
11764
11765     groups = dict(self.cfg.GetMultiNodeGroupInfo(node.group
11766                                                  for node in nodes.values()))
11767
11768     group2name_fn = lambda uuid: groups[uuid].name
11769
11770     for instance in self.wanted_instances:
11771       pnode = nodes[instance.primary_node]
11772
11773       if self.op.static or pnode.offline:
11774         remote_state = None
11775         if pnode.offline:
11776           self.LogWarning("Primary node %s is marked offline, returning static"
11777                           " information only for instance %s" %
11778                           (pnode.name, instance.name))
11779       else:
11780         remote_info = self.rpc.call_instance_info(instance.primary_node,
11781                                                   instance.name,
11782                                                   instance.hypervisor)
11783         remote_info.Raise("Error checking node %s" % instance.primary_node)
11784         remote_info = remote_info.payload
11785         if remote_info and "state" in remote_info:
11786           remote_state = "up"
11787         else:
11788           if instance.admin_state == constants.ADMINST_UP:
11789             remote_state = "down"
11790           else:
11791             remote_state = instance.admin_state
11792
11793       disks = map(compat.partial(self._ComputeDiskStatus, instance, None),
11794                   instance.disks)
11795
11796       snodes_group_uuids = [nodes[snode_name].group
11797                             for snode_name in instance.secondary_nodes]
11798
11799       result[instance.name] = {
11800         "name": instance.name,
11801         "config_state": instance.admin_state,
11802         "run_state": remote_state,
11803         "pnode": instance.primary_node,
11804         "pnode_group_uuid": pnode.group,
11805         "pnode_group_name": group2name_fn(pnode.group),
11806         "snodes": instance.secondary_nodes,
11807         "snodes_group_uuids": snodes_group_uuids,
11808         "snodes_group_names": map(group2name_fn, snodes_group_uuids),
11809         "os": instance.os,
11810         # this happens to be the same format used for hooks
11811         "nics": _NICListToTuple(self, instance.nics),
11812         "disk_template": instance.disk_template,
11813         "disks": disks,
11814         "hypervisor": instance.hypervisor,
11815         "network_port": instance.network_port,
11816         "hv_instance": instance.hvparams,
11817         "hv_actual": cluster.FillHV(instance, skip_globals=True),
11818         "be_instance": instance.beparams,
11819         "be_actual": cluster.FillBE(instance),
11820         "os_instance": instance.osparams,
11821         "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
11822         "serial_no": instance.serial_no,
11823         "mtime": instance.mtime,
11824         "ctime": instance.ctime,
11825         "uuid": instance.uuid,
11826         }
11827
11828     return result
11829
11830
11831 def PrepareContainerMods(mods, private_fn):
11832   """Prepares a list of container modifications by adding a private data field.
11833
11834   @type mods: list of tuples; (operation, index, parameters)
11835   @param mods: List of modifications
11836   @type private_fn: callable or None
11837   @param private_fn: Callable for constructing a private data field for a
11838     modification
11839   @rtype: list
11840
11841   """
11842   if private_fn is None:
11843     fn = lambda: None
11844   else:
11845     fn = private_fn
11846
11847   return [(op, idx, params, fn()) for (op, idx, params) in mods]
11848
11849
11850 #: Type description for changes as returned by L{ApplyContainerMods}'s
11851 #: callbacks
11852 _TApplyContModsCbChanges = \
11853   ht.TMaybeListOf(ht.TAnd(ht.TIsLength(2), ht.TItems([
11854     ht.TNonEmptyString,
11855     ht.TAny,
11856     ])))
11857
11858
11859 def ApplyContainerMods(kind, container, chgdesc, mods,
11860                        create_fn, modify_fn, remove_fn):
11861   """Applies descriptions in C{mods} to C{container}.
11862
11863   @type kind: string
11864   @param kind: One-word item description
11865   @type container: list
11866   @param container: Container to modify
11867   @type chgdesc: None or list
11868   @param chgdesc: List of applied changes
11869   @type mods: list
11870   @param mods: Modifications as returned by L{PrepareContainerMods}
11871   @type create_fn: callable
11872   @param create_fn: Callback for creating a new item (L{constants.DDM_ADD});
11873     receives absolute item index, parameters and private data object as added
11874     by L{PrepareContainerMods}, returns tuple containing new item and changes
11875     as list
11876   @type modify_fn: callable
11877   @param modify_fn: Callback for modifying an existing item
11878     (L{constants.DDM_MODIFY}); receives absolute item index, item, parameters
11879     and private data object as added by L{PrepareContainerMods}, returns
11880     changes as list
11881   @type remove_fn: callable
11882   @param remove_fn: Callback on removing item; receives absolute item index,
11883     item and private data object as added by L{PrepareContainerMods}
11884
11885   """
11886   for (op, idx, params, private) in mods:
11887     if idx == -1:
11888       # Append
11889       absidx = len(container) - 1
11890     elif idx < 0:
11891       raise IndexError("Not accepting negative indices other than -1")
11892     elif idx > len(container):
11893       raise IndexError("Got %s index %s, but there are only %s" %
11894                        (kind, idx, len(container)))
11895     else:
11896       absidx = idx
11897
11898     changes = None
11899
11900     if op == constants.DDM_ADD:
11901       # Calculate where item will be added
11902       if idx == -1:
11903         addidx = len(container)
11904       else:
11905         addidx = idx
11906
11907       if create_fn is None:
11908         item = params
11909       else:
11910         (item, changes) = create_fn(addidx, params, private)
11911
11912       if idx == -1:
11913         container.append(item)
11914       else:
11915         assert idx >= 0
11916         assert idx <= len(container)
11917         # list.insert does so before the specified index
11918         container.insert(idx, item)
11919     else:
11920       # Retrieve existing item
11921       try:
11922         item = container[absidx]
11923       except IndexError:
11924         raise IndexError("Invalid %s index %s" % (kind, idx))
11925
11926       if op == constants.DDM_REMOVE:
11927         assert not params
11928
11929         if remove_fn is not None:
11930           remove_fn(absidx, item, private)
11931
11932         changes = [("%s/%s" % (kind, absidx), "remove")]
11933
11934         assert container[absidx] == item
11935         del container[absidx]
11936       elif op == constants.DDM_MODIFY:
11937         if modify_fn is not None:
11938           changes = modify_fn(absidx, item, params, private)
11939       else:
11940         raise errors.ProgrammerError("Unhandled operation '%s'" % op)
11941
11942     assert _TApplyContModsCbChanges(changes)
11943
11944     if not (chgdesc is None or changes is None):
11945       chgdesc.extend(changes)
11946
11947
11948 def _UpdateIvNames(base_index, disks):
11949   """Updates the C{iv_name} attribute of disks.
11950
11951   @type disks: list of L{objects.Disk}
11952
11953   """
11954   for (idx, disk) in enumerate(disks):
11955     disk.iv_name = "disk/%s" % (base_index + idx, )
11956
11957
11958 class _InstNicModPrivate:
11959   """Data structure for network interface modifications.
11960
11961   Used by L{LUInstanceSetParams}.
11962
11963   """
11964   def __init__(self):
11965     self.params = None
11966     self.filled = None
11967
11968
11969 class LUInstanceSetParams(LogicalUnit):
11970   """Modifies an instances's parameters.
11971
11972   """
11973   HPATH = "instance-modify"
11974   HTYPE = constants.HTYPE_INSTANCE
11975   REQ_BGL = False
11976
11977   @staticmethod
11978   def _UpgradeDiskNicMods(kind, mods, verify_fn):
11979     assert ht.TList(mods)
11980     assert not mods or len(mods[0]) in (2, 3)
11981
11982     if mods and len(mods[0]) == 2:
11983       result = []
11984
11985       addremove = 0
11986       for op, params in mods:
11987         if op in (constants.DDM_ADD, constants.DDM_REMOVE):
11988           result.append((op, -1, params))
11989           addremove += 1
11990
11991           if addremove > 1:
11992             raise errors.OpPrereqError("Only one %s add or remove operation is"
11993                                        " supported at a time" % kind,
11994                                        errors.ECODE_INVAL)
11995         else:
11996           result.append((constants.DDM_MODIFY, op, params))
11997
11998       assert verify_fn(result)
11999     else:
12000       result = mods
12001
12002     return result
12003
12004   @staticmethod
12005   def _CheckMods(kind, mods, key_types, item_fn):
12006     """Ensures requested disk/NIC modifications are valid.
12007
12008     """
12009     for (op, _, params) in mods:
12010       assert ht.TDict(params)
12011
12012       utils.ForceDictType(params, key_types)
12013
12014       if op == constants.DDM_REMOVE:
12015         if params:
12016           raise errors.OpPrereqError("No settings should be passed when"
12017                                      " removing a %s" % kind,
12018                                      errors.ECODE_INVAL)
12019       elif op in (constants.DDM_ADD, constants.DDM_MODIFY):
12020         item_fn(op, params)
12021       else:
12022         raise errors.ProgrammerError("Unhandled operation '%s'" % op)
12023
12024   @staticmethod
12025   def _VerifyDiskModification(op, params):
12026     """Verifies a disk modification.
12027
12028     """
12029     if op == constants.DDM_ADD:
12030       mode = params.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
12031       if mode not in constants.DISK_ACCESS_SET:
12032         raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
12033                                    errors.ECODE_INVAL)
12034
12035       size = params.get(constants.IDISK_SIZE, None)
12036       if size is None:
12037         raise errors.OpPrereqError("Required disk parameter '%s' missing" %
12038                                    constants.IDISK_SIZE, errors.ECODE_INVAL)
12039
12040       try:
12041         size = int(size)
12042       except (TypeError, ValueError), err:
12043         raise errors.OpPrereqError("Invalid disk size parameter: %s" % err,
12044                                    errors.ECODE_INVAL)
12045
12046       params[constants.IDISK_SIZE] = size
12047
12048     elif op == constants.DDM_MODIFY and constants.IDISK_SIZE in params:
12049       raise errors.OpPrereqError("Disk size change not possible, use"
12050                                  " grow-disk", errors.ECODE_INVAL)
12051
12052   @staticmethod
12053   def _VerifyNicModification(op, params):
12054     """Verifies a network interface modification.
12055
12056     """
12057     if op in (constants.DDM_ADD, constants.DDM_MODIFY):
12058       ip = params.get(constants.INIC_IP, None)
12059       if ip is None:
12060         pass
12061       elif ip.lower() == constants.VALUE_NONE:
12062         params[constants.INIC_IP] = None
12063       elif not netutils.IPAddress.IsValid(ip):
12064         raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
12065                                    errors.ECODE_INVAL)
12066
12067       bridge = params.get("bridge", None)
12068       link = params.get(constants.INIC_LINK, None)
12069       if bridge and link:
12070         raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
12071                                    " at the same time", errors.ECODE_INVAL)
12072       elif bridge and bridge.lower() == constants.VALUE_NONE:
12073         params["bridge"] = None
12074       elif link and link.lower() == constants.VALUE_NONE:
12075         params[constants.INIC_LINK] = None
12076
12077       if op == constants.DDM_ADD:
12078         macaddr = params.get(constants.INIC_MAC, None)
12079         if macaddr is None:
12080           params[constants.INIC_MAC] = constants.VALUE_AUTO
12081
12082       if constants.INIC_MAC in params:
12083         macaddr = params[constants.INIC_MAC]
12084         if macaddr not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
12085           macaddr = utils.NormalizeAndValidateMac(macaddr)
12086
12087         if op == constants.DDM_MODIFY and macaddr == constants.VALUE_AUTO:
12088           raise errors.OpPrereqError("'auto' is not a valid MAC address when"
12089                                      " modifying an existing NIC",
12090                                      errors.ECODE_INVAL)
12091
12092   def CheckArguments(self):
12093     if not (self.op.nics or self.op.disks or self.op.disk_template or
12094             self.op.hvparams or self.op.beparams or self.op.os_name or
12095             self.op.offline is not None or self.op.runtime_mem):
12096       raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
12097
12098     if self.op.hvparams:
12099       _CheckGlobalHvParams(self.op.hvparams)
12100
12101     self.op.disks = \
12102       self._UpgradeDiskNicMods("disk", self.op.disks,
12103         opcodes.OpInstanceSetParams.TestDiskModifications)
12104     self.op.nics = \
12105       self._UpgradeDiskNicMods("NIC", self.op.nics,
12106         opcodes.OpInstanceSetParams.TestNicModifications)
12107
12108     # Check disk modifications
12109     self._CheckMods("disk", self.op.disks, constants.IDISK_PARAMS_TYPES,
12110                     self._VerifyDiskModification)
12111
12112     if self.op.disks and self.op.disk_template is not None:
12113       raise errors.OpPrereqError("Disk template conversion and other disk"
12114                                  " changes not supported at the same time",
12115                                  errors.ECODE_INVAL)
12116
12117     if (self.op.disk_template and
12118         self.op.disk_template in constants.DTS_INT_MIRROR and
12119         self.op.remote_node is None):
12120       raise errors.OpPrereqError("Changing the disk template to a mirrored"
12121                                  " one requires specifying a secondary node",
12122                                  errors.ECODE_INVAL)
12123
12124     # Check NIC modifications
12125     self._CheckMods("NIC", self.op.nics, constants.INIC_PARAMS_TYPES,
12126                     self._VerifyNicModification)
12127
12128   def ExpandNames(self):
12129     self._ExpandAndLockInstance()
12130     # Can't even acquire node locks in shared mode as upcoming changes in
12131     # Ganeti 2.6 will start to modify the node object on disk conversion
12132     self.needed_locks[locking.LEVEL_NODE] = []
12133     self.needed_locks[locking.LEVEL_NODE_RES] = []
12134     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
12135
12136   def DeclareLocks(self, level):
12137     # TODO: Acquire group lock in shared mode (disk parameters)
12138     if level == locking.LEVEL_NODE:
12139       self._LockInstancesNodes()
12140       if self.op.disk_template and self.op.remote_node:
12141         self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
12142         self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
12143     elif level == locking.LEVEL_NODE_RES and self.op.disk_template:
12144       # Copy node locks
12145       self.needed_locks[locking.LEVEL_NODE_RES] = \
12146         self.needed_locks[locking.LEVEL_NODE][:]
12147
12148   def BuildHooksEnv(self):
12149     """Build hooks env.
12150
12151     This runs on the master, primary and secondaries.
12152
12153     """
12154     args = dict()
12155     if constants.BE_MINMEM in self.be_new:
12156       args["minmem"] = self.be_new[constants.BE_MINMEM]
12157     if constants.BE_MAXMEM in self.be_new:
12158       args["maxmem"] = self.be_new[constants.BE_MAXMEM]
12159     if constants.BE_VCPUS in self.be_new:
12160       args["vcpus"] = self.be_new[constants.BE_VCPUS]
12161     # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
12162     # information at all.
12163
12164     if self._new_nics is not None:
12165       nics = []
12166
12167       for nic in self._new_nics:
12168         nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
12169         mode = nicparams[constants.NIC_MODE]
12170         link = nicparams[constants.NIC_LINK]
12171         nics.append((nic.ip, nic.mac, mode, link))
12172
12173       args["nics"] = nics
12174
12175     env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
12176     if self.op.disk_template:
12177       env["NEW_DISK_TEMPLATE"] = self.op.disk_template
12178     if self.op.runtime_mem:
12179       env["RUNTIME_MEMORY"] = self.op.runtime_mem
12180
12181     return env
12182
12183   def BuildHooksNodes(self):
12184     """Build hooks nodes.
12185
12186     """
12187     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
12188     return (nl, nl)
12189
12190   def _PrepareNicModification(self, params, private, old_ip, old_params,
12191                               cluster, pnode):
12192     update_params_dict = dict([(key, params[key])
12193                                for key in constants.NICS_PARAMETERS
12194                                if key in params])
12195
12196     if "bridge" in params:
12197       update_params_dict[constants.NIC_LINK] = params["bridge"]
12198
12199     new_params = _GetUpdatedParams(old_params, update_params_dict)
12200     utils.ForceDictType(new_params, constants.NICS_PARAMETER_TYPES)
12201
12202     new_filled_params = cluster.SimpleFillNIC(new_params)
12203     objects.NIC.CheckParameterSyntax(new_filled_params)
12204
12205     new_mode = new_filled_params[constants.NIC_MODE]
12206     if new_mode == constants.NIC_MODE_BRIDGED:
12207       bridge = new_filled_params[constants.NIC_LINK]
12208       msg = self.rpc.call_bridges_exist(pnode, [bridge]).fail_msg
12209       if msg:
12210         msg = "Error checking bridges on node '%s': %s" % (pnode, msg)
12211         if self.op.force:
12212           self.warn.append(msg)
12213         else:
12214           raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
12215
12216     elif new_mode == constants.NIC_MODE_ROUTED:
12217       ip = params.get(constants.INIC_IP, old_ip)
12218       if ip is None:
12219         raise errors.OpPrereqError("Cannot set the NIC IP address to None"
12220                                    " on a routed NIC", errors.ECODE_INVAL)
12221
12222     if constants.INIC_MAC in params:
12223       mac = params[constants.INIC_MAC]
12224       if mac is None:
12225         raise errors.OpPrereqError("Cannot unset the NIC MAC address",
12226                                    errors.ECODE_INVAL)
12227       elif mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
12228         # otherwise generate the MAC address
12229         params[constants.INIC_MAC] = \
12230           self.cfg.GenerateMAC(self.proc.GetECId())
12231       else:
12232         # or validate/reserve the current one
12233         try:
12234           self.cfg.ReserveMAC(mac, self.proc.GetECId())
12235         except errors.ReservationError:
12236           raise errors.OpPrereqError("MAC address '%s' already in use"
12237                                      " in cluster" % mac,
12238                                      errors.ECODE_NOTUNIQUE)
12239
12240     private.params = new_params
12241     private.filled = new_filled_params
12242
12243     return (None, None)
12244
12245   def CheckPrereq(self):
12246     """Check prerequisites.
12247
12248     This only checks the instance list against the existing names.
12249
12250     """
12251     # checking the new params on the primary/secondary nodes
12252
12253     instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
12254     cluster = self.cluster = self.cfg.GetClusterInfo()
12255     assert self.instance is not None, \
12256       "Cannot retrieve locked instance %s" % self.op.instance_name
12257     pnode = instance.primary_node
12258     nodelist = list(instance.all_nodes)
12259     pnode_info = self.cfg.GetNodeInfo(pnode)
12260     self.diskparams = self.cfg.GetNodeGroup(pnode_info.group).diskparams
12261
12262     # Prepare disk/NIC modifications
12263     self.diskmod = PrepareContainerMods(self.op.disks, None)
12264     self.nicmod = PrepareContainerMods(self.op.nics, _InstNicModPrivate)
12265
12266     # OS change
12267     if self.op.os_name and not self.op.force:
12268       _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
12269                       self.op.force_variant)
12270       instance_os = self.op.os_name
12271     else:
12272       instance_os = instance.os
12273
12274     assert not (self.op.disk_template and self.op.disks), \
12275       "Can't modify disk template and apply disk changes at the same time"
12276
12277     if self.op.disk_template:
12278       if instance.disk_template == self.op.disk_template:
12279         raise errors.OpPrereqError("Instance already has disk template %s" %
12280                                    instance.disk_template, errors.ECODE_INVAL)
12281
12282       if (instance.disk_template,
12283           self.op.disk_template) not in self._DISK_CONVERSIONS:
12284         raise errors.OpPrereqError("Unsupported disk template conversion from"
12285                                    " %s to %s" % (instance.disk_template,
12286                                                   self.op.disk_template),
12287                                    errors.ECODE_INVAL)
12288       _CheckInstanceState(self, instance, INSTANCE_DOWN,
12289                           msg="cannot change disk template")
12290       if self.op.disk_template in constants.DTS_INT_MIRROR:
12291         if self.op.remote_node == pnode:
12292           raise errors.OpPrereqError("Given new secondary node %s is the same"
12293                                      " as the primary node of the instance" %
12294                                      self.op.remote_node, errors.ECODE_STATE)
12295         _CheckNodeOnline(self, self.op.remote_node)
12296         _CheckNodeNotDrained(self, self.op.remote_node)
12297         # FIXME: here we assume that the old instance type is DT_PLAIN
12298         assert instance.disk_template == constants.DT_PLAIN
12299         disks = [{constants.IDISK_SIZE: d.size,
12300                   constants.IDISK_VG: d.logical_id[0]}
12301                  for d in instance.disks]
12302         required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
12303         _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
12304
12305         snode_info = self.cfg.GetNodeInfo(self.op.remote_node)
12306         snode_group = self.cfg.GetNodeGroup(snode_info.group)
12307         ipolicy = _CalculateGroupIPolicy(cluster, snode_group)
12308         _CheckTargetNodeIPolicy(self, ipolicy, instance, snode_info,
12309                                 ignore=self.op.ignore_ipolicy)
12310         if pnode_info.group != snode_info.group:
12311           self.LogWarning("The primary and secondary nodes are in two"
12312                           " different node groups; the disk parameters"
12313                           " from the first disk's node group will be"
12314                           " used")
12315
12316     # hvparams processing
12317     if self.op.hvparams:
12318       hv_type = instance.hypervisor
12319       i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
12320       utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
12321       hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
12322
12323       # local check
12324       hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
12325       _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
12326       self.hv_proposed = self.hv_new = hv_new # the new actual values
12327       self.hv_inst = i_hvdict # the new dict (without defaults)
12328     else:
12329       self.hv_proposed = cluster.SimpleFillHV(instance.hypervisor, instance.os,
12330                                               instance.hvparams)
12331       self.hv_new = self.hv_inst = {}
12332
12333     # beparams processing
12334     if self.op.beparams:
12335       i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
12336                                    use_none=True)
12337       objects.UpgradeBeParams(i_bedict)
12338       utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
12339       be_new = cluster.SimpleFillBE(i_bedict)
12340       self.be_proposed = self.be_new = be_new # the new actual values
12341       self.be_inst = i_bedict # the new dict (without defaults)
12342     else:
12343       self.be_new = self.be_inst = {}
12344       self.be_proposed = cluster.SimpleFillBE(instance.beparams)
12345     be_old = cluster.FillBE(instance)
12346
12347     # CPU param validation -- checking every time a paramtere is
12348     # changed to cover all cases where either CPU mask or vcpus have
12349     # changed
12350     if (constants.BE_VCPUS in self.be_proposed and
12351         constants.HV_CPU_MASK in self.hv_proposed):
12352       cpu_list = \
12353         utils.ParseMultiCpuMask(self.hv_proposed[constants.HV_CPU_MASK])
12354       # Verify mask is consistent with number of vCPUs. Can skip this
12355       # test if only 1 entry in the CPU mask, which means same mask
12356       # is applied to all vCPUs.
12357       if (len(cpu_list) > 1 and
12358           len(cpu_list) != self.be_proposed[constants.BE_VCPUS]):
12359         raise errors.OpPrereqError("Number of vCPUs [%d] does not match the"
12360                                    " CPU mask [%s]" %
12361                                    (self.be_proposed[constants.BE_VCPUS],
12362                                     self.hv_proposed[constants.HV_CPU_MASK]),
12363                                    errors.ECODE_INVAL)
12364
12365       # Only perform this test if a new CPU mask is given
12366       if constants.HV_CPU_MASK in self.hv_new:
12367         # Calculate the largest CPU number requested
12368         max_requested_cpu = max(map(max, cpu_list))
12369         # Check that all of the instance's nodes have enough physical CPUs to
12370         # satisfy the requested CPU mask
12371         _CheckNodesPhysicalCPUs(self, instance.all_nodes,
12372                                 max_requested_cpu + 1, instance.hypervisor)
12373
12374     # osparams processing
12375     if self.op.osparams:
12376       i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
12377       _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
12378       self.os_inst = i_osdict # the new dict (without defaults)
12379     else:
12380       self.os_inst = {}
12381
12382     self.warn = []
12383
12384     #TODO(dynmem): do the appropriate check involving MINMEM
12385     if (constants.BE_MAXMEM in self.op.beparams and not self.op.force and
12386         be_new[constants.BE_MAXMEM] > be_old[constants.BE_MAXMEM]):
12387       mem_check_list = [pnode]
12388       if be_new[constants.BE_AUTO_BALANCE]:
12389         # either we changed auto_balance to yes or it was from before
12390         mem_check_list.extend(instance.secondary_nodes)
12391       instance_info = self.rpc.call_instance_info(pnode, instance.name,
12392                                                   instance.hypervisor)
12393       nodeinfo = self.rpc.call_node_info(mem_check_list, None,
12394                                          [instance.hypervisor])
12395       pninfo = nodeinfo[pnode]
12396       msg = pninfo.fail_msg
12397       if msg:
12398         # Assume the primary node is unreachable and go ahead
12399         self.warn.append("Can't get info from primary node %s: %s" %
12400                          (pnode, msg))
12401       else:
12402         (_, _, (pnhvinfo, )) = pninfo.payload
12403         if not isinstance(pnhvinfo.get("memory_free", None), int):
12404           self.warn.append("Node data from primary node %s doesn't contain"
12405                            " free memory information" % pnode)
12406         elif instance_info.fail_msg:
12407           self.warn.append("Can't get instance runtime information: %s" %
12408                           instance_info.fail_msg)
12409         else:
12410           if instance_info.payload:
12411             current_mem = int(instance_info.payload["memory"])
12412           else:
12413             # Assume instance not running
12414             # (there is a slight race condition here, but it's not very
12415             # probable, and we have no other way to check)
12416             # TODO: Describe race condition
12417             current_mem = 0
12418           #TODO(dynmem): do the appropriate check involving MINMEM
12419           miss_mem = (be_new[constants.BE_MAXMEM] - current_mem -
12420                       pnhvinfo["memory_free"])
12421           if miss_mem > 0:
12422             raise errors.OpPrereqError("This change will prevent the instance"
12423                                        " from starting, due to %d MB of memory"
12424                                        " missing on its primary node" %
12425                                        miss_mem,
12426                                        errors.ECODE_NORES)
12427
12428       if be_new[constants.BE_AUTO_BALANCE]:
12429         for node, nres in nodeinfo.items():
12430           if node not in instance.secondary_nodes:
12431             continue
12432           nres.Raise("Can't get info from secondary node %s" % node,
12433                      prereq=True, ecode=errors.ECODE_STATE)
12434           (_, _, (nhvinfo, )) = nres.payload
12435           if not isinstance(nhvinfo.get("memory_free", None), int):
12436             raise errors.OpPrereqError("Secondary node %s didn't return free"
12437                                        " memory information" % node,
12438                                        errors.ECODE_STATE)
12439           #TODO(dynmem): do the appropriate check involving MINMEM
12440           elif be_new[constants.BE_MAXMEM] > nhvinfo["memory_free"]:
12441             raise errors.OpPrereqError("This change will prevent the instance"
12442                                        " from failover to its secondary node"
12443                                        " %s, due to not enough memory" % node,
12444                                        errors.ECODE_STATE)
12445
12446     if self.op.runtime_mem:
12447       remote_info = self.rpc.call_instance_info(instance.primary_node,
12448                                                 instance.name,
12449                                                 instance.hypervisor)
12450       remote_info.Raise("Error checking node %s" % instance.primary_node)
12451       if not remote_info.payload: # not running already
12452         raise errors.OpPrereqError("Instance %s is not running" % instance.name,
12453                                    errors.ECODE_STATE)
12454
12455       current_memory = remote_info.payload["memory"]
12456       if (not self.op.force and
12457            (self.op.runtime_mem > self.be_proposed[constants.BE_MAXMEM] or
12458             self.op.runtime_mem < self.be_proposed[constants.BE_MINMEM])):
12459         raise errors.OpPrereqError("Instance %s must have memory between %d"
12460                                    " and %d MB of memory unless --force is"
12461                                    " given" % (instance.name,
12462                                     self.be_proposed[constants.BE_MINMEM],
12463                                     self.be_proposed[constants.BE_MAXMEM]),
12464                                    errors.ECODE_INVAL)
12465
12466       if self.op.runtime_mem > current_memory:
12467         _CheckNodeFreeMemory(self, instance.primary_node,
12468                              "ballooning memory for instance %s" %
12469                              instance.name,
12470                              self.op.memory - current_memory,
12471                              instance.hypervisor)
12472
12473     if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
12474       raise errors.OpPrereqError("Disk operations not supported for"
12475                                  " diskless instances",
12476                                  errors.ECODE_INVAL)
12477
12478     def _PrepareNicCreate(_, params, private):
12479       return self._PrepareNicModification(params, private, None, {},
12480                                           cluster, pnode)
12481
12482     def _PrepareNicMod(_, nic, params, private):
12483       return self._PrepareNicModification(params, private, nic.ip,
12484                                           nic.nicparams, cluster, pnode)
12485
12486     # Verify NIC changes (operating on copy)
12487     nics = instance.nics[:]
12488     ApplyContainerMods("NIC", nics, None, self.nicmod,
12489                        _PrepareNicCreate, _PrepareNicMod, None)
12490     if len(nics) > constants.MAX_NICS:
12491       raise errors.OpPrereqError("Instance has too many network interfaces"
12492                                  " (%d), cannot add more" % constants.MAX_NICS,
12493                                  errors.ECODE_STATE)
12494
12495     # Verify disk changes (operating on a copy)
12496     disks = instance.disks[:]
12497     ApplyContainerMods("disk", disks, None, self.diskmod, None, None, None)
12498     if len(disks) > constants.MAX_DISKS:
12499       raise errors.OpPrereqError("Instance has too many disks (%d), cannot add"
12500                                  " more" % constants.MAX_DISKS,
12501                                  errors.ECODE_STATE)
12502
12503     if self.op.offline is not None:
12504       if self.op.offline:
12505         msg = "can't change to offline"
12506       else:
12507         msg = "can't change to online"
12508       _CheckInstanceState(self, instance, CAN_CHANGE_INSTANCE_OFFLINE, msg=msg)
12509
12510     # Pre-compute NIC changes (necessary to use result in hooks)
12511     self._nic_chgdesc = []
12512     if self.nicmod:
12513       # Operate on copies as this is still in prereq
12514       nics = [nic.Copy() for nic in instance.nics]
12515       ApplyContainerMods("NIC", nics, self._nic_chgdesc, self.nicmod,
12516                          self._CreateNewNic, self._ApplyNicMods, None)
12517       self._new_nics = nics
12518     else:
12519       self._new_nics = None
12520
12521   def _ConvertPlainToDrbd(self, feedback_fn):
12522     """Converts an instance from plain to drbd.
12523
12524     """
12525     feedback_fn("Converting template to drbd")
12526     instance = self.instance
12527     pnode = instance.primary_node
12528     snode = self.op.remote_node
12529
12530     assert instance.disk_template == constants.DT_PLAIN
12531
12532     # create a fake disk info for _GenerateDiskTemplate
12533     disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
12534                   constants.IDISK_VG: d.logical_id[0]}
12535                  for d in instance.disks]
12536     new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
12537                                       instance.name, pnode, [snode],
12538                                       disk_info, None, None, 0, feedback_fn,
12539                                       self.diskparams)
12540     info = _GetInstanceInfoText(instance)
12541     feedback_fn("Creating additional volumes...")
12542     # first, create the missing data and meta devices
12543     for disk in new_disks:
12544       # unfortunately this is... not too nice
12545       _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
12546                             info, True)
12547       for child in disk.children:
12548         _CreateSingleBlockDev(self, snode, instance, child, info, True)
12549     # at this stage, all new LVs have been created, we can rename the
12550     # old ones
12551     feedback_fn("Renaming original volumes...")
12552     rename_list = [(o, n.children[0].logical_id)
12553                    for (o, n) in zip(instance.disks, new_disks)]
12554     result = self.rpc.call_blockdev_rename(pnode, rename_list)
12555     result.Raise("Failed to rename original LVs")
12556
12557     feedback_fn("Initializing DRBD devices...")
12558     # all child devices are in place, we can now create the DRBD devices
12559     for disk in new_disks:
12560       for node in [pnode, snode]:
12561         f_create = node == pnode
12562         _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
12563
12564     # at this point, the instance has been modified
12565     instance.disk_template = constants.DT_DRBD8
12566     instance.disks = new_disks
12567     self.cfg.Update(instance, feedback_fn)
12568
12569     # Release node locks while waiting for sync
12570     _ReleaseLocks(self, locking.LEVEL_NODE)
12571
12572     # disks are created, waiting for sync
12573     disk_abort = not _WaitForSync(self, instance,
12574                                   oneshot=not self.op.wait_for_sync)
12575     if disk_abort:
12576       raise errors.OpExecError("There are some degraded disks for"
12577                                " this instance, please cleanup manually")
12578
12579     # Node resource locks will be released by caller
12580
12581   def _ConvertDrbdToPlain(self, feedback_fn):
12582     """Converts an instance from drbd to plain.
12583
12584     """
12585     instance = self.instance
12586
12587     assert len(instance.secondary_nodes) == 1
12588     assert instance.disk_template == constants.DT_DRBD8
12589
12590     pnode = instance.primary_node
12591     snode = instance.secondary_nodes[0]
12592     feedback_fn("Converting template to plain")
12593
12594     old_disks = instance.disks
12595     new_disks = [d.children[0] for d in old_disks]
12596
12597     # copy over size and mode
12598     for parent, child in zip(old_disks, new_disks):
12599       child.size = parent.size
12600       child.mode = parent.mode
12601
12602     # this is a DRBD disk, return its port to the pool
12603     # NOTE: this must be done right before the call to cfg.Update!
12604     for disk in old_disks:
12605       tcp_port = disk.logical_id[2]
12606       self.cfg.AddTcpUdpPort(tcp_port)
12607
12608     # update instance structure
12609     instance.disks = new_disks
12610     instance.disk_template = constants.DT_PLAIN
12611     self.cfg.Update(instance, feedback_fn)
12612
12613     # Release locks in case removing disks takes a while
12614     _ReleaseLocks(self, locking.LEVEL_NODE)
12615
12616     feedback_fn("Removing volumes on the secondary node...")
12617     for disk in old_disks:
12618       self.cfg.SetDiskID(disk, snode)
12619       msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
12620       if msg:
12621         self.LogWarning("Could not remove block device %s on node %s,"
12622                         " continuing anyway: %s", disk.iv_name, snode, msg)
12623
12624     feedback_fn("Removing unneeded volumes on the primary node...")
12625     for idx, disk in enumerate(old_disks):
12626       meta = disk.children[1]
12627       self.cfg.SetDiskID(meta, pnode)
12628       msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
12629       if msg:
12630         self.LogWarning("Could not remove metadata for disk %d on node %s,"
12631                         " continuing anyway: %s", idx, pnode, msg)
12632
12633   def _CreateNewDisk(self, idx, params, _):
12634     """Creates a new disk.
12635
12636     """
12637     instance = self.instance
12638
12639     # add a new disk
12640     if instance.disk_template in constants.DTS_FILEBASED:
12641       (file_driver, file_path) = instance.disks[0].logical_id
12642       file_path = os.path.dirname(file_path)
12643     else:
12644       file_driver = file_path = None
12645
12646     disk = \
12647       _GenerateDiskTemplate(self, instance.disk_template, instance.name,
12648                             instance.primary_node, instance.secondary_nodes,
12649                             [params], file_path, file_driver, idx,
12650                             self.Log, self.diskparams)[0]
12651
12652     info = _GetInstanceInfoText(instance)
12653
12654     logging.info("Creating volume %s for instance %s",
12655                  disk.iv_name, instance.name)
12656     # Note: this needs to be kept in sync with _CreateDisks
12657     #HARDCODE
12658     for node in instance.all_nodes:
12659       f_create = (node == instance.primary_node)
12660       try:
12661         _CreateBlockDev(self, node, instance, disk, f_create, info, f_create)
12662       except errors.OpExecError, err:
12663         self.LogWarning("Failed to create volume %s (%s) on node '%s': %s",
12664                         disk.iv_name, disk, node, err)
12665
12666     return (disk, [
12667       ("disk/%d" % idx, "add:size=%s,mode=%s" % (disk.size, disk.mode)),
12668       ])
12669
12670   @staticmethod
12671   def _ModifyDisk(idx, disk, params, _):
12672     """Modifies a disk.
12673
12674     """
12675     disk.mode = params[constants.IDISK_MODE]
12676
12677     return [
12678       ("disk.mode/%d" % idx, disk.mode),
12679       ]
12680
12681   def _RemoveDisk(self, idx, root, _):
12682     """Removes a disk.
12683
12684     """
12685     for node, disk in root.ComputeNodeTree(self.instance.primary_node):
12686       self.cfg.SetDiskID(disk, node)
12687       msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
12688       if msg:
12689         self.LogWarning("Could not remove disk/%d on node '%s': %s,"
12690                         " continuing anyway", idx, node, msg)
12691
12692     # if this is a DRBD disk, return its port to the pool
12693     if root.dev_type in constants.LDS_DRBD:
12694       self.cfg.AddTcpUdpPort(root.logical_id[2])
12695
12696   @staticmethod
12697   def _CreateNewNic(idx, params, private):
12698     """Creates data structure for a new network interface.
12699
12700     """
12701     mac = params[constants.INIC_MAC]
12702     ip = params.get(constants.INIC_IP, None)
12703     nicparams = private.params
12704
12705     return (objects.NIC(mac=mac, ip=ip, nicparams=nicparams), [
12706       ("nic.%d" % idx,
12707        "add:mac=%s,ip=%s,mode=%s,link=%s" %
12708        (mac, ip, private.filled[constants.NIC_MODE],
12709        private.filled[constants.NIC_LINK])),
12710       ])
12711
12712   @staticmethod
12713   def _ApplyNicMods(idx, nic, params, private):
12714     """Modifies a network interface.
12715
12716     """
12717     changes = []
12718
12719     for key in [constants.INIC_MAC, constants.INIC_IP]:
12720       if key in params:
12721         changes.append(("nic.%s/%d" % (key, idx), params[key]))
12722         setattr(nic, key, params[key])
12723
12724     if private.params:
12725       nic.nicparams = private.params
12726
12727       for (key, val) in params.items():
12728         changes.append(("nic.%s/%d" % (key, idx), val))
12729
12730     return changes
12731
12732   def Exec(self, feedback_fn):
12733     """Modifies an instance.
12734
12735     All parameters take effect only at the next restart of the instance.
12736
12737     """
12738     # Process here the warnings from CheckPrereq, as we don't have a
12739     # feedback_fn there.
12740     # TODO: Replace with self.LogWarning
12741     for warn in self.warn:
12742       feedback_fn("WARNING: %s" % warn)
12743
12744     assert ((self.op.disk_template is None) ^
12745             bool(self.owned_locks(locking.LEVEL_NODE_RES))), \
12746       "Not owning any node resource locks"
12747
12748     result = []
12749     instance = self.instance
12750
12751     # runtime memory
12752     if self.op.runtime_mem:
12753       rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node,
12754                                                      instance,
12755                                                      self.op.runtime_mem)
12756       rpcres.Raise("Cannot modify instance runtime memory")
12757       result.append(("runtime_memory", self.op.runtime_mem))
12758
12759     # Apply disk changes
12760     ApplyContainerMods("disk", instance.disks, result, self.diskmod,
12761                        self._CreateNewDisk, self._ModifyDisk, self._RemoveDisk)
12762     _UpdateIvNames(0, instance.disks)
12763
12764     if self.op.disk_template:
12765       if __debug__:
12766         check_nodes = set(instance.all_nodes)
12767         if self.op.remote_node:
12768           check_nodes.add(self.op.remote_node)
12769         for level in [locking.LEVEL_NODE, locking.LEVEL_NODE_RES]:
12770           owned = self.owned_locks(level)
12771           assert not (check_nodes - owned), \
12772             ("Not owning the correct locks, owning %r, expected at least %r" %
12773              (owned, check_nodes))
12774
12775       r_shut = _ShutdownInstanceDisks(self, instance)
12776       if not r_shut:
12777         raise errors.OpExecError("Cannot shutdown instance disks, unable to"
12778                                  " proceed with disk template conversion")
12779       mode = (instance.disk_template, self.op.disk_template)
12780       try:
12781         self._DISK_CONVERSIONS[mode](self, feedback_fn)
12782       except:
12783         self.cfg.ReleaseDRBDMinors(instance.name)
12784         raise
12785       result.append(("disk_template", self.op.disk_template))
12786
12787       assert instance.disk_template == self.op.disk_template, \
12788         ("Expected disk template '%s', found '%s'" %
12789          (self.op.disk_template, instance.disk_template))
12790
12791     # Release node and resource locks if there are any (they might already have
12792     # been released during disk conversion)
12793     _ReleaseLocks(self, locking.LEVEL_NODE)
12794     _ReleaseLocks(self, locking.LEVEL_NODE_RES)
12795
12796     # Apply NIC changes
12797     if self._new_nics is not None:
12798       instance.nics = self._new_nics
12799       result.extend(self._nic_chgdesc)
12800
12801     # hvparams changes
12802     if self.op.hvparams:
12803       instance.hvparams = self.hv_inst
12804       for key, val in self.op.hvparams.iteritems():
12805         result.append(("hv/%s" % key, val))
12806
12807     # beparams changes
12808     if self.op.beparams:
12809       instance.beparams = self.be_inst
12810       for key, val in self.op.beparams.iteritems():
12811         result.append(("be/%s" % key, val))
12812
12813     # OS change
12814     if self.op.os_name:
12815       instance.os = self.op.os_name
12816
12817     # osparams changes
12818     if self.op.osparams:
12819       instance.osparams = self.os_inst
12820       for key, val in self.op.osparams.iteritems():
12821         result.append(("os/%s" % key, val))
12822
12823     if self.op.offline is None:
12824       # Ignore
12825       pass
12826     elif self.op.offline:
12827       # Mark instance as offline
12828       self.cfg.MarkInstanceOffline(instance.name)
12829       result.append(("admin_state", constants.ADMINST_OFFLINE))
12830     else:
12831       # Mark instance as online, but stopped
12832       self.cfg.MarkInstanceDown(instance.name)
12833       result.append(("admin_state", constants.ADMINST_DOWN))
12834
12835     self.cfg.Update(instance, feedback_fn)
12836
12837     assert not (self.owned_locks(locking.LEVEL_NODE_RES) or
12838                 self.owned_locks(locking.LEVEL_NODE)), \
12839       "All node locks should have been released by now"
12840
12841     return result
12842
12843   _DISK_CONVERSIONS = {
12844     (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
12845     (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
12846     }
12847
12848
12849 class LUInstanceChangeGroup(LogicalUnit):
12850   HPATH = "instance-change-group"
12851   HTYPE = constants.HTYPE_INSTANCE
12852   REQ_BGL = False
12853
12854   def ExpandNames(self):
12855     self.share_locks = _ShareAll()
12856     self.needed_locks = {
12857       locking.LEVEL_NODEGROUP: [],
12858       locking.LEVEL_NODE: [],
12859       }
12860
12861     self._ExpandAndLockInstance()
12862
12863     if self.op.target_groups:
12864       self.req_target_uuids = map(self.cfg.LookupNodeGroup,
12865                                   self.op.target_groups)
12866     else:
12867       self.req_target_uuids = None
12868
12869     self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
12870
12871   def DeclareLocks(self, level):
12872     if level == locking.LEVEL_NODEGROUP:
12873       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
12874
12875       if self.req_target_uuids:
12876         lock_groups = set(self.req_target_uuids)
12877
12878         # Lock all groups used by instance optimistically; this requires going
12879         # via the node before it's locked, requiring verification later on
12880         instance_groups = self.cfg.GetInstanceNodeGroups(self.op.instance_name)
12881         lock_groups.update(instance_groups)
12882       else:
12883         # No target groups, need to lock all of them
12884         lock_groups = locking.ALL_SET
12885
12886       self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
12887
12888     elif level == locking.LEVEL_NODE:
12889       if self.req_target_uuids:
12890         # Lock all nodes used by instances
12891         self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
12892         self._LockInstancesNodes()
12893
12894         # Lock all nodes in all potential target groups
12895         lock_groups = (frozenset(self.owned_locks(locking.LEVEL_NODEGROUP)) -
12896                        self.cfg.GetInstanceNodeGroups(self.op.instance_name))
12897         member_nodes = [node_name
12898                         for group in lock_groups
12899                         for node_name in self.cfg.GetNodeGroup(group).members]
12900         self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
12901       else:
12902         # Lock all nodes as all groups are potential targets
12903         self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
12904
12905   def CheckPrereq(self):
12906     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
12907     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
12908     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
12909
12910     assert (self.req_target_uuids is None or
12911             owned_groups.issuperset(self.req_target_uuids))
12912     assert owned_instances == set([self.op.instance_name])
12913
12914     # Get instance information
12915     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
12916
12917     # Check if node groups for locked instance are still correct
12918     assert owned_nodes.issuperset(self.instance.all_nodes), \
12919       ("Instance %s's nodes changed while we kept the lock" %
12920        self.op.instance_name)
12921
12922     inst_groups = _CheckInstanceNodeGroups(self.cfg, self.op.instance_name,
12923                                            owned_groups)
12924
12925     if self.req_target_uuids:
12926       # User requested specific target groups
12927       self.target_uuids = frozenset(self.req_target_uuids)
12928     else:
12929       # All groups except those used by the instance are potential targets
12930       self.target_uuids = owned_groups - inst_groups
12931
12932     conflicting_groups = self.target_uuids & inst_groups
12933     if conflicting_groups:
12934       raise errors.OpPrereqError("Can't use group(s) '%s' as targets, they are"
12935                                  " used by the instance '%s'" %
12936                                  (utils.CommaJoin(conflicting_groups),
12937                                   self.op.instance_name),
12938                                  errors.ECODE_INVAL)
12939
12940     if not self.target_uuids:
12941       raise errors.OpPrereqError("There are no possible target groups",
12942                                  errors.ECODE_INVAL)
12943
12944   def BuildHooksEnv(self):
12945     """Build hooks env.
12946
12947     """
12948     assert self.target_uuids
12949
12950     env = {
12951       "TARGET_GROUPS": " ".join(self.target_uuids),
12952       }
12953
12954     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
12955
12956     return env
12957
12958   def BuildHooksNodes(self):
12959     """Build hooks nodes.
12960
12961     """
12962     mn = self.cfg.GetMasterNode()
12963     return ([mn], [mn])
12964
12965   def Exec(self, feedback_fn):
12966     instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
12967
12968     assert instances == [self.op.instance_name], "Instance not locked"
12969
12970     ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
12971                      instances=instances, target_groups=list(self.target_uuids))
12972
12973     ial.Run(self.op.iallocator)
12974
12975     if not ial.success:
12976       raise errors.OpPrereqError("Can't compute solution for changing group of"
12977                                  " instance '%s' using iallocator '%s': %s" %
12978                                  (self.op.instance_name, self.op.iallocator,
12979                                   ial.info),
12980                                  errors.ECODE_NORES)
12981
12982     jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
12983
12984     self.LogInfo("Iallocator returned %s job(s) for changing group of"
12985                  " instance '%s'", len(jobs), self.op.instance_name)
12986
12987     return ResultWithJobs(jobs)
12988
12989
12990 class LUBackupQuery(NoHooksLU):
12991   """Query the exports list
12992
12993   """
12994   REQ_BGL = False
12995
12996   def CheckArguments(self):
12997     self.expq = _ExportQuery(qlang.MakeSimpleFilter("node", self.op.nodes),
12998                              ["node", "export"], self.op.use_locking)
12999
13000   def ExpandNames(self):
13001     self.expq.ExpandNames(self)
13002
13003   def DeclareLocks(self, level):
13004     self.expq.DeclareLocks(self, level)
13005
13006   def Exec(self, feedback_fn):
13007     result = {}
13008
13009     for (node, expname) in self.expq.OldStyleQuery(self):
13010       if expname is None:
13011         result[node] = False
13012       else:
13013         result.setdefault(node, []).append(expname)
13014
13015     return result
13016
13017
13018 class _ExportQuery(_QueryBase):
13019   FIELDS = query.EXPORT_FIELDS
13020
13021   #: The node name is not a unique key for this query
13022   SORT_FIELD = "node"
13023
13024   def ExpandNames(self, lu):
13025     lu.needed_locks = {}
13026
13027     # The following variables interact with _QueryBase._GetNames
13028     if self.names:
13029       self.wanted = _GetWantedNodes(lu, self.names)
13030     else:
13031       self.wanted = locking.ALL_SET
13032
13033     self.do_locking = self.use_locking
13034
13035     if self.do_locking:
13036       lu.share_locks = _ShareAll()
13037       lu.needed_locks = {
13038         locking.LEVEL_NODE: self.wanted,
13039         }
13040
13041   def DeclareLocks(self, lu, level):
13042     pass
13043
13044   def _GetQueryData(self, lu):
13045     """Computes the list of nodes and their attributes.
13046
13047     """
13048     # Locking is not used
13049     # TODO
13050     assert not (compat.any(lu.glm.is_owned(level)
13051                            for level in locking.LEVELS
13052                            if level != locking.LEVEL_CLUSTER) or
13053                 self.do_locking or self.use_locking)
13054
13055     nodes = self._GetNames(lu, lu.cfg.GetNodeList(), locking.LEVEL_NODE)
13056
13057     result = []
13058
13059     for (node, nres) in lu.rpc.call_export_list(nodes).items():
13060       if nres.fail_msg:
13061         result.append((node, None))
13062       else:
13063         result.extend((node, expname) for expname in nres.payload)
13064
13065     return result
13066
13067
13068 class LUBackupPrepare(NoHooksLU):
13069   """Prepares an instance for an export and returns useful information.
13070
13071   """
13072   REQ_BGL = False
13073
13074   def ExpandNames(self):
13075     self._ExpandAndLockInstance()
13076
13077   def CheckPrereq(self):
13078     """Check prerequisites.
13079
13080     """
13081     instance_name = self.op.instance_name
13082
13083     self.instance = self.cfg.GetInstanceInfo(instance_name)
13084     assert self.instance is not None, \
13085           "Cannot retrieve locked instance %s" % self.op.instance_name
13086     _CheckNodeOnline(self, self.instance.primary_node)
13087
13088     self._cds = _GetClusterDomainSecret()
13089
13090   def Exec(self, feedback_fn):
13091     """Prepares an instance for an export.
13092
13093     """
13094     instance = self.instance
13095
13096     if self.op.mode == constants.EXPORT_MODE_REMOTE:
13097       salt = utils.GenerateSecret(8)
13098
13099       feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
13100       result = self.rpc.call_x509_cert_create(instance.primary_node,
13101                                               constants.RIE_CERT_VALIDITY)
13102       result.Raise("Can't create X509 key and certificate on %s" % result.node)
13103
13104       (name, cert_pem) = result.payload
13105
13106       cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
13107                                              cert_pem)
13108
13109       return {
13110         "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
13111         "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
13112                           salt),
13113         "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
13114         }
13115
13116     return None
13117
13118
13119 class LUBackupExport(LogicalUnit):
13120   """Export an instance to an image in the cluster.
13121
13122   """
13123   HPATH = "instance-export"
13124   HTYPE = constants.HTYPE_INSTANCE
13125   REQ_BGL = False
13126
13127   def CheckArguments(self):
13128     """Check the arguments.
13129
13130     """
13131     self.x509_key_name = self.op.x509_key_name
13132     self.dest_x509_ca_pem = self.op.destination_x509_ca
13133
13134     if self.op.mode == constants.EXPORT_MODE_REMOTE:
13135       if not self.x509_key_name:
13136         raise errors.OpPrereqError("Missing X509 key name for encryption",
13137                                    errors.ECODE_INVAL)
13138
13139       if not self.dest_x509_ca_pem:
13140         raise errors.OpPrereqError("Missing destination X509 CA",
13141                                    errors.ECODE_INVAL)
13142
13143   def ExpandNames(self):
13144     self._ExpandAndLockInstance()
13145
13146     # Lock all nodes for local exports
13147     if self.op.mode == constants.EXPORT_MODE_LOCAL:
13148       # FIXME: lock only instance primary and destination node
13149       #
13150       # Sad but true, for now we have do lock all nodes, as we don't know where
13151       # the previous export might be, and in this LU we search for it and
13152       # remove it from its current node. In the future we could fix this by:
13153       #  - making a tasklet to search (share-lock all), then create the
13154       #    new one, then one to remove, after
13155       #  - removing the removal operation altogether
13156       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
13157
13158   def DeclareLocks(self, level):
13159     """Last minute lock declaration."""
13160     # All nodes are locked anyway, so nothing to do here.
13161
13162   def BuildHooksEnv(self):
13163     """Build hooks env.
13164
13165     This will run on the master, primary node and target node.
13166
13167     """
13168     env = {
13169       "EXPORT_MODE": self.op.mode,
13170       "EXPORT_NODE": self.op.target_node,
13171       "EXPORT_DO_SHUTDOWN": self.op.shutdown,
13172       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
13173       # TODO: Generic function for boolean env variables
13174       "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
13175       }
13176
13177     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
13178
13179     return env
13180
13181   def BuildHooksNodes(self):
13182     """Build hooks nodes.
13183
13184     """
13185     nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
13186
13187     if self.op.mode == constants.EXPORT_MODE_LOCAL:
13188       nl.append(self.op.target_node)
13189
13190     return (nl, nl)
13191
13192   def CheckPrereq(self):
13193     """Check prerequisites.
13194
13195     This checks that the instance and node names are valid.
13196
13197     """
13198     instance_name = self.op.instance_name
13199
13200     self.instance = self.cfg.GetInstanceInfo(instance_name)
13201     assert self.instance is not None, \
13202           "Cannot retrieve locked instance %s" % self.op.instance_name
13203     _CheckNodeOnline(self, self.instance.primary_node)
13204
13205     if (self.op.remove_instance and
13206         self.instance.admin_state == constants.ADMINST_UP and
13207         not self.op.shutdown):
13208       raise errors.OpPrereqError("Can not remove instance without shutting it"
13209                                  " down before")
13210
13211     if self.op.mode == constants.EXPORT_MODE_LOCAL:
13212       self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
13213       self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
13214       assert self.dst_node is not None
13215
13216       _CheckNodeOnline(self, self.dst_node.name)
13217       _CheckNodeNotDrained(self, self.dst_node.name)
13218
13219       self._cds = None
13220       self.dest_disk_info = None
13221       self.dest_x509_ca = None
13222
13223     elif self.op.mode == constants.EXPORT_MODE_REMOTE:
13224       self.dst_node = None
13225
13226       if len(self.op.target_node) != len(self.instance.disks):
13227         raise errors.OpPrereqError(("Received destination information for %s"
13228                                     " disks, but instance %s has %s disks") %
13229                                    (len(self.op.target_node), instance_name,
13230                                     len(self.instance.disks)),
13231                                    errors.ECODE_INVAL)
13232
13233       cds = _GetClusterDomainSecret()
13234
13235       # Check X509 key name
13236       try:
13237         (key_name, hmac_digest, hmac_salt) = self.x509_key_name
13238       except (TypeError, ValueError), err:
13239         raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
13240
13241       if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
13242         raise errors.OpPrereqError("HMAC for X509 key name is wrong",
13243                                    errors.ECODE_INVAL)
13244
13245       # Load and verify CA
13246       try:
13247         (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
13248       except OpenSSL.crypto.Error, err:
13249         raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
13250                                    (err, ), errors.ECODE_INVAL)
13251
13252       (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
13253       if errcode is not None:
13254         raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
13255                                    (msg, ), errors.ECODE_INVAL)
13256
13257       self.dest_x509_ca = cert
13258
13259       # Verify target information
13260       disk_info = []
13261       for idx, disk_data in enumerate(self.op.target_node):
13262         try:
13263           (host, port, magic) = \
13264             masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
13265         except errors.GenericError, err:
13266           raise errors.OpPrereqError("Target info for disk %s: %s" %
13267                                      (idx, err), errors.ECODE_INVAL)
13268
13269         disk_info.append((host, port, magic))
13270
13271       assert len(disk_info) == len(self.op.target_node)
13272       self.dest_disk_info = disk_info
13273
13274     else:
13275       raise errors.ProgrammerError("Unhandled export mode %r" %
13276                                    self.op.mode)
13277
13278     # instance disk type verification
13279     # TODO: Implement export support for file-based disks
13280     for disk in self.instance.disks:
13281       if disk.dev_type == constants.LD_FILE:
13282         raise errors.OpPrereqError("Export not supported for instances with"
13283                                    " file-based disks", errors.ECODE_INVAL)
13284
13285   def _CleanupExports(self, feedback_fn):
13286     """Removes exports of current instance from all other nodes.
13287
13288     If an instance in a cluster with nodes A..D was exported to node C, its
13289     exports will be removed from the nodes A, B and D.
13290
13291     """
13292     assert self.op.mode != constants.EXPORT_MODE_REMOTE
13293
13294     nodelist = self.cfg.GetNodeList()
13295     nodelist.remove(self.dst_node.name)
13296
13297     # on one-node clusters nodelist will be empty after the removal
13298     # if we proceed the backup would be removed because OpBackupQuery
13299     # substitutes an empty list with the full cluster node list.
13300     iname = self.instance.name
13301     if nodelist:
13302       feedback_fn("Removing old exports for instance %s" % iname)
13303       exportlist = self.rpc.call_export_list(nodelist)
13304       for node in exportlist:
13305         if exportlist[node].fail_msg:
13306           continue
13307         if iname in exportlist[node].payload:
13308           msg = self.rpc.call_export_remove(node, iname).fail_msg
13309           if msg:
13310             self.LogWarning("Could not remove older export for instance %s"
13311                             " on node %s: %s", iname, node, msg)
13312
13313   def Exec(self, feedback_fn):
13314     """Export an instance to an image in the cluster.
13315
13316     """
13317     assert self.op.mode in constants.EXPORT_MODES
13318
13319     instance = self.instance
13320     src_node = instance.primary_node
13321
13322     if self.op.shutdown:
13323       # shutdown the instance, but not the disks
13324       feedback_fn("Shutting down instance %s" % instance.name)
13325       result = self.rpc.call_instance_shutdown(src_node, instance,
13326                                                self.op.shutdown_timeout)
13327       # TODO: Maybe ignore failures if ignore_remove_failures is set
13328       result.Raise("Could not shutdown instance %s on"
13329                    " node %s" % (instance.name, src_node))
13330
13331     # set the disks ID correctly since call_instance_start needs the
13332     # correct drbd minor to create the symlinks
13333     for disk in instance.disks:
13334       self.cfg.SetDiskID(disk, src_node)
13335
13336     activate_disks = (instance.admin_state != constants.ADMINST_UP)
13337
13338     if activate_disks:
13339       # Activate the instance disks if we'exporting a stopped instance
13340       feedback_fn("Activating disks for %s" % instance.name)
13341       _StartInstanceDisks(self, instance, None)
13342
13343     try:
13344       helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
13345                                                      instance)
13346
13347       helper.CreateSnapshots()
13348       try:
13349         if (self.op.shutdown and
13350             instance.admin_state == constants.ADMINST_UP and
13351             not self.op.remove_instance):
13352           assert not activate_disks
13353           feedback_fn("Starting instance %s" % instance.name)
13354           result = self.rpc.call_instance_start(src_node,
13355                                                 (instance, None, None), False)
13356           msg = result.fail_msg
13357           if msg:
13358             feedback_fn("Failed to start instance: %s" % msg)
13359             _ShutdownInstanceDisks(self, instance)
13360             raise errors.OpExecError("Could not start instance: %s" % msg)
13361
13362         if self.op.mode == constants.EXPORT_MODE_LOCAL:
13363           (fin_resu, dresults) = helper.LocalExport(self.dst_node)
13364         elif self.op.mode == constants.EXPORT_MODE_REMOTE:
13365           connect_timeout = constants.RIE_CONNECT_TIMEOUT
13366           timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
13367
13368           (key_name, _, _) = self.x509_key_name
13369
13370           dest_ca_pem = \
13371             OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
13372                                             self.dest_x509_ca)
13373
13374           (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
13375                                                      key_name, dest_ca_pem,
13376                                                      timeouts)
13377       finally:
13378         helper.Cleanup()
13379
13380       # Check for backwards compatibility
13381       assert len(dresults) == len(instance.disks)
13382       assert compat.all(isinstance(i, bool) for i in dresults), \
13383              "Not all results are boolean: %r" % dresults
13384
13385     finally:
13386       if activate_disks:
13387         feedback_fn("Deactivating disks for %s" % instance.name)
13388         _ShutdownInstanceDisks(self, instance)
13389
13390     if not (compat.all(dresults) and fin_resu):
13391       failures = []
13392       if not fin_resu:
13393         failures.append("export finalization")
13394       if not compat.all(dresults):
13395         fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
13396                                if not dsk)
13397         failures.append("disk export: disk(s) %s" % fdsk)
13398
13399       raise errors.OpExecError("Export failed, errors in %s" %
13400                                utils.CommaJoin(failures))
13401
13402     # At this point, the export was successful, we can cleanup/finish
13403
13404     # Remove instance if requested
13405     if self.op.remove_instance:
13406       feedback_fn("Removing instance %s" % instance.name)
13407       _RemoveInstance(self, feedback_fn, instance,
13408                       self.op.ignore_remove_failures)
13409
13410     if self.op.mode == constants.EXPORT_MODE_LOCAL:
13411       self._CleanupExports(feedback_fn)
13412
13413     return fin_resu, dresults
13414
13415
13416 class LUBackupRemove(NoHooksLU):
13417   """Remove exports related to the named instance.
13418
13419   """
13420   REQ_BGL = False
13421
13422   def ExpandNames(self):
13423     self.needed_locks = {}
13424     # We need all nodes to be locked in order for RemoveExport to work, but we
13425     # don't need to lock the instance itself, as nothing will happen to it (and
13426     # we can remove exports also for a removed instance)
13427     self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
13428
13429   def Exec(self, feedback_fn):
13430     """Remove any export.
13431
13432     """
13433     instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
13434     # If the instance was not found we'll try with the name that was passed in.
13435     # This will only work if it was an FQDN, though.
13436     fqdn_warn = False
13437     if not instance_name:
13438       fqdn_warn = True
13439       instance_name = self.op.instance_name
13440
13441     locked_nodes = self.owned_locks(locking.LEVEL_NODE)
13442     exportlist = self.rpc.call_export_list(locked_nodes)
13443     found = False
13444     for node in exportlist:
13445       msg = exportlist[node].fail_msg
13446       if msg:
13447         self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
13448         continue
13449       if instance_name in exportlist[node].payload:
13450         found = True
13451         result = self.rpc.call_export_remove(node, instance_name)
13452         msg = result.fail_msg
13453         if msg:
13454           logging.error("Could not remove export for instance %s"
13455                         " on node %s: %s", instance_name, node, msg)
13456
13457     if fqdn_warn and not found:
13458       feedback_fn("Export not found. If trying to remove an export belonging"
13459                   " to a deleted instance please use its Fully Qualified"
13460                   " Domain Name.")
13461
13462
13463 class LUGroupAdd(LogicalUnit):
13464   """Logical unit for creating node groups.
13465
13466   """
13467   HPATH = "group-add"
13468   HTYPE = constants.HTYPE_GROUP
13469   REQ_BGL = False
13470
13471   def ExpandNames(self):
13472     # We need the new group's UUID here so that we can create and acquire the
13473     # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
13474     # that it should not check whether the UUID exists in the configuration.
13475     self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
13476     self.needed_locks = {}
13477     self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
13478
13479   def CheckPrereq(self):
13480     """Check prerequisites.
13481
13482     This checks that the given group name is not an existing node group
13483     already.
13484
13485     """
13486     try:
13487       existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13488     except errors.OpPrereqError:
13489       pass
13490     else:
13491       raise errors.OpPrereqError("Desired group name '%s' already exists as a"
13492                                  " node group (UUID: %s)" %
13493                                  (self.op.group_name, existing_uuid),
13494                                  errors.ECODE_EXISTS)
13495
13496     if self.op.ndparams:
13497       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
13498
13499     if self.op.hv_state:
13500       self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
13501     else:
13502       self.new_hv_state = None
13503
13504     if self.op.disk_state:
13505       self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
13506     else:
13507       self.new_disk_state = None
13508
13509     if self.op.diskparams:
13510       for templ in constants.DISK_TEMPLATES:
13511         if templ not in self.op.diskparams:
13512           self.op.diskparams[templ] = {}
13513         utils.ForceDictType(self.op.diskparams[templ], constants.DISK_DT_TYPES)
13514     else:
13515       self.op.diskparams = self.cfg.GetClusterInfo().diskparams
13516
13517     if self.op.ipolicy:
13518       cluster = self.cfg.GetClusterInfo()
13519       full_ipolicy = cluster.SimpleFillIPolicy(self.op.ipolicy)
13520       try:
13521         objects.InstancePolicy.CheckParameterSyntax(full_ipolicy)
13522       except errors.ConfigurationError, err:
13523         raise errors.OpPrereqError("Invalid instance policy: %s" % err,
13524                                    errors.ECODE_INVAL)
13525
13526   def BuildHooksEnv(self):
13527     """Build hooks env.
13528
13529     """
13530     return {
13531       "GROUP_NAME": self.op.group_name,
13532       }
13533
13534   def BuildHooksNodes(self):
13535     """Build hooks nodes.
13536
13537     """
13538     mn = self.cfg.GetMasterNode()
13539     return ([mn], [mn])
13540
13541   def Exec(self, feedback_fn):
13542     """Add the node group to the cluster.
13543
13544     """
13545     group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
13546                                   uuid=self.group_uuid,
13547                                   alloc_policy=self.op.alloc_policy,
13548                                   ndparams=self.op.ndparams,
13549                                   diskparams=self.op.diskparams,
13550                                   ipolicy=self.op.ipolicy,
13551                                   hv_state_static=self.new_hv_state,
13552                                   disk_state_static=self.new_disk_state)
13553
13554     self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
13555     del self.remove_locks[locking.LEVEL_NODEGROUP]
13556
13557
13558 class LUGroupAssignNodes(NoHooksLU):
13559   """Logical unit for assigning nodes to groups.
13560
13561   """
13562   REQ_BGL = False
13563
13564   def ExpandNames(self):
13565     # These raise errors.OpPrereqError on their own:
13566     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13567     self.op.nodes = _GetWantedNodes(self, self.op.nodes)
13568
13569     # We want to lock all the affected nodes and groups. We have readily
13570     # available the list of nodes, and the *destination* group. To gather the
13571     # list of "source" groups, we need to fetch node information later on.
13572     self.needed_locks = {
13573       locking.LEVEL_NODEGROUP: set([self.group_uuid]),
13574       locking.LEVEL_NODE: self.op.nodes,
13575       }
13576
13577   def DeclareLocks(self, level):
13578     if level == locking.LEVEL_NODEGROUP:
13579       assert len(self.needed_locks[locking.LEVEL_NODEGROUP]) == 1
13580
13581       # Try to get all affected nodes' groups without having the group or node
13582       # lock yet. Needs verification later in the code flow.
13583       groups = self.cfg.GetNodeGroupsFromNodes(self.op.nodes)
13584
13585       self.needed_locks[locking.LEVEL_NODEGROUP].update(groups)
13586
13587   def CheckPrereq(self):
13588     """Check prerequisites.
13589
13590     """
13591     assert self.needed_locks[locking.LEVEL_NODEGROUP]
13592     assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
13593             frozenset(self.op.nodes))
13594
13595     expected_locks = (set([self.group_uuid]) |
13596                       self.cfg.GetNodeGroupsFromNodes(self.op.nodes))
13597     actual_locks = self.owned_locks(locking.LEVEL_NODEGROUP)
13598     if actual_locks != expected_locks:
13599       raise errors.OpExecError("Nodes changed groups since locks were acquired,"
13600                                " current groups are '%s', used to be '%s'" %
13601                                (utils.CommaJoin(expected_locks),
13602                                 utils.CommaJoin(actual_locks)))
13603
13604     self.node_data = self.cfg.GetAllNodesInfo()
13605     self.group = self.cfg.GetNodeGroup(self.group_uuid)
13606     instance_data = self.cfg.GetAllInstancesInfo()
13607
13608     if self.group is None:
13609       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
13610                                (self.op.group_name, self.group_uuid))
13611
13612     (new_splits, previous_splits) = \
13613       self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
13614                                              for node in self.op.nodes],
13615                                             self.node_data, instance_data)
13616
13617     if new_splits:
13618       fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
13619
13620       if not self.op.force:
13621         raise errors.OpExecError("The following instances get split by this"
13622                                  " change and --force was not given: %s" %
13623                                  fmt_new_splits)
13624       else:
13625         self.LogWarning("This operation will split the following instances: %s",
13626                         fmt_new_splits)
13627
13628         if previous_splits:
13629           self.LogWarning("In addition, these already-split instances continue"
13630                           " to be split across groups: %s",
13631                           utils.CommaJoin(utils.NiceSort(previous_splits)))
13632
13633   def Exec(self, feedback_fn):
13634     """Assign nodes to a new group.
13635
13636     """
13637     mods = [(node_name, self.group_uuid) for node_name in self.op.nodes]
13638
13639     self.cfg.AssignGroupNodes(mods)
13640
13641   @staticmethod
13642   def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
13643     """Check for split instances after a node assignment.
13644
13645     This method considers a series of node assignments as an atomic operation,
13646     and returns information about split instances after applying the set of
13647     changes.
13648
13649     In particular, it returns information about newly split instances, and
13650     instances that were already split, and remain so after the change.
13651
13652     Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
13653     considered.
13654
13655     @type changes: list of (node_name, new_group_uuid) pairs.
13656     @param changes: list of node assignments to consider.
13657     @param node_data: a dict with data for all nodes
13658     @param instance_data: a dict with all instances to consider
13659     @rtype: a two-tuple
13660     @return: a list of instances that were previously okay and result split as a
13661       consequence of this change, and a list of instances that were previously
13662       split and this change does not fix.
13663
13664     """
13665     changed_nodes = dict((node, group) for node, group in changes
13666                          if node_data[node].group != group)
13667
13668     all_split_instances = set()
13669     previously_split_instances = set()
13670
13671     def InstanceNodes(instance):
13672       return [instance.primary_node] + list(instance.secondary_nodes)
13673
13674     for inst in instance_data.values():
13675       if inst.disk_template not in constants.DTS_INT_MIRROR:
13676         continue
13677
13678       instance_nodes = InstanceNodes(inst)
13679
13680       if len(set(node_data[node].group for node in instance_nodes)) > 1:
13681         previously_split_instances.add(inst.name)
13682
13683       if len(set(changed_nodes.get(node, node_data[node].group)
13684                  for node in instance_nodes)) > 1:
13685         all_split_instances.add(inst.name)
13686
13687     return (list(all_split_instances - previously_split_instances),
13688             list(previously_split_instances & all_split_instances))
13689
13690
13691 class _GroupQuery(_QueryBase):
13692   FIELDS = query.GROUP_FIELDS
13693
13694   def ExpandNames(self, lu):
13695     lu.needed_locks = {}
13696
13697     self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
13698     self._cluster = lu.cfg.GetClusterInfo()
13699     name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
13700
13701     if not self.names:
13702       self.wanted = [name_to_uuid[name]
13703                      for name in utils.NiceSort(name_to_uuid.keys())]
13704     else:
13705       # Accept names to be either names or UUIDs.
13706       missing = []
13707       self.wanted = []
13708       all_uuid = frozenset(self._all_groups.keys())
13709
13710       for name in self.names:
13711         if name in all_uuid:
13712           self.wanted.append(name)
13713         elif name in name_to_uuid:
13714           self.wanted.append(name_to_uuid[name])
13715         else:
13716           missing.append(name)
13717
13718       if missing:
13719         raise errors.OpPrereqError("Some groups do not exist: %s" %
13720                                    utils.CommaJoin(missing),
13721                                    errors.ECODE_NOENT)
13722
13723   def DeclareLocks(self, lu, level):
13724     pass
13725
13726   def _GetQueryData(self, lu):
13727     """Computes the list of node groups and their attributes.
13728
13729     """
13730     do_nodes = query.GQ_NODE in self.requested_data
13731     do_instances = query.GQ_INST in self.requested_data
13732
13733     group_to_nodes = None
13734     group_to_instances = None
13735
13736     # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
13737     # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
13738     # latter GetAllInstancesInfo() is not enough, for we have to go through
13739     # instance->node. Hence, we will need to process nodes even if we only need
13740     # instance information.
13741     if do_nodes or do_instances:
13742       all_nodes = lu.cfg.GetAllNodesInfo()
13743       group_to_nodes = dict((uuid, []) for uuid in self.wanted)
13744       node_to_group = {}
13745
13746       for node in all_nodes.values():
13747         if node.group in group_to_nodes:
13748           group_to_nodes[node.group].append(node.name)
13749           node_to_group[node.name] = node.group
13750
13751       if do_instances:
13752         all_instances = lu.cfg.GetAllInstancesInfo()
13753         group_to_instances = dict((uuid, []) for uuid in self.wanted)
13754
13755         for instance in all_instances.values():
13756           node = instance.primary_node
13757           if node in node_to_group:
13758             group_to_instances[node_to_group[node]].append(instance.name)
13759
13760         if not do_nodes:
13761           # Do not pass on node information if it was not requested.
13762           group_to_nodes = None
13763
13764     return query.GroupQueryData(self._cluster,
13765                                 [self._all_groups[uuid]
13766                                  for uuid in self.wanted],
13767                                 group_to_nodes, group_to_instances)
13768
13769
13770 class LUGroupQuery(NoHooksLU):
13771   """Logical unit for querying node groups.
13772
13773   """
13774   REQ_BGL = False
13775
13776   def CheckArguments(self):
13777     self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
13778                           self.op.output_fields, False)
13779
13780   def ExpandNames(self):
13781     self.gq.ExpandNames(self)
13782
13783   def DeclareLocks(self, level):
13784     self.gq.DeclareLocks(self, level)
13785
13786   def Exec(self, feedback_fn):
13787     return self.gq.OldStyleQuery(self)
13788
13789
13790 class LUGroupSetParams(LogicalUnit):
13791   """Modifies the parameters of a node group.
13792
13793   """
13794   HPATH = "group-modify"
13795   HTYPE = constants.HTYPE_GROUP
13796   REQ_BGL = False
13797
13798   def CheckArguments(self):
13799     all_changes = [
13800       self.op.ndparams,
13801       self.op.diskparams,
13802       self.op.alloc_policy,
13803       self.op.hv_state,
13804       self.op.disk_state,
13805       self.op.ipolicy,
13806       ]
13807
13808     if all_changes.count(None) == len(all_changes):
13809       raise errors.OpPrereqError("Please pass at least one modification",
13810                                  errors.ECODE_INVAL)
13811
13812   def ExpandNames(self):
13813     # This raises errors.OpPrereqError on its own:
13814     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13815
13816     self.needed_locks = {
13817       locking.LEVEL_INSTANCE: [],
13818       locking.LEVEL_NODEGROUP: [self.group_uuid],
13819       }
13820
13821     self.share_locks[locking.LEVEL_INSTANCE] = 1
13822
13823   def DeclareLocks(self, level):
13824     if level == locking.LEVEL_INSTANCE:
13825       assert not self.needed_locks[locking.LEVEL_INSTANCE]
13826
13827       # Lock instances optimistically, needs verification once group lock has
13828       # been acquired
13829       self.needed_locks[locking.LEVEL_INSTANCE] = \
13830           self.cfg.GetNodeGroupInstances(self.group_uuid)
13831
13832   def CheckPrereq(self):
13833     """Check prerequisites.
13834
13835     """
13836     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
13837
13838     # Check if locked instances are still correct
13839     _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
13840
13841     self.group = self.cfg.GetNodeGroup(self.group_uuid)
13842     cluster = self.cfg.GetClusterInfo()
13843
13844     if self.group is None:
13845       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
13846                                (self.op.group_name, self.group_uuid))
13847
13848     if self.op.ndparams:
13849       new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
13850       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
13851       self.new_ndparams = new_ndparams
13852
13853     if self.op.diskparams:
13854       self.new_diskparams = dict()
13855       for templ in constants.DISK_TEMPLATES:
13856         if templ not in self.op.diskparams:
13857           self.op.diskparams[templ] = {}
13858         new_templ_params = _GetUpdatedParams(self.group.diskparams[templ],
13859                                              self.op.diskparams[templ])
13860         utils.ForceDictType(new_templ_params, constants.DISK_DT_TYPES)
13861         self.new_diskparams[templ] = new_templ_params
13862
13863     if self.op.hv_state:
13864       self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
13865                                                  self.group.hv_state_static)
13866
13867     if self.op.disk_state:
13868       self.new_disk_state = \
13869         _MergeAndVerifyDiskState(self.op.disk_state,
13870                                  self.group.disk_state_static)
13871
13872     if self.op.ipolicy:
13873       self.new_ipolicy = _GetUpdatedIPolicy(self.group.ipolicy,
13874                                             self.op.ipolicy,
13875                                             group_policy=True)
13876
13877       new_ipolicy = cluster.SimpleFillIPolicy(self.new_ipolicy)
13878       inst_filter = lambda inst: inst.name in owned_instances
13879       instances = self.cfg.GetInstancesInfoByFilter(inst_filter).values()
13880       violations = \
13881           _ComputeNewInstanceViolations(_CalculateGroupIPolicy(cluster,
13882                                                                self.group),
13883                                         new_ipolicy, instances)
13884
13885       if violations:
13886         self.LogWarning("After the ipolicy change the following instances"
13887                         " violate them: %s",
13888                         utils.CommaJoin(violations))
13889
13890   def BuildHooksEnv(self):
13891     """Build hooks env.
13892
13893     """
13894     return {
13895       "GROUP_NAME": self.op.group_name,
13896       "NEW_ALLOC_POLICY": self.op.alloc_policy,
13897       }
13898
13899   def BuildHooksNodes(self):
13900     """Build hooks nodes.
13901
13902     """
13903     mn = self.cfg.GetMasterNode()
13904     return ([mn], [mn])
13905
13906   def Exec(self, feedback_fn):
13907     """Modifies the node group.
13908
13909     """
13910     result = []
13911
13912     if self.op.ndparams:
13913       self.group.ndparams = self.new_ndparams
13914       result.append(("ndparams", str(self.group.ndparams)))
13915
13916     if self.op.diskparams:
13917       self.group.diskparams = self.new_diskparams
13918       result.append(("diskparams", str(self.group.diskparams)))
13919
13920     if self.op.alloc_policy:
13921       self.group.alloc_policy = self.op.alloc_policy
13922
13923     if self.op.hv_state:
13924       self.group.hv_state_static = self.new_hv_state
13925
13926     if self.op.disk_state:
13927       self.group.disk_state_static = self.new_disk_state
13928
13929     if self.op.ipolicy:
13930       self.group.ipolicy = self.new_ipolicy
13931
13932     self.cfg.Update(self.group, feedback_fn)
13933     return result
13934
13935
13936 class LUGroupRemove(LogicalUnit):
13937   HPATH = "group-remove"
13938   HTYPE = constants.HTYPE_GROUP
13939   REQ_BGL = False
13940
13941   def ExpandNames(self):
13942     # This will raises errors.OpPrereqError on its own:
13943     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13944     self.needed_locks = {
13945       locking.LEVEL_NODEGROUP: [self.group_uuid],
13946       }
13947
13948   def CheckPrereq(self):
13949     """Check prerequisites.
13950
13951     This checks that the given group name exists as a node group, that is
13952     empty (i.e., contains no nodes), and that is not the last group of the
13953     cluster.
13954
13955     """
13956     # Verify that the group is empty.
13957     group_nodes = [node.name
13958                    for node in self.cfg.GetAllNodesInfo().values()
13959                    if node.group == self.group_uuid]
13960
13961     if group_nodes:
13962       raise errors.OpPrereqError("Group '%s' not empty, has the following"
13963                                  " nodes: %s" %
13964                                  (self.op.group_name,
13965                                   utils.CommaJoin(utils.NiceSort(group_nodes))),
13966                                  errors.ECODE_STATE)
13967
13968     # Verify the cluster would not be left group-less.
13969     if len(self.cfg.GetNodeGroupList()) == 1:
13970       raise errors.OpPrereqError("Group '%s' is the only group,"
13971                                  " cannot be removed" %
13972                                  self.op.group_name,
13973                                  errors.ECODE_STATE)
13974
13975   def BuildHooksEnv(self):
13976     """Build hooks env.
13977
13978     """
13979     return {
13980       "GROUP_NAME": self.op.group_name,
13981       }
13982
13983   def BuildHooksNodes(self):
13984     """Build hooks nodes.
13985
13986     """
13987     mn = self.cfg.GetMasterNode()
13988     return ([mn], [mn])
13989
13990   def Exec(self, feedback_fn):
13991     """Remove the node group.
13992
13993     """
13994     try:
13995       self.cfg.RemoveNodeGroup(self.group_uuid)
13996     except errors.ConfigurationError:
13997       raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
13998                                (self.op.group_name, self.group_uuid))
13999
14000     self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
14001
14002
14003 class LUGroupRename(LogicalUnit):
14004   HPATH = "group-rename"
14005   HTYPE = constants.HTYPE_GROUP
14006   REQ_BGL = False
14007
14008   def ExpandNames(self):
14009     # This raises errors.OpPrereqError on its own:
14010     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14011
14012     self.needed_locks = {
14013       locking.LEVEL_NODEGROUP: [self.group_uuid],
14014       }
14015
14016   def CheckPrereq(self):
14017     """Check prerequisites.
14018
14019     Ensures requested new name is not yet used.
14020
14021     """
14022     try:
14023       new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
14024     except errors.OpPrereqError:
14025       pass
14026     else:
14027       raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
14028                                  " node group (UUID: %s)" %
14029                                  (self.op.new_name, new_name_uuid),
14030                                  errors.ECODE_EXISTS)
14031
14032   def BuildHooksEnv(self):
14033     """Build hooks env.
14034
14035     """
14036     return {
14037       "OLD_NAME": self.op.group_name,
14038       "NEW_NAME": self.op.new_name,
14039       }
14040
14041   def BuildHooksNodes(self):
14042     """Build hooks nodes.
14043
14044     """
14045     mn = self.cfg.GetMasterNode()
14046
14047     all_nodes = self.cfg.GetAllNodesInfo()
14048     all_nodes.pop(mn, None)
14049
14050     run_nodes = [mn]
14051     run_nodes.extend(node.name for node in all_nodes.values()
14052                      if node.group == self.group_uuid)
14053
14054     return (run_nodes, run_nodes)
14055
14056   def Exec(self, feedback_fn):
14057     """Rename the node group.
14058
14059     """
14060     group = self.cfg.GetNodeGroup(self.group_uuid)
14061
14062     if group is None:
14063       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
14064                                (self.op.group_name, self.group_uuid))
14065
14066     group.name = self.op.new_name
14067     self.cfg.Update(group, feedback_fn)
14068
14069     return self.op.new_name
14070
14071
14072 class LUGroupEvacuate(LogicalUnit):
14073   HPATH = "group-evacuate"
14074   HTYPE = constants.HTYPE_GROUP
14075   REQ_BGL = False
14076
14077   def ExpandNames(self):
14078     # This raises errors.OpPrereqError on its own:
14079     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14080
14081     if self.op.target_groups:
14082       self.req_target_uuids = map(self.cfg.LookupNodeGroup,
14083                                   self.op.target_groups)
14084     else:
14085       self.req_target_uuids = []
14086
14087     if self.group_uuid in self.req_target_uuids:
14088       raise errors.OpPrereqError("Group to be evacuated (%s) can not be used"
14089                                  " as a target group (targets are %s)" %
14090                                  (self.group_uuid,
14091                                   utils.CommaJoin(self.req_target_uuids)),
14092                                  errors.ECODE_INVAL)
14093
14094     self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
14095
14096     self.share_locks = _ShareAll()
14097     self.needed_locks = {
14098       locking.LEVEL_INSTANCE: [],
14099       locking.LEVEL_NODEGROUP: [],
14100       locking.LEVEL_NODE: [],
14101       }
14102
14103   def DeclareLocks(self, level):
14104     if level == locking.LEVEL_INSTANCE:
14105       assert not self.needed_locks[locking.LEVEL_INSTANCE]
14106
14107       # Lock instances optimistically, needs verification once node and group
14108       # locks have been acquired
14109       self.needed_locks[locking.LEVEL_INSTANCE] = \
14110         self.cfg.GetNodeGroupInstances(self.group_uuid)
14111
14112     elif level == locking.LEVEL_NODEGROUP:
14113       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
14114
14115       if self.req_target_uuids:
14116         lock_groups = set([self.group_uuid] + self.req_target_uuids)
14117
14118         # Lock all groups used by instances optimistically; this requires going
14119         # via the node before it's locked, requiring verification later on
14120         lock_groups.update(group_uuid
14121                            for instance_name in
14122                              self.owned_locks(locking.LEVEL_INSTANCE)
14123                            for group_uuid in
14124                              self.cfg.GetInstanceNodeGroups(instance_name))
14125       else:
14126         # No target groups, need to lock all of them
14127         lock_groups = locking.ALL_SET
14128
14129       self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
14130
14131     elif level == locking.LEVEL_NODE:
14132       # This will only lock the nodes in the group to be evacuated which
14133       # contain actual instances
14134       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
14135       self._LockInstancesNodes()
14136
14137       # Lock all nodes in group to be evacuated and target groups
14138       owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
14139       assert self.group_uuid in owned_groups
14140       member_nodes = [node_name
14141                       for group in owned_groups
14142                       for node_name in self.cfg.GetNodeGroup(group).members]
14143       self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
14144
14145   def CheckPrereq(self):
14146     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
14147     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
14148     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
14149
14150     assert owned_groups.issuperset(self.req_target_uuids)
14151     assert self.group_uuid in owned_groups
14152
14153     # Check if locked instances are still correct
14154     _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
14155
14156     # Get instance information
14157     self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
14158
14159     # Check if node groups for locked instances are still correct
14160     _CheckInstancesNodeGroups(self.cfg, self.instances,
14161                               owned_groups, owned_nodes, self.group_uuid)
14162
14163     if self.req_target_uuids:
14164       # User requested specific target groups
14165       self.target_uuids = self.req_target_uuids
14166     else:
14167       # All groups except the one to be evacuated are potential targets
14168       self.target_uuids = [group_uuid for group_uuid in owned_groups
14169                            if group_uuid != self.group_uuid]
14170
14171       if not self.target_uuids:
14172         raise errors.OpPrereqError("There are no possible target groups",
14173                                    errors.ECODE_INVAL)
14174
14175   def BuildHooksEnv(self):
14176     """Build hooks env.
14177
14178     """
14179     return {
14180       "GROUP_NAME": self.op.group_name,
14181       "TARGET_GROUPS": " ".join(self.target_uuids),
14182       }
14183
14184   def BuildHooksNodes(self):
14185     """Build hooks nodes.
14186
14187     """
14188     mn = self.cfg.GetMasterNode()
14189
14190     assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
14191
14192     run_nodes = [mn] + self.cfg.GetNodeGroup(self.group_uuid).members
14193
14194     return (run_nodes, run_nodes)
14195
14196   def Exec(self, feedback_fn):
14197     instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
14198
14199     assert self.group_uuid not in self.target_uuids
14200
14201     ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
14202                      instances=instances, target_groups=self.target_uuids)
14203
14204     ial.Run(self.op.iallocator)
14205
14206     if not ial.success:
14207       raise errors.OpPrereqError("Can't compute group evacuation using"
14208                                  " iallocator '%s': %s" %
14209                                  (self.op.iallocator, ial.info),
14210                                  errors.ECODE_NORES)
14211
14212     jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
14213
14214     self.LogInfo("Iallocator returned %s job(s) for evacuating node group %s",
14215                  len(jobs), self.op.group_name)
14216
14217     return ResultWithJobs(jobs)
14218
14219
14220 class TagsLU(NoHooksLU): # pylint: disable=W0223
14221   """Generic tags LU.
14222
14223   This is an abstract class which is the parent of all the other tags LUs.
14224
14225   """
14226   def ExpandNames(self):
14227     self.group_uuid = None
14228     self.needed_locks = {}
14229
14230     if self.op.kind == constants.TAG_NODE:
14231       self.op.name = _ExpandNodeName(self.cfg, self.op.name)
14232       lock_level = locking.LEVEL_NODE
14233       lock_name = self.op.name
14234     elif self.op.kind == constants.TAG_INSTANCE:
14235       self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
14236       lock_level = locking.LEVEL_INSTANCE
14237       lock_name = self.op.name
14238     elif self.op.kind == constants.TAG_NODEGROUP:
14239       self.group_uuid = self.cfg.LookupNodeGroup(self.op.name)
14240       lock_level = locking.LEVEL_NODEGROUP
14241       lock_name = self.group_uuid
14242     else:
14243       lock_level = None
14244       lock_name = None
14245
14246     if lock_level and getattr(self.op, "use_locking", True):
14247       self.needed_locks[lock_level] = lock_name
14248
14249     # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
14250     # not possible to acquire the BGL based on opcode parameters)
14251
14252   def CheckPrereq(self):
14253     """Check prerequisites.
14254
14255     """
14256     if self.op.kind == constants.TAG_CLUSTER:
14257       self.target = self.cfg.GetClusterInfo()
14258     elif self.op.kind == constants.TAG_NODE:
14259       self.target = self.cfg.GetNodeInfo(self.op.name)
14260     elif self.op.kind == constants.TAG_INSTANCE:
14261       self.target = self.cfg.GetInstanceInfo(self.op.name)
14262     elif self.op.kind == constants.TAG_NODEGROUP:
14263       self.target = self.cfg.GetNodeGroup(self.group_uuid)
14264     else:
14265       raise errors.OpPrereqError("Wrong tag type requested (%s)" %
14266                                  str(self.op.kind), errors.ECODE_INVAL)
14267
14268
14269 class LUTagsGet(TagsLU):
14270   """Returns the tags of a given object.
14271
14272   """
14273   REQ_BGL = False
14274
14275   def ExpandNames(self):
14276     TagsLU.ExpandNames(self)
14277
14278     # Share locks as this is only a read operation
14279     self.share_locks = _ShareAll()
14280
14281   def Exec(self, feedback_fn):
14282     """Returns the tag list.
14283
14284     """
14285     return list(self.target.GetTags())
14286
14287
14288 class LUTagsSearch(NoHooksLU):
14289   """Searches the tags for a given pattern.
14290
14291   """
14292   REQ_BGL = False
14293
14294   def ExpandNames(self):
14295     self.needed_locks = {}
14296
14297   def CheckPrereq(self):
14298     """Check prerequisites.
14299
14300     This checks the pattern passed for validity by compiling it.
14301
14302     """
14303     try:
14304       self.re = re.compile(self.op.pattern)
14305     except re.error, err:
14306       raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
14307                                  (self.op.pattern, err), errors.ECODE_INVAL)
14308
14309   def Exec(self, feedback_fn):
14310     """Returns the tag list.
14311
14312     """
14313     cfg = self.cfg
14314     tgts = [("/cluster", cfg.GetClusterInfo())]
14315     ilist = cfg.GetAllInstancesInfo().values()
14316     tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
14317     nlist = cfg.GetAllNodesInfo().values()
14318     tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
14319     tgts.extend(("/nodegroup/%s" % n.name, n)
14320                 for n in cfg.GetAllNodeGroupsInfo().values())
14321     results = []
14322     for path, target in tgts:
14323       for tag in target.GetTags():
14324         if self.re.search(tag):
14325           results.append((path, tag))
14326     return results
14327
14328
14329 class LUTagsSet(TagsLU):
14330   """Sets a tag on a given object.
14331
14332   """
14333   REQ_BGL = False
14334
14335   def CheckPrereq(self):
14336     """Check prerequisites.
14337
14338     This checks the type and length of the tag name and value.
14339
14340     """
14341     TagsLU.CheckPrereq(self)
14342     for tag in self.op.tags:
14343       objects.TaggableObject.ValidateTag(tag)
14344
14345   def Exec(self, feedback_fn):
14346     """Sets the tag.
14347
14348     """
14349     try:
14350       for tag in self.op.tags:
14351         self.target.AddTag(tag)
14352     except errors.TagError, err:
14353       raise errors.OpExecError("Error while setting tag: %s" % str(err))
14354     self.cfg.Update(self.target, feedback_fn)
14355
14356
14357 class LUTagsDel(TagsLU):
14358   """Delete a list of tags from a given object.
14359
14360   """
14361   REQ_BGL = False
14362
14363   def CheckPrereq(self):
14364     """Check prerequisites.
14365
14366     This checks that we have the given tag.
14367
14368     """
14369     TagsLU.CheckPrereq(self)
14370     for tag in self.op.tags:
14371       objects.TaggableObject.ValidateTag(tag)
14372     del_tags = frozenset(self.op.tags)
14373     cur_tags = self.target.GetTags()
14374
14375     diff_tags = del_tags - cur_tags
14376     if diff_tags:
14377       diff_names = ("'%s'" % i for i in sorted(diff_tags))
14378       raise errors.OpPrereqError("Tag(s) %s not found" %
14379                                  (utils.CommaJoin(diff_names), ),
14380                                  errors.ECODE_NOENT)
14381
14382   def Exec(self, feedback_fn):
14383     """Remove the tag from the object.
14384
14385     """
14386     for tag in self.op.tags:
14387       self.target.RemoveTag(tag)
14388     self.cfg.Update(self.target, feedback_fn)
14389
14390
14391 class LUTestDelay(NoHooksLU):
14392   """Sleep for a specified amount of time.
14393
14394   This LU sleeps on the master and/or nodes for a specified amount of
14395   time.
14396
14397   """
14398   REQ_BGL = False
14399
14400   def ExpandNames(self):
14401     """Expand names and set required locks.
14402
14403     This expands the node list, if any.
14404
14405     """
14406     self.needed_locks = {}
14407     if self.op.on_nodes:
14408       # _GetWantedNodes can be used here, but is not always appropriate to use
14409       # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
14410       # more information.
14411       self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
14412       self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
14413
14414   def _TestDelay(self):
14415     """Do the actual sleep.
14416
14417     """
14418     if self.op.on_master:
14419       if not utils.TestDelay(self.op.duration):
14420         raise errors.OpExecError("Error during master delay test")
14421     if self.op.on_nodes:
14422       result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
14423       for node, node_result in result.items():
14424         node_result.Raise("Failure during rpc call to node %s" % node)
14425
14426   def Exec(self, feedback_fn):
14427     """Execute the test delay opcode, with the wanted repetitions.
14428
14429     """
14430     if self.op.repeat == 0:
14431       self._TestDelay()
14432     else:
14433       top_value = self.op.repeat - 1
14434       for i in range(self.op.repeat):
14435         self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
14436         self._TestDelay()
14437
14438
14439 class LUTestJqueue(NoHooksLU):
14440   """Utility LU to test some aspects of the job queue.
14441
14442   """
14443   REQ_BGL = False
14444
14445   # Must be lower than default timeout for WaitForJobChange to see whether it
14446   # notices changed jobs
14447   _CLIENT_CONNECT_TIMEOUT = 20.0
14448   _CLIENT_CONFIRM_TIMEOUT = 60.0
14449
14450   @classmethod
14451   def _NotifyUsingSocket(cls, cb, errcls):
14452     """Opens a Unix socket and waits for another program to connect.
14453
14454     @type cb: callable
14455     @param cb: Callback to send socket name to client
14456     @type errcls: class
14457     @param errcls: Exception class to use for errors
14458
14459     """
14460     # Using a temporary directory as there's no easy way to create temporary
14461     # sockets without writing a custom loop around tempfile.mktemp and
14462     # socket.bind
14463     tmpdir = tempfile.mkdtemp()
14464     try:
14465       tmpsock = utils.PathJoin(tmpdir, "sock")
14466
14467       logging.debug("Creating temporary socket at %s", tmpsock)
14468       sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
14469       try:
14470         sock.bind(tmpsock)
14471         sock.listen(1)
14472
14473         # Send details to client
14474         cb(tmpsock)
14475
14476         # Wait for client to connect before continuing
14477         sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
14478         try:
14479           (conn, _) = sock.accept()
14480         except socket.error, err:
14481           raise errcls("Client didn't connect in time (%s)" % err)
14482       finally:
14483         sock.close()
14484     finally:
14485       # Remove as soon as client is connected
14486       shutil.rmtree(tmpdir)
14487
14488     # Wait for client to close
14489     try:
14490       try:
14491         # pylint: disable=E1101
14492         # Instance of '_socketobject' has no ... member
14493         conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
14494         conn.recv(1)
14495       except socket.error, err:
14496         raise errcls("Client failed to confirm notification (%s)" % err)
14497     finally:
14498       conn.close()
14499
14500   def _SendNotification(self, test, arg, sockname):
14501     """Sends a notification to the client.
14502
14503     @type test: string
14504     @param test: Test name
14505     @param arg: Test argument (depends on test)
14506     @type sockname: string
14507     @param sockname: Socket path
14508
14509     """
14510     self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
14511
14512   def _Notify(self, prereq, test, arg):
14513     """Notifies the client of a test.
14514
14515     @type prereq: bool
14516     @param prereq: Whether this is a prereq-phase test
14517     @type test: string
14518     @param test: Test name
14519     @param arg: Test argument (depends on test)
14520
14521     """
14522     if prereq:
14523       errcls = errors.OpPrereqError
14524     else:
14525       errcls = errors.OpExecError
14526
14527     return self._NotifyUsingSocket(compat.partial(self._SendNotification,
14528                                                   test, arg),
14529                                    errcls)
14530
14531   def CheckArguments(self):
14532     self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
14533     self.expandnames_calls = 0
14534
14535   def ExpandNames(self):
14536     checkargs_calls = getattr(self, "checkargs_calls", 0)
14537     if checkargs_calls < 1:
14538       raise errors.ProgrammerError("CheckArguments was not called")
14539
14540     self.expandnames_calls += 1
14541
14542     if self.op.notify_waitlock:
14543       self._Notify(True, constants.JQT_EXPANDNAMES, None)
14544
14545     self.LogInfo("Expanding names")
14546
14547     # Get lock on master node (just to get a lock, not for a particular reason)
14548     self.needed_locks = {
14549       locking.LEVEL_NODE: self.cfg.GetMasterNode(),
14550       }
14551
14552   def Exec(self, feedback_fn):
14553     if self.expandnames_calls < 1:
14554       raise errors.ProgrammerError("ExpandNames was not called")
14555
14556     if self.op.notify_exec:
14557       self._Notify(False, constants.JQT_EXEC, None)
14558
14559     self.LogInfo("Executing")
14560
14561     if self.op.log_messages:
14562       self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
14563       for idx, msg in enumerate(self.op.log_messages):
14564         self.LogInfo("Sending log message %s", idx + 1)
14565         feedback_fn(constants.JQT_MSGPREFIX + msg)
14566         # Report how many test messages have been sent
14567         self._Notify(False, constants.JQT_LOGMSG, idx + 1)
14568
14569     if self.op.fail:
14570       raise errors.OpExecError("Opcode failure was requested")
14571
14572     return True
14573
14574
14575 class IAllocator(object):
14576   """IAllocator framework.
14577
14578   An IAllocator instance has three sets of attributes:
14579     - cfg that is needed to query the cluster
14580     - input data (all members of the _KEYS class attribute are required)
14581     - four buffer attributes (in|out_data|text), that represent the
14582       input (to the external script) in text and data structure format,
14583       and the output from it, again in two formats
14584     - the result variables from the script (success, info, nodes) for
14585       easy usage
14586
14587   """
14588   # pylint: disable=R0902
14589   # lots of instance attributes
14590
14591   def __init__(self, cfg, rpc_runner, mode, **kwargs):
14592     self.cfg = cfg
14593     self.rpc = rpc_runner
14594     # init buffer variables
14595     self.in_text = self.out_text = self.in_data = self.out_data = None
14596     # init all input fields so that pylint is happy
14597     self.mode = mode
14598     self.memory = self.disks = self.disk_template = self.spindle_use = None
14599     self.os = self.tags = self.nics = self.vcpus = None
14600     self.hypervisor = None
14601     self.relocate_from = None
14602     self.name = None
14603     self.instances = None
14604     self.evac_mode = None
14605     self.target_groups = []
14606     # computed fields
14607     self.required_nodes = None
14608     # init result fields
14609     self.success = self.info = self.result = None
14610
14611     try:
14612       (fn, keydata, self._result_check) = self._MODE_DATA[self.mode]
14613     except KeyError:
14614       raise errors.ProgrammerError("Unknown mode '%s' passed to the"
14615                                    " IAllocator" % self.mode)
14616
14617     keyset = [n for (n, _) in keydata]
14618
14619     for key in kwargs:
14620       if key not in keyset:
14621         raise errors.ProgrammerError("Invalid input parameter '%s' to"
14622                                      " IAllocator" % key)
14623       setattr(self, key, kwargs[key])
14624
14625     for key in keyset:
14626       if key not in kwargs:
14627         raise errors.ProgrammerError("Missing input parameter '%s' to"
14628                                      " IAllocator" % key)
14629     self._BuildInputData(compat.partial(fn, self), keydata)
14630
14631   def _ComputeClusterData(self):
14632     """Compute the generic allocator input data.
14633
14634     This is the data that is independent of the actual operation.
14635
14636     """
14637     cfg = self.cfg
14638     cluster_info = cfg.GetClusterInfo()
14639     # cluster data
14640     data = {
14641       "version": constants.IALLOCATOR_VERSION,
14642       "cluster_name": cfg.GetClusterName(),
14643       "cluster_tags": list(cluster_info.GetTags()),
14644       "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
14645       "ipolicy": cluster_info.ipolicy,
14646       }
14647     ninfo = cfg.GetAllNodesInfo()
14648     iinfo = cfg.GetAllInstancesInfo().values()
14649     i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
14650
14651     # node data
14652     node_list = [n.name for n in ninfo.values() if n.vm_capable]
14653
14654     if self.mode == constants.IALLOCATOR_MODE_ALLOC:
14655       hypervisor_name = self.hypervisor
14656     elif self.mode == constants.IALLOCATOR_MODE_RELOC:
14657       hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
14658     else:
14659       hypervisor_name = cluster_info.primary_hypervisor
14660
14661     node_data = self.rpc.call_node_info(node_list, [cfg.GetVGName()],
14662                                         [hypervisor_name])
14663     node_iinfo = \
14664       self.rpc.call_all_instances_info(node_list,
14665                                        cluster_info.enabled_hypervisors)
14666
14667     data["nodegroups"] = self._ComputeNodeGroupData(cfg)
14668
14669     config_ndata = self._ComputeBasicNodeData(cfg, ninfo)
14670     data["nodes"] = self._ComputeDynamicNodeData(ninfo, node_data, node_iinfo,
14671                                                  i_list, config_ndata)
14672     assert len(data["nodes"]) == len(ninfo), \
14673         "Incomplete node data computed"
14674
14675     data["instances"] = self._ComputeInstanceData(cluster_info, i_list)
14676
14677     self.in_data = data
14678
14679   @staticmethod
14680   def _ComputeNodeGroupData(cfg):
14681     """Compute node groups data.
14682
14683     """
14684     cluster = cfg.GetClusterInfo()
14685     ng = dict((guuid, {
14686       "name": gdata.name,
14687       "alloc_policy": gdata.alloc_policy,
14688       "ipolicy": _CalculateGroupIPolicy(cluster, gdata),
14689       })
14690       for guuid, gdata in cfg.GetAllNodeGroupsInfo().items())
14691
14692     return ng
14693
14694   @staticmethod
14695   def _ComputeBasicNodeData(cfg, node_cfg):
14696     """Compute global node data.
14697
14698     @rtype: dict
14699     @returns: a dict of name: (node dict, node config)
14700
14701     """
14702     # fill in static (config-based) values
14703     node_results = dict((ninfo.name, {
14704       "tags": list(ninfo.GetTags()),
14705       "primary_ip": ninfo.primary_ip,
14706       "secondary_ip": ninfo.secondary_ip,
14707       "offline": ninfo.offline,
14708       "drained": ninfo.drained,
14709       "master_candidate": ninfo.master_candidate,
14710       "group": ninfo.group,
14711       "master_capable": ninfo.master_capable,
14712       "vm_capable": ninfo.vm_capable,
14713       "ndparams": cfg.GetNdParams(ninfo),
14714       })
14715       for ninfo in node_cfg.values())
14716
14717     return node_results
14718
14719   @staticmethod
14720   def _ComputeDynamicNodeData(node_cfg, node_data, node_iinfo, i_list,
14721                               node_results):
14722     """Compute global node data.
14723
14724     @param node_results: the basic node structures as filled from the config
14725
14726     """
14727     #TODO(dynmem): compute the right data on MAX and MIN memory
14728     # make a copy of the current dict
14729     node_results = dict(node_results)
14730     for nname, nresult in node_data.items():
14731       assert nname in node_results, "Missing basic data for node %s" % nname
14732       ninfo = node_cfg[nname]
14733
14734       if not (ninfo.offline or ninfo.drained):
14735         nresult.Raise("Can't get data for node %s" % nname)
14736         node_iinfo[nname].Raise("Can't get node instance info from node %s" %
14737                                 nname)
14738         remote_info = _MakeLegacyNodeInfo(nresult.payload)
14739
14740         for attr in ["memory_total", "memory_free", "memory_dom0",
14741                      "vg_size", "vg_free", "cpu_total"]:
14742           if attr not in remote_info:
14743             raise errors.OpExecError("Node '%s' didn't return attribute"
14744                                      " '%s'" % (nname, attr))
14745           if not isinstance(remote_info[attr], int):
14746             raise errors.OpExecError("Node '%s' returned invalid value"
14747                                      " for '%s': %s" %
14748                                      (nname, attr, remote_info[attr]))
14749         # compute memory used by primary instances
14750         i_p_mem = i_p_up_mem = 0
14751         for iinfo, beinfo in i_list:
14752           if iinfo.primary_node == nname:
14753             i_p_mem += beinfo[constants.BE_MAXMEM]
14754             if iinfo.name not in node_iinfo[nname].payload:
14755               i_used_mem = 0
14756             else:
14757               i_used_mem = int(node_iinfo[nname].payload[iinfo.name]["memory"])
14758             i_mem_diff = beinfo[constants.BE_MAXMEM] - i_used_mem
14759             remote_info["memory_free"] -= max(0, i_mem_diff)
14760
14761             if iinfo.admin_state == constants.ADMINST_UP:
14762               i_p_up_mem += beinfo[constants.BE_MAXMEM]
14763
14764         # compute memory used by instances
14765         pnr_dyn = {
14766           "total_memory": remote_info["memory_total"],
14767           "reserved_memory": remote_info["memory_dom0"],
14768           "free_memory": remote_info["memory_free"],
14769           "total_disk": remote_info["vg_size"],
14770           "free_disk": remote_info["vg_free"],
14771           "total_cpus": remote_info["cpu_total"],
14772           "i_pri_memory": i_p_mem,
14773           "i_pri_up_memory": i_p_up_mem,
14774           }
14775         pnr_dyn.update(node_results[nname])
14776         node_results[nname] = pnr_dyn
14777
14778     return node_results
14779
14780   @staticmethod
14781   def _ComputeInstanceData(cluster_info, i_list):
14782     """Compute global instance data.
14783
14784     """
14785     instance_data = {}
14786     for iinfo, beinfo in i_list:
14787       nic_data = []
14788       for nic in iinfo.nics:
14789         filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
14790         nic_dict = {
14791           "mac": nic.mac,
14792           "ip": nic.ip,
14793           "mode": filled_params[constants.NIC_MODE],
14794           "link": filled_params[constants.NIC_LINK],
14795           }
14796         if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
14797           nic_dict["bridge"] = filled_params[constants.NIC_LINK]
14798         nic_data.append(nic_dict)
14799       pir = {
14800         "tags": list(iinfo.GetTags()),
14801         "admin_state": iinfo.admin_state,
14802         "vcpus": beinfo[constants.BE_VCPUS],
14803         "memory": beinfo[constants.BE_MAXMEM],
14804         "spindle_use": beinfo[constants.BE_SPINDLE_USE],
14805         "os": iinfo.os,
14806         "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
14807         "nics": nic_data,
14808         "disks": [{constants.IDISK_SIZE: dsk.size,
14809                    constants.IDISK_MODE: dsk.mode}
14810                   for dsk in iinfo.disks],
14811         "disk_template": iinfo.disk_template,
14812         "hypervisor": iinfo.hypervisor,
14813         }
14814       pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
14815                                                  pir["disks"])
14816       instance_data[iinfo.name] = pir
14817
14818     return instance_data
14819
14820   def _AddNewInstance(self):
14821     """Add new instance data to allocator structure.
14822
14823     This in combination with _AllocatorGetClusterData will create the
14824     correct structure needed as input for the allocator.
14825
14826     The checks for the completeness of the opcode must have already been
14827     done.
14828
14829     """
14830     disk_space = _ComputeDiskSize(self.disk_template, self.disks)
14831
14832     if self.disk_template in constants.DTS_INT_MIRROR:
14833       self.required_nodes = 2
14834     else:
14835       self.required_nodes = 1
14836
14837     request = {
14838       "name": self.name,
14839       "disk_template": self.disk_template,
14840       "tags": self.tags,
14841       "os": self.os,
14842       "vcpus": self.vcpus,
14843       "memory": self.memory,
14844       "spindle_use": self.spindle_use,
14845       "disks": self.disks,
14846       "disk_space_total": disk_space,
14847       "nics": self.nics,
14848       "required_nodes": self.required_nodes,
14849       "hypervisor": self.hypervisor,
14850       }
14851
14852     return request
14853
14854   def _AddRelocateInstance(self):
14855     """Add relocate instance data to allocator structure.
14856
14857     This in combination with _IAllocatorGetClusterData will create the
14858     correct structure needed as input for the allocator.
14859
14860     The checks for the completeness of the opcode must have already been
14861     done.
14862
14863     """
14864     instance = self.cfg.GetInstanceInfo(self.name)
14865     if instance is None:
14866       raise errors.ProgrammerError("Unknown instance '%s' passed to"
14867                                    " IAllocator" % self.name)
14868
14869     if instance.disk_template not in constants.DTS_MIRRORED:
14870       raise errors.OpPrereqError("Can't relocate non-mirrored instances",
14871                                  errors.ECODE_INVAL)
14872
14873     if instance.disk_template in constants.DTS_INT_MIRROR and \
14874         len(instance.secondary_nodes) != 1:
14875       raise errors.OpPrereqError("Instance has not exactly one secondary node",
14876                                  errors.ECODE_STATE)
14877
14878     self.required_nodes = 1
14879     disk_sizes = [{constants.IDISK_SIZE: disk.size} for disk in instance.disks]
14880     disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
14881
14882     request = {
14883       "name": self.name,
14884       "disk_space_total": disk_space,
14885       "required_nodes": self.required_nodes,
14886       "relocate_from": self.relocate_from,
14887       }
14888     return request
14889
14890   def _AddNodeEvacuate(self):
14891     """Get data for node-evacuate requests.
14892
14893     """
14894     return {
14895       "instances": self.instances,
14896       "evac_mode": self.evac_mode,
14897       }
14898
14899   def _AddChangeGroup(self):
14900     """Get data for node-evacuate requests.
14901
14902     """
14903     return {
14904       "instances": self.instances,
14905       "target_groups": self.target_groups,
14906       }
14907
14908   def _BuildInputData(self, fn, keydata):
14909     """Build input data structures.
14910
14911     """
14912     self._ComputeClusterData()
14913
14914     request = fn()
14915     request["type"] = self.mode
14916     for keyname, keytype in keydata:
14917       if keyname not in request:
14918         raise errors.ProgrammerError("Request parameter %s is missing" %
14919                                      keyname)
14920       val = request[keyname]
14921       if not keytype(val):
14922         raise errors.ProgrammerError("Request parameter %s doesn't pass"
14923                                      " validation, value %s, expected"
14924                                      " type %s" % (keyname, val, keytype))
14925     self.in_data["request"] = request
14926
14927     self.in_text = serializer.Dump(self.in_data)
14928
14929   _STRING_LIST = ht.TListOf(ht.TString)
14930   _JOB_LIST = ht.TListOf(ht.TListOf(ht.TStrictDict(True, False, {
14931      # pylint: disable=E1101
14932      # Class '...' has no 'OP_ID' member
14933      "OP_ID": ht.TElemOf([opcodes.OpInstanceFailover.OP_ID,
14934                           opcodes.OpInstanceMigrate.OP_ID,
14935                           opcodes.OpInstanceReplaceDisks.OP_ID])
14936      })))
14937
14938   _NEVAC_MOVED = \
14939     ht.TListOf(ht.TAnd(ht.TIsLength(3),
14940                        ht.TItems([ht.TNonEmptyString,
14941                                   ht.TNonEmptyString,
14942                                   ht.TListOf(ht.TNonEmptyString),
14943                                  ])))
14944   _NEVAC_FAILED = \
14945     ht.TListOf(ht.TAnd(ht.TIsLength(2),
14946                        ht.TItems([ht.TNonEmptyString,
14947                                   ht.TMaybeString,
14948                                  ])))
14949   _NEVAC_RESULT = ht.TAnd(ht.TIsLength(3),
14950                           ht.TItems([_NEVAC_MOVED, _NEVAC_FAILED, _JOB_LIST]))
14951
14952   _MODE_DATA = {
14953     constants.IALLOCATOR_MODE_ALLOC:
14954       (_AddNewInstance,
14955        [
14956         ("name", ht.TString),
14957         ("memory", ht.TInt),
14958         ("spindle_use", ht.TInt),
14959         ("disks", ht.TListOf(ht.TDict)),
14960         ("disk_template", ht.TString),
14961         ("os", ht.TString),
14962         ("tags", _STRING_LIST),
14963         ("nics", ht.TListOf(ht.TDict)),
14964         ("vcpus", ht.TInt),
14965         ("hypervisor", ht.TString),
14966         ], ht.TList),
14967     constants.IALLOCATOR_MODE_RELOC:
14968       (_AddRelocateInstance,
14969        [("name", ht.TString), ("relocate_from", _STRING_LIST)],
14970        ht.TList),
14971      constants.IALLOCATOR_MODE_NODE_EVAC:
14972       (_AddNodeEvacuate, [
14973         ("instances", _STRING_LIST),
14974         ("evac_mode", ht.TElemOf(constants.IALLOCATOR_NEVAC_MODES)),
14975         ], _NEVAC_RESULT),
14976      constants.IALLOCATOR_MODE_CHG_GROUP:
14977       (_AddChangeGroup, [
14978         ("instances", _STRING_LIST),
14979         ("target_groups", _STRING_LIST),
14980         ], _NEVAC_RESULT),
14981     }
14982
14983   def Run(self, name, validate=True, call_fn=None):
14984     """Run an instance allocator and return the results.
14985
14986     """
14987     if call_fn is None:
14988       call_fn = self.rpc.call_iallocator_runner
14989
14990     result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
14991     result.Raise("Failure while running the iallocator script")
14992
14993     self.out_text = result.payload
14994     if validate:
14995       self._ValidateResult()
14996
14997   def _ValidateResult(self):
14998     """Process the allocator results.
14999
15000     This will process and if successful save the result in
15001     self.out_data and the other parameters.
15002
15003     """
15004     try:
15005       rdict = serializer.Load(self.out_text)
15006     except Exception, err:
15007       raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
15008
15009     if not isinstance(rdict, dict):
15010       raise errors.OpExecError("Can't parse iallocator results: not a dict")
15011
15012     # TODO: remove backwards compatiblity in later versions
15013     if "nodes" in rdict and "result" not in rdict:
15014       rdict["result"] = rdict["nodes"]
15015       del rdict["nodes"]
15016
15017     for key in "success", "info", "result":
15018       if key not in rdict:
15019         raise errors.OpExecError("Can't parse iallocator results:"
15020                                  " missing key '%s'" % key)
15021       setattr(self, key, rdict[key])
15022
15023     if not self._result_check(self.result):
15024       raise errors.OpExecError("Iallocator returned invalid result,"
15025                                " expected %s, got %s" %
15026                                (self._result_check, self.result),
15027                                errors.ECODE_INVAL)
15028
15029     if self.mode == constants.IALLOCATOR_MODE_RELOC:
15030       assert self.relocate_from is not None
15031       assert self.required_nodes == 1
15032
15033       node2group = dict((name, ndata["group"])
15034                         for (name, ndata) in self.in_data["nodes"].items())
15035
15036       fn = compat.partial(self._NodesToGroups, node2group,
15037                           self.in_data["nodegroups"])
15038
15039       instance = self.cfg.GetInstanceInfo(self.name)
15040       request_groups = fn(self.relocate_from + [instance.primary_node])
15041       result_groups = fn(rdict["result"] + [instance.primary_node])
15042
15043       if self.success and not set(result_groups).issubset(request_groups):
15044         raise errors.OpExecError("Groups of nodes returned by iallocator (%s)"
15045                                  " differ from original groups (%s)" %
15046                                  (utils.CommaJoin(result_groups),
15047                                   utils.CommaJoin(request_groups)))
15048
15049     elif self.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
15050       assert self.evac_mode in constants.IALLOCATOR_NEVAC_MODES
15051
15052     self.out_data = rdict
15053
15054   @staticmethod
15055   def _NodesToGroups(node2group, groups, nodes):
15056     """Returns a list of unique group names for a list of nodes.
15057
15058     @type node2group: dict
15059     @param node2group: Map from node name to group UUID
15060     @type groups: dict
15061     @param groups: Group information
15062     @type nodes: list
15063     @param nodes: Node names
15064
15065     """
15066     result = set()
15067
15068     for node in nodes:
15069       try:
15070         group_uuid = node2group[node]
15071       except KeyError:
15072         # Ignore unknown node
15073         pass
15074       else:
15075         try:
15076           group = groups[group_uuid]
15077         except KeyError:
15078           # Can't find group, let's use UUID
15079           group_name = group_uuid
15080         else:
15081           group_name = group["name"]
15082
15083         result.add(group_name)
15084
15085     return sorted(result)
15086
15087
15088 class LUTestAllocator(NoHooksLU):
15089   """Run allocator tests.
15090
15091   This LU runs the allocator tests
15092
15093   """
15094   def CheckPrereq(self):
15095     """Check prerequisites.
15096
15097     This checks the opcode parameters depending on the director and mode test.
15098
15099     """
15100     if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
15101       for attr in ["memory", "disks", "disk_template",
15102                    "os", "tags", "nics", "vcpus"]:
15103         if not hasattr(self.op, attr):
15104           raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
15105                                      attr, errors.ECODE_INVAL)
15106       iname = self.cfg.ExpandInstanceName(self.op.name)
15107       if iname is not None:
15108         raise errors.OpPrereqError("Instance '%s' already in the cluster" %
15109                                    iname, errors.ECODE_EXISTS)
15110       if not isinstance(self.op.nics, list):
15111         raise errors.OpPrereqError("Invalid parameter 'nics'",
15112                                    errors.ECODE_INVAL)
15113       if not isinstance(self.op.disks, list):
15114         raise errors.OpPrereqError("Invalid parameter 'disks'",
15115                                    errors.ECODE_INVAL)
15116       for row in self.op.disks:
15117         if (not isinstance(row, dict) or
15118             constants.IDISK_SIZE not in row or
15119             not isinstance(row[constants.IDISK_SIZE], int) or
15120             constants.IDISK_MODE not in row or
15121             row[constants.IDISK_MODE] not in constants.DISK_ACCESS_SET):
15122           raise errors.OpPrereqError("Invalid contents of the 'disks'"
15123                                      " parameter", errors.ECODE_INVAL)
15124       if self.op.hypervisor is None:
15125         self.op.hypervisor = self.cfg.GetHypervisorType()
15126     elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
15127       fname = _ExpandInstanceName(self.cfg, self.op.name)
15128       self.op.name = fname
15129       self.relocate_from = \
15130           list(self.cfg.GetInstanceInfo(fname).secondary_nodes)
15131     elif self.op.mode in (constants.IALLOCATOR_MODE_CHG_GROUP,
15132                           constants.IALLOCATOR_MODE_NODE_EVAC):
15133       if not self.op.instances:
15134         raise errors.OpPrereqError("Missing instances", errors.ECODE_INVAL)
15135       self.op.instances = _GetWantedInstances(self, self.op.instances)
15136     else:
15137       raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
15138                                  self.op.mode, errors.ECODE_INVAL)
15139
15140     if self.op.direction == constants.IALLOCATOR_DIR_OUT:
15141       if self.op.allocator is None:
15142         raise errors.OpPrereqError("Missing allocator name",
15143                                    errors.ECODE_INVAL)
15144     elif self.op.direction != constants.IALLOCATOR_DIR_IN:
15145       raise errors.OpPrereqError("Wrong allocator test '%s'" %
15146                                  self.op.direction, errors.ECODE_INVAL)
15147
15148   def Exec(self, feedback_fn):
15149     """Run the allocator test.
15150
15151     """
15152     if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
15153       ial = IAllocator(self.cfg, self.rpc,
15154                        mode=self.op.mode,
15155                        name=self.op.name,
15156                        memory=self.op.memory,
15157                        disks=self.op.disks,
15158                        disk_template=self.op.disk_template,
15159                        os=self.op.os,
15160                        tags=self.op.tags,
15161                        nics=self.op.nics,
15162                        vcpus=self.op.vcpus,
15163                        hypervisor=self.op.hypervisor,
15164                        )
15165     elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
15166       ial = IAllocator(self.cfg, self.rpc,
15167                        mode=self.op.mode,
15168                        name=self.op.name,
15169                        relocate_from=list(self.relocate_from),
15170                        )
15171     elif self.op.mode == constants.IALLOCATOR_MODE_CHG_GROUP:
15172       ial = IAllocator(self.cfg, self.rpc,
15173                        mode=self.op.mode,
15174                        instances=self.op.instances,
15175                        target_groups=self.op.target_groups)
15176     elif self.op.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
15177       ial = IAllocator(self.cfg, self.rpc,
15178                        mode=self.op.mode,
15179                        instances=self.op.instances,
15180                        evac_mode=self.op.evac_mode)
15181     else:
15182       raise errors.ProgrammerError("Uncatched mode %s in"
15183                                    " LUTestAllocator.Exec", self.op.mode)
15184
15185     if self.op.direction == constants.IALLOCATOR_DIR_IN:
15186       result = ial.in_text
15187     else:
15188       ial.Run(self.op.allocator, validate=False)
15189       result = ial.out_text
15190     return result
15191
15192
15193 #: Query type implementations
15194 _QUERY_IMPL = {
15195   constants.QR_CLUSTER: _ClusterQuery,
15196   constants.QR_INSTANCE: _InstanceQuery,
15197   constants.QR_NODE: _NodeQuery,
15198   constants.QR_GROUP: _GroupQuery,
15199   constants.QR_OS: _OsQuery,
15200   constants.QR_EXPORT: _ExportQuery,
15201   }
15202
15203 assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
15204
15205
15206 def _GetQueryImplementation(name):
15207   """Returns the implemtnation for a query type.
15208
15209   @param name: Query type, must be one of L{constants.QR_VIA_OP}
15210
15211   """
15212   try:
15213     return _QUERY_IMPL[name]
15214   except KeyError:
15215     raise errors.OpPrereqError("Unknown query resource '%s'" % name,
15216                                errors.ECODE_INVAL)