code.grnet.gr Git - ganeti-local/blob - lib/cmdlib.py

   1 #
   2 #
   3
   4 # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012 Google Inc.
   5 #
   6 # This program is free software; you can redistribute it and/or modify
   7 # it under the terms of the GNU General Public License as published by
   8 # the Free Software Foundation; either version 2 of the License, or
   9 # (at your option) any later version.
  10 #
  11 # This program is distributed in the hope that it will be useful, but
  12 # WITHOUT ANY WARRANTY; without even the implied warranty of
  13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 # General Public License for more details.
  15 #
  16 # You should have received a copy of the GNU General Public License
  17 # along with this program; if not, write to the Free Software
  18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  19 # 02110-1301, USA.
  20
  21
  22 """Module implementing the master-side code."""
  23
  24 # pylint: disable=W0201,C0302
  25
  26 # W0201 since most LU attributes are defined in CheckPrereq or similar
  27 # functions
  28
  29 # C0302: since we have waaaay too many lines in this module
  30
  31 import os
  32 import os.path
  33 import time
  34 import re
  35 import platform
  36 import logging
  37 import copy
  38 import OpenSSL
  39 import socket
  40 import tempfile
  41 import shutil
  42 import itertools
  43 import operator
  44
  45 from ganeti import ssh
  46 from ganeti import utils
  47 from ganeti import errors
  48 from ganeti import hypervisor
  49 from ganeti import locking
  50 from ganeti import constants
  51 from ganeti import objects
  52 from ganeti import serializer
  53 from ganeti import ssconf
  54 from ganeti import uidpool
  55 from ganeti import compat
  56 from ganeti import masterd
  57 from ganeti import netutils
  58 from ganeti import query
  59 from ganeti import qlang
  60 from ganeti import opcodes
  61 from ganeti import ht
  62 from ganeti import rpc
  63
  64 import ganeti.masterd.instance # pylint: disable=W0611
  65
  66
  67 #: Size of DRBD meta block device
  68 DRBD_META_SIZE = 128
  69
  70 # States of instance
  71 INSTANCE_DOWN = [constants.ADMINST_DOWN]
  72 INSTANCE_ONLINE = [constants.ADMINST_DOWN, constants.ADMINST_UP]
  73 INSTANCE_NOT_RUNNING = [constants.ADMINST_DOWN, constants.ADMINST_OFFLINE]
  74
  75 #: Instance status in which an instance can be marked as offline/online
  76 CAN_CHANGE_INSTANCE_OFFLINE = (frozenset(INSTANCE_DOWN) | frozenset([
  77   constants.ADMINST_OFFLINE,
  78   ]))
  79
  80
  81 class ResultWithJobs:
  82   """Data container for LU results with jobs.
  83
  84   Instances of this class returned from L{LogicalUnit.Exec} will be recognized
  85   by L{mcpu.Processor._ProcessResult}. The latter will then submit the jobs
  86   contained in the C{jobs} attribute and include the job IDs in the opcode
  87   result.
  88
  89   """
  90   def __init__(self, jobs, **kwargs):
  91     """Initializes this class.
  92
  93     Additional return values can be specified as keyword arguments.
  94
  95     @type jobs: list of lists of L{opcode.OpCode}
  96     @param jobs: A list of lists of opcode objects
  97
  98     """
  99     self.jobs = jobs
 100     self.other = kwargs
 101
 102
 103 class LogicalUnit(object):
 104   """Logical Unit base class.
 105
 106   Subclasses must follow these rules:
 107     - implement ExpandNames
 108     - implement CheckPrereq (except when tasklets are used)
 109     - implement Exec (except when tasklets are used)
 110     - implement BuildHooksEnv
 111     - implement BuildHooksNodes
 112     - redefine HPATH and HTYPE
 113     - optionally redefine their run requirements:
 114         REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
 115
 116   Note that all commands require root permissions.
 117
 118   @ivar dry_run_result: the value (if any) that will be returned to the caller
 119       in dry-run mode (signalled by opcode dry_run parameter)
 120
 121   """
 122   HPATH = None
 123   HTYPE = None
 124   REQ_BGL = True
 125
 126   def __init__(self, processor, op, context, rpc_runner):
 127     """Constructor for LogicalUnit.
 128
 129     This needs to be overridden in derived classes in order to check op
 130     validity.
 131
 132     """
 133     self.proc = processor
 134     self.op = op
 135     self.cfg = context.cfg
 136     self.glm = context.glm
 137     # readability alias
 138     self.owned_locks = context.glm.list_owned
 139     self.context = context
 140     self.rpc = rpc_runner
 141     # Dicts used to declare locking needs to mcpu
 142     self.needed_locks = None
 143     self.share_locks = dict.fromkeys(locking.LEVELS, 0)
 144     self.add_locks = {}
 145     self.remove_locks = {}
 146     # Used to force good behavior when calling helper functions
 147     self.recalculate_locks = {}
 148     # logging
 149     self.Log = processor.Log # pylint: disable=C0103
 150     self.LogWarning = processor.LogWarning # pylint: disable=C0103
 151     self.LogInfo = processor.LogInfo # pylint: disable=C0103
 152     self.LogStep = processor.LogStep # pylint: disable=C0103
 153     # support for dry-run
 154     self.dry_run_result = None
 155     # support for generic debug attribute
 156     if (not hasattr(self.op, "debug_level") or
 157         not isinstance(self.op.debug_level, int)):
 158       self.op.debug_level = 0
 159
 160     # Tasklets
 161     self.tasklets = None
 162
 163     # Validate opcode parameters and set defaults
 164     self.op.Validate(True)
 165
 166     self.CheckArguments()
 167
 168   def CheckArguments(self):
 169     """Check syntactic validity for the opcode arguments.
 170
 171     This method is for doing a simple syntactic check and ensure
 172     validity of opcode parameters, without any cluster-related
 173     checks. While the same can be accomplished in ExpandNames and/or
 174     CheckPrereq, doing these separate is better because:
 175
 176       - ExpandNames is left as as purely a lock-related function
 177       - CheckPrereq is run after we have acquired locks (and possible
 178         waited for them)
 179
 180     The function is allowed to change the self.op attribute so that
 181     later methods can no longer worry about missing parameters.
 182
 183     """
 184     pass
 185
 186   def ExpandNames(self):
 187     """Expand names for this LU.
 188
 189     This method is called before starting to execute the opcode, and it should
 190     update all the parameters of the opcode to their canonical form (e.g. a
 191     short node name must be fully expanded after this method has successfully
 192     completed). This way locking, hooks, logging, etc. can work correctly.
 193
 194     LUs which implement this method must also populate the self.needed_locks
 195     member, as a dict with lock levels as keys, and a list of needed lock names
 196     as values. Rules:
 197
 198       - use an empty dict if you don't need any lock
 199       - if you don't need any lock at a particular level omit that
 200         level (note that in this case C{DeclareLocks} won't be called
 201         at all for that level)
 202       - if you need locks at a level, but you can't calculate it in
 203         this function, initialise that level with an empty list and do
 204         further processing in L{LogicalUnit.DeclareLocks} (see that
 205         function's docstring)
 206       - don't put anything for the BGL level
 207       - if you want all locks at a level use L{locking.ALL_SET} as a value
 208
 209     If you need to share locks (rather than acquire them exclusively) at one
 210     level you can modify self.share_locks, setting a true value (usually 1) for
 211     that level. By default locks are not shared.
 212
 213     This function can also define a list of tasklets, which then will be
 214     executed in order instead of the usual LU-level CheckPrereq and Exec
 215     functions, if those are not defined by the LU.
 216
 217     Examples::
 218
 219       # Acquire all nodes and one instance
 220       self.needed_locks = {
 221         locking.LEVEL_NODE: locking.ALL_SET,
 222         locking.LEVEL_INSTANCE: ['instance1.example.com'],
 223       }
 224       # Acquire just two nodes
 225       self.needed_locks = {
 226         locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
 227       }
 228       # Acquire no locks
 229       self.needed_locks = {} # No, you can't leave it to the default value None
 230
 231     """
 232     # The implementation of this method is mandatory only if the new LU is
 233     # concurrent, so that old LUs don't need to be changed all at the same
 234     # time.
 235     if self.REQ_BGL:
 236       self.needed_locks = {} # Exclusive LUs don't need locks.
 237     else:
 238       raise NotImplementedError
 239
 240   def DeclareLocks(self, level):
 241     """Declare LU locking needs for a level
 242
 243     While most LUs can just declare their locking needs at ExpandNames time,
 244     sometimes there's the need to calculate some locks after having acquired
 245     the ones before. This function is called just before acquiring locks at a
 246     particular level, but after acquiring the ones at lower levels, and permits
 247     such calculations. It can be used to modify self.needed_locks, and by
 248     default it does nothing.
 249
 250     This function is only called if you have something already set in
 251     self.needed_locks for the level.
 252
 253     @param level: Locking level which is going to be locked
 254     @type level: member of L{ganeti.locking.LEVELS}
 255
 256     """
 257
 258   def CheckPrereq(self):
 259     """Check prerequisites for this LU.
 260
 261     This method should check that the prerequisites for the execution
 262     of this LU are fulfilled. It can do internode communication, but
 263     it should be idempotent - no cluster or system changes are
 264     allowed.
 265
 266     The method should raise errors.OpPrereqError in case something is
 267     not fulfilled. Its return value is ignored.
 268
 269     This method should also update all the parameters of the opcode to
 270     their canonical form if it hasn't been done by ExpandNames before.
 271
 272     """
 273     if self.tasklets is not None:
 274       for (idx, tl) in enumerate(self.tasklets):
 275         logging.debug("Checking prerequisites for tasklet %s/%s",
 276                       idx + 1, len(self.tasklets))
 277         tl.CheckPrereq()
 278     else:
 279       pass
 280
 281   def Exec(self, feedback_fn):
 282     """Execute the LU.
 283
 284     This method should implement the actual work. It should raise
 285     errors.OpExecError for failures that are somewhat dealt with in
 286     code, or expected.
 287
 288     """
 289     if self.tasklets is not None:
 290       for (idx, tl) in enumerate(self.tasklets):
 291         logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
 292         tl.Exec(feedback_fn)
 293     else:
 294       raise NotImplementedError
 295
 296   def BuildHooksEnv(self):
 297     """Build hooks environment for this LU.
 298
 299     @rtype: dict
 300     @return: Dictionary containing the environment that will be used for
 301       running the hooks for this LU. The keys of the dict must not be prefixed
 302       with "GANETI_"--that'll be added by the hooks runner. The hooks runner
 303       will extend the environment with additional variables. If no environment
 304       should be defined, an empty dictionary should be returned (not C{None}).
 305     @note: If the C{HPATH} attribute of the LU class is C{None}, this function
 306       will not be called.
 307
 308     """
 309     raise NotImplementedError
 310
 311   def BuildHooksNodes(self):
 312     """Build list of nodes to run LU's hooks.
 313
 314     @rtype: tuple; (list, list)
 315     @return: Tuple containing a list of node names on which the hook
 316       should run before the execution and a list of node names on which the
 317       hook should run after the execution. No nodes should be returned as an
 318       empty list (and not None).
 319     @note: If the C{HPATH} attribute of the LU class is C{None}, this function
 320       will not be called.
 321
 322     """
 323     raise NotImplementedError
 324
 325   def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
 326     """Notify the LU about the results of its hooks.
 327
 328     This method is called every time a hooks phase is executed, and notifies
 329     the Logical Unit about the hooks' result. The LU can then use it to alter
 330     its result based on the hooks.  By default the method does nothing and the
 331     previous result is passed back unchanged but any LU can define it if it
 332     wants to use the local cluster hook-scripts somehow.
 333
 334     @param phase: one of L{constants.HOOKS_PHASE_POST} or
 335         L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
 336     @param hook_results: the results of the multi-node hooks rpc call
 337     @param feedback_fn: function used send feedback back to the caller
 338     @param lu_result: the previous Exec result this LU had, or None
 339         in the PRE phase
 340     @return: the new Exec result, based on the previous result
 341         and hook results
 342
 343     """
 344     # API must be kept, thus we ignore the unused argument and could
 345     # be a function warnings
 346     # pylint: disable=W0613,R0201
 347     return lu_result
 348
 349   def _ExpandAndLockInstance(self):
 350     """Helper function to expand and lock an instance.
 351
 352     Many LUs that work on an instance take its name in self.op.instance_name
 353     and need to expand it and then declare the expanded name for locking. This
 354     function does it, and then updates self.op.instance_name to the expanded
 355     name. It also initializes needed_locks as a dict, if this hasn't been done
 356     before.
 357
 358     """
 359     if self.needed_locks is None:
 360       self.needed_locks = {}
 361     else:
 362       assert locking.LEVEL_INSTANCE not in self.needed_locks, \
 363         "_ExpandAndLockInstance called with instance-level locks set"
 364     self.op.instance_name = _ExpandInstanceName(self.cfg,
 365                                                 self.op.instance_name)
 366     self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
 367
 368   def _LockInstancesNodes(self, primary_only=False,
 369                           level=locking.LEVEL_NODE):
 370     """Helper function to declare instances' nodes for locking.
 371
 372     This function should be called after locking one or more instances to lock
 373     their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
 374     with all primary or secondary nodes for instances already locked and
 375     present in self.needed_locks[locking.LEVEL_INSTANCE].
 376
 377     It should be called from DeclareLocks, and for safety only works if
 378     self.recalculate_locks[locking.LEVEL_NODE] is set.
 379
 380     In the future it may grow parameters to just lock some instance's nodes, or
 381     to just lock primaries or secondary nodes, if needed.
 382
 383     If should be called in DeclareLocks in a way similar to::
 384
 385       if level == locking.LEVEL_NODE:
 386         self._LockInstancesNodes()
 387
 388     @type primary_only: boolean
 389     @param primary_only: only lock primary nodes of locked instances
 390     @param level: Which lock level to use for locking nodes
 391
 392     """
 393     assert level in self.recalculate_locks, \
 394       "_LockInstancesNodes helper function called with no nodes to recalculate"
 395
 396     # TODO: check if we're really been called with the instance locks held
 397
 398     # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
 399     # future we might want to have different behaviors depending on the value
 400     # of self.recalculate_locks[locking.LEVEL_NODE]
 401     wanted_nodes = []
 402     locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
 403     for _, instance in self.cfg.GetMultiInstanceInfo(locked_i):
 404       wanted_nodes.append(instance.primary_node)
 405       if not primary_only:
 406         wanted_nodes.extend(instance.secondary_nodes)
 407
 408     if self.recalculate_locks[level] == constants.LOCKS_REPLACE:
 409       self.needed_locks[level] = wanted_nodes
 410     elif self.recalculate_locks[level] == constants.LOCKS_APPEND:
 411       self.needed_locks[level].extend(wanted_nodes)
 412     else:
 413       raise errors.ProgrammerError("Unknown recalculation mode")
 414
 415     del self.recalculate_locks[level]
 416
 417
 418 class NoHooksLU(LogicalUnit): # pylint: disable=W0223
 419   """Simple LU which runs no hooks.
 420
 421   This LU is intended as a parent for other LogicalUnits which will
 422   run no hooks, in order to reduce duplicate code.
 423
 424   """
 425   HPATH = None
 426   HTYPE = None
 427
 428   def BuildHooksEnv(self):
 429     """Empty BuildHooksEnv for NoHooksLu.
 430
 431     This just raises an error.
 432
 433     """
 434     raise AssertionError("BuildHooksEnv called for NoHooksLUs")
 435
 436   def BuildHooksNodes(self):
 437     """Empty BuildHooksNodes for NoHooksLU.
 438
 439     """
 440     raise AssertionError("BuildHooksNodes called for NoHooksLU")
 441
 442
 443 class Tasklet:
 444   """Tasklet base class.
 445
 446   Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
 447   they can mix legacy code with tasklets. Locking needs to be done in the LU,
 448   tasklets know nothing about locks.
 449
 450   Subclasses must follow these rules:
 451     - Implement CheckPrereq
 452     - Implement Exec
 453
 454   """
 455   def __init__(self, lu):
 456     self.lu = lu
 457
 458     # Shortcuts
 459     self.cfg = lu.cfg
 460     self.rpc = lu.rpc
 461
 462   def CheckPrereq(self):
 463     """Check prerequisites for this tasklets.
 464
 465     This method should check whether the prerequisites for the execution of
 466     this tasklet are fulfilled. It can do internode communication, but it
 467     should be idempotent - no cluster or system changes are allowed.
 468
 469     The method should raise errors.OpPrereqError in case something is not
 470     fulfilled. Its return value is ignored.
 471
 472     This method should also update all parameters to their canonical form if it
 473     hasn't been done before.
 474
 475     """
 476     pass
 477
 478   def Exec(self, feedback_fn):
 479     """Execute the tasklet.
 480
 481     This method should implement the actual work. It should raise
 482     errors.OpExecError for failures that are somewhat dealt with in code, or
 483     expected.
 484
 485     """
 486     raise NotImplementedError
 487
 488
 489 class _QueryBase:
 490   """Base for query utility classes.
 491
 492   """
 493   #: Attribute holding field definitions
 494   FIELDS = None
 495
 496   def __init__(self, qfilter, fields, use_locking):
 497     """Initializes this class.
 498
 499     """
 500     self.use_locking = use_locking
 501
 502     self.query = query.Query(self.FIELDS, fields, qfilter=qfilter,
 503                              namefield="name")
 504     self.requested_data = self.query.RequestedData()
 505     self.names = self.query.RequestedNames()
 506
 507     # Sort only if no names were requested
 508     self.sort_by_name = not self.names
 509
 510     self.do_locking = None
 511     self.wanted = None
 512
 513   def _GetNames(self, lu, all_names, lock_level):
 514     """Helper function to determine names asked for in the query.
 515
 516     """
 517     if self.do_locking:
 518       names = lu.owned_locks(lock_level)
 519     else:
 520       names = all_names
 521
 522     if self.wanted == locking.ALL_SET:
 523       assert not self.names
 524       # caller didn't specify names, so ordering is not important
 525       return utils.NiceSort(names)
 526
 527     # caller specified names and we must keep the same order
 528     assert self.names
 529     assert not self.do_locking or lu.glm.is_owned(lock_level)
 530
 531     missing = set(self.wanted).difference(names)
 532     if missing:
 533       raise errors.OpExecError("Some items were removed before retrieving"
 534                                " their data: %s" % missing)
 535
 536     # Return expanded names
 537     return self.wanted
 538
 539   def ExpandNames(self, lu):
 540     """Expand names for this query.
 541
 542     See L{LogicalUnit.ExpandNames}.
 543
 544     """
 545     raise NotImplementedError()
 546
 547   def DeclareLocks(self, lu, level):
 548     """Declare locks for this query.
 549
 550     See L{LogicalUnit.DeclareLocks}.
 551
 552     """
 553     raise NotImplementedError()
 554
 555   def _GetQueryData(self, lu):
 556     """Collects all data for this query.
 557
 558     @return: Query data object
 559
 560     """
 561     raise NotImplementedError()
 562
 563   def NewStyleQuery(self, lu):
 564     """Collect data and execute query.
 565
 566     """
 567     return query.GetQueryResponse(self.query, self._GetQueryData(lu),
 568                                   sort_by_name=self.sort_by_name)
 569
 570   def OldStyleQuery(self, lu):
 571     """Collect data and execute query.
 572
 573     """
 574     return self.query.OldStyleQuery(self._GetQueryData(lu),
 575                                     sort_by_name=self.sort_by_name)
 576
 577
 578 def _ShareAll():
 579   """Returns a dict declaring all lock levels shared.
 580
 581   """
 582   return dict.fromkeys(locking.LEVELS, 1)
 583
 584
 585 def _MakeLegacyNodeInfo(data):
 586   """Formats the data returned by L{rpc.RpcRunner.call_node_info}.
 587
 588   Converts the data into a single dictionary. This is fine for most use cases,
 589   but some require information from more than one volume group or hypervisor.
 590
 591   """
 592   (bootid, (vg_info, ), (hv_info, )) = data
 593
 594   return utils.JoinDisjointDicts(utils.JoinDisjointDicts(vg_info, hv_info), {
 595     "bootid": bootid,
 596     })
 597
 598
 599 def _CheckInstanceNodeGroups(cfg, instance_name, owned_groups):
 600   """Checks if the owned node groups are still correct for an instance.
 601
 602   @type cfg: L{config.ConfigWriter}
 603   @param cfg: The cluster configuration
 604   @type instance_name: string
 605   @param instance_name: Instance name
 606   @type owned_groups: set or frozenset
 607   @param owned_groups: List of currently owned node groups
 608
 609   """
 610   inst_groups = cfg.GetInstanceNodeGroups(instance_name)
 611
 612   if not owned_groups.issuperset(inst_groups):
 613     raise errors.OpPrereqError("Instance %s's node groups changed since"
 614                                " locks were acquired, current groups are"
 615                                " are '%s', owning groups '%s'; retry the"
 616                                " operation" %
 617                                (instance_name,
 618                                 utils.CommaJoin(inst_groups),
 619                                 utils.CommaJoin(owned_groups)),
 620                                errors.ECODE_STATE)
 621
 622   return inst_groups
 623
 624
 625 def _CheckNodeGroupInstances(cfg, group_uuid, owned_instances):
 626   """Checks if the instances in a node group are still correct.
 627
 628   @type cfg: L{config.ConfigWriter}
 629   @param cfg: The cluster configuration
 630   @type group_uuid: string
 631   @param group_uuid: Node group UUID
 632   @type owned_instances: set or frozenset
 633   @param owned_instances: List of currently owned instances
 634
 635   """
 636   wanted_instances = cfg.GetNodeGroupInstances(group_uuid)
 637   if owned_instances != wanted_instances:
 638     raise errors.OpPrereqError("Instances in node group '%s' changed since"
 639                                " locks were acquired, wanted '%s', have '%s';"
 640                                " retry the operation" %
 641                                (group_uuid,
 642                                 utils.CommaJoin(wanted_instances),
 643                                 utils.CommaJoin(owned_instances)),
 644                                errors.ECODE_STATE)
 645
 646   return wanted_instances
 647
 648
 649 def _SupportsOob(cfg, node):
 650   """Tells if node supports OOB.
 651
 652   @type cfg: L{config.ConfigWriter}
 653   @param cfg: The cluster configuration
 654   @type node: L{objects.Node}
 655   @param node: The node
 656   @return: The OOB script if supported or an empty string otherwise
 657
 658   """
 659   return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
 660
 661
 662 def _GetWantedNodes(lu, nodes):
 663   """Returns list of checked and expanded node names.
 664
 665   @type lu: L{LogicalUnit}
 666   @param lu: the logical unit on whose behalf we execute
 667   @type nodes: list
 668   @param nodes: list of node names or None for all nodes
 669   @rtype: list
 670   @return: the list of nodes, sorted
 671   @raise errors.ProgrammerError: if the nodes parameter is wrong type
 672
 673   """
 674   if nodes:
 675     return [_ExpandNodeName(lu.cfg, name) for name in nodes]
 676
 677   return utils.NiceSort(lu.cfg.GetNodeList())
 678
 679
 680 def _GetWantedInstances(lu, instances):
 681   """Returns list of checked and expanded instance names.
 682
 683   @type lu: L{LogicalUnit}
 684   @param lu: the logical unit on whose behalf we execute
 685   @type instances: list
 686   @param instances: list of instance names or None for all instances
 687   @rtype: list
 688   @return: the list of instances, sorted
 689   @raise errors.OpPrereqError: if the instances parameter is wrong type
 690   @raise errors.OpPrereqError: if any of the passed instances is not found
 691
 692   """
 693   if instances:
 694     wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
 695   else:
 696     wanted = utils.NiceSort(lu.cfg.GetInstanceList())
 697   return wanted
 698
 699
 700 def _GetUpdatedParams(old_params, update_dict,
 701                       use_default=True, use_none=False):
 702   """Return the new version of a parameter dictionary.
 703
 704   @type old_params: dict
 705   @param old_params: old parameters
 706   @type update_dict: dict
 707   @param update_dict: dict containing new parameter values, or
 708       constants.VALUE_DEFAULT to reset the parameter to its default
 709       value
 710   @param use_default: boolean
 711   @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
 712       values as 'to be deleted' values
 713   @param use_none: boolean
 714   @type use_none: whether to recognise C{None} values as 'to be
 715       deleted' values
 716   @rtype: dict
 717   @return: the new parameter dictionary
 718
 719   """
 720   params_copy = copy.deepcopy(old_params)
 721   for key, val in update_dict.iteritems():
 722     if ((use_default and val == constants.VALUE_DEFAULT) or
 723         (use_none and val is None)):
 724       try:
 725         del params_copy[key]
 726       except KeyError:
 727         pass
 728     else:
 729       params_copy[key] = val
 730   return params_copy
 731
 732
 733 def _GetUpdatedIPolicy(old_ipolicy, new_ipolicy, group_policy=False):
 734   """Return the new version of a instance policy.
 735
 736   @param group_policy: whether this policy applies to a group and thus
 737     we should support removal of policy entries
 738
 739   """
 740   use_none = use_default = group_policy
 741   ipolicy = copy.deepcopy(old_ipolicy)
 742   for key, value in new_ipolicy.items():
 743     if key not in constants.IPOLICY_ALL_KEYS:
 744       raise errors.OpPrereqError("Invalid key in new ipolicy: %s" % key,
 745                                  errors.ECODE_INVAL)
 746     if key in constants.IPOLICY_ISPECS:
 747       utils.ForceDictType(value, constants.ISPECS_PARAMETER_TYPES)
 748       ipolicy[key] = _GetUpdatedParams(old_ipolicy.get(key, {}), value,
 749                                        use_none=use_none,
 750                                        use_default=use_default)
 751     else:
 752       if not value or value == [constants.VALUE_DEFAULT]:
 753         if group_policy:
 754           del ipolicy[key]
 755         else:
 756           raise errors.OpPrereqError("Can't unset ipolicy attribute '%s'"
 757                                      " on the cluster'" % key,
 758                                      errors.ECODE_INVAL)
 759       else:
 760         if key in constants.IPOLICY_PARAMETERS:
 761           # FIXME: we assume all such values are float
 762           try:
 763             ipolicy[key] = float(value)
 764           except (TypeError, ValueError), err:
 765             raise errors.OpPrereqError("Invalid value for attribute"
 766                                        " '%s': '%s', error: %s" %
 767                                        (key, value, err), errors.ECODE_INVAL)
 768         else:
 769           # FIXME: we assume all others are lists; this should be redone
 770           # in a nicer way
 771           ipolicy[key] = list(value)
 772   try:
 773     objects.InstancePolicy.CheckParameterSyntax(ipolicy)
 774   except errors.ConfigurationError, err:
 775     raise errors.OpPrereqError("Invalid instance policy: %s" % err,
 776                                errors.ECODE_INVAL)
 777   return ipolicy
 778
 779
 780 def _UpdateAndVerifySubDict(base, updates, type_check):
 781   """Updates and verifies a dict with sub dicts of the same type.
 782
 783   @param base: The dict with the old data
 784   @param updates: The dict with the new data
 785   @param type_check: Dict suitable to ForceDictType to verify correct types
 786   @returns: A new dict with updated and verified values
 787
 788   """
 789   def fn(old, value):
 790     new = _GetUpdatedParams(old, value)
 791     utils.ForceDictType(new, type_check)
 792     return new
 793
 794   ret = copy.deepcopy(base)
 795   ret.update(dict((key, fn(base.get(key, {}), value))
 796                   for key, value in updates.items()))
 797   return ret
 798
 799
 800 def _MergeAndVerifyHvState(op_input, obj_input):
 801   """Combines the hv state from an opcode with the one of the object
 802
 803   @param op_input: The input dict from the opcode
 804   @param obj_input: The input dict from the objects
 805   @return: The verified and updated dict
 806
 807   """
 808   if op_input:
 809     invalid_hvs = set(op_input) - constants.HYPER_TYPES
 810     if invalid_hvs:
 811       raise errors.OpPrereqError("Invalid hypervisor(s) in hypervisor state:"
 812                                  " %s" % utils.CommaJoin(invalid_hvs),
 813                                  errors.ECODE_INVAL)
 814     if obj_input is None:
 815       obj_input = {}
 816     type_check = constants.HVSTS_PARAMETER_TYPES
 817     return _UpdateAndVerifySubDict(obj_input, op_input, type_check)
 818
 819   return None
 820
 821
 822 def _MergeAndVerifyDiskState(op_input, obj_input):
 823   """Combines the disk state from an opcode with the one of the object
 824
 825   @param op_input: The input dict from the opcode
 826   @param obj_input: The input dict from the objects
 827   @return: The verified and updated dict
 828   """
 829   if op_input:
 830     invalid_dst = set(op_input) - constants.DS_VALID_TYPES
 831     if invalid_dst:
 832       raise errors.OpPrereqError("Invalid storage type(s) in disk state: %s" %
 833                                  utils.CommaJoin(invalid_dst),
 834                                  errors.ECODE_INVAL)
 835     type_check = constants.DSS_PARAMETER_TYPES
 836     if obj_input is None:
 837       obj_input = {}
 838     return dict((key, _UpdateAndVerifySubDict(obj_input.get(key, {}), value,
 839                                               type_check))
 840                 for key, value in op_input.items())
 841
 842   return None
 843
 844
 845 def _ReleaseLocks(lu, level, names=None, keep=None):
 846   """Releases locks owned by an LU.
 847
 848   @type lu: L{LogicalUnit}
 849   @param level: Lock level
 850   @type names: list or None
 851   @param names: Names of locks to release
 852   @type keep: list or None
 853   @param keep: Names of locks to retain
 854
 855   """
 856   assert not (keep is not None and names is not None), \
 857          "Only one of the 'names' and the 'keep' parameters can be given"
 858
 859   if names is not None:
 860     should_release = names.__contains__
 861   elif keep:
 862     should_release = lambda name: name not in keep
 863   else:
 864     should_release = None
 865
 866   owned = lu.owned_locks(level)
 867   if not owned:
 868     # Not owning any lock at this level, do nothing
 869     pass
 870
 871   elif should_release:
 872     retain = []
 873     release = []
 874
 875     # Determine which locks to release
 876     for name in owned:
 877       if should_release(name):
 878         release.append(name)
 879       else:
 880         retain.append(name)
 881
 882     assert len(lu.owned_locks(level)) == (len(retain) + len(release))
 883
 884     # Release just some locks
 885     lu.glm.release(level, names=release)
 886
 887     assert frozenset(lu.owned_locks(level)) == frozenset(retain)
 888   else:
 889     # Release everything
 890     lu.glm.release(level)
 891
 892     assert not lu.glm.is_owned(level), "No locks should be owned"
 893
 894
 895 def _MapInstanceDisksToNodes(instances):
 896   """Creates a map from (node, volume) to instance name.
 897
 898   @type instances: list of L{objects.Instance}
 899   @rtype: dict; tuple of (node name, volume name) as key, instance name as value
 900
 901   """
 902   return dict(((node, vol), inst.name)
 903               for inst in instances
 904               for (node, vols) in inst.MapLVsByNode().items()
 905               for vol in vols)
 906
 907
 908 def _RunPostHook(lu, node_name):
 909   """Runs the post-hook for an opcode on a single node.
 910
 911   """
 912   hm = lu.proc.BuildHooksManager(lu)
 913   try:
 914     hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
 915   except:
 916     # pylint: disable=W0702
 917     lu.LogWarning("Errors occurred running hooks on %s" % node_name)
 918
 919
 920 def _CheckOutputFields(static, dynamic, selected):
 921   """Checks whether all selected fields are valid.
 922
 923   @type static: L{utils.FieldSet}
 924   @param static: static fields set
 925   @type dynamic: L{utils.FieldSet}
 926   @param dynamic: dynamic fields set
 927
 928   """
 929   f = utils.FieldSet()
 930   f.Extend(static)
 931   f.Extend(dynamic)
 932
 933   delta = f.NonMatching(selected)
 934   if delta:
 935     raise errors.OpPrereqError("Unknown output fields selected: %s"
 936                                % ",".join(delta), errors.ECODE_INVAL)
 937
 938
 939 def _CheckGlobalHvParams(params):
 940   """Validates that given hypervisor params are not global ones.
 941
 942   This will ensure that instances don't get customised versions of
 943   global params.
 944
 945   """
 946   used_globals = constants.HVC_GLOBALS.intersection(params)
 947   if used_globals:
 948     msg = ("The following hypervisor parameters are global and cannot"
 949            " be customized at instance level, please modify them at"
 950            " cluster level: %s" % utils.CommaJoin(used_globals))
 951     raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
 952
 953
 954 def _CheckNodeOnline(lu, node, msg=None):
 955   """Ensure that a given node is online.
 956
 957   @param lu: the LU on behalf of which we make the check
 958   @param node: the node to check
 959   @param msg: if passed, should be a message to replace the default one
 960   @raise errors.OpPrereqError: if the node is offline
 961
 962   """
 963   if msg is None:
 964     msg = "Can't use offline node"
 965   if lu.cfg.GetNodeInfo(node).offline:
 966     raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
 967
 968
 969 def _CheckNodeNotDrained(lu, node):
 970   """Ensure that a given node is not drained.
 971
 972   @param lu: the LU on behalf of which we make the check
 973   @param node: the node to check
 974   @raise errors.OpPrereqError: if the node is drained
 975
 976   """
 977   if lu.cfg.GetNodeInfo(node).drained:
 978     raise errors.OpPrereqError("Can't use drained node %s" % node,
 979                                errors.ECODE_STATE)
 980
 981
 982 def _CheckNodeVmCapable(lu, node):
 983   """Ensure that a given node is vm capable.
 984
 985   @param lu: the LU on behalf of which we make the check
 986   @param node: the node to check
 987   @raise errors.OpPrereqError: if the node is not vm capable
 988
 989   """
 990   if not lu.cfg.GetNodeInfo(node).vm_capable:
 991     raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
 992                                errors.ECODE_STATE)
 993
 994
 995 def _CheckNodeHasOS(lu, node, os_name, force_variant):
 996   """Ensure that a node supports a given OS.
 997
 998   @param lu: the LU on behalf of which we make the check
 999   @param node: the node to check
1000   @param os_name: the OS to query about
1001   @param force_variant: whether to ignore variant errors
1002   @raise errors.OpPrereqError: if the node is not supporting the OS
1003
1004   """
1005   result = lu.rpc.call_os_get(node, os_name)
1006   result.Raise("OS '%s' not in supported OS list for node %s" %
1007                (os_name, node),
1008                prereq=True, ecode=errors.ECODE_INVAL)
1009   if not force_variant:
1010     _CheckOSVariant(result.payload, os_name)
1011
1012
1013 def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
1014   """Ensure that a node has the given secondary ip.
1015
1016   @type lu: L{LogicalUnit}
1017   @param lu: the LU on behalf of which we make the check
1018   @type node: string
1019   @param node: the node to check
1020   @type secondary_ip: string
1021   @param secondary_ip: the ip to check
1022   @type prereq: boolean
1023   @param prereq: whether to throw a prerequisite or an execute error
1024   @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
1025   @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
1026
1027   """
1028   result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
1029   result.Raise("Failure checking secondary ip on node %s" % node,
1030                prereq=prereq, ecode=errors.ECODE_ENVIRON)
1031   if not result.payload:
1032     msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
1033            " please fix and re-run this command" % secondary_ip)
1034     if prereq:
1035       raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
1036     else:
1037       raise errors.OpExecError(msg)
1038
1039
1040 def _GetClusterDomainSecret():
1041   """Reads the cluster domain secret.
1042
1043   """
1044   return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
1045                                strict=True)
1046
1047
1048 def _CheckInstanceState(lu, instance, req_states, msg=None):
1049   """Ensure that an instance is in one of the required states.
1050
1051   @param lu: the LU on behalf of which we make the check
1052   @param instance: the instance to check
1053   @param msg: if passed, should be a message to replace the default one
1054   @raise errors.OpPrereqError: if the instance is not in the required state
1055
1056   """
1057   if msg is None:
1058     msg = "can't use instance from outside %s states" % ", ".join(req_states)
1059   if instance.admin_state not in req_states:
1060     raise errors.OpPrereqError("Instance '%s' is marked to be %s, %s" %
1061                                (instance.name, instance.admin_state, msg),
1062                                errors.ECODE_STATE)
1063
1064   if constants.ADMINST_UP not in req_states:
1065     pnode = instance.primary_node
1066     ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
1067     ins_l.Raise("Can't contact node %s for instance information" % pnode,
1068                 prereq=True, ecode=errors.ECODE_ENVIRON)
1069
1070     if instance.name in ins_l.payload:
1071       raise errors.OpPrereqError("Instance %s is running, %s" %
1072                                  (instance.name, msg), errors.ECODE_STATE)
1073
1074
1075 def _ComputeMinMaxSpec(name, ipolicy, value):
1076   """Computes if value is in the desired range.
1077
1078   @param name: name of the parameter for which we perform the check
1079   @param ipolicy: dictionary containing min, max and std values
1080   @param value: actual value that we want to use
1081   @return: None or element not meeting the criteria
1082
1083
1084   """
1085   if value in [None, constants.VALUE_AUTO]:
1086     return None
1087   max_v = ipolicy[constants.ISPECS_MAX].get(name, value)
1088   min_v = ipolicy[constants.ISPECS_MIN].get(name, value)
1089   if value > max_v or min_v > value:
1090     return ("%s value %s is not in range [%s, %s]" %
1091             (name, value, min_v, max_v))
1092   return None
1093
1094
1095 def _ComputeIPolicySpecViolation(ipolicy, mem_size, cpu_count, disk_count,
1096                                  nic_count, disk_sizes,
1097                                  _compute_fn=_ComputeMinMaxSpec):
1098   """Verifies ipolicy against provided specs.
1099
1100   @type ipolicy: dict
1101   @param ipolicy: The ipolicy
1102   @type mem_size: int
1103   @param mem_size: The memory size
1104   @type cpu_count: int
1105   @param cpu_count: Used cpu cores
1106   @type disk_count: int
1107   @param disk_count: Number of disks used
1108   @type nic_count: int
1109   @param nic_count: Number of nics used
1110   @type disk_sizes: list of ints
1111   @param disk_sizes: Disk sizes of used disk (len must match C{disk_count})
1112   @param _compute_fn: The compute function (unittest only)
1113   @return: A list of violations, or an empty list of no violations are found
1114
1115   """
1116   assert disk_count == len(disk_sizes)
1117
1118   test_settings = [
1119     (constants.ISPEC_MEM_SIZE, mem_size),
1120     (constants.ISPEC_CPU_COUNT, cpu_count),
1121     (constants.ISPEC_DISK_COUNT, disk_count),
1122     (constants.ISPEC_NIC_COUNT, nic_count),
1123     ] + map((lambda d: (constants.ISPEC_DISK_SIZE, d)), disk_sizes)
1124
1125   return filter(None,
1126                 (_compute_fn(name, ipolicy, value)
1127                  for (name, value) in test_settings))
1128
1129
1130 def _ComputeIPolicyInstanceViolation(ipolicy, instance,
1131                                      _compute_fn=_ComputeIPolicySpecViolation):
1132   """Compute if instance meets the specs of ipolicy.
1133
1134   @type ipolicy: dict
1135   @param ipolicy: The ipolicy to verify against
1136   @type instance: L{objects.Instance}
1137   @param instance: The instance to verify
1138   @param _compute_fn: The function to verify ipolicy (unittest only)
1139   @see: L{_ComputeIPolicySpecViolation}
1140
1141   """
1142   mem_size = instance.beparams.get(constants.BE_MAXMEM, None)
1143   cpu_count = instance.beparams.get(constants.BE_VCPUS, None)
1144   disk_count = len(instance.disks)
1145   disk_sizes = [disk.size for disk in instance.disks]
1146   nic_count = len(instance.nics)
1147
1148   return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1149                      disk_sizes)
1150
1151
1152 def _ComputeIPolicyInstanceSpecViolation(ipolicy, instance_spec,
1153     _compute_fn=_ComputeIPolicySpecViolation):
1154   """Compute if instance specs meets the specs of ipolicy.
1155
1156   @type ipolicy: dict
1157   @param ipolicy: The ipolicy to verify against
1158   @param instance_spec: dict
1159   @param instance_spec: The instance spec to verify
1160   @param _compute_fn: The function to verify ipolicy (unittest only)
1161   @see: L{_ComputeIPolicySpecViolation}
1162
1163   """
1164   mem_size = instance_spec.get(constants.ISPEC_MEM_SIZE, None)
1165   cpu_count = instance_spec.get(constants.ISPEC_CPU_COUNT, None)
1166   disk_count = instance_spec.get(constants.ISPEC_DISK_COUNT, 0)
1167   disk_sizes = instance_spec.get(constants.ISPEC_DISK_SIZE, [])
1168   nic_count = instance_spec.get(constants.ISPEC_NIC_COUNT, 0)
1169
1170   return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1171                      disk_sizes)
1172
1173
1174 def _ComputeIPolicyNodeViolation(ipolicy, instance, current_group,
1175                                  target_group,
1176                                  _compute_fn=_ComputeIPolicyInstanceViolation):
1177   """Compute if instance meets the specs of the new target group.
1178
1179   @param ipolicy: The ipolicy to verify
1180   @param instance: The instance object to verify
1181   @param current_group: The current group of the instance
1182   @param target_group: The new group of the instance
1183   @param _compute_fn: The function to verify ipolicy (unittest only)
1184   @see: L{_ComputeIPolicySpecViolation}
1185
1186   """
1187   if current_group == target_group:
1188     return []
1189   else:
1190     return _compute_fn(ipolicy, instance)
1191
1192
1193 def _CheckTargetNodeIPolicy(lu, ipolicy, instance, node, ignore=False,
1194                             _compute_fn=_ComputeIPolicyNodeViolation):
1195   """Checks that the target node is correct in terms of instance policy.
1196
1197   @param ipolicy: The ipolicy to verify
1198   @param instance: The instance object to verify
1199   @param node: The new node to relocate
1200   @param ignore: Ignore violations of the ipolicy
1201   @param _compute_fn: The function to verify ipolicy (unittest only)
1202   @see: L{_ComputeIPolicySpecViolation}
1203
1204   """
1205   primary_node = lu.cfg.GetNodeInfo(instance.primary_node)
1206   res = _compute_fn(ipolicy, instance, primary_node.group, node.group)
1207
1208   if res:
1209     msg = ("Instance does not meet target node group's (%s) instance"
1210            " policy: %s") % (node.group, utils.CommaJoin(res))
1211     if ignore:
1212       lu.LogWarning(msg)
1213     else:
1214       raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
1215
1216
1217 def _ComputeNewInstanceViolations(old_ipolicy, new_ipolicy, instances):
1218   """Computes a set of any instances that would violate the new ipolicy.
1219
1220   @param old_ipolicy: The current (still in-place) ipolicy
1221   @param new_ipolicy: The new (to become) ipolicy
1222   @param instances: List of instances to verify
1223   @return: A list of instances which violates the new ipolicy but did not before
1224
1225   """
1226   return (_ComputeViolatingInstances(old_ipolicy, instances) -
1227           _ComputeViolatingInstances(new_ipolicy, instances))
1228
1229
1230 def _ExpandItemName(fn, name, kind):
1231   """Expand an item name.
1232
1233   @param fn: the function to use for expansion
1234   @param name: requested item name
1235   @param kind: text description ('Node' or 'Instance')
1236   @return: the resolved (full) name
1237   @raise errors.OpPrereqError: if the item is not found
1238
1239   """
1240   full_name = fn(name)
1241   if full_name is None:
1242     raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
1243                                errors.ECODE_NOENT)
1244   return full_name
1245
1246
1247 def _ExpandNodeName(cfg, name):
1248   """Wrapper over L{_ExpandItemName} for nodes."""
1249   return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
1250
1251
1252 def _ExpandInstanceName(cfg, name):
1253   """Wrapper over L{_ExpandItemName} for instance."""
1254   return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
1255
1256
1257 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
1258                           minmem, maxmem, vcpus, nics, disk_template, disks,
1259                           bep, hvp, hypervisor_name, tags):
1260   """Builds instance related env variables for hooks
1261
1262   This builds the hook environment from individual variables.
1263
1264   @type name: string
1265   @param name: the name of the instance
1266   @type primary_node: string
1267   @param primary_node: the name of the instance's primary node
1268   @type secondary_nodes: list
1269   @param secondary_nodes: list of secondary nodes as strings
1270   @type os_type: string
1271   @param os_type: the name of the instance's OS
1272   @type status: string
1273   @param status: the desired status of the instance
1274   @type minmem: string
1275   @param minmem: the minimum memory size of the instance
1276   @type maxmem: string
1277   @param maxmem: the maximum memory size of the instance
1278   @type vcpus: string
1279   @param vcpus: the count of VCPUs the instance has
1280   @type nics: list
1281   @param nics: list of tuples (ip, mac, mode, link) representing
1282       the NICs the instance has
1283   @type disk_template: string
1284   @param disk_template: the disk template of the instance
1285   @type disks: list
1286   @param disks: the list of (size, mode) pairs
1287   @type bep: dict
1288   @param bep: the backend parameters for the instance
1289   @type hvp: dict
1290   @param hvp: the hypervisor parameters for the instance
1291   @type hypervisor_name: string
1292   @param hypervisor_name: the hypervisor for the instance
1293   @type tags: list
1294   @param tags: list of instance tags as strings
1295   @rtype: dict
1296   @return: the hook environment for this instance
1297
1298   """
1299   env = {
1300     "OP_TARGET": name,
1301     "INSTANCE_NAME": name,
1302     "INSTANCE_PRIMARY": primary_node,
1303     "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
1304     "INSTANCE_OS_TYPE": os_type,
1305     "INSTANCE_STATUS": status,
1306     "INSTANCE_MINMEM": minmem,
1307     "INSTANCE_MAXMEM": maxmem,
1308     # TODO(2.7) remove deprecated "memory" value
1309     "INSTANCE_MEMORY": maxmem,
1310     "INSTANCE_VCPUS": vcpus,
1311     "INSTANCE_DISK_TEMPLATE": disk_template,
1312     "INSTANCE_HYPERVISOR": hypervisor_name,
1313   }
1314   if nics:
1315     nic_count = len(nics)
1316     for idx, (ip, mac, mode, link) in enumerate(nics):
1317       if ip is None:
1318         ip = ""
1319       env["INSTANCE_NIC%d_IP" % idx] = ip
1320       env["INSTANCE_NIC%d_MAC" % idx] = mac
1321       env["INSTANCE_NIC%d_MODE" % idx] = mode
1322       env["INSTANCE_NIC%d_LINK" % idx] = link
1323       if mode == constants.NIC_MODE_BRIDGED:
1324         env["INSTANCE_NIC%d_BRIDGE" % idx] = link
1325   else:
1326     nic_count = 0
1327
1328   env["INSTANCE_NIC_COUNT"] = nic_count
1329
1330   if disks:
1331     disk_count = len(disks)
1332     for idx, (size, mode) in enumerate(disks):
1333       env["INSTANCE_DISK%d_SIZE" % idx] = size
1334       env["INSTANCE_DISK%d_MODE" % idx] = mode
1335   else:
1336     disk_count = 0
1337
1338   env["INSTANCE_DISK_COUNT"] = disk_count
1339
1340   if not tags:
1341     tags = []
1342
1343   env["INSTANCE_TAGS"] = " ".join(tags)
1344
1345   for source, kind in [(bep, "BE"), (hvp, "HV")]:
1346     for key, value in source.items():
1347       env["INSTANCE_%s_%s" % (kind, key)] = value
1348
1349   return env
1350
1351
1352 def _NICListToTuple(lu, nics):
1353   """Build a list of nic information tuples.
1354
1355   This list is suitable to be passed to _BuildInstanceHookEnv or as a return
1356   value in LUInstanceQueryData.
1357
1358   @type lu:  L{LogicalUnit}
1359   @param lu: the logical unit on whose behalf we execute
1360   @type nics: list of L{objects.NIC}
1361   @param nics: list of nics to convert to hooks tuples
1362
1363   """
1364   hooks_nics = []
1365   cluster = lu.cfg.GetClusterInfo()
1366   for nic in nics:
1367     ip = nic.ip
1368     mac = nic.mac
1369     filled_params = cluster.SimpleFillNIC(nic.nicparams)
1370     mode = filled_params[constants.NIC_MODE]
1371     link = filled_params[constants.NIC_LINK]
1372     hooks_nics.append((ip, mac, mode, link))
1373   return hooks_nics
1374
1375
1376 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
1377   """Builds instance related env variables for hooks from an object.
1378
1379   @type lu: L{LogicalUnit}
1380   @param lu: the logical unit on whose behalf we execute
1381   @type instance: L{objects.Instance}
1382   @param instance: the instance for which we should build the
1383       environment
1384   @type override: dict
1385   @param override: dictionary with key/values that will override
1386       our values
1387   @rtype: dict
1388   @return: the hook environment dictionary
1389
1390   """
1391   cluster = lu.cfg.GetClusterInfo()
1392   bep = cluster.FillBE(instance)
1393   hvp = cluster.FillHV(instance)
1394   args = {
1395     "name": instance.name,
1396     "primary_node": instance.primary_node,
1397     "secondary_nodes": instance.secondary_nodes,
1398     "os_type": instance.os,
1399     "status": instance.admin_state,
1400     "maxmem": bep[constants.BE_MAXMEM],
1401     "minmem": bep[constants.BE_MINMEM],
1402     "vcpus": bep[constants.BE_VCPUS],
1403     "nics": _NICListToTuple(lu, instance.nics),
1404     "disk_template": instance.disk_template,
1405     "disks": [(disk.size, disk.mode) for disk in instance.disks],
1406     "bep": bep,
1407     "hvp": hvp,
1408     "hypervisor_name": instance.hypervisor,
1409     "tags": instance.tags,
1410   }
1411   if override:
1412     args.update(override)
1413   return _BuildInstanceHookEnv(**args) # pylint: disable=W0142
1414
1415
1416 def _AdjustCandidatePool(lu, exceptions):
1417   """Adjust the candidate pool after node operations.
1418
1419   """
1420   mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1421   if mod_list:
1422     lu.LogInfo("Promoted nodes to master candidate role: %s",
1423                utils.CommaJoin(node.name for node in mod_list))
1424     for name in mod_list:
1425       lu.context.ReaddNode(name)
1426   mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1427   if mc_now > mc_max:
1428     lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1429                (mc_now, mc_max))
1430
1431
1432 def _DecideSelfPromotion(lu, exceptions=None):
1433   """Decide whether I should promote myself as a master candidate.
1434
1435   """
1436   cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1437   mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1438   # the new node will increase mc_max with one, so:
1439   mc_should = min(mc_should + 1, cp_size)
1440   return mc_now < mc_should
1441
1442
1443 def _CalculateGroupIPolicy(cluster, group):
1444   """Calculate instance policy for group.
1445
1446   """
1447   return cluster.SimpleFillIPolicy(group.ipolicy)
1448
1449
1450 def _ComputeViolatingInstances(ipolicy, instances):
1451   """Computes a set of instances who violates given ipolicy.
1452
1453   @param ipolicy: The ipolicy to verify
1454   @type instances: object.Instance
1455   @param instances: List of instances to verify
1456   @return: A frozenset of instance names violating the ipolicy
1457
1458   """
1459   return frozenset([inst.name for inst in instances
1460                     if _ComputeIPolicyInstanceViolation(ipolicy, inst)])
1461
1462
1463 def _CheckNicsBridgesExist(lu, target_nics, target_node):
1464   """Check that the brigdes needed by a list of nics exist.
1465
1466   """
1467   cluster = lu.cfg.GetClusterInfo()
1468   paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1469   brlist = [params[constants.NIC_LINK] for params in paramslist
1470             if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1471   if brlist:
1472     result = lu.rpc.call_bridges_exist(target_node, brlist)
1473     result.Raise("Error checking bridges on destination node '%s'" %
1474                  target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1475
1476
1477 def _CheckInstanceBridgesExist(lu, instance, node=None):
1478   """Check that the brigdes needed by an instance exist.
1479
1480   """
1481   if node is None:
1482     node = instance.primary_node
1483   _CheckNicsBridgesExist(lu, instance.nics, node)
1484
1485
1486 def _CheckOSVariant(os_obj, name):
1487   """Check whether an OS name conforms to the os variants specification.
1488
1489   @type os_obj: L{objects.OS}
1490   @param os_obj: OS object to check
1491   @type name: string
1492   @param name: OS name passed by the user, to check for validity
1493
1494   """
1495   variant = objects.OS.GetVariant(name)
1496   if not os_obj.supported_variants:
1497     if variant:
1498       raise errors.OpPrereqError("OS '%s' doesn't support variants ('%s'"
1499                                  " passed)" % (os_obj.name, variant),
1500                                  errors.ECODE_INVAL)
1501     return
1502   if not variant:
1503     raise errors.OpPrereqError("OS name must include a variant",
1504                                errors.ECODE_INVAL)
1505
1506   if variant not in os_obj.supported_variants:
1507     raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1508
1509
1510 def _GetNodeInstancesInner(cfg, fn):
1511   return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1512
1513
1514 def _GetNodeInstances(cfg, node_name):
1515   """Returns a list of all primary and secondary instances on a node.
1516
1517   """
1518
1519   return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1520
1521
1522 def _GetNodePrimaryInstances(cfg, node_name):
1523   """Returns primary instances on a node.
1524
1525   """
1526   return _GetNodeInstancesInner(cfg,
1527                                 lambda inst: node_name == inst.primary_node)
1528
1529
1530 def _GetNodeSecondaryInstances(cfg, node_name):
1531   """Returns secondary instances on a node.
1532
1533   """
1534   return _GetNodeInstancesInner(cfg,
1535                                 lambda inst: node_name in inst.secondary_nodes)
1536
1537
1538 def _GetStorageTypeArgs(cfg, storage_type):
1539   """Returns the arguments for a storage type.
1540
1541   """
1542   # Special case for file storage
1543   if storage_type == constants.ST_FILE:
1544     # storage.FileStorage wants a list of storage directories
1545     return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1546
1547   return []
1548
1549
1550 def _FindFaultyInstanceDisks(cfg, rpc_runner, instance, node_name, prereq):
1551   faulty = []
1552
1553   for dev in instance.disks:
1554     cfg.SetDiskID(dev, node_name)
1555
1556   result = rpc_runner.call_blockdev_getmirrorstatus(node_name, instance.disks)
1557   result.Raise("Failed to get disk status from node %s" % node_name,
1558                prereq=prereq, ecode=errors.ECODE_ENVIRON)
1559
1560   for idx, bdev_status in enumerate(result.payload):
1561     if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1562       faulty.append(idx)
1563
1564   return faulty
1565
1566
1567 def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1568   """Check the sanity of iallocator and node arguments and use the
1569   cluster-wide iallocator if appropriate.
1570
1571   Check that at most one of (iallocator, node) is specified. If none is
1572   specified, then the LU's opcode's iallocator slot is filled with the
1573   cluster-wide default iallocator.
1574
1575   @type iallocator_slot: string
1576   @param iallocator_slot: the name of the opcode iallocator slot
1577   @type node_slot: string
1578   @param node_slot: the name of the opcode target node slot
1579
1580   """
1581   node = getattr(lu.op, node_slot, None)
1582   iallocator = getattr(lu.op, iallocator_slot, None)
1583
1584   if node is not None and iallocator is not None:
1585     raise errors.OpPrereqError("Do not specify both, iallocator and node",
1586                                errors.ECODE_INVAL)
1587   elif node is None and iallocator is None:
1588     default_iallocator = lu.cfg.GetDefaultIAllocator()
1589     if default_iallocator:
1590       setattr(lu.op, iallocator_slot, default_iallocator)
1591     else:
1592       raise errors.OpPrereqError("No iallocator or node given and no"
1593                                  " cluster-wide default iallocator found;"
1594                                  " please specify either an iallocator or a"
1595                                  " node, or set a cluster-wide default"
1596                                  " iallocator")
1597
1598
1599 def _GetDefaultIAllocator(cfg, iallocator):
1600   """Decides on which iallocator to use.
1601
1602   @type cfg: L{config.ConfigWriter}
1603   @param cfg: Cluster configuration object
1604   @type iallocator: string or None
1605   @param iallocator: Iallocator specified in opcode
1606   @rtype: string
1607   @return: Iallocator name
1608
1609   """
1610   if not iallocator:
1611     # Use default iallocator
1612     iallocator = cfg.GetDefaultIAllocator()
1613
1614   if not iallocator:
1615     raise errors.OpPrereqError("No iallocator was specified, neither in the"
1616                                " opcode nor as a cluster-wide default",
1617                                errors.ECODE_INVAL)
1618
1619   return iallocator
1620
1621
1622 class LUClusterPostInit(LogicalUnit):
1623   """Logical unit for running hooks after cluster initialization.
1624
1625   """
1626   HPATH = "cluster-init"
1627   HTYPE = constants.HTYPE_CLUSTER
1628
1629   def BuildHooksEnv(self):
1630     """Build hooks env.
1631
1632     """
1633     return {
1634       "OP_TARGET": self.cfg.GetClusterName(),
1635       }
1636
1637   def BuildHooksNodes(self):
1638     """Build hooks nodes.
1639
1640     """
1641     return ([], [self.cfg.GetMasterNode()])
1642
1643   def Exec(self, feedback_fn):
1644     """Nothing to do.
1645
1646     """
1647     return True
1648
1649
1650 class LUClusterDestroy(LogicalUnit):
1651   """Logical unit for destroying the cluster.
1652
1653   """
1654   HPATH = "cluster-destroy"
1655   HTYPE = constants.HTYPE_CLUSTER
1656
1657   def BuildHooksEnv(self):
1658     """Build hooks env.
1659
1660     """
1661     return {
1662       "OP_TARGET": self.cfg.GetClusterName(),
1663       }
1664
1665   def BuildHooksNodes(self):
1666     """Build hooks nodes.
1667
1668     """
1669     return ([], [])
1670
1671   def CheckPrereq(self):
1672     """Check prerequisites.
1673
1674     This checks whether the cluster is empty.
1675
1676     Any errors are signaled by raising errors.OpPrereqError.
1677
1678     """
1679     master = self.cfg.GetMasterNode()
1680
1681     nodelist = self.cfg.GetNodeList()
1682     if len(nodelist) != 1 or nodelist[0] != master:
1683       raise errors.OpPrereqError("There are still %d node(s) in"
1684                                  " this cluster." % (len(nodelist) - 1),
1685                                  errors.ECODE_INVAL)
1686     instancelist = self.cfg.GetInstanceList()
1687     if instancelist:
1688       raise errors.OpPrereqError("There are still %d instance(s) in"
1689                                  " this cluster." % len(instancelist),
1690                                  errors.ECODE_INVAL)
1691
1692   def Exec(self, feedback_fn):
1693     """Destroys the cluster.
1694
1695     """
1696     master_params = self.cfg.GetMasterNetworkParameters()
1697
1698     # Run post hooks on master node before it's removed
1699     _RunPostHook(self, master_params.name)
1700
1701     ems = self.cfg.GetUseExternalMipScript()
1702     result = self.rpc.call_node_deactivate_master_ip(master_params.name,
1703                                                      master_params, ems)
1704     if result.fail_msg:
1705       self.LogWarning("Error disabling the master IP address: %s",
1706                       result.fail_msg)
1707
1708     return master_params.name
1709
1710
1711 def _VerifyCertificate(filename):
1712   """Verifies a certificate for L{LUClusterVerifyConfig}.
1713
1714   @type filename: string
1715   @param filename: Path to PEM file
1716
1717   """
1718   try:
1719     cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1720                                            utils.ReadFile(filename))
1721   except Exception, err: # pylint: disable=W0703
1722     return (LUClusterVerifyConfig.ETYPE_ERROR,
1723             "Failed to load X509 certificate %s: %s" % (filename, err))
1724
1725   (errcode, msg) = \
1726     utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1727                                 constants.SSL_CERT_EXPIRATION_ERROR)
1728
1729   if msg:
1730     fnamemsg = "While verifying %s: %s" % (filename, msg)
1731   else:
1732     fnamemsg = None
1733
1734   if errcode is None:
1735     return (None, fnamemsg)
1736   elif errcode == utils.CERT_WARNING:
1737     return (LUClusterVerifyConfig.ETYPE_WARNING, fnamemsg)
1738   elif errcode == utils.CERT_ERROR:
1739     return (LUClusterVerifyConfig.ETYPE_ERROR, fnamemsg)
1740
1741   raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1742
1743
1744 def _GetAllHypervisorParameters(cluster, instances):
1745   """Compute the set of all hypervisor parameters.
1746
1747   @type cluster: L{objects.Cluster}
1748   @param cluster: the cluster object
1749   @param instances: list of L{objects.Instance}
1750   @param instances: additional instances from which to obtain parameters
1751   @rtype: list of (origin, hypervisor, parameters)
1752   @return: a list with all parameters found, indicating the hypervisor they
1753        apply to, and the origin (can be "cluster", "os X", or "instance Y")
1754
1755   """
1756   hvp_data = []
1757
1758   for hv_name in cluster.enabled_hypervisors:
1759     hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
1760
1761   for os_name, os_hvp in cluster.os_hvp.items():
1762     for hv_name, hv_params in os_hvp.items():
1763       if hv_params:
1764         full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
1765         hvp_data.append(("os %s" % os_name, hv_name, full_params))
1766
1767   # TODO: collapse identical parameter values in a single one
1768   for instance in instances:
1769     if instance.hvparams:
1770       hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
1771                        cluster.FillHV(instance)))
1772
1773   return hvp_data
1774
1775
1776 class _VerifyErrors(object):
1777   """Mix-in for cluster/group verify LUs.
1778
1779   It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects
1780   self.op and self._feedback_fn to be available.)
1781
1782   """
1783
1784   ETYPE_FIELD = "code"
1785   ETYPE_ERROR = "ERROR"
1786   ETYPE_WARNING = "WARNING"
1787
1788   def _Error(self, ecode, item, msg, *args, **kwargs):
1789     """Format an error message.
1790
1791     Based on the opcode's error_codes parameter, either format a
1792     parseable error code, or a simpler error string.
1793
1794     This must be called only from Exec and functions called from Exec.
1795
1796     """
1797     ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1798     itype, etxt, _ = ecode
1799     # first complete the msg
1800     if args:
1801       msg = msg % args
1802     # then format the whole message
1803     if self.op.error_codes: # This is a mix-in. pylint: disable=E1101
1804       msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1805     else:
1806       if item:
1807         item = " " + item
1808       else:
1809         item = ""
1810       msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1811     # and finally report it via the feedback_fn
1812     self._feedback_fn("  - %s" % msg) # Mix-in. pylint: disable=E1101
1813
1814   def _ErrorIf(self, cond, ecode, *args, **kwargs):
1815     """Log an error message if the passed condition is True.
1816
1817     """
1818     cond = (bool(cond)
1819             or self.op.debug_simulate_errors) # pylint: disable=E1101
1820
1821     # If the error code is in the list of ignored errors, demote the error to a
1822     # warning
1823     (_, etxt, _) = ecode
1824     if etxt in self.op.ignore_errors:     # pylint: disable=E1101
1825       kwargs[self.ETYPE_FIELD] = self.ETYPE_WARNING
1826
1827     if cond:
1828       self._Error(ecode, *args, **kwargs)
1829
1830     # do not mark the operation as failed for WARN cases only
1831     if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1832       self.bad = self.bad or cond
1833
1834
1835 class LUClusterVerify(NoHooksLU):
1836   """Submits all jobs necessary to verify the cluster.
1837
1838   """
1839   REQ_BGL = False
1840
1841   def ExpandNames(self):
1842     self.needed_locks = {}
1843
1844   def Exec(self, feedback_fn):
1845     jobs = []
1846
1847     if self.op.group_name:
1848       groups = [self.op.group_name]
1849       depends_fn = lambda: None
1850     else:
1851       groups = self.cfg.GetNodeGroupList()
1852
1853       # Verify global configuration
1854       jobs.append([
1855         opcodes.OpClusterVerifyConfig(ignore_errors=self.op.ignore_errors)
1856         ])
1857
1858       # Always depend on global verification
1859       depends_fn = lambda: [(-len(jobs), [])]
1860
1861     jobs.extend([opcodes.OpClusterVerifyGroup(group_name=group,
1862                                             ignore_errors=self.op.ignore_errors,
1863                                             depends=depends_fn())]
1864                 for group in groups)
1865
1866     # Fix up all parameters
1867     for op in itertools.chain(*jobs): # pylint: disable=W0142
1868       op.debug_simulate_errors = self.op.debug_simulate_errors
1869       op.verbose = self.op.verbose
1870       op.error_codes = self.op.error_codes
1871       try:
1872         op.skip_checks = self.op.skip_checks
1873       except AttributeError:
1874         assert not isinstance(op, opcodes.OpClusterVerifyGroup)
1875
1876     return ResultWithJobs(jobs)
1877
1878
1879 class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
1880   """Verifies the cluster config.
1881
1882   """
1883   REQ_BGL = True
1884
1885   def _VerifyHVP(self, hvp_data):
1886     """Verifies locally the syntax of the hypervisor parameters.
1887
1888     """
1889     for item, hv_name, hv_params in hvp_data:
1890       msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
1891              (item, hv_name))
1892       try:
1893         hv_class = hypervisor.GetHypervisor(hv_name)
1894         utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
1895         hv_class.CheckParameterSyntax(hv_params)
1896       except errors.GenericError, err:
1897         self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg % str(err))
1898
1899   def ExpandNames(self):
1900     # Information can be safely retrieved as the BGL is acquired in exclusive
1901     # mode
1902     assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER)
1903     self.all_group_info = self.cfg.GetAllNodeGroupsInfo()
1904     self.all_node_info = self.cfg.GetAllNodesInfo()
1905     self.all_inst_info = self.cfg.GetAllInstancesInfo()
1906     self.needed_locks = {}
1907
1908   def Exec(self, feedback_fn):
1909     """Verify integrity of cluster, performing various test on nodes.
1910
1911     """
1912     self.bad = False
1913     self._feedback_fn = feedback_fn
1914
1915     feedback_fn("* Verifying cluster config")
1916
1917     for msg in self.cfg.VerifyConfig():
1918       self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg)
1919
1920     feedback_fn("* Verifying cluster certificate files")
1921
1922     for cert_filename in constants.ALL_CERT_FILES:
1923       (errcode, msg) = _VerifyCertificate(cert_filename)
1924       self._ErrorIf(errcode, constants.CV_ECLUSTERCERT, None, msg, code=errcode)
1925
1926     feedback_fn("* Verifying hypervisor parameters")
1927
1928     self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(),
1929                                                 self.all_inst_info.values()))
1930
1931     feedback_fn("* Verifying all nodes belong to an existing group")
1932
1933     # We do this verification here because, should this bogus circumstance
1934     # occur, it would never be caught by VerifyGroup, which only acts on
1935     # nodes/instances reachable from existing node groups.
1936
1937     dangling_nodes = set(node.name for node in self.all_node_info.values()
1938                          if node.group not in self.all_group_info)
1939
1940     dangling_instances = {}
1941     no_node_instances = []
1942
1943     for inst in self.all_inst_info.values():
1944       if inst.primary_node in dangling_nodes:
1945         dangling_instances.setdefault(inst.primary_node, []).append(inst.name)
1946       elif inst.primary_node not in self.all_node_info:
1947         no_node_instances.append(inst.name)
1948
1949     pretty_dangling = [
1950         "%s (%s)" %
1951         (node.name,
1952          utils.CommaJoin(dangling_instances.get(node.name,
1953                                                 ["no instances"])))
1954         for node in dangling_nodes]
1955
1956     self._ErrorIf(bool(dangling_nodes), constants.CV_ECLUSTERDANGLINGNODES,
1957                   None,
1958                   "the following nodes (and their instances) belong to a non"
1959                   " existing group: %s", utils.CommaJoin(pretty_dangling))
1960
1961     self._ErrorIf(bool(no_node_instances), constants.CV_ECLUSTERDANGLINGINST,
1962                   None,
1963                   "the following instances have a non-existing primary-node:"
1964                   " %s", utils.CommaJoin(no_node_instances))
1965
1966     return not self.bad
1967
1968
1969 class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
1970   """Verifies the status of a node group.
1971
1972   """
1973   HPATH = "cluster-verify"
1974   HTYPE = constants.HTYPE_CLUSTER
1975   REQ_BGL = False
1976
1977   _HOOKS_INDENT_RE = re.compile("^", re.M)
1978
1979   class NodeImage(object):
1980     """A class representing the logical and physical status of a node.
1981
1982     @type name: string
1983     @ivar name: the node name to which this object refers
1984     @ivar volumes: a structure as returned from
1985         L{ganeti.backend.GetVolumeList} (runtime)
1986     @ivar instances: a list of running instances (runtime)
1987     @ivar pinst: list of configured primary instances (config)
1988     @ivar sinst: list of configured secondary instances (config)
1989     @ivar sbp: dictionary of {primary-node: list of instances} for all
1990         instances for which this node is secondary (config)
1991     @ivar mfree: free memory, as reported by hypervisor (runtime)
1992     @ivar dfree: free disk, as reported by the node (runtime)
1993     @ivar offline: the offline status (config)
1994     @type rpc_fail: boolean
1995     @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1996         not whether the individual keys were correct) (runtime)
1997     @type lvm_fail: boolean
1998     @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1999     @type hyp_fail: boolean
2000     @ivar hyp_fail: whether the RPC call didn't return the instance list
2001     @type ghost: boolean
2002     @ivar ghost: whether this is a known node or not (config)
2003     @type os_fail: boolean
2004     @ivar os_fail: whether the RPC call didn't return valid OS data
2005     @type oslist: list
2006     @ivar oslist: list of OSes as diagnosed by DiagnoseOS
2007     @type vm_capable: boolean
2008     @ivar vm_capable: whether the node can host instances
2009
2010     """
2011     def __init__(self, offline=False, name=None, vm_capable=True):
2012       self.name = name
2013       self.volumes = {}
2014       self.instances = []
2015       self.pinst = []
2016       self.sinst = []
2017       self.sbp = {}
2018       self.mfree = 0
2019       self.dfree = 0
2020       self.offline = offline
2021       self.vm_capable = vm_capable
2022       self.rpc_fail = False
2023       self.lvm_fail = False
2024       self.hyp_fail = False
2025       self.ghost = False
2026       self.os_fail = False
2027       self.oslist = {}
2028
2029   def ExpandNames(self):
2030     # This raises errors.OpPrereqError on its own:
2031     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
2032
2033     # Get instances in node group; this is unsafe and needs verification later
2034     inst_names = self.cfg.GetNodeGroupInstances(self.group_uuid)
2035
2036     self.needed_locks = {
2037       locking.LEVEL_INSTANCE: inst_names,
2038       locking.LEVEL_NODEGROUP: [self.group_uuid],
2039       locking.LEVEL_NODE: [],
2040       }
2041
2042     self.share_locks = _ShareAll()
2043
2044   def DeclareLocks(self, level):
2045     if level == locking.LEVEL_NODE:
2046       # Get members of node group; this is unsafe and needs verification later
2047       nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
2048
2049       all_inst_info = self.cfg.GetAllInstancesInfo()
2050
2051       # In Exec(), we warn about mirrored instances that have primary and
2052       # secondary living in separate node groups. To fully verify that
2053       # volumes for these instances are healthy, we will need to do an
2054       # extra call to their secondaries. We ensure here those nodes will
2055       # be locked.
2056       for inst in self.owned_locks(locking.LEVEL_INSTANCE):
2057         # Important: access only the instances whose lock is owned
2058         if all_inst_info[inst].disk_template in constants.DTS_INT_MIRROR:
2059           nodes.update(all_inst_info[inst].secondary_nodes)
2060
2061       self.needed_locks[locking.LEVEL_NODE] = nodes
2062
2063   def CheckPrereq(self):
2064     assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
2065     self.group_info = self.cfg.GetNodeGroup(self.group_uuid)
2066
2067     group_nodes = set(self.group_info.members)
2068     group_instances = self.cfg.GetNodeGroupInstances(self.group_uuid)
2069
2070     unlocked_nodes = \
2071         group_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
2072
2073     unlocked_instances = \
2074         group_instances.difference(self.owned_locks(locking.LEVEL_INSTANCE))
2075
2076     if unlocked_nodes:
2077       raise errors.OpPrereqError("Missing lock for nodes: %s" %
2078                                  utils.CommaJoin(unlocked_nodes))
2079
2080     if unlocked_instances:
2081       raise errors.OpPrereqError("Missing lock for instances: %s" %
2082                                  utils.CommaJoin(unlocked_instances))
2083
2084     self.all_node_info = self.cfg.GetAllNodesInfo()
2085     self.all_inst_info = self.cfg.GetAllInstancesInfo()
2086
2087     self.my_node_names = utils.NiceSort(group_nodes)
2088     self.my_inst_names = utils.NiceSort(group_instances)
2089
2090     self.my_node_info = dict((name, self.all_node_info[name])
2091                              for name in self.my_node_names)
2092
2093     self.my_inst_info = dict((name, self.all_inst_info[name])
2094                              for name in self.my_inst_names)
2095
2096     # We detect here the nodes that will need the extra RPC calls for verifying
2097     # split LV volumes; they should be locked.
2098     extra_lv_nodes = set()
2099
2100     for inst in self.my_inst_info.values():
2101       if inst.disk_template in constants.DTS_INT_MIRROR:
2102         group = self.my_node_info[inst.primary_node].group
2103         for nname in inst.secondary_nodes:
2104           if self.all_node_info[nname].group != group:
2105             extra_lv_nodes.add(nname)
2106
2107     unlocked_lv_nodes = \
2108         extra_lv_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
2109
2110     if unlocked_lv_nodes:
2111       raise errors.OpPrereqError("these nodes could be locked: %s" %
2112                                  utils.CommaJoin(unlocked_lv_nodes))
2113     self.extra_lv_nodes = list(extra_lv_nodes)
2114
2115   def _VerifyNode(self, ninfo, nresult):
2116     """Perform some basic validation on data returned from a node.
2117
2118       - check the result data structure is well formed and has all the
2119         mandatory fields
2120       - check ganeti version
2121
2122     @type ninfo: L{objects.Node}
2123     @param ninfo: the node to check
2124     @param nresult: the results from the node
2125     @rtype: boolean
2126     @return: whether overall this call was successful (and we can expect
2127          reasonable values in the respose)
2128
2129     """
2130     node = ninfo.name
2131     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2132
2133     # main result, nresult should be a non-empty dict
2134     test = not nresult or not isinstance(nresult, dict)
2135     _ErrorIf(test, constants.CV_ENODERPC, node,
2136                   "unable to verify node: no data returned")
2137     if test:
2138       return False
2139
2140     # compares ganeti version
2141     local_version = constants.PROTOCOL_VERSION
2142     remote_version = nresult.get("version", None)
2143     test = not (remote_version and
2144                 isinstance(remote_version, (list, tuple)) and
2145                 len(remote_version) == 2)
2146     _ErrorIf(test, constants.CV_ENODERPC, node,
2147              "connection to node returned invalid data")
2148     if test:
2149       return False
2150
2151     test = local_version != remote_version[0]
2152     _ErrorIf(test, constants.CV_ENODEVERSION, node,
2153              "incompatible protocol versions: master %s,"
2154              " node %s", local_version, remote_version[0])
2155     if test:
2156       return False
2157
2158     # node seems compatible, we can actually try to look into its results
2159
2160     # full package version
2161     self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
2162                   constants.CV_ENODEVERSION, node,
2163                   "software version mismatch: master %s, node %s",
2164                   constants.RELEASE_VERSION, remote_version[1],
2165                   code=self.ETYPE_WARNING)
2166
2167     hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
2168     if ninfo.vm_capable and isinstance(hyp_result, dict):
2169       for hv_name, hv_result in hyp_result.iteritems():
2170         test = hv_result is not None
2171         _ErrorIf(test, constants.CV_ENODEHV, node,
2172                  "hypervisor %s verify failure: '%s'", hv_name, hv_result)
2173
2174     hvp_result = nresult.get(constants.NV_HVPARAMS, None)
2175     if ninfo.vm_capable and isinstance(hvp_result, list):
2176       for item, hv_name, hv_result in hvp_result:
2177         _ErrorIf(True, constants.CV_ENODEHV, node,
2178                  "hypervisor %s parameter verify failure (source %s): %s",
2179                  hv_name, item, hv_result)
2180
2181     test = nresult.get(constants.NV_NODESETUP,
2182                        ["Missing NODESETUP results"])
2183     _ErrorIf(test, constants.CV_ENODESETUP, node, "node setup error: %s",
2184              "; ".join(test))
2185
2186     return True
2187
2188   def _VerifyNodeTime(self, ninfo, nresult,
2189                       nvinfo_starttime, nvinfo_endtime):
2190     """Check the node time.
2191
2192     @type ninfo: L{objects.Node}
2193     @param ninfo: the node to check
2194     @param nresult: the remote results for the node
2195     @param nvinfo_starttime: the start time of the RPC call
2196     @param nvinfo_endtime: the end time of the RPC call
2197
2198     """
2199     node = ninfo.name
2200     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2201
2202     ntime = nresult.get(constants.NV_TIME, None)
2203     try:
2204       ntime_merged = utils.MergeTime(ntime)
2205     except (ValueError, TypeError):
2206       _ErrorIf(True, constants.CV_ENODETIME, node, "Node returned invalid time")
2207       return
2208
2209     if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
2210       ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
2211     elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
2212       ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
2213     else:
2214       ntime_diff = None
2215
2216     _ErrorIf(ntime_diff is not None, constants.CV_ENODETIME, node,
2217              "Node time diverges by at least %s from master node time",
2218              ntime_diff)
2219
2220   def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
2221     """Check the node LVM results.
2222
2223     @type ninfo: L{objects.Node}
2224     @param ninfo: the node to check
2225     @param nresult: the remote results for the node
2226     @param vg_name: the configured VG name
2227
2228     """
2229     if vg_name is None:
2230       return
2231
2232     node = ninfo.name
2233     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2234
2235     # checks vg existence and size > 20G
2236     vglist = nresult.get(constants.NV_VGLIST, None)
2237     test = not vglist
2238     _ErrorIf(test, constants.CV_ENODELVM, node, "unable to check volume groups")
2239     if not test:
2240       vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
2241                                             constants.MIN_VG_SIZE)
2242       _ErrorIf(vgstatus, constants.CV_ENODELVM, node, vgstatus)
2243
2244     # check pv names
2245     pvlist = nresult.get(constants.NV_PVLIST, None)
2246     test = pvlist is None
2247     _ErrorIf(test, constants.CV_ENODELVM, node, "Can't get PV list from node")
2248     if not test:
2249       # check that ':' is not present in PV names, since it's a
2250       # special character for lvcreate (denotes the range of PEs to
2251       # use on the PV)
2252       for _, pvname, owner_vg in pvlist:
2253         test = ":" in pvname
2254         _ErrorIf(test, constants.CV_ENODELVM, node,
2255                  "Invalid character ':' in PV '%s' of VG '%s'",
2256                  pvname, owner_vg)
2257
2258   def _VerifyNodeBridges(self, ninfo, nresult, bridges):
2259     """Check the node bridges.
2260
2261     @type ninfo: L{objects.Node}
2262     @param ninfo: the node to check
2263     @param nresult: the remote results for the node
2264     @param bridges: the expected list of bridges
2265
2266     """
2267     if not bridges:
2268       return
2269
2270     node = ninfo.name
2271     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2272
2273     missing = nresult.get(constants.NV_BRIDGES, None)
2274     test = not isinstance(missing, list)
2275     _ErrorIf(test, constants.CV_ENODENET, node,
2276              "did not return valid bridge information")
2277     if not test:
2278       _ErrorIf(bool(missing), constants.CV_ENODENET, node,
2279                "missing bridges: %s" % utils.CommaJoin(sorted(missing)))
2280
2281   def _VerifyNodeUserScripts(self, ninfo, nresult):
2282     """Check the results of user scripts presence and executability on the node
2283
2284     @type ninfo: L{objects.Node}
2285     @param ninfo: the node to check
2286     @param nresult: the remote results for the node
2287
2288     """
2289     node = ninfo.name
2290
2291     test = not constants.NV_USERSCRIPTS in nresult
2292     self._ErrorIf(test, constants.CV_ENODEUSERSCRIPTS, node,
2293                   "did not return user scripts information")
2294
2295     broken_scripts = nresult.get(constants.NV_USERSCRIPTS, None)
2296     if not test:
2297       self._ErrorIf(broken_scripts, constants.CV_ENODEUSERSCRIPTS, node,
2298                     "user scripts not present or not executable: %s" %
2299                     utils.CommaJoin(sorted(broken_scripts)))
2300
2301   def _VerifyNodeNetwork(self, ninfo, nresult):
2302     """Check the node network connectivity results.
2303
2304     @type ninfo: L{objects.Node}
2305     @param ninfo: the node to check
2306     @param nresult: the remote results for the node
2307
2308     """
2309     node = ninfo.name
2310     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2311
2312     test = constants.NV_NODELIST not in nresult
2313     _ErrorIf(test, constants.CV_ENODESSH, node,
2314              "node hasn't returned node ssh connectivity data")
2315     if not test:
2316       if nresult[constants.NV_NODELIST]:
2317         for a_node, a_msg in nresult[constants.NV_NODELIST].items():
2318           _ErrorIf(True, constants.CV_ENODESSH, node,
2319                    "ssh communication with node '%s': %s", a_node, a_msg)
2320
2321     test = constants.NV_NODENETTEST not in nresult
2322     _ErrorIf(test, constants.CV_ENODENET, node,
2323              "node hasn't returned node tcp connectivity data")
2324     if not test:
2325       if nresult[constants.NV_NODENETTEST]:
2326         nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
2327         for anode in nlist:
2328           _ErrorIf(True, constants.CV_ENODENET, node,
2329                    "tcp communication with node '%s': %s",
2330                    anode, nresult[constants.NV_NODENETTEST][anode])
2331
2332     test = constants.NV_MASTERIP not in nresult
2333     _ErrorIf(test, constants.CV_ENODENET, node,
2334              "node hasn't returned node master IP reachability data")
2335     if not test:
2336       if not nresult[constants.NV_MASTERIP]:
2337         if node == self.master_node:
2338           msg = "the master node cannot reach the master IP (not configured?)"
2339         else:
2340           msg = "cannot reach the master IP"
2341         _ErrorIf(True, constants.CV_ENODENET, node, msg)
2342
2343   def _VerifyInstance(self, instance, instanceconfig, node_image,
2344                       diskstatus):
2345     """Verify an instance.
2346
2347     This function checks to see if the required block devices are
2348     available on the instance's node.
2349
2350     """
2351     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2352     node_current = instanceconfig.primary_node
2353
2354     node_vol_should = {}
2355     instanceconfig.MapLVsByNode(node_vol_should)
2356
2357     ipolicy = _CalculateGroupIPolicy(self.cfg.GetClusterInfo(), self.group_info)
2358     err = _ComputeIPolicyInstanceViolation(ipolicy, instanceconfig)
2359     _ErrorIf(err, constants.CV_EINSTANCEPOLICY, instance, err)
2360
2361     for node in node_vol_should:
2362       n_img = node_image[node]
2363       if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2364         # ignore missing volumes on offline or broken nodes
2365         continue
2366       for volume in node_vol_should[node]:
2367         test = volume not in n_img.volumes
2368         _ErrorIf(test, constants.CV_EINSTANCEMISSINGDISK, instance,
2369                  "volume %s missing on node %s", volume, node)
2370
2371     if instanceconfig.admin_state == constants.ADMINST_UP:
2372       pri_img = node_image[node_current]
2373       test = instance not in pri_img.instances and not pri_img.offline
2374       _ErrorIf(test, constants.CV_EINSTANCEDOWN, instance,
2375                "instance not running on its primary node %s",
2376                node_current)
2377
2378     diskdata = [(nname, success, status, idx)
2379                 for (nname, disks) in diskstatus.items()
2380                 for idx, (success, status) in enumerate(disks)]
2381
2382     for nname, success, bdev_status, idx in diskdata:
2383       # the 'ghost node' construction in Exec() ensures that we have a
2384       # node here
2385       snode = node_image[nname]
2386       bad_snode = snode.ghost or snode.offline
2387       _ErrorIf(instanceconfig.admin_state == constants.ADMINST_UP and
2388                not success and not bad_snode,
2389                constants.CV_EINSTANCEFAULTYDISK, instance,
2390                "couldn't retrieve status for disk/%s on %s: %s",
2391                idx, nname, bdev_status)
2392       _ErrorIf((instanceconfig.admin_state == constants.ADMINST_UP and
2393                 success and bdev_status.ldisk_status == constants.LDS_FAULTY),
2394                constants.CV_EINSTANCEFAULTYDISK, instance,
2395                "disk/%s on %s is faulty", idx, nname)
2396
2397   def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
2398     """Verify if there are any unknown volumes in the cluster.
2399
2400     The .os, .swap and backup volumes are ignored. All other volumes are
2401     reported as unknown.
2402
2403     @type reserved: L{ganeti.utils.FieldSet}
2404     @param reserved: a FieldSet of reserved volume names
2405
2406     """
2407     for node, n_img in node_image.items():
2408       if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2409         # skip non-healthy nodes
2410         continue
2411       for volume in n_img.volumes:
2412         test = ((node not in node_vol_should or
2413                 volume not in node_vol_should[node]) and
2414                 not reserved.Matches(volume))
2415         self._ErrorIf(test, constants.CV_ENODEORPHANLV, node,
2416                       "volume %s is unknown", volume)
2417
2418   def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
2419     """Verify N+1 Memory Resilience.
2420
2421     Check that if one single node dies we can still start all the
2422     instances it was primary for.
2423
2424     """
2425     cluster_info = self.cfg.GetClusterInfo()
2426     for node, n_img in node_image.items():
2427       # This code checks that every node which is now listed as
2428       # secondary has enough memory to host all instances it is
2429       # supposed to should a single other node in the cluster fail.
2430       # FIXME: not ready for failover to an arbitrary node
2431       # FIXME: does not support file-backed instances
2432       # WARNING: we currently take into account down instances as well
2433       # as up ones, considering that even if they're down someone
2434       # might want to start them even in the event of a node failure.
2435       if n_img.offline:
2436         # we're skipping offline nodes from the N+1 warning, since
2437         # most likely we don't have good memory infromation from them;
2438         # we already list instances living on such nodes, and that's
2439         # enough warning
2440         continue
2441       #TODO(dynmem): also consider ballooning out other instances
2442       for prinode, instances in n_img.sbp.items():
2443         needed_mem = 0
2444         for instance in instances:
2445           bep = cluster_info.FillBE(instance_cfg[instance])
2446           if bep[constants.BE_AUTO_BALANCE]:
2447             needed_mem += bep[constants.BE_MINMEM]
2448         test = n_img.mfree < needed_mem
2449         self._ErrorIf(test, constants.CV_ENODEN1, node,
2450                       "not enough memory to accomodate instance failovers"
2451                       " should node %s fail (%dMiB needed, %dMiB available)",
2452                       prinode, needed_mem, n_img.mfree)
2453
2454   @classmethod
2455   def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
2456                    (files_all, files_opt, files_mc, files_vm)):
2457     """Verifies file checksums collected from all nodes.
2458
2459     @param errorif: Callback for reporting errors
2460     @param nodeinfo: List of L{objects.Node} objects
2461     @param master_node: Name of master node
2462     @param all_nvinfo: RPC results
2463
2464     """
2465     # Define functions determining which nodes to consider for a file
2466     files2nodefn = [
2467       (files_all, None),
2468       (files_mc, lambda node: (node.master_candidate or
2469                                node.name == master_node)),
2470       (files_vm, lambda node: node.vm_capable),
2471       ]
2472
2473     # Build mapping from filename to list of nodes which should have the file
2474     nodefiles = {}
2475     for (files, fn) in files2nodefn:
2476       if fn is None:
2477         filenodes = nodeinfo
2478       else:
2479         filenodes = filter(fn, nodeinfo)
2480       nodefiles.update((filename,
2481                         frozenset(map(operator.attrgetter("name"), filenodes)))
2482                        for filename in files)
2483
2484     assert set(nodefiles) == (files_all | files_mc | files_vm)
2485
2486     fileinfo = dict((filename, {}) for filename in nodefiles)
2487     ignore_nodes = set()
2488
2489     for node in nodeinfo:
2490       if node.offline:
2491         ignore_nodes.add(node.name)
2492         continue
2493
2494       nresult = all_nvinfo[node.name]
2495
2496       if nresult.fail_msg or not nresult.payload:
2497         node_files = None
2498       else:
2499         node_files = nresult.payload.get(constants.NV_FILELIST, None)
2500
2501       test = not (node_files and isinstance(node_files, dict))
2502       errorif(test, constants.CV_ENODEFILECHECK, node.name,
2503               "Node did not return file checksum data")
2504       if test:
2505         ignore_nodes.add(node.name)
2506         continue
2507
2508       # Build per-checksum mapping from filename to nodes having it
2509       for (filename, checksum) in node_files.items():
2510         assert filename in nodefiles
2511         fileinfo[filename].setdefault(checksum, set()).add(node.name)
2512
2513     for (filename, checksums) in fileinfo.items():
2514       assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
2515
2516       # Nodes having the file
2517       with_file = frozenset(node_name
2518                             for nodes in fileinfo[filename].values()
2519                             for node_name in nodes) - ignore_nodes
2520
2521       expected_nodes = nodefiles[filename] - ignore_nodes
2522
2523       # Nodes missing file
2524       missing_file = expected_nodes - with_file
2525
2526       if filename in files_opt:
2527         # All or no nodes
2528         errorif(missing_file and missing_file != expected_nodes,
2529                 constants.CV_ECLUSTERFILECHECK, None,
2530                 "File %s is optional, but it must exist on all or no"
2531                 " nodes (not found on %s)",
2532                 filename, utils.CommaJoin(utils.NiceSort(missing_file)))
2533       else:
2534         errorif(missing_file, constants.CV_ECLUSTERFILECHECK, None,
2535                 "File %s is missing from node(s) %s", filename,
2536                 utils.CommaJoin(utils.NiceSort(missing_file)))
2537
2538         # Warn if a node has a file it shouldn't
2539         unexpected = with_file - expected_nodes
2540         errorif(unexpected,
2541                 constants.CV_ECLUSTERFILECHECK, None,
2542                 "File %s should not exist on node(s) %s",
2543                 filename, utils.CommaJoin(utils.NiceSort(unexpected)))
2544
2545       # See if there are multiple versions of the file
2546       test = len(checksums) > 1
2547       if test:
2548         variants = ["variant %s on %s" %
2549                     (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
2550                     for (idx, (checksum, nodes)) in
2551                       enumerate(sorted(checksums.items()))]
2552       else:
2553         variants = []
2554
2555       errorif(test, constants.CV_ECLUSTERFILECHECK, None,
2556               "File %s found with %s different checksums (%s)",
2557               filename, len(checksums), "; ".join(variants))
2558
2559   def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
2560                       drbd_map):
2561     """Verifies and the node DRBD status.
2562
2563     @type ninfo: L{objects.Node}
2564     @param ninfo: the node to check
2565     @param nresult: the remote results for the node
2566     @param instanceinfo: the dict of instances
2567     @param drbd_helper: the configured DRBD usermode helper
2568     @param drbd_map: the DRBD map as returned by
2569         L{ganeti.config.ConfigWriter.ComputeDRBDMap}
2570
2571     """
2572     node = ninfo.name
2573     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2574
2575     if drbd_helper:
2576       helper_result = nresult.get(constants.NV_DRBDHELPER, None)
2577       test = (helper_result == None)
2578       _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2579                "no drbd usermode helper returned")
2580       if helper_result:
2581         status, payload = helper_result
2582         test = not status
2583         _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2584                  "drbd usermode helper check unsuccessful: %s", payload)
2585         test = status and (payload != drbd_helper)
2586         _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2587                  "wrong drbd usermode helper: %s", payload)
2588
2589     # compute the DRBD minors
2590     node_drbd = {}
2591     for minor, instance in drbd_map[node].items():
2592       test = instance not in instanceinfo
2593       _ErrorIf(test, constants.CV_ECLUSTERCFG, None,
2594                "ghost instance '%s' in temporary DRBD map", instance)
2595         # ghost instance should not be running, but otherwise we
2596         # don't give double warnings (both ghost instance and
2597         # unallocated minor in use)
2598       if test:
2599         node_drbd[minor] = (instance, False)
2600       else:
2601         instance = instanceinfo[instance]
2602         node_drbd[minor] = (instance.name,
2603                             instance.admin_state == constants.ADMINST_UP)
2604
2605     # and now check them
2606     used_minors = nresult.get(constants.NV_DRBDLIST, [])
2607     test = not isinstance(used_minors, (tuple, list))
2608     _ErrorIf(test, constants.CV_ENODEDRBD, node,
2609              "cannot parse drbd status file: %s", str(used_minors))
2610     if test:
2611       # we cannot check drbd status
2612       return
2613
2614     for minor, (iname, must_exist) in node_drbd.items():
2615       test = minor not in used_minors and must_exist
2616       _ErrorIf(test, constants.CV_ENODEDRBD, node,
2617                "drbd minor %d of instance %s is not active", minor, iname)
2618     for minor in used_minors:
2619       test = minor not in node_drbd
2620       _ErrorIf(test, constants.CV_ENODEDRBD, node,
2621                "unallocated drbd minor %d is in use", minor)
2622
2623   def _UpdateNodeOS(self, ninfo, nresult, nimg):
2624     """Builds the node OS structures.
2625
2626     @type ninfo: L{objects.Node}
2627     @param ninfo: the node to check
2628     @param nresult: the remote results for the node
2629     @param nimg: the node image object
2630
2631     """
2632     node = ninfo.name
2633     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2634
2635     remote_os = nresult.get(constants.NV_OSLIST, None)
2636     test = (not isinstance(remote_os, list) or
2637             not compat.all(isinstance(v, list) and len(v) == 7
2638                            for v in remote_os))
2639
2640     _ErrorIf(test, constants.CV_ENODEOS, node,
2641              "node hasn't returned valid OS data")
2642
2643     nimg.os_fail = test
2644
2645     if test:
2646       return
2647
2648     os_dict = {}
2649
2650     for (name, os_path, status, diagnose,
2651          variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
2652
2653       if name not in os_dict:
2654         os_dict[name] = []
2655
2656       # parameters is a list of lists instead of list of tuples due to
2657       # JSON lacking a real tuple type, fix it:
2658       parameters = [tuple(v) for v in parameters]
2659       os_dict[name].append((os_path, status, diagnose,
2660                             set(variants), set(parameters), set(api_ver)))
2661
2662     nimg.oslist = os_dict
2663
2664   def _VerifyNodeOS(self, ninfo, nimg, base):
2665     """Verifies the node OS list.
2666
2667     @type ninfo: L{objects.Node}
2668     @param ninfo: the node to check
2669     @param nimg: the node image object
2670     @param base: the 'template' node we match against (e.g. from the master)
2671
2672     """
2673     node = ninfo.name
2674     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2675
2676     assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
2677
2678     beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
2679     for os_name, os_data in nimg.oslist.items():
2680       assert os_data, "Empty OS status for OS %s?!" % os_name
2681       f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
2682       _ErrorIf(not f_status, constants.CV_ENODEOS, node,
2683                "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
2684       _ErrorIf(len(os_data) > 1, constants.CV_ENODEOS, node,
2685                "OS '%s' has multiple entries (first one shadows the rest): %s",
2686                os_name, utils.CommaJoin([v[0] for v in os_data]))
2687       # comparisons with the 'base' image
2688       test = os_name not in base.oslist
2689       _ErrorIf(test, constants.CV_ENODEOS, node,
2690                "Extra OS %s not present on reference node (%s)",
2691                os_name, base.name)
2692       if test:
2693         continue
2694       assert base.oslist[os_name], "Base node has empty OS status?"
2695       _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
2696       if not b_status:
2697         # base OS is invalid, skipping
2698         continue
2699       for kind, a, b in [("API version", f_api, b_api),
2700                          ("variants list", f_var, b_var),
2701                          ("parameters", beautify_params(f_param),
2702                           beautify_params(b_param))]:
2703         _ErrorIf(a != b, constants.CV_ENODEOS, node,
2704                  "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
2705                  kind, os_name, base.name,
2706                  utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
2707
2708     # check any missing OSes
2709     missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
2710     _ErrorIf(missing, constants.CV_ENODEOS, node,
2711              "OSes present on reference node %s but missing on this node: %s",
2712              base.name, utils.CommaJoin(missing))
2713
2714   def _VerifyOob(self, ninfo, nresult):
2715     """Verifies out of band functionality of a node.
2716
2717     @type ninfo: L{objects.Node}
2718     @param ninfo: the node to check
2719     @param nresult: the remote results for the node
2720
2721     """
2722     node = ninfo.name
2723     # We just have to verify the paths on master and/or master candidates
2724     # as the oob helper is invoked on the master
2725     if ((ninfo.master_candidate or ninfo.master_capable) and
2726         constants.NV_OOB_PATHS in nresult):
2727       for path_result in nresult[constants.NV_OOB_PATHS]:
2728         self._ErrorIf(path_result, constants.CV_ENODEOOBPATH, node, path_result)
2729
2730   def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
2731     """Verifies and updates the node volume data.
2732
2733     This function will update a L{NodeImage}'s internal structures
2734     with data from the remote call.
2735
2736     @type ninfo: L{objects.Node}
2737     @param ninfo: the node to check
2738     @param nresult: the remote results for the node
2739     @param nimg: the node image object
2740     @param vg_name: the configured VG name
2741
2742     """
2743     node = ninfo.name
2744     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2745
2746     nimg.lvm_fail = True
2747     lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
2748     if vg_name is None:
2749       pass
2750     elif isinstance(lvdata, basestring):
2751       _ErrorIf(True, constants.CV_ENODELVM, node, "LVM problem on node: %s",
2752                utils.SafeEncode(lvdata))
2753     elif not isinstance(lvdata, dict):
2754       _ErrorIf(True, constants.CV_ENODELVM, node,
2755                "rpc call to node failed (lvlist)")
2756     else:
2757       nimg.volumes = lvdata
2758       nimg.lvm_fail = False
2759
2760   def _UpdateNodeInstances(self, ninfo, nresult, nimg):
2761     """Verifies and updates the node instance list.
2762
2763     If the listing was successful, then updates this node's instance
2764     list. Otherwise, it marks the RPC call as failed for the instance
2765     list key.
2766
2767     @type ninfo: L{objects.Node}
2768     @param ninfo: the node to check
2769     @param nresult: the remote results for the node
2770     @param nimg: the node image object
2771
2772     """
2773     idata = nresult.get(constants.NV_INSTANCELIST, None)
2774     test = not isinstance(idata, list)
2775     self._ErrorIf(test, constants.CV_ENODEHV, ninfo.name,
2776                   "rpc call to node failed (instancelist): %s",
2777                   utils.SafeEncode(str(idata)))
2778     if test:
2779       nimg.hyp_fail = True
2780     else:
2781       nimg.instances = idata
2782
2783   def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
2784     """Verifies and computes a node information map
2785
2786     @type ninfo: L{objects.Node}
2787     @param ninfo: the node to check
2788     @param nresult: the remote results for the node
2789     @param nimg: the node image object
2790     @param vg_name: the configured VG name
2791
2792     """
2793     node = ninfo.name
2794     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2795
2796     # try to read free memory (from the hypervisor)
2797     hv_info = nresult.get(constants.NV_HVINFO, None)
2798     test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
2799     _ErrorIf(test, constants.CV_ENODEHV, node,
2800              "rpc call to node failed (hvinfo)")
2801     if not test:
2802       try:
2803         nimg.mfree = int(hv_info["memory_free"])
2804       except (ValueError, TypeError):
2805         _ErrorIf(True, constants.CV_ENODERPC, node,
2806                  "node returned invalid nodeinfo, check hypervisor")
2807
2808     # FIXME: devise a free space model for file based instances as well
2809     if vg_name is not None:
2810       test = (constants.NV_VGLIST not in nresult or
2811               vg_name not in nresult[constants.NV_VGLIST])
2812       _ErrorIf(test, constants.CV_ENODELVM, node,
2813                "node didn't return data for the volume group '%s'"
2814                " - it is either missing or broken", vg_name)
2815       if not test:
2816         try:
2817           nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
2818         except (ValueError, TypeError):
2819           _ErrorIf(True, constants.CV_ENODERPC, node,
2820                    "node returned invalid LVM info, check LVM status")
2821
2822   def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
2823     """Gets per-disk status information for all instances.
2824
2825     @type nodelist: list of strings
2826     @param nodelist: Node names
2827     @type node_image: dict of (name, L{objects.Node})
2828     @param node_image: Node objects
2829     @type instanceinfo: dict of (name, L{objects.Instance})
2830     @param instanceinfo: Instance objects
2831     @rtype: {instance: {node: [(succes, payload)]}}
2832     @return: a dictionary of per-instance dictionaries with nodes as
2833         keys and disk information as values; the disk information is a
2834         list of tuples (success, payload)
2835
2836     """
2837     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2838
2839     node_disks = {}
2840     node_disks_devonly = {}
2841     diskless_instances = set()
2842     diskless = constants.DT_DISKLESS
2843
2844     for nname in nodelist:
2845       node_instances = list(itertools.chain(node_image[nname].pinst,
2846                                             node_image[nname].sinst))
2847       diskless_instances.update(inst for inst in node_instances
2848                                 if instanceinfo[inst].disk_template == diskless)
2849       disks = [(inst, disk)
2850                for inst in node_instances
2851                for disk in instanceinfo[inst].disks]
2852
2853       if not disks:
2854         # No need to collect data
2855         continue
2856
2857       node_disks[nname] = disks
2858
2859       # Creating copies as SetDiskID below will modify the objects and that can
2860       # lead to incorrect data returned from nodes
2861       devonly = [dev.Copy() for (_, dev) in disks]
2862
2863       for dev in devonly:
2864         self.cfg.SetDiskID(dev, nname)
2865
2866       node_disks_devonly[nname] = devonly
2867
2868     assert len(node_disks) == len(node_disks_devonly)
2869
2870     # Collect data from all nodes with disks
2871     result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
2872                                                           node_disks_devonly)
2873
2874     assert len(result) == len(node_disks)
2875
2876     instdisk = {}
2877
2878     for (nname, nres) in result.items():
2879       disks = node_disks[nname]
2880
2881       if nres.offline:
2882         # No data from this node
2883         data = len(disks) * [(False, "node offline")]
2884       else:
2885         msg = nres.fail_msg
2886         _ErrorIf(msg, constants.CV_ENODERPC, nname,
2887                  "while getting disk information: %s", msg)
2888         if msg:
2889           # No data from this node
2890           data = len(disks) * [(False, msg)]
2891         else:
2892           data = []
2893           for idx, i in enumerate(nres.payload):
2894             if isinstance(i, (tuple, list)) and len(i) == 2:
2895               data.append(i)
2896             else:
2897               logging.warning("Invalid result from node %s, entry %d: %s",
2898                               nname, idx, i)
2899               data.append((False, "Invalid result from the remote node"))
2900
2901       for ((inst, _), status) in zip(disks, data):
2902         instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
2903
2904     # Add empty entries for diskless instances.
2905     for inst in diskless_instances:
2906       assert inst not in instdisk
2907       instdisk[inst] = {}
2908
2909     assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
2910                       len(nnames) <= len(instanceinfo[inst].all_nodes) and
2911                       compat.all(isinstance(s, (tuple, list)) and
2912                                  len(s) == 2 for s in statuses)
2913                       for inst, nnames in instdisk.items()
2914                       for nname, statuses in nnames.items())
2915     assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
2916
2917     return instdisk
2918
2919   @staticmethod
2920   def _SshNodeSelector(group_uuid, all_nodes):
2921     """Create endless iterators for all potential SSH check hosts.
2922
2923     """
2924     nodes = [node for node in all_nodes
2925              if (node.group != group_uuid and
2926                  not node.offline)]
2927     keyfunc = operator.attrgetter("group")
2928
2929     return map(itertools.cycle,
2930                [sorted(map(operator.attrgetter("name"), names))
2931                 for _, names in itertools.groupby(sorted(nodes, key=keyfunc),
2932                                                   keyfunc)])
2933
2934   @classmethod
2935   def _SelectSshCheckNodes(cls, group_nodes, group_uuid, all_nodes):
2936     """Choose which nodes should talk to which other nodes.
2937
2938     We will make nodes contact all nodes in their group, and one node from
2939     every other group.
2940
2941     @warning: This algorithm has a known issue if one node group is much
2942       smaller than others (e.g. just one node). In such a case all other
2943       nodes will talk to the single node.
2944
2945     """
2946     online_nodes = sorted(node.name for node in group_nodes if not node.offline)
2947     sel = cls._SshNodeSelector(group_uuid, all_nodes)
2948
2949     return (online_nodes,
2950             dict((name, sorted([i.next() for i in sel]))
2951                  for name in online_nodes))
2952
2953   def BuildHooksEnv(self):
2954     """Build hooks env.
2955
2956     Cluster-Verify hooks just ran in the post phase and their failure makes
2957     the output be logged in the verify output and the verification to fail.
2958
2959     """
2960     env = {
2961       "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
2962       }
2963
2964     env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
2965                for node in self.my_node_info.values())
2966
2967     return env
2968
2969   def BuildHooksNodes(self):
2970     """Build hooks nodes.
2971
2972     """
2973     return ([], self.my_node_names)
2974
2975   def Exec(self, feedback_fn):
2976     """Verify integrity of the node group, performing various test on nodes.
2977
2978     """
2979     # This method has too many local variables. pylint: disable=R0914
2980     feedback_fn("* Verifying group '%s'" % self.group_info.name)
2981
2982     if not self.my_node_names:
2983       # empty node group
2984       feedback_fn("* Empty node group, skipping verification")
2985       return True
2986
2987     self.bad = False
2988     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2989     verbose = self.op.verbose
2990     self._feedback_fn = feedback_fn
2991
2992     vg_name = self.cfg.GetVGName()
2993     drbd_helper = self.cfg.GetDRBDHelper()
2994     cluster = self.cfg.GetClusterInfo()
2995     groupinfo = self.cfg.GetAllNodeGroupsInfo()
2996     hypervisors = cluster.enabled_hypervisors
2997     node_data_list = [self.my_node_info[name] for name in self.my_node_names]
2998
2999     i_non_redundant = [] # Non redundant instances
3000     i_non_a_balanced = [] # Non auto-balanced instances
3001     i_offline = 0 # Count of offline instances
3002     n_offline = 0 # Count of offline nodes
3003     n_drained = 0 # Count of nodes being drained
3004     node_vol_should = {}
3005
3006     # FIXME: verify OS list
3007
3008     # File verification
3009     filemap = _ComputeAncillaryFiles(cluster, False)
3010
3011     # do local checksums
3012     master_node = self.master_node = self.cfg.GetMasterNode()
3013     master_ip = self.cfg.GetMasterIP()
3014
3015     feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names))
3016
3017     user_scripts = []
3018     if self.cfg.GetUseExternalMipScript():
3019       user_scripts.append(constants.EXTERNAL_MASTER_SETUP_SCRIPT)
3020
3021     node_verify_param = {
3022       constants.NV_FILELIST:
3023         utils.UniqueSequence(filename
3024                              for files in filemap
3025                              for filename in files),
3026       constants.NV_NODELIST:
3027         self._SelectSshCheckNodes(node_data_list, self.group_uuid,
3028                                   self.all_node_info.values()),
3029       constants.NV_HYPERVISOR: hypervisors,
3030       constants.NV_HVPARAMS:
3031         _GetAllHypervisorParameters(cluster, self.all_inst_info.values()),
3032       constants.NV_NODENETTEST: [(node.name, node.primary_ip, node.secondary_ip)
3033                                  for node in node_data_list
3034                                  if not node.offline],
3035       constants.NV_INSTANCELIST: hypervisors,
3036       constants.NV_VERSION: None,
3037       constants.NV_HVINFO: self.cfg.GetHypervisorType(),
3038       constants.NV_NODESETUP: None,
3039       constants.NV_TIME: None,
3040       constants.NV_MASTERIP: (master_node, master_ip),
3041       constants.NV_OSLIST: None,
3042       constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
3043       constants.NV_USERSCRIPTS: user_scripts,
3044       }
3045
3046     if vg_name is not None:
3047       node_verify_param[constants.NV_VGLIST] = None
3048       node_verify_param[constants.NV_LVLIST] = vg_name
3049       node_verify_param[constants.NV_PVLIST] = [vg_name]
3050       node_verify_param[constants.NV_DRBDLIST] = None
3051
3052     if drbd_helper:
3053       node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
3054
3055     # bridge checks
3056     # FIXME: this needs to be changed per node-group, not cluster-wide
3057     bridges = set()
3058     default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
3059     if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3060       bridges.add(default_nicpp[constants.NIC_LINK])
3061     for instance in self.my_inst_info.values():
3062       for nic in instance.nics:
3063         full_nic = cluster.SimpleFillNIC(nic.nicparams)
3064         if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3065           bridges.add(full_nic[constants.NIC_LINK])
3066
3067     if bridges:
3068       node_verify_param[constants.NV_BRIDGES] = list(bridges)
3069
3070     # Build our expected cluster state
3071     node_image = dict((node.name, self.NodeImage(offline=node.offline,
3072                                                  name=node.name,
3073                                                  vm_capable=node.vm_capable))
3074                       for node in node_data_list)
3075
3076     # Gather OOB paths
3077     oob_paths = []
3078     for node in self.all_node_info.values():
3079       path = _SupportsOob(self.cfg, node)
3080       if path and path not in oob_paths:
3081         oob_paths.append(path)
3082
3083     if oob_paths:
3084       node_verify_param[constants.NV_OOB_PATHS] = oob_paths
3085
3086     for instance in self.my_inst_names:
3087       inst_config = self.my_inst_info[instance]
3088
3089       for nname in inst_config.all_nodes:
3090         if nname not in node_image:
3091           gnode = self.NodeImage(name=nname)
3092           gnode.ghost = (nname not in self.all_node_info)
3093           node_image[nname] = gnode
3094
3095       inst_config.MapLVsByNode(node_vol_should)
3096
3097       pnode = inst_config.primary_node
3098       node_image[pnode].pinst.append(instance)
3099
3100       for snode in inst_config.secondary_nodes:
3101         nimg = node_image[snode]
3102         nimg.sinst.append(instance)
3103         if pnode not in nimg.sbp:
3104           nimg.sbp[pnode] = []
3105         nimg.sbp[pnode].append(instance)
3106
3107     # At this point, we have the in-memory data structures complete,
3108     # except for the runtime information, which we'll gather next
3109
3110     # Due to the way our RPC system works, exact response times cannot be
3111     # guaranteed (e.g. a broken node could run into a timeout). By keeping the
3112     # time before and after executing the request, we can at least have a time
3113     # window.
3114     nvinfo_starttime = time.time()
3115     all_nvinfo = self.rpc.call_node_verify(self.my_node_names,
3116                                            node_verify_param,
3117                                            self.cfg.GetClusterName())
3118     nvinfo_endtime = time.time()
3119
3120     if self.extra_lv_nodes and vg_name is not None:
3121       extra_lv_nvinfo = \
3122           self.rpc.call_node_verify(self.extra_lv_nodes,
3123                                     {constants.NV_LVLIST: vg_name},
3124                                     self.cfg.GetClusterName())
3125     else:
3126       extra_lv_nvinfo = {}
3127
3128     all_drbd_map = self.cfg.ComputeDRBDMap()
3129
3130     feedback_fn("* Gathering disk information (%s nodes)" %
3131                 len(self.my_node_names))
3132     instdisk = self._CollectDiskInfo(self.my_node_names, node_image,
3133                                      self.my_inst_info)
3134
3135     feedback_fn("* Verifying configuration file consistency")
3136
3137     # If not all nodes are being checked, we need to make sure the master node
3138     # and a non-checked vm_capable node are in the list.
3139     absent_nodes = set(self.all_node_info).difference(self.my_node_info)
3140     if absent_nodes:
3141       vf_nvinfo = all_nvinfo.copy()
3142       vf_node_info = list(self.my_node_info.values())
3143       additional_nodes = []
3144       if master_node not in self.my_node_info:
3145         additional_nodes.append(master_node)
3146         vf_node_info.append(self.all_node_info[master_node])
3147       # Add the first vm_capable node we find which is not included
3148       for node in absent_nodes:
3149         nodeinfo = self.all_node_info[node]
3150         if nodeinfo.vm_capable and not nodeinfo.offline:
3151           additional_nodes.append(node)
3152           vf_node_info.append(self.all_node_info[node])
3153           break
3154       key = constants.NV_FILELIST
3155       vf_nvinfo.update(self.rpc.call_node_verify(additional_nodes,
3156                                                  {key: node_verify_param[key]},
3157                                                  self.cfg.GetClusterName()))
3158     else:
3159       vf_nvinfo = all_nvinfo
3160       vf_node_info = self.my_node_info.values()
3161
3162     self._VerifyFiles(_ErrorIf, vf_node_info, master_node, vf_nvinfo, filemap)
3163
3164     feedback_fn("* Verifying node status")
3165
3166     refos_img = None
3167
3168     for node_i in node_data_list:
3169       node = node_i.name
3170       nimg = node_image[node]
3171
3172       if node_i.offline:
3173         if verbose:
3174           feedback_fn("* Skipping offline node %s" % (node,))
3175         n_offline += 1
3176         continue
3177
3178       if node == master_node:
3179         ntype = "master"
3180       elif node_i.master_candidate:
3181         ntype = "master candidate"
3182       elif node_i.drained:
3183         ntype = "drained"
3184         n_drained += 1
3185       else:
3186         ntype = "regular"
3187       if verbose:
3188         feedback_fn("* Verifying node %s (%s)" % (node, ntype))
3189
3190       msg = all_nvinfo[node].fail_msg
3191       _ErrorIf(msg, constants.CV_ENODERPC, node, "while contacting node: %s",
3192                msg)
3193       if msg:
3194         nimg.rpc_fail = True
3195         continue
3196
3197       nresult = all_nvinfo[node].payload
3198
3199       nimg.call_ok = self._VerifyNode(node_i, nresult)
3200       self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
3201       self._VerifyNodeNetwork(node_i, nresult)
3202       self._VerifyNodeUserScripts(node_i, nresult)
3203       self._VerifyOob(node_i, nresult)
3204
3205       if nimg.vm_capable:
3206         self._VerifyNodeLVM(node_i, nresult, vg_name)
3207         self._VerifyNodeDrbd(node_i, nresult, self.all_inst_info, drbd_helper,
3208                              all_drbd_map)
3209
3210         self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
3211         self._UpdateNodeInstances(node_i, nresult, nimg)
3212         self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
3213         self._UpdateNodeOS(node_i, nresult, nimg)
3214
3215         if not nimg.os_fail:
3216           if refos_img is None:
3217             refos_img = nimg
3218           self._VerifyNodeOS(node_i, nimg, refos_img)
3219         self._VerifyNodeBridges(node_i, nresult, bridges)
3220
3221         # Check whether all running instancies are primary for the node. (This
3222         # can no longer be done from _VerifyInstance below, since some of the
3223         # wrong instances could be from other node groups.)
3224         non_primary_inst = set(nimg.instances).difference(nimg.pinst)
3225
3226         for inst in non_primary_inst:
3227           # FIXME: investigate best way to handle offline insts
3228           if inst.admin_state == constants.ADMINST_OFFLINE:
3229             if verbose:
3230               feedback_fn("* Skipping offline instance %s" % inst.name)
3231             i_offline += 1
3232             continue
3233           test = inst in self.all_inst_info
3234           _ErrorIf(test, constants.CV_EINSTANCEWRONGNODE, inst,
3235                    "instance should not run on node %s", node_i.name)
3236           _ErrorIf(not test, constants.CV_ENODEORPHANINSTANCE, node_i.name,
3237                    "node is running unknown instance %s", inst)
3238
3239     for node, result in extra_lv_nvinfo.items():
3240       self._UpdateNodeVolumes(self.all_node_info[node], result.payload,
3241                               node_image[node], vg_name)
3242
3243     feedback_fn("* Verifying instance status")
3244     for instance in self.my_inst_names:
3245       if verbose:
3246         feedback_fn("* Verifying instance %s" % instance)
3247       inst_config = self.my_inst_info[instance]
3248       self._VerifyInstance(instance, inst_config, node_image,
3249                            instdisk[instance])
3250       inst_nodes_offline = []
3251
3252       pnode = inst_config.primary_node
3253       pnode_img = node_image[pnode]
3254       _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
3255                constants.CV_ENODERPC, pnode, "instance %s, connection to"
3256                " primary node failed", instance)
3257
3258       _ErrorIf(inst_config.admin_state == constants.ADMINST_UP and
3259                pnode_img.offline,
3260                constants.CV_EINSTANCEBADNODE, instance,
3261                "instance is marked as running and lives on offline node %s",
3262                inst_config.primary_node)
3263
3264       # If the instance is non-redundant we cannot survive losing its primary
3265       # node, so we are not N+1 compliant. On the other hand we have no disk
3266       # templates with more than one secondary so that situation is not well
3267       # supported either.
3268       # FIXME: does not support file-backed instances
3269       if not inst_config.secondary_nodes:
3270         i_non_redundant.append(instance)
3271
3272       _ErrorIf(len(inst_config.secondary_nodes) > 1,
3273                constants.CV_EINSTANCELAYOUT,
3274                instance, "instance has multiple secondary nodes: %s",
3275                utils.CommaJoin(inst_config.secondary_nodes),
3276                code=self.ETYPE_WARNING)
3277
3278       if inst_config.disk_template in constants.DTS_INT_MIRROR:
3279         pnode = inst_config.primary_node
3280         instance_nodes = utils.NiceSort(inst_config.all_nodes)
3281         instance_groups = {}
3282
3283         for node in instance_nodes:
3284           instance_groups.setdefault(self.all_node_info[node].group,
3285                                      []).append(node)
3286
3287         pretty_list = [
3288           "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
3289           # Sort so that we always list the primary node first.
3290           for group, nodes in sorted(instance_groups.items(),
3291                                      key=lambda (_, nodes): pnode in nodes,
3292                                      reverse=True)]
3293
3294         self._ErrorIf(len(instance_groups) > 1,
3295                       constants.CV_EINSTANCESPLITGROUPS,
3296                       instance, "instance has primary and secondary nodes in"
3297                       " different groups: %s", utils.CommaJoin(pretty_list),
3298                       code=self.ETYPE_WARNING)
3299
3300       if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
3301         i_non_a_balanced.append(instance)
3302
3303       for snode in inst_config.secondary_nodes:
3304         s_img = node_image[snode]
3305         _ErrorIf(s_img.rpc_fail and not s_img.offline, constants.CV_ENODERPC,
3306                  snode, "instance %s, connection to secondary node failed",
3307                  instance)
3308
3309         if s_img.offline:
3310           inst_nodes_offline.append(snode)
3311
3312       # warn that the instance lives on offline nodes
3313       _ErrorIf(inst_nodes_offline, constants.CV_EINSTANCEBADNODE, instance,
3314                "instance has offline secondary node(s) %s",
3315                utils.CommaJoin(inst_nodes_offline))
3316       # ... or ghost/non-vm_capable nodes
3317       for node in inst_config.all_nodes:
3318         _ErrorIf(node_image[node].ghost, constants.CV_EINSTANCEBADNODE,
3319                  instance, "instance lives on ghost node %s", node)
3320         _ErrorIf(not node_image[node].vm_capable, constants.CV_EINSTANCEBADNODE,
3321                  instance, "instance lives on non-vm_capable node %s", node)
3322
3323     feedback_fn("* Verifying orphan volumes")
3324     reserved = utils.FieldSet(*cluster.reserved_lvs)
3325
3326     # We will get spurious "unknown volume" warnings if any node of this group
3327     # is secondary for an instance whose primary is in another group. To avoid
3328     # them, we find these instances and add their volumes to node_vol_should.
3329     for inst in self.all_inst_info.values():
3330       for secondary in inst.secondary_nodes:
3331         if (secondary in self.my_node_info
3332             and inst.name not in self.my_inst_info):
3333           inst.MapLVsByNode(node_vol_should)
3334           break
3335
3336     self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
3337
3338     if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
3339       feedback_fn("* Verifying N+1 Memory redundancy")
3340       self._VerifyNPlusOneMemory(node_image, self.my_inst_info)
3341
3342     feedback_fn("* Other Notes")
3343     if i_non_redundant:
3344       feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
3345                   % len(i_non_redundant))
3346
3347     if i_non_a_balanced:
3348       feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
3349                   % len(i_non_a_balanced))
3350
3351     if i_offline:
3352       feedback_fn("  - NOTICE: %d offline instance(s) found." % i_offline)
3353
3354     if n_offline:
3355       feedback_fn("  - NOTICE: %d offline node(s) found." % n_offline)
3356
3357     if n_drained:
3358       feedback_fn("  - NOTICE: %d drained node(s) found." % n_drained)
3359
3360     return not self.bad
3361
3362   def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
3363     """Analyze the post-hooks' result
3364
3365     This method analyses the hook result, handles it, and sends some
3366     nicely-formatted feedback back to the user.
3367
3368     @param phase: one of L{constants.HOOKS_PHASE_POST} or
3369         L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
3370     @param hooks_results: the results of the multi-node hooks rpc call
3371     @param feedback_fn: function used send feedback back to the caller
3372     @param lu_result: previous Exec result
3373     @return: the new Exec result, based on the previous result
3374         and hook results
3375
3376     """
3377     # We only really run POST phase hooks, only for non-empty groups,
3378     # and are only interested in their results
3379     if not self.my_node_names:
3380       # empty node group
3381       pass
3382     elif phase == constants.HOOKS_PHASE_POST:
3383       # Used to change hooks' output to proper indentation
3384       feedback_fn("* Hooks Results")
3385       assert hooks_results, "invalid result from hooks"
3386
3387       for node_name in hooks_results:
3388         res = hooks_results[node_name]
3389         msg = res.fail_msg
3390         test = msg and not res.offline
3391         self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3392                       "Communication failure in hooks execution: %s", msg)
3393         if res.offline or msg:
3394           # No need to investigate payload if node is offline or gave
3395           # an error.
3396           continue
3397         for script, hkr, output in res.payload:
3398           test = hkr == constants.HKR_FAIL
3399           self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3400                         "Script %s failed, output:", script)
3401           if test:
3402             output = self._HOOKS_INDENT_RE.sub("      ", output)
3403             feedback_fn("%s" % output)
3404             lu_result = False
3405
3406     return lu_result
3407
3408
3409 class LUClusterVerifyDisks(NoHooksLU):
3410   """Verifies the cluster disks status.
3411
3412   """
3413   REQ_BGL = False
3414
3415   def ExpandNames(self):
3416     self.share_locks = _ShareAll()
3417     self.needed_locks = {
3418       locking.LEVEL_NODEGROUP: locking.ALL_SET,
3419       }
3420
3421   def Exec(self, feedback_fn):
3422     group_names = self.owned_locks(locking.LEVEL_NODEGROUP)
3423
3424     # Submit one instance of L{opcodes.OpGroupVerifyDisks} per node group
3425     return ResultWithJobs([[opcodes.OpGroupVerifyDisks(group_name=group)]
3426                            for group in group_names])
3427
3428
3429 class LUGroupVerifyDisks(NoHooksLU):
3430   """Verifies the status of all disks in a node group.
3431
3432   """
3433   REQ_BGL = False
3434
3435   def ExpandNames(self):
3436     # Raises errors.OpPrereqError on its own if group can't be found
3437     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
3438
3439     self.share_locks = _ShareAll()
3440     self.needed_locks = {
3441       locking.LEVEL_INSTANCE: [],
3442       locking.LEVEL_NODEGROUP: [],
3443       locking.LEVEL_NODE: [],
3444       }
3445
3446   def DeclareLocks(self, level):
3447     if level == locking.LEVEL_INSTANCE:
3448       assert not self.needed_locks[locking.LEVEL_INSTANCE]
3449
3450       # Lock instances optimistically, needs verification once node and group
3451       # locks have been acquired
3452       self.needed_locks[locking.LEVEL_INSTANCE] = \
3453         self.cfg.GetNodeGroupInstances(self.group_uuid)
3454
3455     elif level == locking.LEVEL_NODEGROUP:
3456       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
3457
3458       self.needed_locks[locking.LEVEL_NODEGROUP] = \
3459         set([self.group_uuid] +
3460             # Lock all groups used by instances optimistically; this requires
3461             # going via the node before it's locked, requiring verification
3462             # later on
3463             [group_uuid
3464              for instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
3465              for group_uuid in self.cfg.GetInstanceNodeGroups(instance_name)])
3466
3467     elif level == locking.LEVEL_NODE:
3468       # This will only lock the nodes in the group to be verified which contain
3469       # actual instances
3470       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
3471       self._LockInstancesNodes()
3472
3473       # Lock all nodes in group to be verified
3474       assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
3475       member_nodes = self.cfg.GetNodeGroup(self.group_uuid).members
3476       self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
3477
3478   def CheckPrereq(self):
3479     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
3480     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
3481     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
3482
3483     assert self.group_uuid in owned_groups
3484
3485     # Check if locked instances are still correct
3486     _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
3487
3488     # Get instance information
3489     self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
3490
3491     # Check if node groups for locked instances are still correct
3492     for (instance_name, inst) in self.instances.items():
3493       assert owned_nodes.issuperset(inst.all_nodes), \
3494         "Instance %s's nodes changed while we kept the lock" % instance_name
3495
3496       inst_groups = _CheckInstanceNodeGroups(self.cfg, instance_name,
3497                                              owned_groups)
3498
3499       assert self.group_uuid in inst_groups, \
3500         "Instance %s has no node in group %s" % (instance_name, self.group_uuid)
3501
3502   def Exec(self, feedback_fn):
3503     """Verify integrity of cluster disks.
3504
3505     @rtype: tuple of three items
3506     @return: a tuple of (dict of node-to-node_error, list of instances
3507         which need activate-disks, dict of instance: (node, volume) for
3508         missing volumes
3509
3510     """
3511     res_nodes = {}
3512     res_instances = set()
3513     res_missing = {}
3514
3515     nv_dict = _MapInstanceDisksToNodes([inst
3516             for inst in self.instances.values()
3517             if inst.admin_state == constants.ADMINST_UP])
3518
3519     if nv_dict:
3520       nodes = utils.NiceSort(set(self.owned_locks(locking.LEVEL_NODE)) &
3521                              set(self.cfg.GetVmCapableNodeList()))
3522
3523       node_lvs = self.rpc.call_lv_list(nodes, [])
3524
3525       for (node, node_res) in node_lvs.items():
3526         if node_res.offline:
3527           continue
3528
3529         msg = node_res.fail_msg
3530         if msg:
3531           logging.warning("Error enumerating LVs on node %s: %s", node, msg)
3532           res_nodes[node] = msg
3533           continue
3534
3535         for lv_name, (_, _, lv_online) in node_res.payload.items():
3536           inst = nv_dict.pop((node, lv_name), None)
3537           if not (lv_online or inst is None):
3538             res_instances.add(inst)
3539
3540       # any leftover items in nv_dict are missing LVs, let's arrange the data
3541       # better
3542       for key, inst in nv_dict.iteritems():
3543         res_missing.setdefault(inst, []).append(list(key))
3544
3545     return (res_nodes, list(res_instances), res_missing)
3546
3547
3548 class LUClusterRepairDiskSizes(NoHooksLU):
3549   """Verifies the cluster disks sizes.
3550
3551   """
3552   REQ_BGL = False
3553
3554   def ExpandNames(self):
3555     if self.op.instances:
3556       self.wanted_names = _GetWantedInstances(self, self.op.instances)
3557       self.needed_locks = {
3558         locking.LEVEL_NODE_RES: [],
3559         locking.LEVEL_INSTANCE: self.wanted_names,
3560         }
3561       self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
3562     else:
3563       self.wanted_names = None
3564       self.needed_locks = {
3565         locking.LEVEL_NODE_RES: locking.ALL_SET,
3566         locking.LEVEL_INSTANCE: locking.ALL_SET,
3567         }
3568     self.share_locks = {
3569       locking.LEVEL_NODE_RES: 1,
3570       locking.LEVEL_INSTANCE: 0,
3571       }
3572
3573   def DeclareLocks(self, level):
3574     if level == locking.LEVEL_NODE_RES and self.wanted_names is not None:
3575       self._LockInstancesNodes(primary_only=True, level=level)
3576
3577   def CheckPrereq(self):
3578     """Check prerequisites.
3579
3580     This only checks the optional instance list against the existing names.
3581
3582     """
3583     if self.wanted_names is None:
3584       self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
3585
3586     self.wanted_instances = \
3587         map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
3588
3589   def _EnsureChildSizes(self, disk):
3590     """Ensure children of the disk have the needed disk size.
3591
3592     This is valid mainly for DRBD8 and fixes an issue where the
3593     children have smaller disk size.
3594
3595     @param disk: an L{ganeti.objects.Disk} object
3596
3597     """
3598     if disk.dev_type == constants.LD_DRBD8:
3599       assert disk.children, "Empty children for DRBD8?"
3600       fchild = disk.children[0]
3601       mismatch = fchild.size < disk.size
3602       if mismatch:
3603         self.LogInfo("Child disk has size %d, parent %d, fixing",
3604                      fchild.size, disk.size)
3605         fchild.size = disk.size
3606
3607       # and we recurse on this child only, not on the metadev
3608       return self._EnsureChildSizes(fchild) or mismatch
3609     else:
3610       return False
3611
3612   def Exec(self, feedback_fn):
3613     """Verify the size of cluster disks.
3614
3615     """
3616     # TODO: check child disks too
3617     # TODO: check differences in size between primary/secondary nodes
3618     per_node_disks = {}
3619     for instance in self.wanted_instances:
3620       pnode = instance.primary_node
3621       if pnode not in per_node_disks:
3622         per_node_disks[pnode] = []
3623       for idx, disk in enumerate(instance.disks):
3624         per_node_disks[pnode].append((instance, idx, disk))
3625
3626     assert not (frozenset(per_node_disks.keys()) -
3627                 self.owned_locks(locking.LEVEL_NODE_RES)), \
3628       "Not owning correct locks"
3629     assert not self.owned_locks(locking.LEVEL_NODE)
3630
3631     changed = []
3632     for node, dskl in per_node_disks.items():
3633       newl = [v[2].Copy() for v in dskl]
3634       for dsk in newl:
3635         self.cfg.SetDiskID(dsk, node)
3636       result = self.rpc.call_blockdev_getsize(node, newl)
3637       if result.fail_msg:
3638         self.LogWarning("Failure in blockdev_getsize call to node"
3639                         " %s, ignoring", node)
3640         continue
3641       if len(result.payload) != len(dskl):
3642         logging.warning("Invalid result from node %s: len(dksl)=%d,"
3643                         " result.payload=%s", node, len(dskl), result.payload)
3644         self.LogWarning("Invalid result from node %s, ignoring node results",
3645                         node)
3646         continue
3647       for ((instance, idx, disk), size) in zip(dskl, result.payload):
3648         if size is None:
3649           self.LogWarning("Disk %d of instance %s did not return size"
3650                           " information, ignoring", idx, instance.name)
3651           continue
3652         if not isinstance(size, (int, long)):
3653           self.LogWarning("Disk %d of instance %s did not return valid"
3654                           " size information, ignoring", idx, instance.name)
3655           continue
3656         size = size >> 20
3657         if size != disk.size:
3658           self.LogInfo("Disk %d of instance %s has mismatched size,"
3659                        " correcting: recorded %d, actual %d", idx,
3660                        instance.name, disk.size, size)
3661           disk.size = size
3662           self.cfg.Update(instance, feedback_fn)
3663           changed.append((instance.name, idx, size))
3664         if self._EnsureChildSizes(disk):
3665           self.cfg.Update(instance, feedback_fn)
3666           changed.append((instance.name, idx, disk.size))
3667     return changed
3668
3669
3670 class LUClusterRename(LogicalUnit):
3671   """Rename the cluster.
3672
3673   """
3674   HPATH = "cluster-rename"
3675   HTYPE = constants.HTYPE_CLUSTER
3676
3677   def BuildHooksEnv(self):
3678     """Build hooks env.
3679
3680     """
3681     return {
3682       "OP_TARGET": self.cfg.GetClusterName(),
3683       "NEW_NAME": self.op.name,
3684       }
3685
3686   def BuildHooksNodes(self):
3687     """Build hooks nodes.
3688
3689     """
3690     return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
3691
3692   def CheckPrereq(self):
3693     """Verify that the passed name is a valid one.
3694
3695     """
3696     hostname = netutils.GetHostname(name=self.op.name,
3697                                     family=self.cfg.GetPrimaryIPFamily())
3698
3699     new_name = hostname.name
3700     self.ip = new_ip = hostname.ip
3701     old_name = self.cfg.GetClusterName()
3702     old_ip = self.cfg.GetMasterIP()
3703     if new_name == old_name and new_ip == old_ip:
3704       raise errors.OpPrereqError("Neither the name nor the IP address of the"
3705                                  " cluster has changed",
3706                                  errors.ECODE_INVAL)
3707     if new_ip != old_ip:
3708       if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
3709         raise errors.OpPrereqError("The given cluster IP address (%s) is"
3710                                    " reachable on the network" %
3711                                    new_ip, errors.ECODE_NOTUNIQUE)
3712
3713     self.op.name = new_name
3714
3715   def Exec(self, feedback_fn):
3716     """Rename the cluster.
3717
3718     """
3719     clustername = self.op.name
3720     new_ip = self.ip
3721
3722     # shutdown the master IP
3723     master_params = self.cfg.GetMasterNetworkParameters()
3724     ems = self.cfg.GetUseExternalMipScript()
3725     result = self.rpc.call_node_deactivate_master_ip(master_params.name,
3726                                                      master_params, ems)
3727     result.Raise("Could not disable the master role")
3728
3729     try:
3730       cluster = self.cfg.GetClusterInfo()
3731       cluster.cluster_name = clustername
3732       cluster.master_ip = new_ip
3733       self.cfg.Update(cluster, feedback_fn)
3734
3735       # update the known hosts file
3736       ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
3737       node_list = self.cfg.GetOnlineNodeList()
3738       try:
3739         node_list.remove(master_params.name)
3740       except ValueError:
3741         pass
3742       _UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE)
3743     finally:
3744       master_params.ip = new_ip
3745       result = self.rpc.call_node_activate_master_ip(master_params.name,
3746                                                      master_params, ems)
3747       msg = result.fail_msg
3748       if msg:
3749         self.LogWarning("Could not re-enable the master role on"
3750                         " the master, please restart manually: %s", msg)
3751
3752     return clustername
3753
3754
3755 def _ValidateNetmask(cfg, netmask):
3756   """Checks if a netmask is valid.
3757
3758   @type cfg: L{config.ConfigWriter}
3759   @param cfg: The cluster configuration
3760   @type netmask: int
3761   @param netmask: the netmask to be verified
3762   @raise errors.OpPrereqError: if the validation fails
3763
3764   """
3765   ip_family = cfg.GetPrimaryIPFamily()
3766   try:
3767     ipcls = netutils.IPAddress.GetClassFromIpFamily(ip_family)
3768   except errors.ProgrammerError:
3769     raise errors.OpPrereqError("Invalid primary ip family: %s." %
3770                                ip_family)
3771   if not ipcls.ValidateNetmask(netmask):
3772     raise errors.OpPrereqError("CIDR netmask (%s) not valid" %
3773                                 (netmask))
3774
3775
3776 class LUClusterSetParams(LogicalUnit):
3777   """Change the parameters of the cluster.
3778
3779   """
3780   HPATH = "cluster-modify"
3781   HTYPE = constants.HTYPE_CLUSTER
3782   REQ_BGL = False
3783
3784   def CheckArguments(self):
3785     """Check parameters
3786
3787     """
3788     if self.op.uid_pool:
3789       uidpool.CheckUidPool(self.op.uid_pool)
3790
3791     if self.op.add_uids:
3792       uidpool.CheckUidPool(self.op.add_uids)
3793
3794     if self.op.remove_uids:
3795       uidpool.CheckUidPool(self.op.remove_uids)
3796
3797     if self.op.master_netmask is not None:
3798       _ValidateNetmask(self.cfg, self.op.master_netmask)
3799
3800     if self.op.diskparams:
3801       for dt_params in self.op.diskparams.values():
3802         utils.ForceDictType(dt_params, constants.DISK_DT_TYPES)
3803
3804   def ExpandNames(self):
3805     # FIXME: in the future maybe other cluster params won't require checking on
3806     # all nodes to be modified.
3807     self.needed_locks = {
3808       locking.LEVEL_NODE: locking.ALL_SET,
3809       locking.LEVEL_INSTANCE: locking.ALL_SET,
3810       locking.LEVEL_NODEGROUP: locking.ALL_SET,
3811     }
3812     self.share_locks = {
3813         locking.LEVEL_NODE: 1,
3814         locking.LEVEL_INSTANCE: 1,
3815         locking.LEVEL_NODEGROUP: 1,
3816     }
3817
3818   def BuildHooksEnv(self):
3819     """Build hooks env.
3820
3821     """
3822     return {
3823       "OP_TARGET": self.cfg.GetClusterName(),
3824       "NEW_VG_NAME": self.op.vg_name,
3825       }
3826
3827   def BuildHooksNodes(self):
3828     """Build hooks nodes.
3829
3830     """
3831     mn = self.cfg.GetMasterNode()
3832     return ([mn], [mn])
3833
3834   def CheckPrereq(self):
3835     """Check prerequisites.
3836
3837     This checks whether the given params don't conflict and
3838     if the given volume group is valid.
3839
3840     """
3841     if self.op.vg_name is not None and not self.op.vg_name:
3842       if self.cfg.HasAnyDiskOfType(constants.LD_LV):
3843         raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
3844                                    " instances exist", errors.ECODE_INVAL)
3845
3846     if self.op.drbd_helper is not None and not self.op.drbd_helper:
3847       if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
3848         raise errors.OpPrereqError("Cannot disable drbd helper while"
3849                                    " drbd-based instances exist",
3850                                    errors.ECODE_INVAL)
3851
3852     node_list = self.owned_locks(locking.LEVEL_NODE)
3853
3854     # if vg_name not None, checks given volume group on all nodes
3855     if self.op.vg_name:
3856       vglist = self.rpc.call_vg_list(node_list)
3857       for node in node_list:
3858         msg = vglist[node].fail_msg
3859         if msg:
3860           # ignoring down node
3861           self.LogWarning("Error while gathering data on node %s"
3862                           " (ignoring node): %s", node, msg)
3863           continue
3864         vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
3865                                               self.op.vg_name,
3866                                               constants.MIN_VG_SIZE)
3867         if vgstatus:
3868           raise errors.OpPrereqError("Error on node '%s': %s" %
3869                                      (node, vgstatus), errors.ECODE_ENVIRON)
3870
3871     if self.op.drbd_helper:
3872       # checks given drbd helper on all nodes
3873       helpers = self.rpc.call_drbd_helper(node_list)
3874       for (node, ninfo) in self.cfg.GetMultiNodeInfo(node_list):
3875         if ninfo.offline:
3876           self.LogInfo("Not checking drbd helper on offline node %s", node)
3877           continue
3878         msg = helpers[node].fail_msg
3879         if msg:
3880           raise errors.OpPrereqError("Error checking drbd helper on node"
3881                                      " '%s': %s" % (node, msg),
3882                                      errors.ECODE_ENVIRON)
3883         node_helper = helpers[node].payload
3884         if node_helper != self.op.drbd_helper:
3885           raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
3886                                      (node, node_helper), errors.ECODE_ENVIRON)
3887
3888     self.cluster = cluster = self.cfg.GetClusterInfo()
3889     # validate params changes
3890     if self.op.beparams:
3891       objects.UpgradeBeParams(self.op.beparams)
3892       utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
3893       self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
3894
3895     if self.op.ndparams:
3896       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
3897       self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
3898
3899       # TODO: we need a more general way to handle resetting
3900       # cluster-level parameters to default values
3901       if self.new_ndparams["oob_program"] == "":
3902         self.new_ndparams["oob_program"] = \
3903             constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
3904
3905     if self.op.hv_state:
3906       new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
3907                                             self.cluster.hv_state_static)
3908       self.new_hv_state = dict((hv, cluster.SimpleFillHvState(values))
3909                                for hv, values in new_hv_state.items())
3910
3911     if self.op.disk_state:
3912       new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state,
3913                                                 self.cluster.disk_state_static)
3914       self.new_disk_state = \
3915         dict((storage, dict((name, cluster.SimpleFillDiskState(values))
3916                             for name, values in svalues.items()))
3917              for storage, svalues in new_disk_state.items())
3918
3919     if self.op.ipolicy:
3920       self.new_ipolicy = _GetUpdatedIPolicy(cluster.ipolicy, self.op.ipolicy,
3921                                             group_policy=False)
3922
3923       all_instances = self.cfg.GetAllInstancesInfo().values()
3924       violations = set()
3925       for group in self.cfg.GetAllNodeGroupsInfo().values():
3926         instances = frozenset([inst for inst in all_instances
3927                                if compat.any(node in group.members
3928                                              for node in inst.all_nodes)])
3929         new_ipolicy = objects.FillIPolicy(self.new_ipolicy, group.ipolicy)
3930         new = _ComputeNewInstanceViolations(_CalculateGroupIPolicy(cluster,
3931                                                                    group),
3932                                             new_ipolicy, instances)
3933         if new:
3934           violations.update(new)
3935
3936       if violations:
3937         self.LogWarning("After the ipolicy change the following instances"
3938                         " violate them: %s",
3939                         utils.CommaJoin(violations))
3940
3941     if self.op.nicparams:
3942       utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
3943       self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
3944       objects.NIC.CheckParameterSyntax(self.new_nicparams)
3945       nic_errors = []
3946
3947       # check all instances for consistency
3948       for instance in self.cfg.GetAllInstancesInfo().values():
3949         for nic_idx, nic in enumerate(instance.nics):
3950           params_copy = copy.deepcopy(nic.nicparams)
3951           params_filled = objects.FillDict(self.new_nicparams, params_copy)
3952
3953           # check parameter syntax
3954           try:
3955             objects.NIC.CheckParameterSyntax(params_filled)
3956           except errors.ConfigurationError, err:
3957             nic_errors.append("Instance %s, nic/%d: %s" %
3958                               (instance.name, nic_idx, err))
3959
3960           # if we're moving instances to routed, check that they have an ip
3961           target_mode = params_filled[constants.NIC_MODE]
3962           if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
3963             nic_errors.append("Instance %s, nic/%d: routed NIC with no ip"
3964                               " address" % (instance.name, nic_idx))
3965       if nic_errors:
3966         raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
3967                                    "\n".join(nic_errors))
3968
3969     # hypervisor list/parameters
3970     self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
3971     if self.op.hvparams:
3972       for hv_name, hv_dict in self.op.hvparams.items():
3973         if hv_name not in self.new_hvparams:
3974           self.new_hvparams[hv_name] = hv_dict
3975         else:
3976           self.new_hvparams[hv_name].update(hv_dict)
3977
3978     # disk template parameters
3979     self.new_diskparams = objects.FillDict(cluster.diskparams, {})
3980     if self.op.diskparams:
3981       for dt_name, dt_params in self.op.diskparams.items():
3982         if dt_name not in self.op.diskparams:
3983           self.new_diskparams[dt_name] = dt_params
3984         else:
3985           self.new_diskparams[dt_name].update(dt_params)
3986
3987     # os hypervisor parameters
3988     self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
3989     if self.op.os_hvp:
3990       for os_name, hvs in self.op.os_hvp.items():
3991         if os_name not in self.new_os_hvp:
3992           self.new_os_hvp[os_name] = hvs
3993         else:
3994           for hv_name, hv_dict in hvs.items():
3995             if hv_name not in self.new_os_hvp[os_name]:
3996               self.new_os_hvp[os_name][hv_name] = hv_dict
3997             else:
3998               self.new_os_hvp[os_name][hv_name].update(hv_dict)
3999
4000     # os parameters
4001     self.new_osp = objects.FillDict(cluster.osparams, {})
4002     if self.op.osparams:
4003       for os_name, osp in self.op.osparams.items():
4004         if os_name not in self.new_osp:
4005           self.new_osp[os_name] = {}
4006
4007         self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
4008                                                   use_none=True)
4009
4010         if not self.new_osp[os_name]:
4011           # we removed all parameters
4012           del self.new_osp[os_name]
4013         else:
4014           # check the parameter validity (remote check)
4015           _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
4016                          os_name, self.new_osp[os_name])
4017
4018     # changes to the hypervisor list
4019     if self.op.enabled_hypervisors is not None:
4020       self.hv_list = self.op.enabled_hypervisors
4021       for hv in self.hv_list:
4022         # if the hypervisor doesn't already exist in the cluster
4023         # hvparams, we initialize it to empty, and then (in both
4024         # cases) we make sure to fill the defaults, as we might not
4025         # have a complete defaults list if the hypervisor wasn't
4026         # enabled before
4027         if hv not in new_hvp:
4028           new_hvp[hv] = {}
4029         new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
4030         utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
4031     else:
4032       self.hv_list = cluster.enabled_hypervisors
4033
4034     if self.op.hvparams or self.op.enabled_hypervisors is not None:
4035       # either the enabled list has changed, or the parameters have, validate
4036       for hv_name, hv_params in self.new_hvparams.items():
4037         if ((self.op.hvparams and hv_name in self.op.hvparams) or
4038             (self.op.enabled_hypervisors and
4039              hv_name in self.op.enabled_hypervisors)):
4040           # either this is a new hypervisor, or its parameters have changed
4041           hv_class = hypervisor.GetHypervisor(hv_name)
4042           utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
4043           hv_class.CheckParameterSyntax(hv_params)
4044           _CheckHVParams(self, node_list, hv_name, hv_params)
4045
4046     if self.op.os_hvp:
4047       # no need to check any newly-enabled hypervisors, since the
4048       # defaults have already been checked in the above code-block
4049       for os_name, os_hvp in self.new_os_hvp.items():
4050         for hv_name, hv_params in os_hvp.items():
4051           utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
4052           # we need to fill in the new os_hvp on top of the actual hv_p
4053           cluster_defaults = self.new_hvparams.get(hv_name, {})
4054           new_osp = objects.FillDict(cluster_defaults, hv_params)
4055           hv_class = hypervisor.GetHypervisor(hv_name)
4056           hv_class.CheckParameterSyntax(new_osp)
4057           _CheckHVParams(self, node_list, hv_name, new_osp)
4058
4059     if self.op.default_iallocator:
4060       alloc_script = utils.FindFile(self.op.default_iallocator,
4061                                     constants.IALLOCATOR_SEARCH_PATH,
4062                                     os.path.isfile)
4063       if alloc_script is None:
4064         raise errors.OpPrereqError("Invalid default iallocator script '%s'"
4065                                    " specified" % self.op.default_iallocator,
4066                                    errors.ECODE_INVAL)
4067
4068   def Exec(self, feedback_fn):
4069     """Change the parameters of the cluster.
4070
4071     """
4072     if self.op.vg_name is not None:
4073       new_volume = self.op.vg_name
4074       if not new_volume:
4075         new_volume = None
4076       if new_volume != self.cfg.GetVGName():
4077         self.cfg.SetVGName(new_volume)
4078       else:
4079         feedback_fn("Cluster LVM configuration already in desired"
4080                     " state, not changing")
4081     if self.op.drbd_helper is not None:
4082       new_helper = self.op.drbd_helper
4083       if not new_helper:
4084         new_helper = None
4085       if new_helper != self.cfg.GetDRBDHelper():
4086         self.cfg.SetDRBDHelper(new_helper)
4087       else:
4088         feedback_fn("Cluster DRBD helper already in desired state,"
4089                     " not changing")
4090     if self.op.hvparams:
4091       self.cluster.hvparams = self.new_hvparams
4092     if self.op.os_hvp:
4093       self.cluster.os_hvp = self.new_os_hvp
4094     if self.op.enabled_hypervisors is not None:
4095       self.cluster.hvparams = self.new_hvparams
4096       self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
4097     if self.op.beparams:
4098       self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
4099     if self.op.nicparams:
4100       self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
4101     if self.op.ipolicy:
4102       self.cluster.ipolicy = self.new_ipolicy
4103     if self.op.osparams:
4104       self.cluster.osparams = self.new_osp
4105     if self.op.ndparams:
4106       self.cluster.ndparams = self.new_ndparams
4107     if self.op.diskparams:
4108       self.cluster.diskparams = self.new_diskparams
4109     if self.op.hv_state:
4110       self.cluster.hv_state_static = self.new_hv_state
4111     if self.op.disk_state:
4112       self.cluster.disk_state_static = self.new_disk_state
4113
4114     if self.op.candidate_pool_size is not None:
4115       self.cluster.candidate_pool_size = self.op.candidate_pool_size
4116       # we need to update the pool size here, otherwise the save will fail
4117       _AdjustCandidatePool(self, [])
4118
4119     if self.op.maintain_node_health is not None:
4120       if self.op.maintain_node_health and not constants.ENABLE_CONFD:
4121         feedback_fn("Note: CONFD was disabled at build time, node health"
4122                     " maintenance is not useful (still enabling it)")
4123       self.cluster.maintain_node_health = self.op.maintain_node_health
4124
4125     if self.op.prealloc_wipe_disks is not None:
4126       self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
4127
4128     if self.op.add_uids is not None:
4129       uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
4130
4131     if self.op.remove_uids is not None:
4132       uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
4133
4134     if self.op.uid_pool is not None:
4135       self.cluster.uid_pool = self.op.uid_pool
4136
4137     if self.op.default_iallocator is not None:
4138       self.cluster.default_iallocator = self.op.default_iallocator
4139
4140     if self.op.reserved_lvs is not None:
4141       self.cluster.reserved_lvs = self.op.reserved_lvs
4142
4143     if self.op.use_external_mip_script is not None:
4144       self.cluster.use_external_mip_script = self.op.use_external_mip_script
4145
4146     def helper_os(aname, mods, desc):
4147       desc += " OS list"
4148       lst = getattr(self.cluster, aname)
4149       for key, val in mods:
4150         if key == constants.DDM_ADD:
4151           if val in lst:
4152             feedback_fn("OS %s already in %s, ignoring" % (val, desc))
4153           else:
4154             lst.append(val)
4155         elif key == constants.DDM_REMOVE:
4156           if val in lst:
4157             lst.remove(val)
4158           else:
4159             feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
4160         else:
4161           raise errors.ProgrammerError("Invalid modification '%s'" % key)
4162
4163     if self.op.hidden_os:
4164       helper_os("hidden_os", self.op.hidden_os, "hidden")
4165
4166     if self.op.blacklisted_os:
4167       helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
4168
4169     if self.op.master_netdev:
4170       master_params = self.cfg.GetMasterNetworkParameters()
4171       ems = self.cfg.GetUseExternalMipScript()
4172       feedback_fn("Shutting down master ip on the current netdev (%s)" %
4173                   self.cluster.master_netdev)
4174       result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4175                                                        master_params, ems)
4176       result.Raise("Could not disable the master ip")
4177       feedback_fn("Changing master_netdev from %s to %s" %
4178                   (master_params.netdev, self.op.master_netdev))
4179       self.cluster.master_netdev = self.op.master_netdev
4180
4181     if self.op.master_netmask:
4182       master_params = self.cfg.GetMasterNetworkParameters()
4183       feedback_fn("Changing master IP netmask to %s" % self.op.master_netmask)
4184       result = self.rpc.call_node_change_master_netmask(master_params.name,
4185                                                         master_params.netmask,
4186                                                         self.op.master_netmask,
4187                                                         master_params.ip,
4188                                                         master_params.netdev)
4189       if result.fail_msg:
4190         msg = "Could not change the master IP netmask: %s" % result.fail_msg
4191         feedback_fn(msg)
4192
4193       self.cluster.master_netmask = self.op.master_netmask
4194
4195     self.cfg.Update(self.cluster, feedback_fn)
4196
4197     if self.op.master_netdev:
4198       master_params = self.cfg.GetMasterNetworkParameters()
4199       feedback_fn("Starting the master ip on the new master netdev (%s)" %
4200                   self.op.master_netdev)
4201       ems = self.cfg.GetUseExternalMipScript()
4202       result = self.rpc.call_node_activate_master_ip(master_params.name,
4203                                                      master_params, ems)
4204       if result.fail_msg:
4205         self.LogWarning("Could not re-enable the master ip on"
4206                         " the master, please restart manually: %s",
4207                         result.fail_msg)
4208
4209
4210 def _UploadHelper(lu, nodes, fname):
4211   """Helper for uploading a file and showing warnings.
4212
4213   """
4214   if os.path.exists(fname):
4215     result = lu.rpc.call_upload_file(nodes, fname)
4216     for to_node, to_result in result.items():
4217       msg = to_result.fail_msg
4218       if msg:
4219         msg = ("Copy of file %s to node %s failed: %s" %
4220                (fname, to_node, msg))
4221         lu.proc.LogWarning(msg)
4222
4223
4224 def _ComputeAncillaryFiles(cluster, redist):
4225   """Compute files external to Ganeti which need to be consistent.
4226
4227   @type redist: boolean
4228   @param redist: Whether to include files which need to be redistributed
4229
4230   """
4231   # Compute files for all nodes
4232   files_all = set([
4233     constants.SSH_KNOWN_HOSTS_FILE,
4234     constants.CONFD_HMAC_KEY,
4235     constants.CLUSTER_DOMAIN_SECRET_FILE,
4236     constants.SPICE_CERT_FILE,
4237     constants.SPICE_CACERT_FILE,
4238     constants.RAPI_USERS_FILE,
4239     ])
4240
4241   if not redist:
4242     files_all.update(constants.ALL_CERT_FILES)
4243     files_all.update(ssconf.SimpleStore().GetFileList())
4244   else:
4245     # we need to ship at least the RAPI certificate
4246     files_all.add(constants.RAPI_CERT_FILE)
4247
4248   if cluster.modify_etc_hosts:
4249     files_all.add(constants.ETC_HOSTS)
4250
4251   # Files which are optional, these must:
4252   # - be present in one other category as well
4253   # - either exist or not exist on all nodes of that category (mc, vm all)
4254   files_opt = set([
4255     constants.RAPI_USERS_FILE,
4256     ])
4257
4258   # Files which should only be on master candidates
4259   files_mc = set()
4260
4261   if not redist:
4262     files_mc.add(constants.CLUSTER_CONF_FILE)
4263
4264     # FIXME: this should also be replicated but Ganeti doesn't support files_mc
4265     # replication
4266     files_mc.add(constants.DEFAULT_MASTER_SETUP_SCRIPT)
4267
4268   # Files which should only be on VM-capable nodes
4269   files_vm = set(filename
4270     for hv_name in cluster.enabled_hypervisors
4271     for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[0])
4272
4273   files_opt |= set(filename
4274     for hv_name in cluster.enabled_hypervisors
4275     for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[1])
4276
4277   # Filenames in each category must be unique
4278   all_files_set = files_all | files_mc | files_vm
4279   assert (len(all_files_set) ==
4280           sum(map(len, [files_all, files_mc, files_vm]))), \
4281          "Found file listed in more than one file list"
4282
4283   # Optional files must be present in one other category
4284   assert all_files_set.issuperset(files_opt), \
4285          "Optional file not in a different required list"
4286
4287   return (files_all, files_opt, files_mc, files_vm)
4288
4289
4290 def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
4291   """Distribute additional files which are part of the cluster configuration.
4292
4293   ConfigWriter takes care of distributing the config and ssconf files, but
4294   there are more files which should be distributed to all nodes. This function
4295   makes sure those are copied.
4296
4297   @param lu: calling logical unit
4298   @param additional_nodes: list of nodes not in the config to distribute to
4299   @type additional_vm: boolean
4300   @param additional_vm: whether the additional nodes are vm-capable or not
4301
4302   """
4303   # Gather target nodes
4304   cluster = lu.cfg.GetClusterInfo()
4305   master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
4306
4307   online_nodes = lu.cfg.GetOnlineNodeList()
4308   vm_nodes = lu.cfg.GetVmCapableNodeList()
4309
4310   if additional_nodes is not None:
4311     online_nodes.extend(additional_nodes)
4312     if additional_vm:
4313       vm_nodes.extend(additional_nodes)
4314
4315   # Never distribute to master node
4316   for nodelist in [online_nodes, vm_nodes]:
4317     if master_info.name in nodelist:
4318       nodelist.remove(master_info.name)
4319
4320   # Gather file lists
4321   (files_all, _, files_mc, files_vm) = \
4322     _ComputeAncillaryFiles(cluster, True)
4323
4324   # Never re-distribute configuration file from here
4325   assert not (constants.CLUSTER_CONF_FILE in files_all or
4326               constants.CLUSTER_CONF_FILE in files_vm)
4327   assert not files_mc, "Master candidates not handled in this function"
4328
4329   filemap = [
4330     (online_nodes, files_all),
4331     (vm_nodes, files_vm),
4332     ]
4333
4334   # Upload the files
4335   for (node_list, files) in filemap:
4336     for fname in files:
4337       _UploadHelper(lu, node_list, fname)
4338
4339
4340 class LUClusterRedistConf(NoHooksLU):
4341   """Force the redistribution of cluster configuration.
4342
4343   This is a very simple LU.
4344
4345   """
4346   REQ_BGL = False
4347
4348   def ExpandNames(self):
4349     self.needed_locks = {
4350       locking.LEVEL_NODE: locking.ALL_SET,
4351     }
4352     self.share_locks[locking.LEVEL_NODE] = 1
4353
4354   def Exec(self, feedback_fn):
4355     """Redistribute the configuration.
4356
4357     """
4358     self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
4359     _RedistributeAncillaryFiles(self)
4360
4361
4362 class LUClusterActivateMasterIp(NoHooksLU):
4363   """Activate the master IP on the master node.
4364
4365   """
4366   def Exec(self, feedback_fn):
4367     """Activate the master IP.
4368
4369     """
4370     master_params = self.cfg.GetMasterNetworkParameters()
4371     ems = self.cfg.GetUseExternalMipScript()
4372     result = self.rpc.call_node_activate_master_ip(master_params.name,
4373                                                    master_params, ems)
4374     result.Raise("Could not activate the master IP")
4375
4376
4377 class LUClusterDeactivateMasterIp(NoHooksLU):
4378   """Deactivate the master IP on the master node.
4379
4380   """
4381   def Exec(self, feedback_fn):
4382     """Deactivate the master IP.
4383
4384     """
4385     master_params = self.cfg.GetMasterNetworkParameters()
4386     ems = self.cfg.GetUseExternalMipScript()
4387     result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4388                                                      master_params, ems)
4389     result.Raise("Could not deactivate the master IP")
4390
4391
4392 def _WaitForSync(lu, instance, disks=None, oneshot=False):
4393   """Sleep and poll for an instance's disk to sync.
4394
4395   """
4396   if not instance.disks or disks is not None and not disks:
4397     return True
4398
4399   disks = _ExpandCheckDisks(instance, disks)
4400
4401   if not oneshot:
4402     lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
4403
4404   node = instance.primary_node
4405
4406   for dev in disks:
4407     lu.cfg.SetDiskID(dev, node)
4408
4409   # TODO: Convert to utils.Retry
4410
4411   retries = 0
4412   degr_retries = 10 # in seconds, as we sleep 1 second each time
4413   while True:
4414     max_time = 0
4415     done = True
4416     cumul_degraded = False
4417     rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
4418     msg = rstats.fail_msg
4419     if msg:
4420       lu.LogWarning("Can't get any data from node %s: %s", node, msg)
4421       retries += 1
4422       if retries >= 10:
4423         raise errors.RemoteError("Can't contact node %s for mirror data,"
4424                                  " aborting." % node)
4425       time.sleep(6)
4426       continue
4427     rstats = rstats.payload
4428     retries = 0
4429     for i, mstat in enumerate(rstats):
4430       if mstat is None:
4431         lu.LogWarning("Can't compute data for node %s/%s",
4432                            node, disks[i].iv_name)
4433         continue
4434
4435       cumul_degraded = (cumul_degraded or
4436                         (mstat.is_degraded and mstat.sync_percent is None))
4437       if mstat.sync_percent is not None:
4438         done = False
4439         if mstat.estimated_time is not None:
4440           rem_time = ("%s remaining (estimated)" %
4441                       utils.FormatSeconds(mstat.estimated_time))
4442           max_time = mstat.estimated_time
4443         else:
4444           rem_time = "no time estimate"
4445         lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
4446                         (disks[i].iv_name, mstat.sync_percent, rem_time))
4447
4448     # if we're done but degraded, let's do a few small retries, to
4449     # make sure we see a stable and not transient situation; therefore
4450     # we force restart of the loop
4451     if (done or oneshot) and cumul_degraded and degr_retries > 0:
4452       logging.info("Degraded disks found, %d retries left", degr_retries)
4453       degr_retries -= 1
4454       time.sleep(1)
4455       continue
4456
4457     if done or oneshot:
4458       break
4459
4460     time.sleep(min(60, max_time))
4461
4462   if done:
4463     lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
4464   return not cumul_degraded
4465
4466
4467 def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
4468   """Check that mirrors are not degraded.
4469
4470   The ldisk parameter, if True, will change the test from the
4471   is_degraded attribute (which represents overall non-ok status for
4472   the device(s)) to the ldisk (representing the local storage status).
4473
4474   """
4475   lu.cfg.SetDiskID(dev, node)
4476
4477   result = True
4478
4479   if on_primary or dev.AssembleOnSecondary():
4480     rstats = lu.rpc.call_blockdev_find(node, dev)
4481     msg = rstats.fail_msg
4482     if msg:
4483       lu.LogWarning("Can't find disk on node %s: %s", node, msg)
4484       result = False
4485     elif not rstats.payload:
4486       lu.LogWarning("Can't find disk on node %s", node)
4487       result = False
4488     else:
4489       if ldisk:
4490         result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
4491       else:
4492         result = result and not rstats.payload.is_degraded
4493
4494   if dev.children:
4495     for child in dev.children:
4496       result = result and _CheckDiskConsistency(lu, child, node, on_primary)
4497
4498   return result
4499
4500
4501 class LUOobCommand(NoHooksLU):
4502   """Logical unit for OOB handling.
4503
4504   """
4505   REG_BGL = False
4506   _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
4507
4508   def ExpandNames(self):
4509     """Gather locks we need.
4510
4511     """
4512     if self.op.node_names:
4513       self.op.node_names = _GetWantedNodes(self, self.op.node_names)
4514       lock_names = self.op.node_names
4515     else:
4516       lock_names = locking.ALL_SET
4517
4518     self.needed_locks = {
4519       locking.LEVEL_NODE: lock_names,
4520       }
4521
4522   def CheckPrereq(self):
4523     """Check prerequisites.
4524
4525     This checks:
4526      - the node exists in the configuration
4527      - OOB is supported
4528
4529     Any errors are signaled by raising errors.OpPrereqError.
4530
4531     """
4532     self.nodes = []
4533     self.master_node = self.cfg.GetMasterNode()
4534
4535     assert self.op.power_delay >= 0.0
4536
4537     if self.op.node_names:
4538       if (self.op.command in self._SKIP_MASTER and
4539           self.master_node in self.op.node_names):
4540         master_node_obj = self.cfg.GetNodeInfo(self.master_node)
4541         master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
4542
4543         if master_oob_handler:
4544           additional_text = ("run '%s %s %s' if you want to operate on the"
4545                              " master regardless") % (master_oob_handler,
4546                                                       self.op.command,
4547                                                       self.master_node)
4548         else:
4549           additional_text = "it does not support out-of-band operations"
4550
4551         raise errors.OpPrereqError(("Operating on the master node %s is not"
4552                                     " allowed for %s; %s") %
4553                                    (self.master_node, self.op.command,
4554                                     additional_text), errors.ECODE_INVAL)
4555     else:
4556       self.op.node_names = self.cfg.GetNodeList()
4557       if self.op.command in self._SKIP_MASTER:
4558         self.op.node_names.remove(self.master_node)
4559
4560     if self.op.command in self._SKIP_MASTER:
4561       assert self.master_node not in self.op.node_names
4562
4563     for (node_name, node) in self.cfg.GetMultiNodeInfo(self.op.node_names):
4564       if node is None:
4565         raise errors.OpPrereqError("Node %s not found" % node_name,
4566                                    errors.ECODE_NOENT)
4567       else:
4568         self.nodes.append(node)
4569
4570       if (not self.op.ignore_status and
4571           (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
4572         raise errors.OpPrereqError(("Cannot power off node %s because it is"
4573                                     " not marked offline") % node_name,
4574                                    errors.ECODE_STATE)
4575
4576   def Exec(self, feedback_fn):
4577     """Execute OOB and return result if we expect any.
4578
4579     """
4580     master_node = self.master_node
4581     ret = []
4582
4583     for idx, node in enumerate(utils.NiceSort(self.nodes,
4584                                               key=lambda node: node.name)):
4585       node_entry = [(constants.RS_NORMAL, node.name)]
4586       ret.append(node_entry)
4587
4588       oob_program = _SupportsOob(self.cfg, node)
4589
4590       if not oob_program:
4591         node_entry.append((constants.RS_UNAVAIL, None))
4592         continue
4593
4594       logging.info("Executing out-of-band command '%s' using '%s' on %s",
4595                    self.op.command, oob_program, node.name)
4596       result = self.rpc.call_run_oob(master_node, oob_program,
4597                                      self.op.command, node.name,
4598                                      self.op.timeout)
4599
4600       if result.fail_msg:
4601         self.LogWarning("Out-of-band RPC failed on node '%s': %s",
4602                         node.name, result.fail_msg)
4603         node_entry.append((constants.RS_NODATA, None))
4604       else:
4605         try:
4606           self._CheckPayload(result)
4607         except errors.OpExecError, err:
4608           self.LogWarning("Payload returned by node '%s' is not valid: %s",
4609                           node.name, err)
4610           node_entry.append((constants.RS_NODATA, None))
4611         else:
4612           if self.op.command == constants.OOB_HEALTH:
4613             # For health we should log important events
4614             for item, status in result.payload:
4615               if status in [constants.OOB_STATUS_WARNING,
4616                             constants.OOB_STATUS_CRITICAL]:
4617                 self.LogWarning("Item '%s' on node '%s' has status '%s'",
4618                                 item, node.name, status)
4619
4620           if self.op.command == constants.OOB_POWER_ON:
4621             node.powered = True
4622           elif self.op.command == constants.OOB_POWER_OFF:
4623             node.powered = False
4624           elif self.op.command == constants.OOB_POWER_STATUS:
4625             powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
4626             if powered != node.powered:
4627               logging.warning(("Recorded power state (%s) of node '%s' does not"
4628                                " match actual power state (%s)"), node.powered,
4629                               node.name, powered)
4630
4631           # For configuration changing commands we should update the node
4632           if self.op.command in (constants.OOB_POWER_ON,
4633                                  constants.OOB_POWER_OFF):
4634             self.cfg.Update(node, feedback_fn)
4635
4636           node_entry.append((constants.RS_NORMAL, result.payload))
4637
4638           if (self.op.command == constants.OOB_POWER_ON and
4639               idx < len(self.nodes) - 1):
4640             time.sleep(self.op.power_delay)
4641
4642     return ret
4643
4644   def _CheckPayload(self, result):
4645     """Checks if the payload is valid.
4646
4647     @param result: RPC result
4648     @raises errors.OpExecError: If payload is not valid
4649
4650     """
4651     errs = []
4652     if self.op.command == constants.OOB_HEALTH:
4653       if not isinstance(result.payload, list):
4654         errs.append("command 'health' is expected to return a list but got %s" %
4655                     type(result.payload))
4656       else:
4657         for item, status in result.payload:
4658           if status not in constants.OOB_STATUSES:
4659             errs.append("health item '%s' has invalid status '%s'" %
4660                         (item, status))
4661
4662     if self.op.command == constants.OOB_POWER_STATUS:
4663       if not isinstance(result.payload, dict):
4664         errs.append("power-status is expected to return a dict but got %s" %
4665                     type(result.payload))
4666
4667     if self.op.command in [
4668         constants.OOB_POWER_ON,
4669         constants.OOB_POWER_OFF,
4670         constants.OOB_POWER_CYCLE,
4671         ]:
4672       if result.payload is not None:
4673         errs.append("%s is expected to not return payload but got '%s'" %
4674                     (self.op.command, result.payload))
4675
4676     if errs:
4677       raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
4678                                utils.CommaJoin(errs))
4679
4680
4681 class _OsQuery(_QueryBase):
4682   FIELDS = query.OS_FIELDS
4683
4684   def ExpandNames(self, lu):
4685     # Lock all nodes in shared mode
4686     # Temporary removal of locks, should be reverted later
4687     # TODO: reintroduce locks when they are lighter-weight
4688     lu.needed_locks = {}
4689     #self.share_locks[locking.LEVEL_NODE] = 1
4690     #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4691
4692     # The following variables interact with _QueryBase._GetNames
4693     if self.names:
4694       self.wanted = self.names
4695     else:
4696       self.wanted = locking.ALL_SET
4697
4698     self.do_locking = self.use_locking
4699
4700   def DeclareLocks(self, lu, level):
4701     pass
4702
4703   @staticmethod
4704   def _DiagnoseByOS(rlist):
4705     """Remaps a per-node return list into an a per-os per-node dictionary
4706
4707     @param rlist: a map with node names as keys and OS objects as values
4708
4709     @rtype: dict
4710     @return: a dictionary with osnames as keys and as value another
4711         map, with nodes as keys and tuples of (path, status, diagnose,
4712         variants, parameters, api_versions) as values, eg::
4713
4714           {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
4715                                      (/srv/..., False, "invalid api")],
4716                            "node2": [(/srv/..., True, "", [], [])]}
4717           }
4718
4719     """
4720     all_os = {}
4721     # we build here the list of nodes that didn't fail the RPC (at RPC
4722     # level), so that nodes with a non-responding node daemon don't
4723     # make all OSes invalid
4724     good_nodes = [node_name for node_name in rlist
4725                   if not rlist[node_name].fail_msg]
4726     for node_name, nr in rlist.items():
4727       if nr.fail_msg or not nr.payload:
4728         continue
4729       for (name, path, status, diagnose, variants,
4730            params, api_versions) in nr.payload:
4731         if name not in all_os:
4732           # build a list of nodes for this os containing empty lists
4733           # for each node in node_list
4734           all_os[name] = {}
4735           for nname in good_nodes:
4736             all_os[name][nname] = []
4737         # convert params from [name, help] to (name, help)
4738         params = [tuple(v) for v in params]
4739         all_os[name][node_name].append((path, status, diagnose,
4740                                         variants, params, api_versions))
4741     return all_os
4742
4743   def _GetQueryData(self, lu):
4744     """Computes the list of nodes and their attributes.
4745
4746     """
4747     # Locking is not used
4748     assert not (compat.any(lu.glm.is_owned(level)
4749                            for level in locking.LEVELS
4750                            if level != locking.LEVEL_CLUSTER) or
4751                 self.do_locking or self.use_locking)
4752
4753     valid_nodes = [node.name
4754                    for node in lu.cfg.GetAllNodesInfo().values()
4755                    if not node.offline and node.vm_capable]
4756     pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
4757     cluster = lu.cfg.GetClusterInfo()
4758
4759     data = {}
4760
4761     for (os_name, os_data) in pol.items():
4762       info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
4763                           hidden=(os_name in cluster.hidden_os),
4764                           blacklisted=(os_name in cluster.blacklisted_os))
4765
4766       variants = set()
4767       parameters = set()
4768       api_versions = set()
4769
4770       for idx, osl in enumerate(os_data.values()):
4771         info.valid = bool(info.valid and osl and osl[0][1])
4772         if not info.valid:
4773           break
4774
4775         (node_variants, node_params, node_api) = osl[0][3:6]
4776         if idx == 0:
4777           # First entry
4778           variants.update(node_variants)
4779           parameters.update(node_params)
4780           api_versions.update(node_api)
4781         else:
4782           # Filter out inconsistent values
4783           variants.intersection_update(node_variants)
4784           parameters.intersection_update(node_params)
4785           api_versions.intersection_update(node_api)
4786
4787       info.variants = list(variants)
4788       info.parameters = list(parameters)
4789       info.api_versions = list(api_versions)
4790
4791       data[os_name] = info
4792
4793     # Prepare data in requested order
4794     return [data[name] for name in self._GetNames(lu, pol.keys(), None)
4795             if name in data]
4796
4797
4798 class LUOsDiagnose(NoHooksLU):
4799   """Logical unit for OS diagnose/query.
4800
4801   """
4802   REQ_BGL = False
4803
4804   @staticmethod
4805   def _BuildFilter(fields, names):
4806     """Builds a filter for querying OSes.
4807
4808     """
4809     name_filter = qlang.MakeSimpleFilter("name", names)
4810
4811     # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
4812     # respective field is not requested
4813     status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
4814                      for fname in ["hidden", "blacklisted"]
4815                      if fname not in fields]
4816     if "valid" not in fields:
4817       status_filter.append([qlang.OP_TRUE, "valid"])
4818
4819     if status_filter:
4820       status_filter.insert(0, qlang.OP_AND)
4821     else:
4822       status_filter = None
4823
4824     if name_filter and status_filter:
4825       return [qlang.OP_AND, name_filter, status_filter]
4826     elif name_filter:
4827       return name_filter
4828     else:
4829       return status_filter
4830
4831   def CheckArguments(self):
4832     self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
4833                        self.op.output_fields, False)
4834
4835   def ExpandNames(self):
4836     self.oq.ExpandNames(self)
4837
4838   def Exec(self, feedback_fn):
4839     return self.oq.OldStyleQuery(self)
4840
4841
4842 class LUNodeRemove(LogicalUnit):
4843   """Logical unit for removing a node.
4844
4845   """
4846   HPATH = "node-remove"
4847   HTYPE = constants.HTYPE_NODE
4848
4849   def BuildHooksEnv(self):
4850     """Build hooks env.
4851
4852     This doesn't run on the target node in the pre phase as a failed
4853     node would then be impossible to remove.
4854
4855     """
4856     return {
4857       "OP_TARGET": self.op.node_name,
4858       "NODE_NAME": self.op.node_name,
4859       }
4860
4861   def BuildHooksNodes(self):
4862     """Build hooks nodes.
4863
4864     """
4865     all_nodes = self.cfg.GetNodeList()
4866     try:
4867       all_nodes.remove(self.op.node_name)
4868     except ValueError:
4869       logging.warning("Node '%s', which is about to be removed, was not found"
4870                       " in the list of all nodes", self.op.node_name)
4871     return (all_nodes, all_nodes)
4872
4873   def CheckPrereq(self):
4874     """Check prerequisites.
4875
4876     This checks:
4877      - the node exists in the configuration
4878      - it does not have primary or secondary instances
4879      - it's not the master
4880
4881     Any errors are signaled by raising errors.OpPrereqError.
4882
4883     """
4884     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4885     node = self.cfg.GetNodeInfo(self.op.node_name)
4886     assert node is not None
4887
4888     masternode = self.cfg.GetMasterNode()
4889     if node.name == masternode:
4890       raise errors.OpPrereqError("Node is the master node, failover to another"
4891                                  " node is required", errors.ECODE_INVAL)
4892
4893     for instance_name, instance in self.cfg.GetAllInstancesInfo().items():
4894       if node.name in instance.all_nodes:
4895         raise errors.OpPrereqError("Instance %s is still running on the node,"
4896                                    " please remove first" % instance_name,
4897                                    errors.ECODE_INVAL)
4898     self.op.node_name = node.name
4899     self.node = node
4900
4901   def Exec(self, feedback_fn):
4902     """Removes the node from the cluster.
4903
4904     """
4905     node = self.node
4906     logging.info("Stopping the node daemon and removing configs from node %s",
4907                  node.name)
4908
4909     modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
4910
4911     assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
4912       "Not owning BGL"
4913
4914     # Promote nodes to master candidate as needed
4915     _AdjustCandidatePool(self, exceptions=[node.name])
4916     self.context.RemoveNode(node.name)
4917
4918     # Run post hooks on the node before it's removed
4919     _RunPostHook(self, node.name)
4920
4921     result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
4922     msg = result.fail_msg
4923     if msg:
4924       self.LogWarning("Errors encountered on the remote node while leaving"
4925                       " the cluster: %s", msg)
4926
4927     # Remove node from our /etc/hosts
4928     if self.cfg.GetClusterInfo().modify_etc_hosts:
4929       master_node = self.cfg.GetMasterNode()
4930       result = self.rpc.call_etc_hosts_modify(master_node,
4931                                               constants.ETC_HOSTS_REMOVE,
4932                                               node.name, None)
4933       result.Raise("Can't update hosts file with new host data")
4934       _RedistributeAncillaryFiles(self)
4935
4936
4937 class _NodeQuery(_QueryBase):
4938   FIELDS = query.NODE_FIELDS
4939
4940   def ExpandNames(self, lu):
4941     lu.needed_locks = {}
4942     lu.share_locks = _ShareAll()
4943
4944     if self.names:
4945       self.wanted = _GetWantedNodes(lu, self.names)
4946     else:
4947       self.wanted = locking.ALL_SET
4948
4949     self.do_locking = (self.use_locking and
4950                        query.NQ_LIVE in self.requested_data)
4951
4952     if self.do_locking:
4953       # If any non-static field is requested we need to lock the nodes
4954       lu.needed_locks[locking.LEVEL_NODE] = self.wanted
4955
4956   def DeclareLocks(self, lu, level):
4957     pass
4958
4959   def _GetQueryData(self, lu):
4960     """Computes the list of nodes and their attributes.
4961
4962     """
4963     all_info = lu.cfg.GetAllNodesInfo()
4964
4965     nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
4966
4967     # Gather data as requested
4968     if query.NQ_LIVE in self.requested_data:
4969       # filter out non-vm_capable nodes
4970       toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
4971
4972       node_data = lu.rpc.call_node_info(toquery_nodes, [lu.cfg.GetVGName()],
4973                                         [lu.cfg.GetHypervisorType()])
4974       live_data = dict((name, _MakeLegacyNodeInfo(nresult.payload))
4975                        for (name, nresult) in node_data.items()
4976                        if not nresult.fail_msg and nresult.payload)
4977     else:
4978       live_data = None
4979
4980     if query.NQ_INST in self.requested_data:
4981       node_to_primary = dict([(name, set()) for name in nodenames])
4982       node_to_secondary = dict([(name, set()) for name in nodenames])
4983
4984       inst_data = lu.cfg.GetAllInstancesInfo()
4985
4986       for inst in inst_data.values():
4987         if inst.primary_node in node_to_primary:
4988           node_to_primary[inst.primary_node].add(inst.name)
4989         for secnode in inst.secondary_nodes:
4990           if secnode in node_to_secondary:
4991             node_to_secondary[secnode].add(inst.name)
4992     else:
4993       node_to_primary = None
4994       node_to_secondary = None
4995
4996     if query.NQ_OOB in self.requested_data:
4997       oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
4998                          for name, node in all_info.iteritems())
4999     else:
5000       oob_support = None
5001
5002     if query.NQ_GROUP in self.requested_data:
5003       groups = lu.cfg.GetAllNodeGroupsInfo()
5004     else:
5005       groups = {}
5006
5007     return query.NodeQueryData([all_info[name] for name in nodenames],
5008                                live_data, lu.cfg.GetMasterNode(),
5009                                node_to_primary, node_to_secondary, groups,
5010                                oob_support, lu.cfg.GetClusterInfo())
5011
5012
5013 class LUNodeQuery(NoHooksLU):
5014   """Logical unit for querying nodes.
5015
5016   """
5017   # pylint: disable=W0142
5018   REQ_BGL = False
5019
5020   def CheckArguments(self):
5021     self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
5022                          self.op.output_fields, self.op.use_locking)
5023
5024   def ExpandNames(self):
5025     self.nq.ExpandNames(self)
5026
5027   def DeclareLocks(self, level):
5028     self.nq.DeclareLocks(self, level)
5029
5030   def Exec(self, feedback_fn):
5031     return self.nq.OldStyleQuery(self)
5032
5033
5034 class LUNodeQueryvols(NoHooksLU):
5035   """Logical unit for getting volumes on node(s).
5036
5037   """
5038   REQ_BGL = False
5039   _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
5040   _FIELDS_STATIC = utils.FieldSet("node")
5041
5042   def CheckArguments(self):
5043     _CheckOutputFields(static=self._FIELDS_STATIC,
5044                        dynamic=self._FIELDS_DYNAMIC,
5045                        selected=self.op.output_fields)
5046
5047   def ExpandNames(self):
5048     self.share_locks = _ShareAll()
5049     self.needed_locks = {}
5050
5051     if not self.op.nodes:
5052       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5053     else:
5054       self.needed_locks[locking.LEVEL_NODE] = \
5055         _GetWantedNodes(self, self.op.nodes)
5056
5057   def Exec(self, feedback_fn):
5058     """Computes the list of nodes and their attributes.
5059
5060     """
5061     nodenames = self.owned_locks(locking.LEVEL_NODE)
5062     volumes = self.rpc.call_node_volumes(nodenames)
5063
5064     ilist = self.cfg.GetAllInstancesInfo()
5065     vol2inst = _MapInstanceDisksToNodes(ilist.values())
5066
5067     output = []
5068     for node in nodenames:
5069       nresult = volumes[node]
5070       if nresult.offline:
5071         continue
5072       msg = nresult.fail_msg
5073       if msg:
5074         self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
5075         continue
5076
5077       node_vols = sorted(nresult.payload,
5078                          key=operator.itemgetter("dev"))
5079
5080       for vol in node_vols:
5081         node_output = []
5082         for field in self.op.output_fields:
5083           if field == "node":
5084             val = node
5085           elif field == "phys":
5086             val = vol["dev"]
5087           elif field == "vg":
5088             val = vol["vg"]
5089           elif field == "name":
5090             val = vol["name"]
5091           elif field == "size":
5092             val = int(float(vol["size"]))
5093           elif field == "instance":
5094             val = vol2inst.get((node, vol["vg"] + "/" + vol["name"]), "-")
5095           else:
5096             raise errors.ParameterError(field)
5097           node_output.append(str(val))
5098
5099         output.append(node_output)
5100
5101     return output
5102
5103
5104 class LUNodeQueryStorage(NoHooksLU):
5105   """Logical unit for getting information on storage units on node(s).
5106
5107   """
5108   _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
5109   REQ_BGL = False
5110
5111   def CheckArguments(self):
5112     _CheckOutputFields(static=self._FIELDS_STATIC,
5113                        dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
5114                        selected=self.op.output_fields)
5115
5116   def ExpandNames(self):
5117     self.share_locks = _ShareAll()
5118     self.needed_locks = {}
5119
5120     if self.op.nodes:
5121       self.needed_locks[locking.LEVEL_NODE] = \
5122         _GetWantedNodes(self, self.op.nodes)
5123     else:
5124       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5125
5126   def Exec(self, feedback_fn):
5127     """Computes the list of nodes and their attributes.
5128
5129     """
5130     self.nodes = self.owned_locks(locking.LEVEL_NODE)
5131
5132     # Always get name to sort by
5133     if constants.SF_NAME in self.op.output_fields:
5134       fields = self.op.output_fields[:]
5135     else:
5136       fields = [constants.SF_NAME] + self.op.output_fields
5137
5138     # Never ask for node or type as it's only known to the LU
5139     for extra in [constants.SF_NODE, constants.SF_TYPE]:
5140       while extra in fields:
5141         fields.remove(extra)
5142
5143     field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
5144     name_idx = field_idx[constants.SF_NAME]
5145
5146     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5147     data = self.rpc.call_storage_list(self.nodes,
5148                                       self.op.storage_type, st_args,
5149                                       self.op.name, fields)
5150
5151     result = []
5152
5153     for node in utils.NiceSort(self.nodes):
5154       nresult = data[node]
5155       if nresult.offline:
5156         continue
5157
5158       msg = nresult.fail_msg
5159       if msg:
5160         self.LogWarning("Can't get storage data from node %s: %s", node, msg)
5161         continue
5162
5163       rows = dict([(row[name_idx], row) for row in nresult.payload])
5164
5165       for name in utils.NiceSort(rows.keys()):
5166         row = rows[name]
5167
5168         out = []
5169
5170         for field in self.op.output_fields:
5171           if field == constants.SF_NODE:
5172             val = node
5173           elif field == constants.SF_TYPE:
5174             val = self.op.storage_type
5175           elif field in field_idx:
5176             val = row[field_idx[field]]
5177           else:
5178             raise errors.ParameterError(field)
5179
5180           out.append(val)
5181
5182         result.append(out)
5183
5184     return result
5185
5186
5187 class _InstanceQuery(_QueryBase):
5188   FIELDS = query.INSTANCE_FIELDS
5189
5190   def ExpandNames(self, lu):
5191     lu.needed_locks = {}
5192     lu.share_locks = _ShareAll()
5193
5194     if self.names:
5195       self.wanted = _GetWantedInstances(lu, self.names)
5196     else:
5197       self.wanted = locking.ALL_SET
5198
5199     self.do_locking = (self.use_locking and
5200                        query.IQ_LIVE in self.requested_data)
5201     if self.do_locking:
5202       lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
5203       lu.needed_locks[locking.LEVEL_NODEGROUP] = []
5204       lu.needed_locks[locking.LEVEL_NODE] = []
5205       lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5206
5207     self.do_grouplocks = (self.do_locking and
5208                           query.IQ_NODES in self.requested_data)
5209
5210   def DeclareLocks(self, lu, level):
5211     if self.do_locking:
5212       if level == locking.LEVEL_NODEGROUP and self.do_grouplocks:
5213         assert not lu.needed_locks[locking.LEVEL_NODEGROUP]
5214
5215         # Lock all groups used by instances optimistically; this requires going
5216         # via the node before it's locked, requiring verification later on
5217         lu.needed_locks[locking.LEVEL_NODEGROUP] = \
5218           set(group_uuid
5219               for instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
5220               for group_uuid in lu.cfg.GetInstanceNodeGroups(instance_name))
5221       elif level == locking.LEVEL_NODE:
5222         lu._LockInstancesNodes() # pylint: disable=W0212
5223
5224   @staticmethod
5225   def _CheckGroupLocks(lu):
5226     owned_instances = frozenset(lu.owned_locks(locking.LEVEL_INSTANCE))
5227     owned_groups = frozenset(lu.owned_locks(locking.LEVEL_NODEGROUP))
5228
5229     # Check if node groups for locked instances are still correct
5230     for instance_name in owned_instances:
5231       _CheckInstanceNodeGroups(lu.cfg, instance_name, owned_groups)
5232
5233   def _GetQueryData(self, lu):
5234     """Computes the list of instances and their attributes.
5235
5236     """
5237     if self.do_grouplocks:
5238       self._CheckGroupLocks(lu)
5239
5240     cluster = lu.cfg.GetClusterInfo()
5241     all_info = lu.cfg.GetAllInstancesInfo()
5242
5243     instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
5244
5245     instance_list = [all_info[name] for name in instance_names]
5246     nodes = frozenset(itertools.chain(*(inst.all_nodes
5247                                         for inst in instance_list)))
5248     hv_list = list(set([inst.hypervisor for inst in instance_list]))
5249     bad_nodes = []
5250     offline_nodes = []
5251     wrongnode_inst = set()
5252
5253     # Gather data as requested
5254     if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
5255       live_data = {}
5256       node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
5257       for name in nodes:
5258         result = node_data[name]
5259         if result.offline:
5260           # offline nodes will be in both lists
5261           assert result.fail_msg
5262           offline_nodes.append(name)
5263         if result.fail_msg:
5264           bad_nodes.append(name)
5265         elif result.payload:
5266           for inst in result.payload:
5267             if inst in all_info:
5268               if all_info[inst].primary_node == name:
5269                 live_data.update(result.payload)
5270               else:
5271                 wrongnode_inst.add(inst)
5272             else:
5273               # orphan instance; we don't list it here as we don't
5274               # handle this case yet in the output of instance listing
5275               logging.warning("Orphan instance '%s' found on node %s",
5276                               inst, name)
5277         # else no instance is alive
5278     else:
5279       live_data = {}
5280
5281     if query.IQ_DISKUSAGE in self.requested_data:
5282       disk_usage = dict((inst.name,
5283                          _ComputeDiskSize(inst.disk_template,
5284                                           [{constants.IDISK_SIZE: disk.size}
5285                                            for disk in inst.disks]))
5286                         for inst in instance_list)
5287     else:
5288       disk_usage = None
5289
5290     if query.IQ_CONSOLE in self.requested_data:
5291       consinfo = {}
5292       for inst in instance_list:
5293         if inst.name in live_data:
5294           # Instance is running
5295           consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
5296         else:
5297           consinfo[inst.name] = None
5298       assert set(consinfo.keys()) == set(instance_names)
5299     else:
5300       consinfo = None
5301
5302     if query.IQ_NODES in self.requested_data:
5303       node_names = set(itertools.chain(*map(operator.attrgetter("all_nodes"),
5304                                             instance_list)))
5305       nodes = dict(lu.cfg.GetMultiNodeInfo(node_names))
5306       groups = dict((uuid, lu.cfg.GetNodeGroup(uuid))
5307                     for uuid in set(map(operator.attrgetter("group"),
5308                                         nodes.values())))
5309     else:
5310       nodes = None
5311       groups = None
5312
5313     return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
5314                                    disk_usage, offline_nodes, bad_nodes,
5315                                    live_data, wrongnode_inst, consinfo,
5316                                    nodes, groups)
5317
5318
5319 class LUQuery(NoHooksLU):
5320   """Query for resources/items of a certain kind.
5321
5322   """
5323   # pylint: disable=W0142
5324   REQ_BGL = False
5325
5326   def CheckArguments(self):
5327     qcls = _GetQueryImplementation(self.op.what)
5328
5329     self.impl = qcls(self.op.qfilter, self.op.fields, self.op.use_locking)
5330
5331   def ExpandNames(self):
5332     self.impl.ExpandNames(self)
5333
5334   def DeclareLocks(self, level):
5335     self.impl.DeclareLocks(self, level)
5336
5337   def Exec(self, feedback_fn):
5338     return self.impl.NewStyleQuery(self)
5339
5340
5341 class LUQueryFields(NoHooksLU):
5342   """Query for resources/items of a certain kind.
5343
5344   """
5345   # pylint: disable=W0142
5346   REQ_BGL = False
5347
5348   def CheckArguments(self):
5349     self.qcls = _GetQueryImplementation(self.op.what)
5350
5351   def ExpandNames(self):
5352     self.needed_locks = {}
5353
5354   def Exec(self, feedback_fn):
5355     return query.QueryFields(self.qcls.FIELDS, self.op.fields)
5356
5357
5358 class LUNodeModifyStorage(NoHooksLU):
5359   """Logical unit for modifying a storage volume on a node.
5360
5361   """
5362   REQ_BGL = False
5363
5364   def CheckArguments(self):
5365     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5366
5367     storage_type = self.op.storage_type
5368
5369     try:
5370       modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
5371     except KeyError:
5372       raise errors.OpPrereqError("Storage units of type '%s' can not be"
5373                                  " modified" % storage_type,
5374                                  errors.ECODE_INVAL)
5375
5376     diff = set(self.op.changes.keys()) - modifiable
5377     if diff:
5378       raise errors.OpPrereqError("The following fields can not be modified for"
5379                                  " storage units of type '%s': %r" %
5380                                  (storage_type, list(diff)),
5381                                  errors.ECODE_INVAL)
5382
5383   def ExpandNames(self):
5384     self.needed_locks = {
5385       locking.LEVEL_NODE: self.op.node_name,
5386       }
5387
5388   def Exec(self, feedback_fn):
5389     """Computes the list of nodes and their attributes.
5390
5391     """
5392     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5393     result = self.rpc.call_storage_modify(self.op.node_name,
5394                                           self.op.storage_type, st_args,
5395                                           self.op.name, self.op.changes)
5396     result.Raise("Failed to modify storage unit '%s' on %s" %
5397                  (self.op.name, self.op.node_name))
5398
5399
5400 class LUNodeAdd(LogicalUnit):
5401   """Logical unit for adding node to the cluster.
5402
5403   """
5404   HPATH = "node-add"
5405   HTYPE = constants.HTYPE_NODE
5406   _NFLAGS = ["master_capable", "vm_capable"]
5407
5408   def CheckArguments(self):
5409     self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
5410     # validate/normalize the node name
5411     self.hostname = netutils.GetHostname(name=self.op.node_name,
5412                                          family=self.primary_ip_family)
5413     self.op.node_name = self.hostname.name
5414
5415     if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
5416       raise errors.OpPrereqError("Cannot readd the master node",
5417                                  errors.ECODE_STATE)
5418
5419     if self.op.readd and self.op.group:
5420       raise errors.OpPrereqError("Cannot pass a node group when a node is"
5421                                  " being readded", errors.ECODE_INVAL)
5422
5423   def BuildHooksEnv(self):
5424     """Build hooks env.
5425
5426     This will run on all nodes before, and on all nodes + the new node after.
5427
5428     """
5429     return {
5430       "OP_TARGET": self.op.node_name,
5431       "NODE_NAME": self.op.node_name,
5432       "NODE_PIP": self.op.primary_ip,
5433       "NODE_SIP": self.op.secondary_ip,
5434       "MASTER_CAPABLE": str(self.op.master_capable),
5435       "VM_CAPABLE": str(self.op.vm_capable),
5436       }
5437
5438   def BuildHooksNodes(self):
5439     """Build hooks nodes.
5440
5441     """
5442     # Exclude added node
5443     pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
5444     post_nodes = pre_nodes + [self.op.node_name, ]
5445
5446     return (pre_nodes, post_nodes)
5447
5448   def CheckPrereq(self):
5449     """Check prerequisites.
5450
5451     This checks:
5452      - the new node is not already in the config
5453      - it is resolvable
5454      - its parameters (single/dual homed) matches the cluster
5455
5456     Any errors are signaled by raising errors.OpPrereqError.
5457
5458     """
5459     cfg = self.cfg
5460     hostname = self.hostname
5461     node = hostname.name
5462     primary_ip = self.op.primary_ip = hostname.ip
5463     if self.op.secondary_ip is None:
5464       if self.primary_ip_family == netutils.IP6Address.family:
5465         raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
5466                                    " IPv4 address must be given as secondary",
5467                                    errors.ECODE_INVAL)
5468       self.op.secondary_ip = primary_ip
5469
5470     secondary_ip = self.op.secondary_ip
5471     if not netutils.IP4Address.IsValid(secondary_ip):
5472       raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5473                                  " address" % secondary_ip, errors.ECODE_INVAL)
5474
5475     node_list = cfg.GetNodeList()
5476     if not self.op.readd and node in node_list:
5477       raise errors.OpPrereqError("Node %s is already in the configuration" %
5478                                  node, errors.ECODE_EXISTS)
5479     elif self.op.readd and node not in node_list:
5480       raise errors.OpPrereqError("Node %s is not in the configuration" % node,
5481                                  errors.ECODE_NOENT)
5482
5483     self.changed_primary_ip = False
5484
5485     for existing_node_name, existing_node in cfg.GetMultiNodeInfo(node_list):
5486       if self.op.readd and node == existing_node_name:
5487         if existing_node.secondary_ip != secondary_ip:
5488           raise errors.OpPrereqError("Readded node doesn't have the same IP"
5489                                      " address configuration as before",
5490                                      errors.ECODE_INVAL)
5491         if existing_node.primary_ip != primary_ip:
5492           self.changed_primary_ip = True
5493
5494         continue
5495
5496       if (existing_node.primary_ip == primary_ip or
5497           existing_node.secondary_ip == primary_ip or
5498           existing_node.primary_ip == secondary_ip or
5499           existing_node.secondary_ip == secondary_ip):
5500         raise errors.OpPrereqError("New node ip address(es) conflict with"
5501                                    " existing node %s" % existing_node.name,
5502                                    errors.ECODE_NOTUNIQUE)
5503
5504     # After this 'if' block, None is no longer a valid value for the
5505     # _capable op attributes
5506     if self.op.readd:
5507       old_node = self.cfg.GetNodeInfo(node)
5508       assert old_node is not None, "Can't retrieve locked node %s" % node
5509       for attr in self._NFLAGS:
5510         if getattr(self.op, attr) is None:
5511           setattr(self.op, attr, getattr(old_node, attr))
5512     else:
5513       for attr in self._NFLAGS:
5514         if getattr(self.op, attr) is None:
5515           setattr(self.op, attr, True)
5516
5517     if self.op.readd and not self.op.vm_capable:
5518       pri, sec = cfg.GetNodeInstances(node)
5519       if pri or sec:
5520         raise errors.OpPrereqError("Node %s being re-added with vm_capable"
5521                                    " flag set to false, but it already holds"
5522                                    " instances" % node,
5523                                    errors.ECODE_STATE)
5524
5525     # check that the type of the node (single versus dual homed) is the
5526     # same as for the master
5527     myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
5528     master_singlehomed = myself.secondary_ip == myself.primary_ip
5529     newbie_singlehomed = secondary_ip == primary_ip
5530     if master_singlehomed != newbie_singlehomed:
5531       if master_singlehomed:
5532         raise errors.OpPrereqError("The master has no secondary ip but the"
5533                                    " new node has one",
5534                                    errors.ECODE_INVAL)
5535       else:
5536         raise errors.OpPrereqError("The master has a secondary ip but the"
5537                                    " new node doesn't have one",
5538                                    errors.ECODE_INVAL)
5539
5540     # checks reachability
5541     if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
5542       raise errors.OpPrereqError("Node not reachable by ping",
5543                                  errors.ECODE_ENVIRON)
5544
5545     if not newbie_singlehomed:
5546       # check reachability from my secondary ip to newbie's secondary ip
5547       if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
5548                            source=myself.secondary_ip):
5549         raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5550                                    " based ping to node daemon port",
5551                                    errors.ECODE_ENVIRON)
5552
5553     if self.op.readd:
5554       exceptions = [node]
5555     else:
5556       exceptions = []
5557
5558     if self.op.master_capable:
5559       self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
5560     else:
5561       self.master_candidate = False
5562
5563     if self.op.readd:
5564       self.new_node = old_node
5565     else:
5566       node_group = cfg.LookupNodeGroup(self.op.group)
5567       self.new_node = objects.Node(name=node,
5568                                    primary_ip=primary_ip,
5569                                    secondary_ip=secondary_ip,
5570                                    master_candidate=self.master_candidate,
5571                                    offline=False, drained=False,
5572                                    group=node_group)
5573
5574     if self.op.ndparams:
5575       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
5576
5577     if self.op.hv_state:
5578       self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
5579
5580     if self.op.disk_state:
5581       self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
5582
5583   def Exec(self, feedback_fn):
5584     """Adds the new node to the cluster.
5585
5586     """
5587     new_node = self.new_node
5588     node = new_node.name
5589
5590     assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
5591       "Not owning BGL"
5592
5593     # We adding a new node so we assume it's powered
5594     new_node.powered = True
5595
5596     # for re-adds, reset the offline/drained/master-candidate flags;
5597     # we need to reset here, otherwise offline would prevent RPC calls
5598     # later in the procedure; this also means that if the re-add
5599     # fails, we are left with a non-offlined, broken node
5600     if self.op.readd:
5601       new_node.drained = new_node.offline = False # pylint: disable=W0201
5602       self.LogInfo("Readding a node, the offline/drained flags were reset")
5603       # if we demote the node, we do cleanup later in the procedure
5604       new_node.master_candidate = self.master_candidate
5605       if self.changed_primary_ip:
5606         new_node.primary_ip = self.op.primary_ip
5607
5608     # copy the master/vm_capable flags
5609     for attr in self._NFLAGS:
5610       setattr(new_node, attr, getattr(self.op, attr))
5611
5612     # notify the user about any possible mc promotion
5613     if new_node.master_candidate:
5614       self.LogInfo("Node will be a master candidate")
5615
5616     if self.op.ndparams:
5617       new_node.ndparams = self.op.ndparams
5618     else:
5619       new_node.ndparams = {}
5620
5621     if self.op.hv_state:
5622       new_node.hv_state_static = self.new_hv_state
5623
5624     if self.op.disk_state:
5625       new_node.disk_state_static = self.new_disk_state
5626
5627     # check connectivity
5628     result = self.rpc.call_version([node])[node]
5629     result.Raise("Can't get version information from node %s" % node)
5630     if constants.PROTOCOL_VERSION == result.payload:
5631       logging.info("Communication to node %s fine, sw version %s match",
5632                    node, result.payload)
5633     else:
5634       raise errors.OpExecError("Version mismatch master version %s,"
5635                                " node version %s" %
5636                                (constants.PROTOCOL_VERSION, result.payload))
5637
5638     # Add node to our /etc/hosts, and add key to known_hosts
5639     if self.cfg.GetClusterInfo().modify_etc_hosts:
5640       master_node = self.cfg.GetMasterNode()
5641       result = self.rpc.call_etc_hosts_modify(master_node,
5642                                               constants.ETC_HOSTS_ADD,
5643                                               self.hostname.name,
5644                                               self.hostname.ip)
5645       result.Raise("Can't update hosts file with new host data")
5646
5647     if new_node.secondary_ip != new_node.primary_ip:
5648       _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
5649                                False)
5650
5651     node_verify_list = [self.cfg.GetMasterNode()]
5652     node_verify_param = {
5653       constants.NV_NODELIST: ([node], {}),
5654       # TODO: do a node-net-test as well?
5655     }
5656
5657     result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
5658                                        self.cfg.GetClusterName())
5659     for verifier in node_verify_list:
5660       result[verifier].Raise("Cannot communicate with node %s" % verifier)
5661       nl_payload = result[verifier].payload[constants.NV_NODELIST]
5662       if nl_payload:
5663         for failed in nl_payload:
5664           feedback_fn("ssh/hostname verification failed"
5665                       " (checking from %s): %s" %
5666                       (verifier, nl_payload[failed]))
5667         raise errors.OpExecError("ssh/hostname verification failed")
5668
5669     if self.op.readd:
5670       _RedistributeAncillaryFiles(self)
5671       self.context.ReaddNode(new_node)
5672       # make sure we redistribute the config
5673       self.cfg.Update(new_node, feedback_fn)
5674       # and make sure the new node will not have old files around
5675       if not new_node.master_candidate:
5676         result = self.rpc.call_node_demote_from_mc(new_node.name)
5677         msg = result.fail_msg
5678         if msg:
5679           self.LogWarning("Node failed to demote itself from master"
5680                           " candidate status: %s" % msg)
5681     else:
5682       _RedistributeAncillaryFiles(self, additional_nodes=[node],
5683                                   additional_vm=self.op.vm_capable)
5684       self.context.AddNode(new_node, self.proc.GetECId())
5685
5686
5687 class LUNodeSetParams(LogicalUnit):
5688   """Modifies the parameters of a node.
5689
5690   @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
5691       to the node role (as _ROLE_*)
5692   @cvar _R2F: a dictionary from node role to tuples of flags
5693   @cvar _FLAGS: a list of attribute names corresponding to the flags
5694
5695   """
5696   HPATH = "node-modify"
5697   HTYPE = constants.HTYPE_NODE
5698   REQ_BGL = False
5699   (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
5700   _F2R = {
5701     (True, False, False): _ROLE_CANDIDATE,
5702     (False, True, False): _ROLE_DRAINED,
5703     (False, False, True): _ROLE_OFFLINE,
5704     (False, False, False): _ROLE_REGULAR,
5705     }
5706   _R2F = dict((v, k) for k, v in _F2R.items())
5707   _FLAGS = ["master_candidate", "drained", "offline"]
5708
5709   def CheckArguments(self):
5710     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5711     all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
5712                 self.op.master_capable, self.op.vm_capable,
5713                 self.op.secondary_ip, self.op.ndparams, self.op.hv_state,
5714                 self.op.disk_state]
5715     if all_mods.count(None) == len(all_mods):
5716       raise errors.OpPrereqError("Please pass at least one modification",
5717                                  errors.ECODE_INVAL)
5718     if all_mods.count(True) > 1:
5719       raise errors.OpPrereqError("Can't set the node into more than one"
5720                                  " state at the same time",
5721                                  errors.ECODE_INVAL)
5722
5723     # Boolean value that tells us whether we might be demoting from MC
5724     self.might_demote = (self.op.master_candidate == False or
5725                          self.op.offline == True or
5726                          self.op.drained == True or
5727                          self.op.master_capable == False)
5728
5729     if self.op.secondary_ip:
5730       if not netutils.IP4Address.IsValid(self.op.secondary_ip):
5731         raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5732                                    " address" % self.op.secondary_ip,
5733                                    errors.ECODE_INVAL)
5734
5735     self.lock_all = self.op.auto_promote and self.might_demote
5736     self.lock_instances = self.op.secondary_ip is not None
5737
5738   def _InstanceFilter(self, instance):
5739     """Filter for getting affected instances.
5740
5741     """
5742     return (instance.disk_template in constants.DTS_INT_MIRROR and
5743             self.op.node_name in instance.all_nodes)
5744
5745   def ExpandNames(self):
5746     if self.lock_all:
5747       self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
5748     else:
5749       self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
5750
5751     # Since modifying a node can have severe effects on currently running
5752     # operations the resource lock is at least acquired in shared mode
5753     self.needed_locks[locking.LEVEL_NODE_RES] = \
5754       self.needed_locks[locking.LEVEL_NODE]
5755
5756     # Get node resource and instance locks in shared mode; they are not used
5757     # for anything but read-only access
5758     self.share_locks[locking.LEVEL_NODE_RES] = 1
5759     self.share_locks[locking.LEVEL_INSTANCE] = 1
5760
5761     if self.lock_instances:
5762       self.needed_locks[locking.LEVEL_INSTANCE] = \
5763         frozenset(self.cfg.GetInstancesInfoByFilter(self._InstanceFilter))
5764
5765   def BuildHooksEnv(self):
5766     """Build hooks env.
5767
5768     This runs on the master node.
5769
5770     """
5771     return {
5772       "OP_TARGET": self.op.node_name,
5773       "MASTER_CANDIDATE": str(self.op.master_candidate),
5774       "OFFLINE": str(self.op.offline),
5775       "DRAINED": str(self.op.drained),
5776       "MASTER_CAPABLE": str(self.op.master_capable),
5777       "VM_CAPABLE": str(self.op.vm_capable),
5778       }
5779
5780   def BuildHooksNodes(self):
5781     """Build hooks nodes.
5782
5783     """
5784     nl = [self.cfg.GetMasterNode(), self.op.node_name]
5785     return (nl, nl)
5786
5787   def CheckPrereq(self):
5788     """Check prerequisites.
5789
5790     This only checks the instance list against the existing names.
5791
5792     """
5793     node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
5794
5795     if self.lock_instances:
5796       affected_instances = \
5797         self.cfg.GetInstancesInfoByFilter(self._InstanceFilter)
5798
5799       # Verify instance locks
5800       owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
5801       wanted_instances = frozenset(affected_instances.keys())
5802       if wanted_instances - owned_instances:
5803         raise errors.OpPrereqError("Instances affected by changing node %s's"
5804                                    " secondary IP address have changed since"
5805                                    " locks were acquired, wanted '%s', have"
5806                                    " '%s'; retry the operation" %
5807                                    (self.op.node_name,
5808                                     utils.CommaJoin(wanted_instances),
5809                                     utils.CommaJoin(owned_instances)),
5810                                    errors.ECODE_STATE)
5811     else:
5812       affected_instances = None
5813
5814     if (self.op.master_candidate is not None or
5815         self.op.drained is not None or
5816         self.op.offline is not None):
5817       # we can't change the master's node flags
5818       if self.op.node_name == self.cfg.GetMasterNode():
5819         raise errors.OpPrereqError("The master role can be changed"
5820                                    " only via master-failover",
5821                                    errors.ECODE_INVAL)
5822
5823     if self.op.master_candidate and not node.master_capable:
5824       raise errors.OpPrereqError("Node %s is not master capable, cannot make"
5825                                  " it a master candidate" % node.name,
5826                                  errors.ECODE_STATE)
5827
5828     if self.op.vm_capable == False:
5829       (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
5830       if ipri or isec:
5831         raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
5832                                    " the vm_capable flag" % node.name,
5833                                    errors.ECODE_STATE)
5834
5835     if node.master_candidate and self.might_demote and not self.lock_all:
5836       assert not self.op.auto_promote, "auto_promote set but lock_all not"
5837       # check if after removing the current node, we're missing master
5838       # candidates
5839       (mc_remaining, mc_should, _) = \
5840           self.cfg.GetMasterCandidateStats(exceptions=[node.name])
5841       if mc_remaining < mc_should:
5842         raise errors.OpPrereqError("Not enough master candidates, please"
5843                                    " pass auto promote option to allow"
5844                                    " promotion", errors.ECODE_STATE)
5845
5846     self.old_flags = old_flags = (node.master_candidate,
5847                                   node.drained, node.offline)
5848     assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
5849     self.old_role = old_role = self._F2R[old_flags]
5850
5851     # Check for ineffective changes
5852     for attr in self._FLAGS:
5853       if (getattr(self.op, attr) == False and getattr(node, attr) == False):
5854         self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
5855         setattr(self.op, attr, None)
5856
5857     # Past this point, any flag change to False means a transition
5858     # away from the respective state, as only real changes are kept
5859
5860     # TODO: We might query the real power state if it supports OOB
5861     if _SupportsOob(self.cfg, node):
5862       if self.op.offline is False and not (node.powered or
5863                                            self.op.powered == True):
5864         raise errors.OpPrereqError(("Node %s needs to be turned on before its"
5865                                     " offline status can be reset") %
5866                                    self.op.node_name)
5867     elif self.op.powered is not None:
5868       raise errors.OpPrereqError(("Unable to change powered state for node %s"
5869                                   " as it does not support out-of-band"
5870                                   " handling") % self.op.node_name)
5871
5872     # If we're being deofflined/drained, we'll MC ourself if needed
5873     if (self.op.drained == False or self.op.offline == False or
5874         (self.op.master_capable and not node.master_capable)):
5875       if _DecideSelfPromotion(self):
5876         self.op.master_candidate = True
5877         self.LogInfo("Auto-promoting node to master candidate")
5878
5879     # If we're no longer master capable, we'll demote ourselves from MC
5880     if self.op.master_capable == False and node.master_candidate:
5881       self.LogInfo("Demoting from master candidate")
5882       self.op.master_candidate = False
5883
5884     # Compute new role
5885     assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
5886     if self.op.master_candidate:
5887       new_role = self._ROLE_CANDIDATE
5888     elif self.op.drained:
5889       new_role = self._ROLE_DRAINED
5890     elif self.op.offline:
5891       new_role = self._ROLE_OFFLINE
5892     elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
5893       # False is still in new flags, which means we're un-setting (the
5894       # only) True flag
5895       new_role = self._ROLE_REGULAR
5896     else: # no new flags, nothing, keep old role
5897       new_role = old_role
5898
5899     self.new_role = new_role
5900
5901     if old_role == self._ROLE_OFFLINE and new_role != old_role:
5902       # Trying to transition out of offline status
5903       # TODO: Use standard RPC runner, but make sure it works when the node is
5904       # still marked offline
5905       result = rpc.BootstrapRunner().call_version([node.name])[node.name]
5906       if result.fail_msg:
5907         raise errors.OpPrereqError("Node %s is being de-offlined but fails"
5908                                    " to report its version: %s" %
5909                                    (node.name, result.fail_msg),
5910                                    errors.ECODE_STATE)
5911       else:
5912         self.LogWarning("Transitioning node from offline to online state"
5913                         " without using re-add. Please make sure the node"
5914                         " is healthy!")
5915
5916     if self.op.secondary_ip:
5917       # Ok even without locking, because this can't be changed by any LU
5918       master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
5919       master_singlehomed = master.secondary_ip == master.primary_ip
5920       if master_singlehomed and self.op.secondary_ip:
5921         raise errors.OpPrereqError("Cannot change the secondary ip on a single"
5922                                    " homed cluster", errors.ECODE_INVAL)
5923
5924       assert not (frozenset(affected_instances) -
5925                   self.owned_locks(locking.LEVEL_INSTANCE))
5926
5927       if node.offline:
5928         if affected_instances:
5929           raise errors.OpPrereqError("Cannot change secondary IP address:"
5930                                      " offline node has instances (%s)"
5931                                      " configured to use it" %
5932                                      utils.CommaJoin(affected_instances.keys()))
5933       else:
5934         # On online nodes, check that no instances are running, and that
5935         # the node has the new ip and we can reach it.
5936         for instance in affected_instances.values():
5937           _CheckInstanceState(self, instance, INSTANCE_DOWN,
5938                               msg="cannot change secondary ip")
5939
5940         _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
5941         if master.name != node.name:
5942           # check reachability from master secondary ip to new secondary ip
5943           if not netutils.TcpPing(self.op.secondary_ip,
5944                                   constants.DEFAULT_NODED_PORT,
5945                                   source=master.secondary_ip):
5946             raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5947                                        " based ping to node daemon port",
5948                                        errors.ECODE_ENVIRON)
5949
5950     if self.op.ndparams:
5951       new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
5952       utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
5953       self.new_ndparams = new_ndparams
5954
5955     if self.op.hv_state:
5956       self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
5957                                                  self.node.hv_state_static)
5958
5959     if self.op.disk_state:
5960       self.new_disk_state = \
5961         _MergeAndVerifyDiskState(self.op.disk_state,
5962                                  self.node.disk_state_static)
5963
5964   def Exec(self, feedback_fn):
5965     """Modifies a node.
5966
5967     """
5968     node = self.node
5969     old_role = self.old_role
5970     new_role = self.new_role
5971
5972     result = []
5973
5974     if self.op.ndparams:
5975       node.ndparams = self.new_ndparams
5976
5977     if self.op.powered is not None:
5978       node.powered = self.op.powered
5979
5980     if self.op.hv_state:
5981       node.hv_state_static = self.new_hv_state
5982
5983     if self.op.disk_state:
5984       node.disk_state_static = self.new_disk_state
5985
5986     for attr in ["master_capable", "vm_capable"]:
5987       val = getattr(self.op, attr)
5988       if val is not None:
5989         setattr(node, attr, val)
5990         result.append((attr, str(val)))
5991
5992     if new_role != old_role:
5993       # Tell the node to demote itself, if no longer MC and not offline
5994       if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
5995         msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
5996         if msg:
5997           self.LogWarning("Node failed to demote itself: %s", msg)
5998
5999       new_flags = self._R2F[new_role]
6000       for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
6001         if of != nf:
6002           result.append((desc, str(nf)))
6003       (node.master_candidate, node.drained, node.offline) = new_flags
6004
6005       # we locked all nodes, we adjust the CP before updating this node
6006       if self.lock_all:
6007         _AdjustCandidatePool(self, [node.name])
6008
6009     if self.op.secondary_ip:
6010       node.secondary_ip = self.op.secondary_ip
6011       result.append(("secondary_ip", self.op.secondary_ip))
6012
6013     # this will trigger configuration file update, if needed
6014     self.cfg.Update(node, feedback_fn)
6015
6016     # this will trigger job queue propagation or cleanup if the mc
6017     # flag changed
6018     if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
6019       self.context.ReaddNode(node)
6020
6021     return result
6022
6023
6024 class LUNodePowercycle(NoHooksLU):
6025   """Powercycles a node.
6026
6027   """
6028   REQ_BGL = False
6029
6030   def CheckArguments(self):
6031     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6032     if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
6033       raise errors.OpPrereqError("The node is the master and the force"
6034                                  " parameter was not set",
6035                                  errors.ECODE_INVAL)
6036
6037   def ExpandNames(self):
6038     """Locking for PowercycleNode.
6039
6040     This is a last-resort option and shouldn't block on other
6041     jobs. Therefore, we grab no locks.
6042
6043     """
6044     self.needed_locks = {}
6045
6046   def Exec(self, feedback_fn):
6047     """Reboots a node.
6048
6049     """
6050     result = self.rpc.call_node_powercycle(self.op.node_name,
6051                                            self.cfg.GetHypervisorType())
6052     result.Raise("Failed to schedule the reboot")
6053     return result.payload
6054
6055
6056 class LUClusterQuery(NoHooksLU):
6057   """Query cluster configuration.
6058
6059   """
6060   REQ_BGL = False
6061
6062   def ExpandNames(self):
6063     self.needed_locks = {}
6064
6065   def Exec(self, feedback_fn):
6066     """Return cluster config.
6067
6068     """
6069     cluster = self.cfg.GetClusterInfo()
6070     os_hvp = {}
6071
6072     # Filter just for enabled hypervisors
6073     for os_name, hv_dict in cluster.os_hvp.items():
6074       os_hvp[os_name] = {}
6075       for hv_name, hv_params in hv_dict.items():
6076         if hv_name in cluster.enabled_hypervisors:
6077           os_hvp[os_name][hv_name] = hv_params
6078
6079     # Convert ip_family to ip_version
6080     primary_ip_version = constants.IP4_VERSION
6081     if cluster.primary_ip_family == netutils.IP6Address.family:
6082       primary_ip_version = constants.IP6_VERSION
6083
6084     result = {
6085       "software_version": constants.RELEASE_VERSION,
6086       "protocol_version": constants.PROTOCOL_VERSION,
6087       "config_version": constants.CONFIG_VERSION,
6088       "os_api_version": max(constants.OS_API_VERSIONS),
6089       "export_version": constants.EXPORT_VERSION,
6090       "architecture": (platform.architecture()[0], platform.machine()),
6091       "name": cluster.cluster_name,
6092       "master": cluster.master_node,
6093       "default_hypervisor": cluster.primary_hypervisor,
6094       "enabled_hypervisors": cluster.enabled_hypervisors,
6095       "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
6096                         for hypervisor_name in cluster.enabled_hypervisors]),
6097       "os_hvp": os_hvp,
6098       "beparams": cluster.beparams,
6099       "osparams": cluster.osparams,
6100       "ipolicy": cluster.ipolicy,
6101       "nicparams": cluster.nicparams,
6102       "ndparams": cluster.ndparams,
6103       "candidate_pool_size": cluster.candidate_pool_size,
6104       "master_netdev": cluster.master_netdev,
6105       "master_netmask": cluster.master_netmask,
6106       "use_external_mip_script": cluster.use_external_mip_script,
6107       "volume_group_name": cluster.volume_group_name,
6108       "drbd_usermode_helper": cluster.drbd_usermode_helper,
6109       "file_storage_dir": cluster.file_storage_dir,
6110       "shared_file_storage_dir": cluster.shared_file_storage_dir,
6111       "maintain_node_health": cluster.maintain_node_health,
6112       "ctime": cluster.ctime,
6113       "mtime": cluster.mtime,
6114       "uuid": cluster.uuid,
6115       "tags": list(cluster.GetTags()),
6116       "uid_pool": cluster.uid_pool,
6117       "default_iallocator": cluster.default_iallocator,
6118       "reserved_lvs": cluster.reserved_lvs,
6119       "primary_ip_version": primary_ip_version,
6120       "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
6121       "hidden_os": cluster.hidden_os,
6122       "blacklisted_os": cluster.blacklisted_os,
6123       }
6124
6125     return result
6126
6127
6128 class LUClusterConfigQuery(NoHooksLU):
6129   """Return configuration values.
6130
6131   """
6132   REQ_BGL = False
6133   _FIELDS_DYNAMIC = utils.FieldSet()
6134   _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
6135                                   "watcher_pause", "volume_group_name")
6136
6137   def CheckArguments(self):
6138     _CheckOutputFields(static=self._FIELDS_STATIC,
6139                        dynamic=self._FIELDS_DYNAMIC,
6140                        selected=self.op.output_fields)
6141
6142   def ExpandNames(self):
6143     self.needed_locks = {}
6144
6145   def Exec(self, feedback_fn):
6146     """Dump a representation of the cluster config to the standard output.
6147
6148     """
6149     values = []
6150     for field in self.op.output_fields:
6151       if field == "cluster_name":
6152         entry = self.cfg.GetClusterName()
6153       elif field == "master_node":
6154         entry = self.cfg.GetMasterNode()
6155       elif field == "drain_flag":
6156         entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
6157       elif field == "watcher_pause":
6158         entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
6159       elif field == "volume_group_name":
6160         entry = self.cfg.GetVGName()
6161       else:
6162         raise errors.ParameterError(field)
6163       values.append(entry)
6164     return values
6165
6166
6167 class LUInstanceActivateDisks(NoHooksLU):
6168   """Bring up an instance's disks.
6169
6170   """
6171   REQ_BGL = False
6172
6173   def ExpandNames(self):
6174     self._ExpandAndLockInstance()
6175     self.needed_locks[locking.LEVEL_NODE] = []
6176     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6177
6178   def DeclareLocks(self, level):
6179     if level == locking.LEVEL_NODE:
6180       self._LockInstancesNodes()
6181
6182   def CheckPrereq(self):
6183     """Check prerequisites.
6184
6185     This checks that the instance is in the cluster.
6186
6187     """
6188     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6189     assert self.instance is not None, \
6190       "Cannot retrieve locked instance %s" % self.op.instance_name
6191     _CheckNodeOnline(self, self.instance.primary_node)
6192
6193   def Exec(self, feedback_fn):
6194     """Activate the disks.
6195
6196     """
6197     disks_ok, disks_info = \
6198               _AssembleInstanceDisks(self, self.instance,
6199                                      ignore_size=self.op.ignore_size)
6200     if not disks_ok:
6201       raise errors.OpExecError("Cannot activate block devices")
6202
6203     return disks_info
6204
6205
6206 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
6207                            ignore_size=False):
6208   """Prepare the block devices for an instance.
6209
6210   This sets up the block devices on all nodes.
6211
6212   @type lu: L{LogicalUnit}
6213   @param lu: the logical unit on whose behalf we execute
6214   @type instance: L{objects.Instance}
6215   @param instance: the instance for whose disks we assemble
6216   @type disks: list of L{objects.Disk} or None
6217   @param disks: which disks to assemble (or all, if None)
6218   @type ignore_secondaries: boolean
6219   @param ignore_secondaries: if true, errors on secondary nodes
6220       won't result in an error return from the function
6221   @type ignore_size: boolean
6222   @param ignore_size: if true, the current known size of the disk
6223       will not be used during the disk activation, useful for cases
6224       when the size is wrong
6225   @return: False if the operation failed, otherwise a list of
6226       (host, instance_visible_name, node_visible_name)
6227       with the mapping from node devices to instance devices
6228
6229   """
6230   device_info = []
6231   disks_ok = True
6232   iname = instance.name
6233   disks = _ExpandCheckDisks(instance, disks)
6234
6235   # With the two passes mechanism we try to reduce the window of
6236   # opportunity for the race condition of switching DRBD to primary
6237   # before handshaking occured, but we do not eliminate it
6238
6239   # The proper fix would be to wait (with some limits) until the
6240   # connection has been made and drbd transitions from WFConnection
6241   # into any other network-connected state (Connected, SyncTarget,
6242   # SyncSource, etc.)
6243
6244   # 1st pass, assemble on all nodes in secondary mode
6245   for idx, inst_disk in enumerate(disks):
6246     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6247       if ignore_size:
6248         node_disk = node_disk.Copy()
6249         node_disk.UnsetSize()
6250       lu.cfg.SetDiskID(node_disk, node)
6251       result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False, idx)
6252       msg = result.fail_msg
6253       if msg:
6254         lu.proc.LogWarning("Could not prepare block device %s on node %s"
6255                            " (is_primary=False, pass=1): %s",
6256                            inst_disk.iv_name, node, msg)
6257         if not ignore_secondaries:
6258           disks_ok = False
6259
6260   # FIXME: race condition on drbd migration to primary
6261
6262   # 2nd pass, do only the primary node
6263   for idx, inst_disk in enumerate(disks):
6264     dev_path = None
6265
6266     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6267       if node != instance.primary_node:
6268         continue
6269       if ignore_size:
6270         node_disk = node_disk.Copy()
6271         node_disk.UnsetSize()
6272       lu.cfg.SetDiskID(node_disk, node)
6273       result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True, idx)
6274       msg = result.fail_msg
6275       if msg:
6276         lu.proc.LogWarning("Could not prepare block device %s on node %s"
6277                            " (is_primary=True, pass=2): %s",
6278                            inst_disk.iv_name, node, msg)
6279         disks_ok = False
6280       else:
6281         dev_path = result.payload
6282
6283     device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
6284
6285   # leave the disks configured for the primary node
6286   # this is a workaround that would be fixed better by
6287   # improving the logical/physical id handling
6288   for disk in disks:
6289     lu.cfg.SetDiskID(disk, instance.primary_node)
6290
6291   return disks_ok, device_info
6292
6293
6294 def _StartInstanceDisks(lu, instance, force):
6295   """Start the disks of an instance.
6296
6297   """
6298   disks_ok, _ = _AssembleInstanceDisks(lu, instance,
6299                                            ignore_secondaries=force)
6300   if not disks_ok:
6301     _ShutdownInstanceDisks(lu, instance)
6302     if force is not None and not force:
6303       lu.proc.LogWarning("", hint="If the message above refers to a"
6304                          " secondary node,"
6305                          " you can retry the operation using '--force'.")
6306     raise errors.OpExecError("Disk consistency error")
6307
6308
6309 class LUInstanceDeactivateDisks(NoHooksLU):
6310   """Shutdown an instance's disks.
6311
6312   """
6313   REQ_BGL = False
6314
6315   def ExpandNames(self):
6316     self._ExpandAndLockInstance()
6317     self.needed_locks[locking.LEVEL_NODE] = []
6318     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6319
6320   def DeclareLocks(self, level):
6321     if level == locking.LEVEL_NODE:
6322       self._LockInstancesNodes()
6323
6324   def CheckPrereq(self):
6325     """Check prerequisites.
6326
6327     This checks that the instance is in the cluster.
6328
6329     """
6330     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6331     assert self.instance is not None, \
6332       "Cannot retrieve locked instance %s" % self.op.instance_name
6333
6334   def Exec(self, feedback_fn):
6335     """Deactivate the disks
6336
6337     """
6338     instance = self.instance
6339     if self.op.force:
6340       _ShutdownInstanceDisks(self, instance)
6341     else:
6342       _SafeShutdownInstanceDisks(self, instance)
6343
6344
6345 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
6346   """Shutdown block devices of an instance.
6347
6348   This function checks if an instance is running, before calling
6349   _ShutdownInstanceDisks.
6350
6351   """
6352   _CheckInstanceState(lu, instance, INSTANCE_DOWN, msg="cannot shutdown disks")
6353   _ShutdownInstanceDisks(lu, instance, disks=disks)
6354
6355
6356 def _ExpandCheckDisks(instance, disks):
6357   """Return the instance disks selected by the disks list
6358
6359   @type disks: list of L{objects.Disk} or None
6360   @param disks: selected disks
6361   @rtype: list of L{objects.Disk}
6362   @return: selected instance disks to act on
6363
6364   """
6365   if disks is None:
6366     return instance.disks
6367   else:
6368     if not set(disks).issubset(instance.disks):
6369       raise errors.ProgrammerError("Can only act on disks belonging to the"
6370                                    " target instance")
6371     return disks
6372
6373
6374 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
6375   """Shutdown block devices of an instance.
6376
6377   This does the shutdown on all nodes of the instance.
6378
6379   If the ignore_primary is false, errors on the primary node are
6380   ignored.
6381
6382   """
6383   all_result = True
6384   disks = _ExpandCheckDisks(instance, disks)
6385
6386   for disk in disks:
6387     for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
6388       lu.cfg.SetDiskID(top_disk, node)
6389       result = lu.rpc.call_blockdev_shutdown(node, top_disk)
6390       msg = result.fail_msg
6391       if msg:
6392         lu.LogWarning("Could not shutdown block device %s on node %s: %s",
6393                       disk.iv_name, node, msg)
6394         if ((node == instance.primary_node and not ignore_primary) or
6395             (node != instance.primary_node and not result.offline)):
6396           all_result = False
6397   return all_result
6398
6399
6400 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
6401   """Checks if a node has enough free memory.
6402
6403   This function check if a given node has the needed amount of free
6404   memory. In case the node has less memory or we cannot get the
6405   information from the node, this function raise an OpPrereqError
6406   exception.
6407
6408   @type lu: C{LogicalUnit}
6409   @param lu: a logical unit from which we get configuration data
6410   @type node: C{str}
6411   @param node: the node to check
6412   @type reason: C{str}
6413   @param reason: string to use in the error message
6414   @type requested: C{int}
6415   @param requested: the amount of memory in MiB to check for
6416   @type hypervisor_name: C{str}
6417   @param hypervisor_name: the hypervisor to ask for memory stats
6418   @rtype: integer
6419   @return: node current free memory
6420   @raise errors.OpPrereqError: if the node doesn't have enough memory, or
6421       we cannot check the node
6422
6423   """
6424   nodeinfo = lu.rpc.call_node_info([node], None, [hypervisor_name])
6425   nodeinfo[node].Raise("Can't get data from node %s" % node,
6426                        prereq=True, ecode=errors.ECODE_ENVIRON)
6427   (_, _, (hv_info, )) = nodeinfo[node].payload
6428
6429   free_mem = hv_info.get("memory_free", None)
6430   if not isinstance(free_mem, int):
6431     raise errors.OpPrereqError("Can't compute free memory on node %s, result"
6432                                " was '%s'" % (node, free_mem),
6433                                errors.ECODE_ENVIRON)
6434   if requested > free_mem:
6435     raise errors.OpPrereqError("Not enough memory on node %s for %s:"
6436                                " needed %s MiB, available %s MiB" %
6437                                (node, reason, requested, free_mem),
6438                                errors.ECODE_NORES)
6439   return free_mem
6440
6441
6442 def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
6443   """Checks if nodes have enough free disk space in the all VGs.
6444
6445   This function check if all given nodes have the needed amount of
6446   free disk. In case any node has less disk or we cannot get the
6447   information from the node, this function raise an OpPrereqError
6448   exception.
6449
6450   @type lu: C{LogicalUnit}
6451   @param lu: a logical unit from which we get configuration data
6452   @type nodenames: C{list}
6453   @param nodenames: the list of node names to check
6454   @type req_sizes: C{dict}
6455   @param req_sizes: the hash of vg and corresponding amount of disk in
6456       MiB to check for
6457   @raise errors.OpPrereqError: if the node doesn't have enough disk,
6458       or we cannot check the node
6459
6460   """
6461   for vg, req_size in req_sizes.items():
6462     _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
6463
6464
6465 def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
6466   """Checks if nodes have enough free disk space in the specified VG.
6467
6468   This function check if all given nodes have the needed amount of
6469   free disk. In case any node has less disk or we cannot get the
6470   information from the node, this function raise an OpPrereqError
6471   exception.
6472
6473   @type lu: C{LogicalUnit}
6474   @param lu: a logical unit from which we get configuration data
6475   @type nodenames: C{list}
6476   @param nodenames: the list of node names to check
6477   @type vg: C{str}
6478   @param vg: the volume group to check
6479   @type requested: C{int}
6480   @param requested: the amount of disk in MiB to check for
6481   @raise errors.OpPrereqError: if the node doesn't have enough disk,
6482       or we cannot check the node
6483
6484   """
6485   nodeinfo = lu.rpc.call_node_info(nodenames, [vg], None)
6486   for node in nodenames:
6487     info = nodeinfo[node]
6488     info.Raise("Cannot get current information from node %s" % node,
6489                prereq=True, ecode=errors.ECODE_ENVIRON)
6490     (_, (vg_info, ), _) = info.payload
6491     vg_free = vg_info.get("vg_free", None)
6492     if not isinstance(vg_free, int):
6493       raise errors.OpPrereqError("Can't compute free disk space on node"
6494                                  " %s for vg %s, result was '%s'" %
6495                                  (node, vg, vg_free), errors.ECODE_ENVIRON)
6496     if requested > vg_free:
6497       raise errors.OpPrereqError("Not enough disk space on target node %s"
6498                                  " vg %s: required %d MiB, available %d MiB" %
6499                                  (node, vg, requested, vg_free),
6500                                  errors.ECODE_NORES)
6501
6502
6503 def _CheckNodesPhysicalCPUs(lu, nodenames, requested, hypervisor_name):
6504   """Checks if nodes have enough physical CPUs
6505
6506   This function checks if all given nodes have the needed number of
6507   physical CPUs. In case any node has less CPUs or we cannot get the
6508   information from the node, this function raises an OpPrereqError
6509   exception.
6510
6511   @type lu: C{LogicalUnit}
6512   @param lu: a logical unit from which we get configuration data
6513   @type nodenames: C{list}
6514   @param nodenames: the list of node names to check
6515   @type requested: C{int}
6516   @param requested: the minimum acceptable number of physical CPUs
6517   @raise errors.OpPrereqError: if the node doesn't have enough CPUs,
6518       or we cannot check the node
6519
6520   """
6521   nodeinfo = lu.rpc.call_node_info(nodenames, None, [hypervisor_name])
6522   for node in nodenames:
6523     info = nodeinfo[node]
6524     info.Raise("Cannot get current information from node %s" % node,
6525                prereq=True, ecode=errors.ECODE_ENVIRON)
6526     (_, _, (hv_info, )) = info.payload
6527     num_cpus = hv_info.get("cpu_total", None)
6528     if not isinstance(num_cpus, int):
6529       raise errors.OpPrereqError("Can't compute the number of physical CPUs"
6530                                  " on node %s, result was '%s'" %
6531                                  (node, num_cpus), errors.ECODE_ENVIRON)
6532     if requested > num_cpus:
6533       raise errors.OpPrereqError("Node %s has %s physical CPUs, but %s are "
6534                                  "required" % (node, num_cpus, requested),
6535                                  errors.ECODE_NORES)
6536
6537
6538 class LUInstanceStartup(LogicalUnit):
6539   """Starts an instance.
6540
6541   """
6542   HPATH = "instance-start"
6543   HTYPE = constants.HTYPE_INSTANCE
6544   REQ_BGL = False
6545
6546   def CheckArguments(self):
6547     # extra beparams
6548     if self.op.beparams:
6549       # fill the beparams dict
6550       objects.UpgradeBeParams(self.op.beparams)
6551       utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
6552
6553   def ExpandNames(self):
6554     self._ExpandAndLockInstance()
6555     self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
6556
6557   def DeclareLocks(self, level):
6558     if level == locking.LEVEL_NODE_RES:
6559       self._LockInstancesNodes(primary_only=True, level=locking.LEVEL_NODE_RES)
6560
6561   def BuildHooksEnv(self):
6562     """Build hooks env.
6563
6564     This runs on master, primary and secondary nodes of the instance.
6565
6566     """
6567     env = {
6568       "FORCE": self.op.force,
6569       }
6570
6571     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6572
6573     return env
6574
6575   def BuildHooksNodes(self):
6576     """Build hooks nodes.
6577
6578     """
6579     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6580     return (nl, nl)
6581
6582   def CheckPrereq(self):
6583     """Check prerequisites.
6584
6585     This checks that the instance is in the cluster.
6586
6587     """
6588     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6589     assert self.instance is not None, \
6590       "Cannot retrieve locked instance %s" % self.op.instance_name
6591
6592     # extra hvparams
6593     if self.op.hvparams:
6594       # check hypervisor parameter syntax (locally)
6595       cluster = self.cfg.GetClusterInfo()
6596       utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
6597       filled_hvp = cluster.FillHV(instance)
6598       filled_hvp.update(self.op.hvparams)
6599       hv_type = hypervisor.GetHypervisor(instance.hypervisor)
6600       hv_type.CheckParameterSyntax(filled_hvp)
6601       _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
6602
6603     _CheckInstanceState(self, instance, INSTANCE_ONLINE)
6604
6605     self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
6606
6607     if self.primary_offline and self.op.ignore_offline_nodes:
6608       self.proc.LogWarning("Ignoring offline primary node")
6609
6610       if self.op.hvparams or self.op.beparams:
6611         self.proc.LogWarning("Overridden parameters are ignored")
6612     else:
6613       _CheckNodeOnline(self, instance.primary_node)
6614
6615       bep = self.cfg.GetClusterInfo().FillBE(instance)
6616       bep.update(self.op.beparams)
6617
6618       # check bridges existence
6619       _CheckInstanceBridgesExist(self, instance)
6620
6621       remote_info = self.rpc.call_instance_info(instance.primary_node,
6622                                                 instance.name,
6623                                                 instance.hypervisor)
6624       remote_info.Raise("Error checking node %s" % instance.primary_node,
6625                         prereq=True, ecode=errors.ECODE_ENVIRON)
6626       if not remote_info.payload: # not running already
6627         _CheckNodeFreeMemory(self, instance.primary_node,
6628                              "starting instance %s" % instance.name,
6629                              bep[constants.BE_MINMEM], instance.hypervisor)
6630
6631   def Exec(self, feedback_fn):
6632     """Start the instance.
6633
6634     """
6635     instance = self.instance
6636     force = self.op.force
6637
6638     if not self.op.no_remember:
6639       self.cfg.MarkInstanceUp(instance.name)
6640
6641     if self.primary_offline:
6642       assert self.op.ignore_offline_nodes
6643       self.proc.LogInfo("Primary node offline, marked instance as started")
6644     else:
6645       node_current = instance.primary_node
6646
6647       _StartInstanceDisks(self, instance, force)
6648
6649       result = \
6650         self.rpc.call_instance_start(node_current,
6651                                      (instance, self.op.hvparams,
6652                                       self.op.beparams),
6653                                      self.op.startup_paused)
6654       msg = result.fail_msg
6655       if msg:
6656         _ShutdownInstanceDisks(self, instance)
6657         raise errors.OpExecError("Could not start instance: %s" % msg)
6658
6659
6660 class LUInstanceReboot(LogicalUnit):
6661   """Reboot an instance.
6662
6663   """
6664   HPATH = "instance-reboot"
6665   HTYPE = constants.HTYPE_INSTANCE
6666   REQ_BGL = False
6667
6668   def ExpandNames(self):
6669     self._ExpandAndLockInstance()
6670
6671   def BuildHooksEnv(self):
6672     """Build hooks env.
6673
6674     This runs on master, primary and secondary nodes of the instance.
6675
6676     """
6677     env = {
6678       "IGNORE_SECONDARIES": self.op.ignore_secondaries,
6679       "REBOOT_TYPE": self.op.reboot_type,
6680       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6681       }
6682
6683     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6684
6685     return env
6686
6687   def BuildHooksNodes(self):
6688     """Build hooks nodes.
6689
6690     """
6691     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6692     return (nl, nl)
6693
6694   def CheckPrereq(self):
6695     """Check prerequisites.
6696
6697     This checks that the instance is in the cluster.
6698
6699     """
6700     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6701     assert self.instance is not None, \
6702       "Cannot retrieve locked instance %s" % self.op.instance_name
6703     _CheckInstanceState(self, instance, INSTANCE_ONLINE)
6704     _CheckNodeOnline(self, instance.primary_node)
6705
6706     # check bridges existence
6707     _CheckInstanceBridgesExist(self, instance)
6708
6709   def Exec(self, feedback_fn):
6710     """Reboot the instance.
6711
6712     """
6713     instance = self.instance
6714     ignore_secondaries = self.op.ignore_secondaries
6715     reboot_type = self.op.reboot_type
6716
6717     remote_info = self.rpc.call_instance_info(instance.primary_node,
6718                                               instance.name,
6719                                               instance.hypervisor)
6720     remote_info.Raise("Error checking node %s" % instance.primary_node)
6721     instance_running = bool(remote_info.payload)
6722
6723     node_current = instance.primary_node
6724
6725     if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
6726                                             constants.INSTANCE_REBOOT_HARD]:
6727       for disk in instance.disks:
6728         self.cfg.SetDiskID(disk, node_current)
6729       result = self.rpc.call_instance_reboot(node_current, instance,
6730                                              reboot_type,
6731                                              self.op.shutdown_timeout)
6732       result.Raise("Could not reboot instance")
6733     else:
6734       if instance_running:
6735         result = self.rpc.call_instance_shutdown(node_current, instance,
6736                                                  self.op.shutdown_timeout)
6737         result.Raise("Could not shutdown instance for full reboot")
6738         _ShutdownInstanceDisks(self, instance)
6739       else:
6740         self.LogInfo("Instance %s was already stopped, starting now",
6741                      instance.name)
6742       _StartInstanceDisks(self, instance, ignore_secondaries)
6743       result = self.rpc.call_instance_start(node_current,
6744                                             (instance, None, None), False)
6745       msg = result.fail_msg
6746       if msg:
6747         _ShutdownInstanceDisks(self, instance)
6748         raise errors.OpExecError("Could not start instance for"
6749                                  " full reboot: %s" % msg)
6750
6751     self.cfg.MarkInstanceUp(instance.name)
6752
6753
6754 class LUInstanceShutdown(LogicalUnit):
6755   """Shutdown an instance.
6756
6757   """
6758   HPATH = "instance-stop"
6759   HTYPE = constants.HTYPE_INSTANCE
6760   REQ_BGL = False
6761
6762   def ExpandNames(self):
6763     self._ExpandAndLockInstance()
6764
6765   def BuildHooksEnv(self):
6766     """Build hooks env.
6767
6768     This runs on master, primary and secondary nodes of the instance.
6769
6770     """
6771     env = _BuildInstanceHookEnvByObject(self, self.instance)
6772     env["TIMEOUT"] = self.op.timeout
6773     return env
6774
6775   def BuildHooksNodes(self):
6776     """Build hooks nodes.
6777
6778     """
6779     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6780     return (nl, nl)
6781
6782   def CheckPrereq(self):
6783     """Check prerequisites.
6784
6785     This checks that the instance is in the cluster.
6786
6787     """
6788     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6789     assert self.instance is not None, \
6790       "Cannot retrieve locked instance %s" % self.op.instance_name
6791
6792     _CheckInstanceState(self, self.instance, INSTANCE_ONLINE)
6793
6794     self.primary_offline = \
6795       self.cfg.GetNodeInfo(self.instance.primary_node).offline
6796
6797     if self.primary_offline and self.op.ignore_offline_nodes:
6798       self.proc.LogWarning("Ignoring offline primary node")
6799     else:
6800       _CheckNodeOnline(self, self.instance.primary_node)
6801
6802   def Exec(self, feedback_fn):
6803     """Shutdown the instance.
6804
6805     """
6806     instance = self.instance
6807     node_current = instance.primary_node
6808     timeout = self.op.timeout
6809
6810     if not self.op.no_remember:
6811       self.cfg.MarkInstanceDown(instance.name)
6812
6813     if self.primary_offline:
6814       assert self.op.ignore_offline_nodes
6815       self.proc.LogInfo("Primary node offline, marked instance as stopped")
6816     else:
6817       result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
6818       msg = result.fail_msg
6819       if msg:
6820         self.proc.LogWarning("Could not shutdown instance: %s" % msg)
6821
6822       _ShutdownInstanceDisks(self, instance)
6823
6824
6825 class LUInstanceReinstall(LogicalUnit):
6826   """Reinstall an instance.
6827
6828   """
6829   HPATH = "instance-reinstall"
6830   HTYPE = constants.HTYPE_INSTANCE
6831   REQ_BGL = False
6832
6833   def ExpandNames(self):
6834     self._ExpandAndLockInstance()
6835
6836   def BuildHooksEnv(self):
6837     """Build hooks env.
6838
6839     This runs on master, primary and secondary nodes of the instance.
6840
6841     """
6842     return _BuildInstanceHookEnvByObject(self, self.instance)
6843
6844   def BuildHooksNodes(self):
6845     """Build hooks nodes.
6846
6847     """
6848     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6849     return (nl, nl)
6850
6851   def CheckPrereq(self):
6852     """Check prerequisites.
6853
6854     This checks that the instance is in the cluster and is not running.
6855
6856     """
6857     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6858     assert instance is not None, \
6859       "Cannot retrieve locked instance %s" % self.op.instance_name
6860     _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
6861                      " offline, cannot reinstall")
6862     for node in instance.secondary_nodes:
6863       _CheckNodeOnline(self, node, "Instance secondary node offline,"
6864                        " cannot reinstall")
6865
6866     if instance.disk_template == constants.DT_DISKLESS:
6867       raise errors.OpPrereqError("Instance '%s' has no disks" %
6868                                  self.op.instance_name,
6869                                  errors.ECODE_INVAL)
6870     _CheckInstanceState(self, instance, INSTANCE_DOWN, msg="cannot reinstall")
6871
6872     if self.op.os_type is not None:
6873       # OS verification
6874       pnode = _ExpandNodeName(self.cfg, instance.primary_node)
6875       _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
6876       instance_os = self.op.os_type
6877     else:
6878       instance_os = instance.os
6879
6880     nodelist = list(instance.all_nodes)
6881
6882     if self.op.osparams:
6883       i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
6884       _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
6885       self.os_inst = i_osdict # the new dict (without defaults)
6886     else:
6887       self.os_inst = None
6888
6889     self.instance = instance
6890
6891   def Exec(self, feedback_fn):
6892     """Reinstall the instance.
6893
6894     """
6895     inst = self.instance
6896
6897     if self.op.os_type is not None:
6898       feedback_fn("Changing OS to '%s'..." % self.op.os_type)
6899       inst.os = self.op.os_type
6900       # Write to configuration
6901       self.cfg.Update(inst, feedback_fn)
6902
6903     _StartInstanceDisks(self, inst, None)
6904     try:
6905       feedback_fn("Running the instance OS create scripts...")
6906       # FIXME: pass debug option from opcode to backend
6907       result = self.rpc.call_instance_os_add(inst.primary_node,
6908                                              (inst, self.os_inst), True,
6909                                              self.op.debug_level)
6910       result.Raise("Could not install OS for instance %s on node %s" %
6911                    (inst.name, inst.primary_node))
6912     finally:
6913       _ShutdownInstanceDisks(self, inst)
6914
6915
6916 class LUInstanceRecreateDisks(LogicalUnit):
6917   """Recreate an instance's missing disks.
6918
6919   """
6920   HPATH = "instance-recreate-disks"
6921   HTYPE = constants.HTYPE_INSTANCE
6922   REQ_BGL = False
6923
6924   _MODIFYABLE = frozenset([
6925     constants.IDISK_SIZE,
6926     constants.IDISK_MODE,
6927     ])
6928
6929   # New or changed disk parameters may have different semantics
6930   assert constants.IDISK_PARAMS == (_MODIFYABLE | frozenset([
6931     constants.IDISK_ADOPT,
6932
6933     # TODO: Implement support changing VG while recreating
6934     constants.IDISK_VG,
6935     constants.IDISK_METAVG,
6936     ]))
6937
6938   def CheckArguments(self):
6939     if self.op.disks and ht.TPositiveInt(self.op.disks[0]):
6940       # Normalize and convert deprecated list of disk indices
6941       self.op.disks = [(idx, {}) for idx in sorted(frozenset(self.op.disks))]
6942
6943     duplicates = utils.FindDuplicates(map(compat.fst, self.op.disks))
6944     if duplicates:
6945       raise errors.OpPrereqError("Some disks have been specified more than"
6946                                  " once: %s" % utils.CommaJoin(duplicates),
6947                                  errors.ECODE_INVAL)
6948
6949     for (idx, params) in self.op.disks:
6950       utils.ForceDictType(params, constants.IDISK_PARAMS_TYPES)
6951       unsupported = frozenset(params.keys()) - self._MODIFYABLE
6952       if unsupported:
6953         raise errors.OpPrereqError("Parameters for disk %s try to change"
6954                                    " unmodifyable parameter(s): %s" %
6955                                    (idx, utils.CommaJoin(unsupported)),
6956                                    errors.ECODE_INVAL)
6957
6958   def ExpandNames(self):
6959     self._ExpandAndLockInstance()
6960     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6961     if self.op.nodes:
6962       self.op.nodes = [_ExpandNodeName(self.cfg, n) for n in self.op.nodes]
6963       self.needed_locks[locking.LEVEL_NODE] = list(self.op.nodes)
6964     else:
6965       self.needed_locks[locking.LEVEL_NODE] = []
6966     self.needed_locks[locking.LEVEL_NODE_RES] = []
6967
6968   def DeclareLocks(self, level):
6969     if level == locking.LEVEL_NODE:
6970       # if we replace the nodes, we only need to lock the old primary,
6971       # otherwise we need to lock all nodes for disk re-creation
6972       primary_only = bool(self.op.nodes)
6973       self._LockInstancesNodes(primary_only=primary_only)
6974     elif level == locking.LEVEL_NODE_RES:
6975       # Copy node locks
6976       self.needed_locks[locking.LEVEL_NODE_RES] = \
6977         self.needed_locks[locking.LEVEL_NODE][:]
6978
6979   def BuildHooksEnv(self):
6980     """Build hooks env.
6981
6982     This runs on master, primary and secondary nodes of the instance.
6983
6984     """
6985     return _BuildInstanceHookEnvByObject(self, self.instance)
6986
6987   def BuildHooksNodes(self):
6988     """Build hooks nodes.
6989
6990     """
6991     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6992     return (nl, nl)
6993
6994   def CheckPrereq(self):
6995     """Check prerequisites.
6996
6997     This checks that the instance is in the cluster and is not running.
6998
6999     """
7000     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7001     assert instance is not None, \
7002       "Cannot retrieve locked instance %s" % self.op.instance_name
7003     if self.op.nodes:
7004       if len(self.op.nodes) != len(instance.all_nodes):
7005         raise errors.OpPrereqError("Instance %s currently has %d nodes, but"
7006                                    " %d replacement nodes were specified" %
7007                                    (instance.name, len(instance.all_nodes),
7008                                     len(self.op.nodes)),
7009                                    errors.ECODE_INVAL)
7010       assert instance.disk_template != constants.DT_DRBD8 or \
7011           len(self.op.nodes) == 2
7012       assert instance.disk_template != constants.DT_PLAIN or \
7013           len(self.op.nodes) == 1
7014       primary_node = self.op.nodes[0]
7015     else:
7016       primary_node = instance.primary_node
7017     _CheckNodeOnline(self, primary_node)
7018
7019     if instance.disk_template == constants.DT_DISKLESS:
7020       raise errors.OpPrereqError("Instance '%s' has no disks" %
7021                                  self.op.instance_name, errors.ECODE_INVAL)
7022
7023     # if we replace nodes *and* the old primary is offline, we don't
7024     # check
7025     assert instance.primary_node in self.owned_locks(locking.LEVEL_NODE)
7026     assert instance.primary_node in self.owned_locks(locking.LEVEL_NODE_RES)
7027     old_pnode = self.cfg.GetNodeInfo(instance.primary_node)
7028     if not (self.op.nodes and old_pnode.offline):
7029       _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
7030                           msg="cannot recreate disks")
7031
7032     if self.op.disks:
7033       self.disks = dict(self.op.disks)
7034     else:
7035       self.disks = dict((idx, {}) for idx in range(len(instance.disks)))
7036
7037     maxidx = max(self.disks.keys())
7038     if maxidx >= len(instance.disks):
7039       raise errors.OpPrereqError("Invalid disk index '%s'" % maxidx,
7040                                  errors.ECODE_INVAL)
7041
7042     if (self.op.nodes and
7043         sorted(self.disks.keys()) != range(len(instance.disks))):
7044       raise errors.OpPrereqError("Can't recreate disks partially and"
7045                                  " change the nodes at the same time",
7046                                  errors.ECODE_INVAL)
7047
7048     self.instance = instance
7049
7050   def Exec(self, feedback_fn):
7051     """Recreate the disks.
7052
7053     """
7054     instance = self.instance
7055
7056     assert (self.owned_locks(locking.LEVEL_NODE) ==
7057             self.owned_locks(locking.LEVEL_NODE_RES))
7058
7059     to_skip = []
7060     mods = [] # keeps track of needed changes
7061
7062     for idx, disk in enumerate(instance.disks):
7063       try:
7064         changes = self.disks[idx]
7065       except KeyError:
7066         # Disk should not be recreated
7067         to_skip.append(idx)
7068         continue
7069
7070       # update secondaries for disks, if needed
7071       if self.op.nodes and disk.dev_type == constants.LD_DRBD8:
7072         # need to update the nodes and minors
7073         assert len(self.op.nodes) == 2
7074         assert len(disk.logical_id) == 6 # otherwise disk internals
7075                                          # have changed
7076         (_, _, old_port, _, _, old_secret) = disk.logical_id
7077         new_minors = self.cfg.AllocateDRBDMinor(self.op.nodes, instance.name)
7078         new_id = (self.op.nodes[0], self.op.nodes[1], old_port,
7079                   new_minors[0], new_minors[1], old_secret)
7080         assert len(disk.logical_id) == len(new_id)
7081       else:
7082         new_id = None
7083
7084       mods.append((idx, new_id, changes))
7085
7086     # now that we have passed all asserts above, we can apply the mods
7087     # in a single run (to avoid partial changes)
7088     for idx, new_id, changes in mods:
7089       disk = instance.disks[idx]
7090       if new_id is not None:
7091         assert disk.dev_type == constants.LD_DRBD8
7092         disk.logical_id = new_id
7093       if changes:
7094         disk.Update(size=changes.get(constants.IDISK_SIZE, None),
7095                     mode=changes.get(constants.IDISK_MODE, None))
7096
7097     # change primary node, if needed
7098     if self.op.nodes:
7099       instance.primary_node = self.op.nodes[0]
7100       self.LogWarning("Changing the instance's nodes, you will have to"
7101                       " remove any disks left on the older nodes manually")
7102
7103     if self.op.nodes:
7104       self.cfg.Update(instance, feedback_fn)
7105
7106     _CreateDisks(self, instance, to_skip=to_skip)
7107
7108
7109 class LUInstanceRename(LogicalUnit):
7110   """Rename an instance.
7111
7112   """
7113   HPATH = "instance-rename"
7114   HTYPE = constants.HTYPE_INSTANCE
7115
7116   def CheckArguments(self):
7117     """Check arguments.
7118
7119     """
7120     if self.op.ip_check and not self.op.name_check:
7121       # TODO: make the ip check more flexible and not depend on the name check
7122       raise errors.OpPrereqError("IP address check requires a name check",
7123                                  errors.ECODE_INVAL)
7124
7125   def BuildHooksEnv(self):
7126     """Build hooks env.
7127
7128     This runs on master, primary and secondary nodes of the instance.
7129
7130     """
7131     env = _BuildInstanceHookEnvByObject(self, self.instance)
7132     env["INSTANCE_NEW_NAME"] = self.op.new_name
7133     return env
7134
7135   def BuildHooksNodes(self):
7136     """Build hooks nodes.
7137
7138     """
7139     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7140     return (nl, nl)
7141
7142   def CheckPrereq(self):
7143     """Check prerequisites.
7144
7145     This checks that the instance is in the cluster and is not running.
7146
7147     """
7148     self.op.instance_name = _ExpandInstanceName(self.cfg,
7149                                                 self.op.instance_name)
7150     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7151     assert instance is not None
7152     _CheckNodeOnline(self, instance.primary_node)
7153     _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
7154                         msg="cannot rename")
7155     self.instance = instance
7156
7157     new_name = self.op.new_name
7158     if self.op.name_check:
7159       hostname = netutils.GetHostname(name=new_name)
7160       if hostname.name != new_name:
7161         self.LogInfo("Resolved given name '%s' to '%s'", new_name,
7162                      hostname.name)
7163       if not utils.MatchNameComponent(self.op.new_name, [hostname.name]):
7164         raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
7165                                     " same as given hostname '%s'") %
7166                                     (hostname.name, self.op.new_name),
7167                                     errors.ECODE_INVAL)
7168       new_name = self.op.new_name = hostname.name
7169       if (self.op.ip_check and
7170           netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
7171         raise errors.OpPrereqError("IP %s of instance %s already in use" %
7172                                    (hostname.ip, new_name),
7173                                    errors.ECODE_NOTUNIQUE)
7174
7175     instance_list = self.cfg.GetInstanceList()
7176     if new_name in instance_list and new_name != instance.name:
7177       raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
7178                                  new_name, errors.ECODE_EXISTS)
7179
7180   def Exec(self, feedback_fn):
7181     """Rename the instance.
7182
7183     """
7184     inst = self.instance
7185     old_name = inst.name
7186
7187     rename_file_storage = False
7188     if (inst.disk_template in constants.DTS_FILEBASED and
7189         self.op.new_name != inst.name):
7190       old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
7191       rename_file_storage = True
7192
7193     self.cfg.RenameInstance(inst.name, self.op.new_name)
7194     # Change the instance lock. This is definitely safe while we hold the BGL.
7195     # Otherwise the new lock would have to be added in acquired mode.
7196     assert self.REQ_BGL
7197     self.glm.remove(locking.LEVEL_INSTANCE, old_name)
7198     self.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
7199
7200     # re-read the instance from the configuration after rename
7201     inst = self.cfg.GetInstanceInfo(self.op.new_name)
7202
7203     if rename_file_storage:
7204       new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
7205       result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
7206                                                      old_file_storage_dir,
7207                                                      new_file_storage_dir)
7208       result.Raise("Could not rename on node %s directory '%s' to '%s'"
7209                    " (but the instance has been renamed in Ganeti)" %
7210                    (inst.primary_node, old_file_storage_dir,
7211                     new_file_storage_dir))
7212
7213     _StartInstanceDisks(self, inst, None)
7214     try:
7215       result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
7216                                                  old_name, self.op.debug_level)
7217       msg = result.fail_msg
7218       if msg:
7219         msg = ("Could not run OS rename script for instance %s on node %s"
7220                " (but the instance has been renamed in Ganeti): %s" %
7221                (inst.name, inst.primary_node, msg))
7222         self.proc.LogWarning(msg)
7223     finally:
7224       _ShutdownInstanceDisks(self, inst)
7225
7226     return inst.name
7227
7228
7229 class LUInstanceRemove(LogicalUnit):
7230   """Remove an instance.
7231
7232   """
7233   HPATH = "instance-remove"
7234   HTYPE = constants.HTYPE_INSTANCE
7235   REQ_BGL = False
7236
7237   def ExpandNames(self):
7238     self._ExpandAndLockInstance()
7239     self.needed_locks[locking.LEVEL_NODE] = []
7240     self.needed_locks[locking.LEVEL_NODE_RES] = []
7241     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7242
7243   def DeclareLocks(self, level):
7244     if level == locking.LEVEL_NODE:
7245       self._LockInstancesNodes()
7246     elif level == locking.LEVEL_NODE_RES:
7247       # Copy node locks
7248       self.needed_locks[locking.LEVEL_NODE_RES] = \
7249         self.needed_locks[locking.LEVEL_NODE][:]
7250
7251   def BuildHooksEnv(self):
7252     """Build hooks env.
7253
7254     This runs on master, primary and secondary nodes of the instance.
7255
7256     """
7257     env = _BuildInstanceHookEnvByObject(self, self.instance)
7258     env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
7259     return env
7260
7261   def BuildHooksNodes(self):
7262     """Build hooks nodes.
7263
7264     """
7265     nl = [self.cfg.GetMasterNode()]
7266     nl_post = list(self.instance.all_nodes) + nl
7267     return (nl, nl_post)
7268
7269   def CheckPrereq(self):
7270     """Check prerequisites.
7271
7272     This checks that the instance is in the cluster.
7273
7274     """
7275     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7276     assert self.instance is not None, \
7277       "Cannot retrieve locked instance %s" % self.op.instance_name
7278
7279   def Exec(self, feedback_fn):
7280     """Remove the instance.
7281
7282     """
7283     instance = self.instance
7284     logging.info("Shutting down instance %s on node %s",
7285                  instance.name, instance.primary_node)
7286
7287     result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
7288                                              self.op.shutdown_timeout)
7289     msg = result.fail_msg
7290     if msg:
7291       if self.op.ignore_failures:
7292         feedback_fn("Warning: can't shutdown instance: %s" % msg)
7293       else:
7294         raise errors.OpExecError("Could not shutdown instance %s on"
7295                                  " node %s: %s" %
7296                                  (instance.name, instance.primary_node, msg))
7297
7298     assert (self.owned_locks(locking.LEVEL_NODE) ==
7299             self.owned_locks(locking.LEVEL_NODE_RES))
7300     assert not (set(instance.all_nodes) -
7301                 self.owned_locks(locking.LEVEL_NODE)), \
7302       "Not owning correct locks"
7303
7304     _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
7305
7306
7307 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
7308   """Utility function to remove an instance.
7309
7310   """
7311   logging.info("Removing block devices for instance %s", instance.name)
7312
7313   if not _RemoveDisks(lu, instance):
7314     if not ignore_failures:
7315       raise errors.OpExecError("Can't remove instance's disks")
7316     feedback_fn("Warning: can't remove instance's disks")
7317
7318   logging.info("Removing instance %s out of cluster config", instance.name)
7319
7320   lu.cfg.RemoveInstance(instance.name)
7321
7322   assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
7323     "Instance lock removal conflict"
7324
7325   # Remove lock for the instance
7326   lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
7327
7328
7329 class LUInstanceQuery(NoHooksLU):
7330   """Logical unit for querying instances.
7331
7332   """
7333   # pylint: disable=W0142
7334   REQ_BGL = False
7335
7336   def CheckArguments(self):
7337     self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
7338                              self.op.output_fields, self.op.use_locking)
7339
7340   def ExpandNames(self):
7341     self.iq.ExpandNames(self)
7342
7343   def DeclareLocks(self, level):
7344     self.iq.DeclareLocks(self, level)
7345
7346   def Exec(self, feedback_fn):
7347     return self.iq.OldStyleQuery(self)
7348
7349
7350 class LUInstanceFailover(LogicalUnit):
7351   """Failover an instance.
7352
7353   """
7354   HPATH = "instance-failover"
7355   HTYPE = constants.HTYPE_INSTANCE
7356   REQ_BGL = False
7357
7358   def CheckArguments(self):
7359     """Check the arguments.
7360
7361     """
7362     self.iallocator = getattr(self.op, "iallocator", None)
7363     self.target_node = getattr(self.op, "target_node", None)
7364
7365   def ExpandNames(self):
7366     self._ExpandAndLockInstance()
7367
7368     if self.op.target_node is not None:
7369       self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7370
7371     self.needed_locks[locking.LEVEL_NODE] = []
7372     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7373
7374     self.needed_locks[locking.LEVEL_NODE_RES] = []
7375     self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
7376
7377     ignore_consistency = self.op.ignore_consistency
7378     shutdown_timeout = self.op.shutdown_timeout
7379     self._migrater = TLMigrateInstance(self, self.op.instance_name,
7380                                        cleanup=False,
7381                                        failover=True,
7382                                        ignore_consistency=ignore_consistency,
7383                                        shutdown_timeout=shutdown_timeout,
7384                                        ignore_ipolicy=self.op.ignore_ipolicy)
7385     self.tasklets = [self._migrater]
7386
7387   def DeclareLocks(self, level):
7388     if level == locking.LEVEL_NODE:
7389       instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
7390       if instance.disk_template in constants.DTS_EXT_MIRROR:
7391         if self.op.target_node is None:
7392           self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7393         else:
7394           self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
7395                                                    self.op.target_node]
7396         del self.recalculate_locks[locking.LEVEL_NODE]
7397       else:
7398         self._LockInstancesNodes()
7399     elif level == locking.LEVEL_NODE_RES:
7400       # Copy node locks
7401       self.needed_locks[locking.LEVEL_NODE_RES] = \
7402         self.needed_locks[locking.LEVEL_NODE][:]
7403
7404   def BuildHooksEnv(self):
7405     """Build hooks env.
7406
7407     This runs on master, primary and secondary nodes of the instance.
7408
7409     """
7410     instance = self._migrater.instance
7411     source_node = instance.primary_node
7412     target_node = self.op.target_node
7413     env = {
7414       "IGNORE_CONSISTENCY": self.op.ignore_consistency,
7415       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7416       "OLD_PRIMARY": source_node,
7417       "NEW_PRIMARY": target_node,
7418       }
7419
7420     if instance.disk_template in constants.DTS_INT_MIRROR:
7421       env["OLD_SECONDARY"] = instance.secondary_nodes[0]
7422       env["NEW_SECONDARY"] = source_node
7423     else:
7424       env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
7425
7426     env.update(_BuildInstanceHookEnvByObject(self, instance))
7427
7428     return env
7429
7430   def BuildHooksNodes(self):
7431     """Build hooks nodes.
7432
7433     """
7434     instance = self._migrater.instance
7435     nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
7436     return (nl, nl + [instance.primary_node])
7437
7438
7439 class LUInstanceMigrate(LogicalUnit):
7440   """Migrate an instance.
7441
7442   This is migration without shutting down, compared to the failover,
7443   which is done with shutdown.
7444
7445   """
7446   HPATH = "instance-migrate"
7447   HTYPE = constants.HTYPE_INSTANCE
7448   REQ_BGL = False
7449
7450   def ExpandNames(self):
7451     self._ExpandAndLockInstance()
7452
7453     if self.op.target_node is not None:
7454       self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7455
7456     self.needed_locks[locking.LEVEL_NODE] = []
7457     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7458
7459     self.needed_locks[locking.LEVEL_NODE] = []
7460     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7461
7462     self._migrater = \
7463       TLMigrateInstance(self, self.op.instance_name,
7464                         cleanup=self.op.cleanup,
7465                         failover=False,
7466                         fallback=self.op.allow_failover,
7467                         allow_runtime_changes=self.op.allow_runtime_changes,
7468                         ignore_ipolicy=self.op.ignore_ipolicy)
7469     self.tasklets = [self._migrater]
7470
7471   def DeclareLocks(self, level):
7472     if level == locking.LEVEL_NODE:
7473       instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
7474       if instance.disk_template in constants.DTS_EXT_MIRROR:
7475         if self.op.target_node is None:
7476           self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7477         else:
7478           self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
7479                                                    self.op.target_node]
7480         del self.recalculate_locks[locking.LEVEL_NODE]
7481       else:
7482         self._LockInstancesNodes()
7483     elif level == locking.LEVEL_NODE_RES:
7484       # Copy node locks
7485       self.needed_locks[locking.LEVEL_NODE_RES] = \
7486         self.needed_locks[locking.LEVEL_NODE][:]
7487
7488   def BuildHooksEnv(self):
7489     """Build hooks env.
7490
7491     This runs on master, primary and secondary nodes of the instance.
7492
7493     """
7494     instance = self._migrater.instance
7495     source_node = instance.primary_node
7496     target_node = self.op.target_node
7497     env = _BuildInstanceHookEnvByObject(self, instance)
7498     env.update({
7499       "MIGRATE_LIVE": self._migrater.live,
7500       "MIGRATE_CLEANUP": self.op.cleanup,
7501       "OLD_PRIMARY": source_node,
7502       "NEW_PRIMARY": target_node,
7503       "ALLOW_RUNTIME_CHANGES": self.op.allow_runtime_changes,
7504       })
7505
7506     if instance.disk_template in constants.DTS_INT_MIRROR:
7507       env["OLD_SECONDARY"] = target_node
7508       env["NEW_SECONDARY"] = source_node
7509     else:
7510       env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
7511
7512     return env
7513
7514   def BuildHooksNodes(self):
7515     """Build hooks nodes.
7516
7517     """
7518     instance = self._migrater.instance
7519     nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
7520     return (nl, nl + [instance.primary_node])
7521
7522
7523 class LUInstanceMove(LogicalUnit):
7524   """Move an instance by data-copying.
7525
7526   """
7527   HPATH = "instance-move"
7528   HTYPE = constants.HTYPE_INSTANCE
7529   REQ_BGL = False
7530
7531   def ExpandNames(self):
7532     self._ExpandAndLockInstance()
7533     target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7534     self.op.target_node = target_node
7535     self.needed_locks[locking.LEVEL_NODE] = [target_node]
7536     self.needed_locks[locking.LEVEL_NODE_RES] = []
7537     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7538
7539   def DeclareLocks(self, level):
7540     if level == locking.LEVEL_NODE:
7541       self._LockInstancesNodes(primary_only=True)
7542     elif level == locking.LEVEL_NODE_RES:
7543       # Copy node locks
7544       self.needed_locks[locking.LEVEL_NODE_RES] = \
7545         self.needed_locks[locking.LEVEL_NODE][:]
7546
7547   def BuildHooksEnv(self):
7548     """Build hooks env.
7549
7550     This runs on master, primary and secondary nodes of the instance.
7551
7552     """
7553     env = {
7554       "TARGET_NODE": self.op.target_node,
7555       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7556       }
7557     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
7558     return env
7559
7560   def BuildHooksNodes(self):
7561     """Build hooks nodes.
7562
7563     """
7564     nl = [
7565       self.cfg.GetMasterNode(),
7566       self.instance.primary_node,
7567       self.op.target_node,
7568       ]
7569     return (nl, nl)
7570
7571   def CheckPrereq(self):
7572     """Check prerequisites.
7573
7574     This checks that the instance is in the cluster.
7575
7576     """
7577     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7578     assert self.instance is not None, \
7579       "Cannot retrieve locked instance %s" % self.op.instance_name
7580
7581     node = self.cfg.GetNodeInfo(self.op.target_node)
7582     assert node is not None, \
7583       "Cannot retrieve locked node %s" % self.op.target_node
7584
7585     self.target_node = target_node = node.name
7586
7587     if target_node == instance.primary_node:
7588       raise errors.OpPrereqError("Instance %s is already on the node %s" %
7589                                  (instance.name, target_node),
7590                                  errors.ECODE_STATE)
7591
7592     bep = self.cfg.GetClusterInfo().FillBE(instance)
7593
7594     for idx, dsk in enumerate(instance.disks):
7595       if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
7596         raise errors.OpPrereqError("Instance disk %d has a complex layout,"
7597                                    " cannot copy" % idx, errors.ECODE_STATE)
7598
7599     _CheckNodeOnline(self, target_node)
7600     _CheckNodeNotDrained(self, target_node)
7601     _CheckNodeVmCapable(self, target_node)
7602     ipolicy = _CalculateGroupIPolicy(self.cfg.GetClusterInfo(),
7603                                      self.cfg.GetNodeGroup(node.group))
7604     _CheckTargetNodeIPolicy(self, ipolicy, instance, node,
7605                             ignore=self.op.ignore_ipolicy)
7606
7607     if instance.admin_state == constants.ADMINST_UP:
7608       # check memory requirements on the secondary node
7609       _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
7610                            instance.name, bep[constants.BE_MAXMEM],
7611                            instance.hypervisor)
7612     else:
7613       self.LogInfo("Not checking memory on the secondary node as"
7614                    " instance will not be started")
7615
7616     # check bridge existance
7617     _CheckInstanceBridgesExist(self, instance, node=target_node)
7618
7619   def Exec(self, feedback_fn):
7620     """Move an instance.
7621
7622     The move is done by shutting it down on its present node, copying
7623     the data over (slow) and starting it on the new node.
7624
7625     """
7626     instance = self.instance
7627
7628     source_node = instance.primary_node
7629     target_node = self.target_node
7630
7631     self.LogInfo("Shutting down instance %s on source node %s",
7632                  instance.name, source_node)
7633
7634     assert (self.owned_locks(locking.LEVEL_NODE) ==
7635             self.owned_locks(locking.LEVEL_NODE_RES))
7636
7637     result = self.rpc.call_instance_shutdown(source_node, instance,
7638                                              self.op.shutdown_timeout)
7639     msg = result.fail_msg
7640     if msg:
7641       if self.op.ignore_consistency:
7642         self.proc.LogWarning("Could not shutdown instance %s on node %s."
7643                              " Proceeding anyway. Please make sure node"
7644                              " %s is down. Error details: %s",
7645                              instance.name, source_node, source_node, msg)
7646       else:
7647         raise errors.OpExecError("Could not shutdown instance %s on"
7648                                  " node %s: %s" %
7649                                  (instance.name, source_node, msg))
7650
7651     # create the target disks
7652     try:
7653       _CreateDisks(self, instance, target_node=target_node)
7654     except errors.OpExecError:
7655       self.LogWarning("Device creation failed, reverting...")
7656       try:
7657         _RemoveDisks(self, instance, target_node=target_node)
7658       finally:
7659         self.cfg.ReleaseDRBDMinors(instance.name)
7660         raise
7661
7662     cluster_name = self.cfg.GetClusterInfo().cluster_name
7663
7664     errs = []
7665     # activate, get path, copy the data over
7666     for idx, disk in enumerate(instance.disks):
7667       self.LogInfo("Copying data for disk %d", idx)
7668       result = self.rpc.call_blockdev_assemble(target_node, disk,
7669                                                instance.name, True, idx)
7670       if result.fail_msg:
7671         self.LogWarning("Can't assemble newly created disk %d: %s",
7672                         idx, result.fail_msg)
7673         errs.append(result.fail_msg)
7674         break
7675       dev_path = result.payload
7676       result = self.rpc.call_blockdev_export(source_node, disk,
7677                                              target_node, dev_path,
7678                                              cluster_name)
7679       if result.fail_msg:
7680         self.LogWarning("Can't copy data over for disk %d: %s",
7681                         idx, result.fail_msg)
7682         errs.append(result.fail_msg)
7683         break
7684
7685     if errs:
7686       self.LogWarning("Some disks failed to copy, aborting")
7687       try:
7688         _RemoveDisks(self, instance, target_node=target_node)
7689       finally:
7690         self.cfg.ReleaseDRBDMinors(instance.name)
7691         raise errors.OpExecError("Errors during disk copy: %s" %
7692                                  (",".join(errs),))
7693
7694     instance.primary_node = target_node
7695     self.cfg.Update(instance, feedback_fn)
7696
7697     self.LogInfo("Removing the disks on the original node")
7698     _RemoveDisks(self, instance, target_node=source_node)
7699
7700     # Only start the instance if it's marked as up
7701     if instance.admin_state == constants.ADMINST_UP:
7702       self.LogInfo("Starting instance %s on node %s",
7703                    instance.name, target_node)
7704
7705       disks_ok, _ = _AssembleInstanceDisks(self, instance,
7706                                            ignore_secondaries=True)
7707       if not disks_ok:
7708         _ShutdownInstanceDisks(self, instance)
7709         raise errors.OpExecError("Can't activate the instance's disks")
7710
7711       result = self.rpc.call_instance_start(target_node,
7712                                             (instance, None, None), False)
7713       msg = result.fail_msg
7714       if msg:
7715         _ShutdownInstanceDisks(self, instance)
7716         raise errors.OpExecError("Could not start instance %s on node %s: %s" %
7717                                  (instance.name, target_node, msg))
7718
7719
7720 class LUNodeMigrate(LogicalUnit):
7721   """Migrate all instances from a node.
7722
7723   """
7724   HPATH = "node-migrate"
7725   HTYPE = constants.HTYPE_NODE
7726   REQ_BGL = False
7727
7728   def CheckArguments(self):
7729     pass
7730
7731   def ExpandNames(self):
7732     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
7733
7734     self.share_locks = _ShareAll()
7735     self.needed_locks = {
7736       locking.LEVEL_NODE: [self.op.node_name],
7737       }
7738
7739   def BuildHooksEnv(self):
7740     """Build hooks env.
7741
7742     This runs on the master, the primary and all the secondaries.
7743
7744     """
7745     return {
7746       "NODE_NAME": self.op.node_name,
7747       "ALLOW_RUNTIME_CHANGES": self.op.allow_runtime_changes,
7748       }
7749
7750   def BuildHooksNodes(self):
7751     """Build hooks nodes.
7752
7753     """
7754     nl = [self.cfg.GetMasterNode()]
7755     return (nl, nl)
7756
7757   def CheckPrereq(self):
7758     pass
7759
7760   def Exec(self, feedback_fn):
7761     # Prepare jobs for migration instances
7762     allow_runtime_changes = self.op.allow_runtime_changes
7763     jobs = [
7764       [opcodes.OpInstanceMigrate(instance_name=inst.name,
7765                                  mode=self.op.mode,
7766                                  live=self.op.live,
7767                                  iallocator=self.op.iallocator,
7768                                  target_node=self.op.target_node,
7769                                  allow_runtime_changes=allow_runtime_changes,
7770                                  ignore_ipolicy=self.op.ignore_ipolicy)]
7771       for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name)
7772       ]
7773
7774     # TODO: Run iallocator in this opcode and pass correct placement options to
7775     # OpInstanceMigrate. Since other jobs can modify the cluster between
7776     # running the iallocator and the actual migration, a good consistency model
7777     # will have to be found.
7778
7779     assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
7780             frozenset([self.op.node_name]))
7781
7782     return ResultWithJobs(jobs)
7783
7784
7785 class TLMigrateInstance(Tasklet):
7786   """Tasklet class for instance migration.
7787
7788   @type live: boolean
7789   @ivar live: whether the migration will be done live or non-live;
7790       this variable is initalized only after CheckPrereq has run
7791   @type cleanup: boolean
7792   @ivar cleanup: Wheater we cleanup from a failed migration
7793   @type iallocator: string
7794   @ivar iallocator: The iallocator used to determine target_node
7795   @type target_node: string
7796   @ivar target_node: If given, the target_node to reallocate the instance to
7797   @type failover: boolean
7798   @ivar failover: Whether operation results in failover or migration
7799   @type fallback: boolean
7800   @ivar fallback: Whether fallback to failover is allowed if migration not
7801                   possible
7802   @type ignore_consistency: boolean
7803   @ivar ignore_consistency: Wheter we should ignore consistency between source
7804                             and target node
7805   @type shutdown_timeout: int
7806   @ivar shutdown_timeout: In case of failover timeout of the shutdown
7807   @type ignore_ipolicy: bool
7808   @ivar ignore_ipolicy: If true, we can ignore instance policy when migrating
7809
7810   """
7811
7812   # Constants
7813   _MIGRATION_POLL_INTERVAL = 1      # seconds
7814   _MIGRATION_FEEDBACK_INTERVAL = 10 # seconds
7815
7816   def __init__(self, lu, instance_name, cleanup=False,
7817                failover=False, fallback=False,
7818                ignore_consistency=False,
7819                allow_runtime_changes=True,
7820                shutdown_timeout=constants.DEFAULT_SHUTDOWN_TIMEOUT,
7821                ignore_ipolicy=False):
7822     """Initializes this class.
7823
7824     """
7825     Tasklet.__init__(self, lu)
7826
7827     # Parameters
7828     self.instance_name = instance_name
7829     self.cleanup = cleanup
7830     self.live = False # will be overridden later
7831     self.failover = failover
7832     self.fallback = fallback
7833     self.ignore_consistency = ignore_consistency
7834     self.shutdown_timeout = shutdown_timeout
7835     self.ignore_ipolicy = ignore_ipolicy
7836     self.allow_runtime_changes = allow_runtime_changes
7837
7838   def CheckPrereq(self):
7839     """Check prerequisites.
7840
7841     This checks that the instance is in the cluster.
7842
7843     """
7844     instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
7845     instance = self.cfg.GetInstanceInfo(instance_name)
7846     assert instance is not None
7847     self.instance = instance
7848     cluster = self.cfg.GetClusterInfo()
7849
7850     if (not self.cleanup and
7851         not instance.admin_state == constants.ADMINST_UP and
7852         not self.failover and self.fallback):
7853       self.lu.LogInfo("Instance is marked down or offline, fallback allowed,"
7854                       " switching to failover")
7855       self.failover = True
7856
7857     if instance.disk_template not in constants.DTS_MIRRORED:
7858       if self.failover:
7859         text = "failovers"
7860       else:
7861         text = "migrations"
7862       raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
7863                                  " %s" % (instance.disk_template, text),
7864                                  errors.ECODE_STATE)
7865
7866     if instance.disk_template in constants.DTS_EXT_MIRROR:
7867       _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
7868
7869       if self.lu.op.iallocator:
7870         self._RunAllocator()
7871       else:
7872         # We set set self.target_node as it is required by
7873         # BuildHooksEnv
7874         self.target_node = self.lu.op.target_node
7875
7876       # Check that the target node is correct in terms of instance policy
7877       nodeinfo = self.cfg.GetNodeInfo(self.target_node)
7878       group_info = self.cfg.GetNodeGroup(nodeinfo.group)
7879       ipolicy = _CalculateGroupIPolicy(cluster, group_info)
7880       _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo,
7881                               ignore=self.ignore_ipolicy)
7882
7883       # self.target_node is already populated, either directly or by the
7884       # iallocator run
7885       target_node = self.target_node
7886       if self.target_node == instance.primary_node:
7887         raise errors.OpPrereqError("Cannot migrate instance %s"
7888                                    " to its primary (%s)" %
7889                                    (instance.name, instance.primary_node))
7890
7891       if len(self.lu.tasklets) == 1:
7892         # It is safe to release locks only when we're the only tasklet
7893         # in the LU
7894         _ReleaseLocks(self.lu, locking.LEVEL_NODE,
7895                       keep=[instance.primary_node, self.target_node])
7896
7897     else:
7898       secondary_nodes = instance.secondary_nodes
7899       if not secondary_nodes:
7900         raise errors.ConfigurationError("No secondary node but using"
7901                                         " %s disk template" %
7902                                         instance.disk_template)
7903       target_node = secondary_nodes[0]
7904       if self.lu.op.iallocator or (self.lu.op.target_node and
7905                                    self.lu.op.target_node != target_node):
7906         if self.failover:
7907           text = "failed over"
7908         else:
7909           text = "migrated"
7910         raise errors.OpPrereqError("Instances with disk template %s cannot"
7911                                    " be %s to arbitrary nodes"
7912                                    " (neither an iallocator nor a target"
7913                                    " node can be passed)" %
7914                                    (instance.disk_template, text),
7915                                    errors.ECODE_INVAL)
7916       nodeinfo = self.cfg.GetNodeInfo(target_node)
7917       group_info = self.cfg.GetNodeGroup(nodeinfo.group)
7918       ipolicy = _CalculateGroupIPolicy(cluster, group_info)
7919       _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo,
7920                               ignore=self.ignore_ipolicy)
7921
7922     i_be = cluster.FillBE(instance)
7923
7924     # check memory requirements on the secondary node
7925     if (not self.cleanup and
7926          (not self.failover or instance.admin_state == constants.ADMINST_UP)):
7927       self.tgt_free_mem = _CheckNodeFreeMemory(self.lu, target_node,
7928                                                "migrating instance %s" %
7929                                                instance.name,
7930                                                i_be[constants.BE_MINMEM],
7931                                                instance.hypervisor)
7932     else:
7933       self.lu.LogInfo("Not checking memory on the secondary node as"
7934                       " instance will not be started")
7935
7936     # check if failover must be forced instead of migration
7937     if (not self.cleanup and not self.failover and
7938         i_be[constants.BE_ALWAYS_FAILOVER]):
7939       if self.fallback:
7940         self.lu.LogInfo("Instance configured to always failover; fallback"
7941                         " to failover")
7942         self.failover = True
7943       else:
7944         raise errors.OpPrereqError("This instance has been configured to"
7945                                    " always failover, please allow failover",
7946                                    errors.ECODE_STATE)
7947
7948     # check bridge existance
7949     _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
7950
7951     if not self.cleanup:
7952       _CheckNodeNotDrained(self.lu, target_node)
7953       if not self.failover:
7954         result = self.rpc.call_instance_migratable(instance.primary_node,
7955                                                    instance)
7956         if result.fail_msg and self.fallback:
7957           self.lu.LogInfo("Can't migrate, instance offline, fallback to"
7958                           " failover")
7959           self.failover = True
7960         else:
7961           result.Raise("Can't migrate, please use failover",
7962                        prereq=True, ecode=errors.ECODE_STATE)
7963
7964     assert not (self.failover and self.cleanup)
7965
7966     if not self.failover:
7967       if self.lu.op.live is not None and self.lu.op.mode is not None:
7968         raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
7969                                    " parameters are accepted",
7970                                    errors.ECODE_INVAL)
7971       if self.lu.op.live is not None:
7972         if self.lu.op.live:
7973           self.lu.op.mode = constants.HT_MIGRATION_LIVE
7974         else:
7975           self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
7976         # reset the 'live' parameter to None so that repeated
7977         # invocations of CheckPrereq do not raise an exception
7978         self.lu.op.live = None
7979       elif self.lu.op.mode is None:
7980         # read the default value from the hypervisor
7981         i_hv = cluster.FillHV(self.instance, skip_globals=False)
7982         self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
7983
7984       self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
7985     else:
7986       # Failover is never live
7987       self.live = False
7988
7989     if not (self.failover or self.cleanup):
7990       remote_info = self.rpc.call_instance_info(instance.primary_node,
7991                                                 instance.name,
7992                                                 instance.hypervisor)
7993       remote_info.Raise("Error checking instance on node %s" %
7994                         instance.primary_node)
7995       instance_running = bool(remote_info.payload)
7996       if instance_running:
7997         self.current_mem = int(remote_info.payload["memory"])
7998
7999   def _RunAllocator(self):
8000     """Run the allocator based on input opcode.
8001
8002     """
8003     # FIXME: add a self.ignore_ipolicy option
8004     ial = IAllocator(self.cfg, self.rpc,
8005                      mode=constants.IALLOCATOR_MODE_RELOC,
8006                      name=self.instance_name,
8007                      # TODO See why hail breaks with a single node below
8008                      relocate_from=[self.instance.primary_node,
8009                                     self.instance.primary_node],
8010                      )
8011
8012     ial.Run(self.lu.op.iallocator)
8013
8014     if not ial.success:
8015       raise errors.OpPrereqError("Can't compute nodes using"
8016                                  " iallocator '%s': %s" %
8017                                  (self.lu.op.iallocator, ial.info),
8018                                  errors.ECODE_NORES)
8019     if len(ial.result) != ial.required_nodes:
8020       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
8021                                  " of nodes (%s), required %s" %
8022                                  (self.lu.op.iallocator, len(ial.result),
8023                                   ial.required_nodes), errors.ECODE_FAULT)
8024     self.target_node = ial.result[0]
8025     self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
8026                  self.instance_name, self.lu.op.iallocator,
8027                  utils.CommaJoin(ial.result))
8028
8029   def _WaitUntilSync(self):
8030     """Poll with custom rpc for disk sync.
8031
8032     This uses our own step-based rpc call.
8033
8034     """
8035     self.feedback_fn("* wait until resync is done")
8036     all_done = False
8037     while not all_done:
8038       all_done = True
8039       result = self.rpc.call_drbd_wait_sync(self.all_nodes,
8040                                             self.nodes_ip,
8041                                             self.instance.disks)
8042       min_percent = 100
8043       for node, nres in result.items():
8044         nres.Raise("Cannot resync disks on node %s" % node)
8045         node_done, node_percent = nres.payload
8046         all_done = all_done and node_done
8047         if node_percent is not None:
8048           min_percent = min(min_percent, node_percent)
8049       if not all_done:
8050         if min_percent < 100:
8051           self.feedback_fn("   - progress: %.1f%%" % min_percent)
8052         time.sleep(2)
8053
8054   def _EnsureSecondary(self, node):
8055     """Demote a node to secondary.
8056
8057     """
8058     self.feedback_fn("* switching node %s to secondary mode" % node)
8059
8060     for dev in self.instance.disks:
8061       self.cfg.SetDiskID(dev, node)
8062
8063     result = self.rpc.call_blockdev_close(node, self.instance.name,
8064                                           self.instance.disks)
8065     result.Raise("Cannot change disk to secondary on node %s" % node)
8066
8067   def _GoStandalone(self):
8068     """Disconnect from the network.
8069
8070     """
8071     self.feedback_fn("* changing into standalone mode")
8072     result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
8073                                                self.instance.disks)
8074     for node, nres in result.items():
8075       nres.Raise("Cannot disconnect disks node %s" % node)
8076
8077   def _GoReconnect(self, multimaster):
8078     """Reconnect to the network.
8079
8080     """
8081     if multimaster:
8082       msg = "dual-master"
8083     else:
8084       msg = "single-master"
8085     self.feedback_fn("* changing disks into %s mode" % msg)
8086     result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
8087                                            self.instance.disks,
8088                                            self.instance.name, multimaster)
8089     for node, nres in result.items():
8090       nres.Raise("Cannot change disks config on node %s" % node)
8091
8092   def _ExecCleanup(self):
8093     """Try to cleanup after a failed migration.
8094
8095     The cleanup is done by:
8096       - check that the instance is running only on one node
8097         (and update the config if needed)
8098       - change disks on its secondary node to secondary
8099       - wait until disks are fully synchronized
8100       - disconnect from the network
8101       - change disks into single-master mode
8102       - wait again until disks are fully synchronized
8103
8104     """
8105     instance = self.instance
8106     target_node = self.target_node
8107     source_node = self.source_node
8108
8109     # check running on only one node
8110     self.feedback_fn("* checking where the instance actually runs"
8111                      " (if this hangs, the hypervisor might be in"
8112                      " a bad state)")
8113     ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
8114     for node, result in ins_l.items():
8115       result.Raise("Can't contact node %s" % node)
8116
8117     runningon_source = instance.name in ins_l[source_node].payload
8118     runningon_target = instance.name in ins_l[target_node].payload
8119
8120     if runningon_source and runningon_target:
8121       raise errors.OpExecError("Instance seems to be running on two nodes,"
8122                                " or the hypervisor is confused; you will have"
8123                                " to ensure manually that it runs only on one"
8124                                " and restart this operation")
8125
8126     if not (runningon_source or runningon_target):
8127       raise errors.OpExecError("Instance does not seem to be running at all;"
8128                                " in this case it's safer to repair by"
8129                                " running 'gnt-instance stop' to ensure disk"
8130                                " shutdown, and then restarting it")
8131
8132     if runningon_target:
8133       # the migration has actually succeeded, we need to update the config
8134       self.feedback_fn("* instance running on secondary node (%s),"
8135                        " updating config" % target_node)
8136       instance.primary_node = target_node
8137       self.cfg.Update(instance, self.feedback_fn)
8138       demoted_node = source_node
8139     else:
8140       self.feedback_fn("* instance confirmed to be running on its"
8141                        " primary node (%s)" % source_node)
8142       demoted_node = target_node
8143
8144     if instance.disk_template in constants.DTS_INT_MIRROR:
8145       self._EnsureSecondary(demoted_node)
8146       try:
8147         self._WaitUntilSync()
8148       except errors.OpExecError:
8149         # we ignore here errors, since if the device is standalone, it
8150         # won't be able to sync
8151         pass
8152       self._GoStandalone()
8153       self._GoReconnect(False)
8154       self._WaitUntilSync()
8155
8156     self.feedback_fn("* done")
8157
8158   def _RevertDiskStatus(self):
8159     """Try to revert the disk status after a failed migration.
8160
8161     """
8162     target_node = self.target_node
8163     if self.instance.disk_template in constants.DTS_EXT_MIRROR:
8164       return
8165
8166     try:
8167       self._EnsureSecondary(target_node)
8168       self._GoStandalone()
8169       self._GoReconnect(False)
8170       self._WaitUntilSync()
8171     except errors.OpExecError, err:
8172       self.lu.LogWarning("Migration failed and I can't reconnect the drives,"
8173                          " please try to recover the instance manually;"
8174                          " error '%s'" % str(err))
8175
8176   def _AbortMigration(self):
8177     """Call the hypervisor code to abort a started migration.
8178
8179     """
8180     instance = self.instance
8181     target_node = self.target_node
8182     source_node = self.source_node
8183     migration_info = self.migration_info
8184
8185     abort_result = self.rpc.call_instance_finalize_migration_dst(target_node,
8186                                                                  instance,
8187                                                                  migration_info,
8188                                                                  False)
8189     abort_msg = abort_result.fail_msg
8190     if abort_msg:
8191       logging.error("Aborting migration failed on target node %s: %s",
8192                     target_node, abort_msg)
8193       # Don't raise an exception here, as we stil have to try to revert the
8194       # disk status, even if this step failed.
8195
8196     abort_result = self.rpc.call_instance_finalize_migration_src(source_node,
8197         instance, False, self.live)
8198     abort_msg = abort_result.fail_msg
8199     if abort_msg:
8200       logging.error("Aborting migration failed on source node %s: %s",
8201                     source_node, abort_msg)
8202
8203   def _ExecMigration(self):
8204     """Migrate an instance.
8205
8206     The migrate is done by:
8207       - change the disks into dual-master mode
8208       - wait until disks are fully synchronized again
8209       - migrate the instance
8210       - change disks on the new secondary node (the old primary) to secondary
8211       - wait until disks are fully synchronized
8212       - change disks into single-master mode
8213
8214     """
8215     instance = self.instance
8216     target_node = self.target_node
8217     source_node = self.source_node
8218
8219     # Check for hypervisor version mismatch and warn the user.
8220     nodeinfo = self.rpc.call_node_info([source_node, target_node],
8221                                        None, [self.instance.hypervisor])
8222     for ninfo in nodeinfo.values():
8223       ninfo.Raise("Unable to retrieve node information from node '%s'" %
8224                   ninfo.node)
8225     (_, _, (src_info, )) = nodeinfo[source_node].payload
8226     (_, _, (dst_info, )) = nodeinfo[target_node].payload
8227
8228     if ((constants.HV_NODEINFO_KEY_VERSION in src_info) and
8229         (constants.HV_NODEINFO_KEY_VERSION in dst_info)):
8230       src_version = src_info[constants.HV_NODEINFO_KEY_VERSION]
8231       dst_version = dst_info[constants.HV_NODEINFO_KEY_VERSION]
8232       if src_version != dst_version:
8233         self.feedback_fn("* warning: hypervisor version mismatch between"
8234                          " source (%s) and target (%s) node" %
8235                          (src_version, dst_version))
8236
8237     self.feedback_fn("* checking disk consistency between source and target")
8238     for (idx, dev) in enumerate(instance.disks):
8239       if not _CheckDiskConsistency(self.lu, dev, target_node, False):
8240         raise errors.OpExecError("Disk %s is degraded or not fully"
8241                                  " synchronized on target node,"
8242                                  " aborting migration" % idx)
8243
8244     if self.current_mem > self.tgt_free_mem:
8245       if not self.allow_runtime_changes:
8246         raise errors.OpExecError("Memory ballooning not allowed and not enough"
8247                                  " free memory to fit instance %s on target"
8248                                  " node %s (have %dMB, need %dMB)" %
8249                                  (instance.name, target_node,
8250                                   self.tgt_free_mem, self.current_mem))
8251       self.feedback_fn("* setting instance memory to %s" % self.tgt_free_mem)
8252       rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node,
8253                                                      instance,
8254                                                      self.tgt_free_mem)
8255       rpcres.Raise("Cannot modify instance runtime memory")
8256
8257     # First get the migration information from the remote node
8258     result = self.rpc.call_migration_info(source_node, instance)
8259     msg = result.fail_msg
8260     if msg:
8261       log_err = ("Failed fetching source migration information from %s: %s" %
8262                  (source_node, msg))
8263       logging.error(log_err)
8264       raise errors.OpExecError(log_err)
8265
8266     self.migration_info = migration_info = result.payload
8267
8268     if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
8269       # Then switch the disks to master/master mode
8270       self._EnsureSecondary(target_node)
8271       self._GoStandalone()
8272       self._GoReconnect(True)
8273       self._WaitUntilSync()
8274
8275     self.feedback_fn("* preparing %s to accept the instance" % target_node)
8276     result = self.rpc.call_accept_instance(target_node,
8277                                            instance,
8278                                            migration_info,
8279                                            self.nodes_ip[target_node])
8280
8281     msg = result.fail_msg
8282     if msg:
8283       logging.error("Instance pre-migration failed, trying to revert"
8284                     " disk status: %s", msg)
8285       self.feedback_fn("Pre-migration failed, aborting")
8286       self._AbortMigration()
8287       self._RevertDiskStatus()
8288       raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
8289                                (instance.name, msg))
8290
8291     self.feedback_fn("* migrating instance to %s" % target_node)
8292     result = self.rpc.call_instance_migrate(source_node, instance,
8293                                             self.nodes_ip[target_node],
8294                                             self.live)
8295     msg = result.fail_msg
8296     if msg:
8297       logging.error("Instance migration failed, trying to revert"
8298                     " disk status: %s", msg)
8299       self.feedback_fn("Migration failed, aborting")
8300       self._AbortMigration()
8301       self._RevertDiskStatus()
8302       raise errors.OpExecError("Could not migrate instance %s: %s" %
8303                                (instance.name, msg))
8304
8305     self.feedback_fn("* starting memory transfer")
8306     last_feedback = time.time()
8307     while True:
8308       result = self.rpc.call_instance_get_migration_status(source_node,
8309                                                            instance)
8310       msg = result.fail_msg
8311       ms = result.payload   # MigrationStatus instance
8312       if msg or (ms.status in constants.HV_MIGRATION_FAILED_STATUSES):
8313         logging.error("Instance migration failed, trying to revert"
8314                       " disk status: %s", msg)
8315         self.feedback_fn("Migration failed, aborting")
8316         self._AbortMigration()
8317         self._RevertDiskStatus()
8318         raise errors.OpExecError("Could not migrate instance %s: %s" %
8319                                  (instance.name, msg))
8320
8321       if result.payload.status != constants.HV_MIGRATION_ACTIVE:
8322         self.feedback_fn("* memory transfer complete")
8323         break
8324
8325       if (utils.TimeoutExpired(last_feedback,
8326                                self._MIGRATION_FEEDBACK_INTERVAL) and
8327           ms.transferred_ram is not None):
8328         mem_progress = 100 * float(ms.transferred_ram) / float(ms.total_ram)
8329         self.feedback_fn("* memory transfer progress: %.2f %%" % mem_progress)
8330         last_feedback = time.time()
8331
8332       time.sleep(self._MIGRATION_POLL_INTERVAL)
8333
8334     result = self.rpc.call_instance_finalize_migration_src(source_node,
8335                                                            instance,
8336                                                            True,
8337                                                            self.live)
8338     msg = result.fail_msg
8339     if msg:
8340       logging.error("Instance migration succeeded, but finalization failed"
8341                     " on the source node: %s", msg)
8342       raise errors.OpExecError("Could not finalize instance migration: %s" %
8343                                msg)
8344
8345     instance.primary_node = target_node
8346
8347     # distribute new instance config to the other nodes
8348     self.cfg.Update(instance, self.feedback_fn)
8349
8350     result = self.rpc.call_instance_finalize_migration_dst(target_node,
8351                                                            instance,
8352                                                            migration_info,
8353                                                            True)
8354     msg = result.fail_msg
8355     if msg:
8356       logging.error("Instance migration succeeded, but finalization failed"
8357                     " on the target node: %s", msg)
8358       raise errors.OpExecError("Could not finalize instance migration: %s" %
8359                                msg)
8360
8361     if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
8362       self._EnsureSecondary(source_node)
8363       self._WaitUntilSync()
8364       self._GoStandalone()
8365       self._GoReconnect(False)
8366       self._WaitUntilSync()
8367
8368     # If the instance's disk template is `rbd' and there was a successful
8369     # migration, unmap the device from the source node.
8370     if self.instance.disk_template == constants.DT_RBD:
8371       disks = _ExpandCheckDisks(instance, instance.disks)
8372       self.feedback_fn("* unmapping instance's disks from %s" % source_node)
8373       for disk in disks:
8374         result = self.rpc.call_blockdev_shutdown(source_node, disk)
8375         msg = result.fail_msg
8376         if msg:
8377           logging.error("Migration was successful, but couldn't unmap the"
8378                         " block device %s on source node %s: %s",
8379                         disk.iv_name, source_node, msg)
8380           logging.error("You need to unmap the device %s manually on %s",
8381                         disk.iv_name, source_node)
8382
8383     self.feedback_fn("* done")
8384
8385   def _ExecFailover(self):
8386     """Failover an instance.
8387
8388     The failover is done by shutting it down on its present node and
8389     starting it on the secondary.
8390
8391     """
8392     instance = self.instance
8393     primary_node = self.cfg.GetNodeInfo(instance.primary_node)
8394
8395     source_node = instance.primary_node
8396     target_node = self.target_node
8397
8398     if instance.admin_state == constants.ADMINST_UP:
8399       self.feedback_fn("* checking disk consistency between source and target")
8400       for (idx, dev) in enumerate(instance.disks):
8401         # for drbd, these are drbd over lvm
8402         if not _CheckDiskConsistency(self.lu, dev, target_node, False):
8403           if primary_node.offline:
8404             self.feedback_fn("Node %s is offline, ignoring degraded disk %s on"
8405                              " target node %s" %
8406                              (primary_node.name, idx, target_node))
8407           elif not self.ignore_consistency:
8408             raise errors.OpExecError("Disk %s is degraded on target node,"
8409                                      " aborting failover" % idx)
8410     else:
8411       self.feedback_fn("* not checking disk consistency as instance is not"
8412                        " running")
8413
8414     self.feedback_fn("* shutting down instance on source node")
8415     logging.info("Shutting down instance %s on node %s",
8416                  instance.name, source_node)
8417
8418     result = self.rpc.call_instance_shutdown(source_node, instance,
8419                                              self.shutdown_timeout)
8420     msg = result.fail_msg
8421     if msg:
8422       if self.ignore_consistency or primary_node.offline:
8423         self.lu.LogWarning("Could not shutdown instance %s on node %s,"
8424                            " proceeding anyway; please make sure node"
8425                            " %s is down; error details: %s",
8426                            instance.name, source_node, source_node, msg)
8427       else:
8428         raise errors.OpExecError("Could not shutdown instance %s on"
8429                                  " node %s: %s" %
8430                                  (instance.name, source_node, msg))
8431
8432     self.feedback_fn("* deactivating the instance's disks on source node")
8433     if not _ShutdownInstanceDisks(self.lu, instance, ignore_primary=True):
8434       raise errors.OpExecError("Can't shut down the instance's disks")
8435
8436     instance.primary_node = target_node
8437     # distribute new instance config to the other nodes
8438     self.cfg.Update(instance, self.feedback_fn)
8439
8440     # Only start the instance if it's marked as up
8441     if instance.admin_state == constants.ADMINST_UP:
8442       self.feedback_fn("* activating the instance's disks on target node %s" %
8443                        target_node)
8444       logging.info("Starting instance %s on node %s",
8445                    instance.name, target_node)
8446
8447       disks_ok, _ = _AssembleInstanceDisks(self.lu, instance,
8448                                            ignore_secondaries=True)
8449       if not disks_ok:
8450         _ShutdownInstanceDisks(self.lu, instance)
8451         raise errors.OpExecError("Can't activate the instance's disks")
8452
8453       self.feedback_fn("* starting the instance on the target node %s" %
8454                        target_node)
8455       result = self.rpc.call_instance_start(target_node, (instance, None, None),
8456                                             False)
8457       msg = result.fail_msg
8458       if msg:
8459         _ShutdownInstanceDisks(self.lu, instance)
8460         raise errors.OpExecError("Could not start instance %s on node %s: %s" %
8461                                  (instance.name, target_node, msg))
8462
8463   def Exec(self, feedback_fn):
8464     """Perform the migration.
8465
8466     """
8467     self.feedback_fn = feedback_fn
8468     self.source_node = self.instance.primary_node
8469
8470     # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
8471     if self.instance.disk_template in constants.DTS_INT_MIRROR:
8472       self.target_node = self.instance.secondary_nodes[0]
8473       # Otherwise self.target_node has been populated either
8474       # directly, or through an iallocator.
8475
8476     self.all_nodes = [self.source_node, self.target_node]
8477     self.nodes_ip = dict((name, node.secondary_ip) for (name, node)
8478                          in self.cfg.GetMultiNodeInfo(self.all_nodes))
8479
8480     if self.failover:
8481       feedback_fn("Failover instance %s" % self.instance.name)
8482       self._ExecFailover()
8483     else:
8484       feedback_fn("Migrating instance %s" % self.instance.name)
8485
8486       if self.cleanup:
8487         return self._ExecCleanup()
8488       else:
8489         return self._ExecMigration()
8490
8491
8492 def _CreateBlockDev(lu, node, instance, device, force_create,
8493                     info, force_open):
8494   """Create a tree of block devices on a given node.
8495
8496   If this device type has to be created on secondaries, create it and
8497   all its children.
8498
8499   If not, just recurse to children keeping the same 'force' value.
8500
8501   @param lu: the lu on whose behalf we execute
8502   @param node: the node on which to create the device
8503   @type instance: L{objects.Instance}
8504   @param instance: the instance which owns the device
8505   @type device: L{objects.Disk}
8506   @param device: the device to create
8507   @type force_create: boolean
8508   @param force_create: whether to force creation of this device; this
8509       will be change to True whenever we find a device which has
8510       CreateOnSecondary() attribute
8511   @param info: the extra 'metadata' we should attach to the device
8512       (this will be represented as a LVM tag)
8513   @type force_open: boolean
8514   @param force_open: this parameter will be passes to the
8515       L{backend.BlockdevCreate} function where it specifies
8516       whether we run on primary or not, and it affects both
8517       the child assembly and the device own Open() execution
8518
8519   """
8520   if device.CreateOnSecondary():
8521     force_create = True
8522
8523   if device.children:
8524     for child in device.children:
8525       _CreateBlockDev(lu, node, instance, child, force_create,
8526                       info, force_open)
8527
8528   if not force_create:
8529     return
8530
8531   _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
8532
8533
8534 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
8535   """Create a single block device on a given node.
8536
8537   This will not recurse over children of the device, so they must be
8538   created in advance.
8539
8540   @param lu: the lu on whose behalf we execute
8541   @param node: the node on which to create the device
8542   @type instance: L{objects.Instance}
8543   @param instance: the instance which owns the device
8544   @type device: L{objects.Disk}
8545   @param device: the device to create
8546   @param info: the extra 'metadata' we should attach to the device
8547       (this will be represented as a LVM tag)
8548   @type force_open: boolean
8549   @param force_open: this parameter will be passes to the
8550       L{backend.BlockdevCreate} function where it specifies
8551       whether we run on primary or not, and it affects both
8552       the child assembly and the device own Open() execution
8553
8554   """
8555   lu.cfg.SetDiskID(device, node)
8556   result = lu.rpc.call_blockdev_create(node, device, device.size,
8557                                        instance.name, force_open, info)
8558   result.Raise("Can't create block device %s on"
8559                " node %s for instance %s" % (device, node, instance.name))
8560   if device.physical_id is None:
8561     device.physical_id = result.payload
8562
8563
8564 def _GenerateUniqueNames(lu, exts):
8565   """Generate a suitable LV name.
8566
8567   This will generate a logical volume name for the given instance.
8568
8569   """
8570   results = []
8571   for val in exts:
8572     new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
8573     results.append("%s%s" % (new_id, val))
8574   return results
8575
8576
8577 def _ComputeLDParams(disk_template, disk_params):
8578   """Computes Logical Disk parameters from Disk Template parameters.
8579
8580   @type disk_template: string
8581   @param disk_template: disk template, one of L{constants.DISK_TEMPLATES}
8582   @type disk_params: dict
8583   @param disk_params: disk template parameters; dict(template_name -> parameters
8584   @rtype: list(dict)
8585   @return: a list of dicts, one for each node of the disk hierarchy. Each dict
8586     contains the LD parameters of the node. The tree is flattened in-order.
8587
8588   """
8589   if disk_template not in constants.DISK_TEMPLATES:
8590     raise errors.ProgrammerError("Unknown disk template %s" % disk_template)
8591
8592   result = list()
8593   dt_params = disk_params[disk_template]
8594   if disk_template == constants.DT_DRBD8:
8595     drbd_params = {
8596       constants.LDP_RESYNC_RATE: dt_params[constants.DRBD_RESYNC_RATE],
8597       constants.LDP_BARRIERS: dt_params[constants.DRBD_DISK_BARRIERS],
8598       constants.LDP_NO_META_FLUSH: dt_params[constants.DRBD_META_BARRIERS],
8599       constants.LDP_DEFAULT_METAVG: dt_params[constants.DRBD_DEFAULT_METAVG],
8600       constants.LDP_DISK_CUSTOM: dt_params[constants.DRBD_DISK_CUSTOM],
8601       constants.LDP_NET_CUSTOM: dt_params[constants.DRBD_NET_CUSTOM],
8602       constants.LDP_DYNAMIC_RESYNC: dt_params[constants.DRBD_DYNAMIC_RESYNC],
8603       constants.LDP_PLAN_AHEAD: dt_params[constants.DRBD_PLAN_AHEAD],
8604       constants.LDP_FILL_TARGET: dt_params[constants.DRBD_FILL_TARGET],
8605       constants.LDP_DELAY_TARGET: dt_params[constants.DRBD_DELAY_TARGET],
8606       constants.LDP_MAX_RATE: dt_params[constants.DRBD_MAX_RATE],
8607       constants.LDP_MIN_RATE: dt_params[constants.DRBD_MIN_RATE],
8608       }
8609
8610     drbd_params = \
8611       objects.FillDict(constants.DISK_LD_DEFAULTS[constants.LD_DRBD8],
8612                        drbd_params)
8613
8614     result.append(drbd_params)
8615
8616     # data LV
8617     data_params = {
8618       constants.LDP_STRIPES: dt_params[constants.DRBD_DATA_STRIPES],
8619       }
8620     data_params = \
8621       objects.FillDict(constants.DISK_LD_DEFAULTS[constants.LD_LV],
8622                        data_params)
8623     result.append(data_params)
8624
8625     # metadata LV
8626     meta_params = {
8627       constants.LDP_STRIPES: dt_params[constants.DRBD_META_STRIPES],
8628       }
8629     meta_params = \
8630       objects.FillDict(constants.DISK_LD_DEFAULTS[constants.LD_LV],
8631                        meta_params)
8632     result.append(meta_params)
8633
8634   elif (disk_template == constants.DT_FILE or
8635         disk_template == constants.DT_SHARED_FILE):
8636     result.append(constants.DISK_LD_DEFAULTS[constants.LD_FILE])
8637
8638   elif disk_template == constants.DT_PLAIN:
8639     params = {
8640       constants.LDP_STRIPES: dt_params[constants.LV_STRIPES],
8641       }
8642     params = \
8643       objects.FillDict(constants.DISK_LD_DEFAULTS[constants.LD_LV],
8644                        params)
8645     result.append(params)
8646
8647   elif disk_template == constants.DT_BLOCK:
8648     result.append(constants.DISK_LD_DEFAULTS[constants.LD_BLOCKDEV])
8649
8650   elif disk_template == constants.DT_RBD:
8651     params = {
8652       constants.LDP_POOL: dt_params[constants.RBD_POOL]
8653       }
8654     params = \
8655       objects.FillDict(constants.DISK_LD_DEFAULTS[constants.LD_RBD],
8656                        params)
8657     result.append(params)
8658
8659   return result
8660
8661
8662 def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
8663                          iv_name, p_minor, s_minor, drbd_params, data_params,
8664                          meta_params):
8665   """Generate a drbd8 device complete with its children.
8666
8667   """
8668   assert len(vgnames) == len(names) == 2
8669   port = lu.cfg.AllocatePort()
8670   shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
8671
8672   dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
8673                           logical_id=(vgnames[0], names[0]),
8674                           params=data_params)
8675   dev_meta = objects.Disk(dev_type=constants.LD_LV, size=DRBD_META_SIZE,
8676                           logical_id=(vgnames[1], names[1]),
8677                           params=meta_params)
8678   drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
8679                           logical_id=(primary, secondary, port,
8680                                       p_minor, s_minor,
8681                                       shared_secret),
8682                           children=[dev_data, dev_meta],
8683                           iv_name=iv_name, params=drbd_params)
8684   return drbd_dev
8685
8686
8687 _DISK_TEMPLATE_NAME_PREFIX = {
8688   constants.DT_PLAIN: "",
8689   constants.DT_RBD: ".rbd",
8690   }
8691
8692
8693 _DISK_TEMPLATE_DEVICE_TYPE = {
8694   constants.DT_PLAIN: constants.LD_LV,
8695   constants.DT_FILE: constants.LD_FILE,
8696   constants.DT_SHARED_FILE: constants.LD_FILE,
8697   constants.DT_BLOCK: constants.LD_BLOCKDEV,
8698   constants.DT_RBD: constants.LD_RBD,
8699   }
8700
8701
8702 def _GenerateDiskTemplate(lu, template_name, instance_name, primary_node,
8703     secondary_nodes, disk_info, file_storage_dir, file_driver, base_index,
8704     feedback_fn, disk_params,
8705     _req_file_storage=opcodes.RequireFileStorage,
8706     _req_shr_file_storage=opcodes.RequireSharedFileStorage):
8707   """Generate the entire disk layout for a given template type.
8708
8709   """
8710   #TODO: compute space requirements
8711
8712   vgname = lu.cfg.GetVGName()
8713   disk_count = len(disk_info)
8714   disks = []
8715   ld_params = _ComputeLDParams(template_name, disk_params)
8716
8717   if template_name == constants.DT_DISKLESS:
8718     pass
8719   elif template_name == constants.DT_DRBD8:
8720     drbd_params, data_params, meta_params = ld_params
8721     if len(secondary_nodes) != 1:
8722       raise errors.ProgrammerError("Wrong template configuration")
8723     remote_node = secondary_nodes[0]
8724     minors = lu.cfg.AllocateDRBDMinor(
8725       [primary_node, remote_node] * len(disk_info), instance_name)
8726
8727     names = []
8728     for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
8729                                                for i in range(disk_count)]):
8730       names.append(lv_prefix + "_data")
8731       names.append(lv_prefix + "_meta")
8732     for idx, disk in enumerate(disk_info):
8733       disk_index = idx + base_index
8734       drbd_default_metavg = drbd_params[constants.LDP_DEFAULT_METAVG]
8735       data_vg = disk.get(constants.IDISK_VG, vgname)
8736       meta_vg = disk.get(constants.IDISK_METAVG, drbd_default_metavg)
8737       disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
8738                                       disk[constants.IDISK_SIZE],
8739                                       [data_vg, meta_vg],
8740                                       names[idx * 2:idx * 2 + 2],
8741                                       "disk/%d" % disk_index,
8742                                       minors[idx * 2], minors[idx * 2 + 1],
8743                                       drbd_params, data_params, meta_params)
8744       disk_dev.mode = disk[constants.IDISK_MODE]
8745       disks.append(disk_dev)
8746   else:
8747     if secondary_nodes:
8748       raise errors.ProgrammerError("Wrong template configuration")
8749
8750     if template_name == constants.DT_FILE:
8751       _req_file_storage()
8752     elif template_name == constants.DT_SHARED_FILE:
8753       _req_shr_file_storage()
8754
8755     name_prefix = _DISK_TEMPLATE_NAME_PREFIX.get(template_name, None)
8756     if name_prefix is None:
8757       names = None
8758     else:
8759       names = _GenerateUniqueNames(lu, ["%s.disk%s" %
8760                                         (name_prefix, base_index + i)
8761                                         for i in range(disk_count)])
8762
8763     dev_type = _DISK_TEMPLATE_DEVICE_TYPE[template_name]
8764
8765     if template_name == constants.DT_PLAIN:
8766       def logical_id_fn(idx, _, disk):
8767         vg = disk.get(constants.IDISK_VG, vgname)
8768         return (vg, names[idx])
8769     elif template_name in (constants.DT_FILE, constants.DT_SHARED_FILE):
8770       logical_id_fn = \
8771         lambda _, disk_index, disk: (file_driver,
8772                                      "%s/disk%d" % (file_storage_dir,
8773                                                     disk_index))
8774     elif template_name == constants.DT_BLOCK:
8775       logical_id_fn = \
8776         lambda idx, disk_index, disk: (constants.BLOCKDEV_DRIVER_MANUAL,
8777                                        disk[constants.IDISK_ADOPT])
8778     elif template_name == constants.DT_RBD:
8779       logical_id_fn = lambda idx, _, disk: ("rbd", names[idx])
8780     else:
8781       raise errors.ProgrammerError("Unknown disk template '%s'" % template_name)
8782
8783     for idx, disk in enumerate(disk_info):
8784       disk_index = idx + base_index
8785       size = disk[constants.IDISK_SIZE]
8786       feedback_fn("* disk %s, size %s" %
8787                   (disk_index, utils.FormatUnit(size, "h")))
8788       disks.append(objects.Disk(dev_type=dev_type, size=size,
8789                                 logical_id=logical_id_fn(idx, disk_index, disk),
8790                                 iv_name="disk/%d" % disk_index,
8791                                 mode=disk[constants.IDISK_MODE],
8792                                 params=ld_params[0]))
8793
8794   return disks
8795
8796
8797 def _GetInstanceInfoText(instance):
8798   """Compute that text that should be added to the disk's metadata.
8799
8800   """
8801   return "originstname+%s" % instance.name
8802
8803
8804 def _CalcEta(time_taken, written, total_size):
8805   """Calculates the ETA based on size written and total size.
8806
8807   @param time_taken: The time taken so far
8808   @param written: amount written so far
8809   @param total_size: The total size of data to be written
8810   @return: The remaining time in seconds
8811
8812   """
8813   avg_time = time_taken / float(written)
8814   return (total_size - written) * avg_time
8815
8816
8817 def _WipeDisks(lu, instance):
8818   """Wipes instance disks.
8819
8820   @type lu: L{LogicalUnit}
8821   @param lu: the logical unit on whose behalf we execute
8822   @type instance: L{objects.Instance}
8823   @param instance: the instance whose disks we should create
8824   @return: the success of the wipe
8825
8826   """
8827   node = instance.primary_node
8828
8829   for device in instance.disks:
8830     lu.cfg.SetDiskID(device, node)
8831
8832   logging.info("Pause sync of instance %s disks", instance.name)
8833   result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, True)
8834
8835   for idx, success in enumerate(result.payload):
8836     if not success:
8837       logging.warn("pause-sync of instance %s for disks %d failed",
8838                    instance.name, idx)
8839
8840   try:
8841     for idx, device in enumerate(instance.disks):
8842       # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
8843       # MAX_WIPE_CHUNK at max
8844       wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 *
8845                             constants.MIN_WIPE_CHUNK_PERCENT)
8846       # we _must_ make this an int, otherwise rounding errors will
8847       # occur
8848       wipe_chunk_size = int(wipe_chunk_size)
8849
8850       lu.LogInfo("* Wiping disk %d", idx)
8851       logging.info("Wiping disk %d for instance %s, node %s using"
8852                    " chunk size %s", idx, instance.name, node, wipe_chunk_size)
8853
8854       offset = 0
8855       size = device.size
8856       last_output = 0
8857       start_time = time.time()
8858
8859       while offset < size:
8860         wipe_size = min(wipe_chunk_size, size - offset)
8861         logging.debug("Wiping disk %d, offset %s, chunk %s",
8862                       idx, offset, wipe_size)
8863         result = lu.rpc.call_blockdev_wipe(node, device, offset, wipe_size)
8864         result.Raise("Could not wipe disk %d at offset %d for size %d" %
8865                      (idx, offset, wipe_size))
8866         now = time.time()
8867         offset += wipe_size
8868         if now - last_output >= 60:
8869           eta = _CalcEta(now - start_time, offset, size)
8870           lu.LogInfo(" - done: %.1f%% ETA: %s" %
8871                      (offset / float(size) * 100, utils.FormatSeconds(eta)))
8872           last_output = now
8873   finally:
8874     logging.info("Resume sync of instance %s disks", instance.name)
8875
8876     result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, False)
8877
8878     for idx, success in enumerate(result.payload):
8879       if not success:
8880         lu.LogWarning("Resume sync of disk %d failed, please have a"
8881                       " look at the status and troubleshoot the issue", idx)
8882         logging.warn("resume-sync of instance %s for disks %d failed",
8883                      instance.name, idx)
8884
8885
8886 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
8887   """Create all disks for an instance.
8888
8889   This abstracts away some work from AddInstance.
8890
8891   @type lu: L{LogicalUnit}
8892   @param lu: the logical unit on whose behalf we execute
8893   @type instance: L{objects.Instance}
8894   @param instance: the instance whose disks we should create
8895   @type to_skip: list
8896   @param to_skip: list of indices to skip
8897   @type target_node: string
8898   @param target_node: if passed, overrides the target node for creation
8899   @rtype: boolean
8900   @return: the success of the creation
8901
8902   """
8903   info = _GetInstanceInfoText(instance)
8904   if target_node is None:
8905     pnode = instance.primary_node
8906     all_nodes = instance.all_nodes
8907   else:
8908     pnode = target_node
8909     all_nodes = [pnode]
8910
8911   if instance.disk_template in constants.DTS_FILEBASED:
8912     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
8913     result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
8914
8915     result.Raise("Failed to create directory '%s' on"
8916                  " node %s" % (file_storage_dir, pnode))
8917
8918   # Note: this needs to be kept in sync with adding of disks in
8919   # LUInstanceSetParams
8920   for idx, device in enumerate(instance.disks):
8921     if to_skip and idx in to_skip:
8922       continue
8923     logging.info("Creating disk %s for instance '%s'", idx, instance.name)
8924     #HARDCODE
8925     for node in all_nodes:
8926       f_create = node == pnode
8927       _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
8928
8929
8930 def _RemoveDisks(lu, instance, target_node=None):
8931   """Remove all disks for an instance.
8932
8933   This abstracts away some work from `AddInstance()` and
8934   `RemoveInstance()`. Note that in case some of the devices couldn't
8935   be removed, the removal will continue with the other ones (compare
8936   with `_CreateDisks()`).
8937
8938   @type lu: L{LogicalUnit}
8939   @param lu: the logical unit on whose behalf we execute
8940   @type instance: L{objects.Instance}
8941   @param instance: the instance whose disks we should remove
8942   @type target_node: string
8943   @param target_node: used to override the node on which to remove the disks
8944   @rtype: boolean
8945   @return: the success of the removal
8946
8947   """
8948   logging.info("Removing block devices for instance %s", instance.name)
8949
8950   all_result = True
8951   for (idx, device) in enumerate(instance.disks):
8952     if target_node:
8953       edata = [(target_node, device)]
8954     else:
8955       edata = device.ComputeNodeTree(instance.primary_node)
8956     for node, disk in edata:
8957       lu.cfg.SetDiskID(disk, node)
8958       msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
8959       if msg:
8960         lu.LogWarning("Could not remove disk %s on node %s,"
8961                       " continuing anyway: %s", idx, node, msg)
8962         all_result = False
8963
8964     # if this is a DRBD disk, return its port to the pool
8965     if device.dev_type in constants.LDS_DRBD:
8966       tcp_port = device.logical_id[2]
8967       lu.cfg.AddTcpUdpPort(tcp_port)
8968
8969   if instance.disk_template == constants.DT_FILE:
8970     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
8971     if target_node:
8972       tgt = target_node
8973     else:
8974       tgt = instance.primary_node
8975     result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
8976     if result.fail_msg:
8977       lu.LogWarning("Could not remove directory '%s' on node %s: %s",
8978                     file_storage_dir, instance.primary_node, result.fail_msg)
8979       all_result = False
8980
8981   return all_result
8982
8983
8984 def _ComputeDiskSizePerVG(disk_template, disks):
8985   """Compute disk size requirements in the volume group
8986
8987   """
8988   def _compute(disks, payload):
8989     """Universal algorithm.
8990
8991     """
8992     vgs = {}
8993     for disk in disks:
8994       vgs[disk[constants.IDISK_VG]] = \
8995         vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
8996
8997     return vgs
8998
8999   # Required free disk space as a function of disk and swap space
9000   req_size_dict = {
9001     constants.DT_DISKLESS: {},
9002     constants.DT_PLAIN: _compute(disks, 0),
9003     # 128 MB are added for drbd metadata for each disk
9004     constants.DT_DRBD8: _compute(disks, DRBD_META_SIZE),
9005     constants.DT_FILE: {},
9006     constants.DT_SHARED_FILE: {},
9007   }
9008
9009   if disk_template not in req_size_dict:
9010     raise errors.ProgrammerError("Disk template '%s' size requirement"
9011                                  " is unknown" % disk_template)
9012
9013   return req_size_dict[disk_template]
9014
9015
9016 def _ComputeDiskSize(disk_template, disks):
9017   """Compute disk size requirements in the volume group
9018
9019   """
9020   # Required free disk space as a function of disk and swap space
9021   req_size_dict = {
9022     constants.DT_DISKLESS: None,
9023     constants.DT_PLAIN: sum(d[constants.IDISK_SIZE] for d in disks),
9024     # 128 MB are added for drbd metadata for each disk
9025     constants.DT_DRBD8:
9026       sum(d[constants.IDISK_SIZE] + DRBD_META_SIZE for d in disks),
9027     constants.DT_FILE: None,
9028     constants.DT_SHARED_FILE: 0,
9029     constants.DT_BLOCK: 0,
9030     constants.DT_RBD: 0,
9031   }
9032
9033   if disk_template not in req_size_dict:
9034     raise errors.ProgrammerError("Disk template '%s' size requirement"
9035                                  " is unknown" % disk_template)
9036
9037   return req_size_dict[disk_template]
9038
9039
9040 def _FilterVmNodes(lu, nodenames):
9041   """Filters out non-vm_capable nodes from a list.
9042
9043   @type lu: L{LogicalUnit}
9044   @param lu: the logical unit for which we check
9045   @type nodenames: list
9046   @param nodenames: the list of nodes on which we should check
9047   @rtype: list
9048   @return: the list of vm-capable nodes
9049
9050   """
9051   vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
9052   return [name for name in nodenames if name not in vm_nodes]
9053
9054
9055 def _CheckHVParams(lu, nodenames, hvname, hvparams):
9056   """Hypervisor parameter validation.
9057
9058   This function abstract the hypervisor parameter validation to be
9059   used in both instance create and instance modify.
9060
9061   @type lu: L{LogicalUnit}
9062   @param lu: the logical unit for which we check
9063   @type nodenames: list
9064   @param nodenames: the list of nodes on which we should check
9065   @type hvname: string
9066   @param hvname: the name of the hypervisor we should use
9067   @type hvparams: dict
9068   @param hvparams: the parameters which we need to check
9069   @raise errors.OpPrereqError: if the parameters are not valid
9070
9071   """
9072   nodenames = _FilterVmNodes(lu, nodenames)
9073
9074   cluster = lu.cfg.GetClusterInfo()
9075   hvfull = objects.FillDict(cluster.hvparams.get(hvname, {}), hvparams)
9076
9077   hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames, hvname, hvfull)
9078   for node in nodenames:
9079     info = hvinfo[node]
9080     if info.offline:
9081       continue
9082     info.Raise("Hypervisor parameter validation failed on node %s" % node)
9083
9084
9085 def _CheckOSParams(lu, required, nodenames, osname, osparams):
9086   """OS parameters validation.
9087
9088   @type lu: L{LogicalUnit}
9089   @param lu: the logical unit for which we check
9090   @type required: boolean
9091   @param required: whether the validation should fail if the OS is not
9092       found
9093   @type nodenames: list
9094   @param nodenames: the list of nodes on which we should check
9095   @type osname: string
9096   @param osname: the name of the hypervisor we should use
9097   @type osparams: dict
9098   @param osparams: the parameters which we need to check
9099   @raise errors.OpPrereqError: if the parameters are not valid
9100
9101   """
9102   nodenames = _FilterVmNodes(lu, nodenames)
9103   result = lu.rpc.call_os_validate(nodenames, required, osname,
9104                                    [constants.OS_VALIDATE_PARAMETERS],
9105                                    osparams)
9106   for node, nres in result.items():
9107     # we don't check for offline cases since this should be run only
9108     # against the master node and/or an instance's nodes
9109     nres.Raise("OS Parameters validation failed on node %s" % node)
9110     if not nres.payload:
9111       lu.LogInfo("OS %s not found on node %s, validation skipped",
9112                  osname, node)
9113
9114
9115 class LUInstanceCreate(LogicalUnit):
9116   """Create an instance.
9117
9118   """
9119   HPATH = "instance-add"
9120   HTYPE = constants.HTYPE_INSTANCE
9121   REQ_BGL = False
9122
9123   def CheckArguments(self):
9124     """Check arguments.
9125
9126     """
9127     # do not require name_check to ease forward/backward compatibility
9128     # for tools
9129     if self.op.no_install and self.op.start:
9130       self.LogInfo("No-installation mode selected, disabling startup")
9131       self.op.start = False
9132     # validate/normalize the instance name
9133     self.op.instance_name = \
9134       netutils.Hostname.GetNormalizedName(self.op.instance_name)
9135
9136     if self.op.ip_check and not self.op.name_check:
9137       # TODO: make the ip check more flexible and not depend on the name check
9138       raise errors.OpPrereqError("Cannot do IP address check without a name"
9139                                  " check", errors.ECODE_INVAL)
9140
9141     # check nics' parameter names
9142     for nic in self.op.nics:
9143       utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
9144
9145     # check disks. parameter names and consistent adopt/no-adopt strategy
9146     has_adopt = has_no_adopt = False
9147     for disk in self.op.disks:
9148       utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
9149       if constants.IDISK_ADOPT in disk:
9150         has_adopt = True
9151       else:
9152         has_no_adopt = True
9153     if has_adopt and has_no_adopt:
9154       raise errors.OpPrereqError("Either all disks are adopted or none is",
9155                                  errors.ECODE_INVAL)
9156     if has_adopt:
9157       if self.op.disk_template not in constants.DTS_MAY_ADOPT:
9158         raise errors.OpPrereqError("Disk adoption is not supported for the"
9159                                    " '%s' disk template" %
9160                                    self.op.disk_template,
9161                                    errors.ECODE_INVAL)
9162       if self.op.iallocator is not None:
9163         raise errors.OpPrereqError("Disk adoption not allowed with an"
9164                                    " iallocator script", errors.ECODE_INVAL)
9165       if self.op.mode == constants.INSTANCE_IMPORT:
9166         raise errors.OpPrereqError("Disk adoption not allowed for"
9167                                    " instance import", errors.ECODE_INVAL)
9168     else:
9169       if self.op.disk_template in constants.DTS_MUST_ADOPT:
9170         raise errors.OpPrereqError("Disk template %s requires disk adoption,"
9171                                    " but no 'adopt' parameter given" %
9172                                    self.op.disk_template,
9173                                    errors.ECODE_INVAL)
9174
9175     self.adopt_disks = has_adopt
9176
9177     # instance name verification
9178     if self.op.name_check:
9179       self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
9180       self.op.instance_name = self.hostname1.name
9181       # used in CheckPrereq for ip ping check
9182       self.check_ip = self.hostname1.ip
9183     else:
9184       self.check_ip = None
9185
9186     # file storage checks
9187     if (self.op.file_driver and
9188         not self.op.file_driver in constants.FILE_DRIVER):
9189       raise errors.OpPrereqError("Invalid file driver name '%s'" %
9190                                  self.op.file_driver, errors.ECODE_INVAL)
9191
9192     if self.op.disk_template == constants.DT_FILE:
9193       opcodes.RequireFileStorage()
9194     elif self.op.disk_template == constants.DT_SHARED_FILE:
9195       opcodes.RequireSharedFileStorage()
9196
9197     ### Node/iallocator related checks
9198     _CheckIAllocatorOrNode(self, "iallocator", "pnode")
9199
9200     if self.op.pnode is not None:
9201       if self.op.disk_template in constants.DTS_INT_MIRROR:
9202         if self.op.snode is None:
9203           raise errors.OpPrereqError("The networked disk templates need"
9204                                      " a mirror node", errors.ECODE_INVAL)
9205       elif self.op.snode:
9206         self.LogWarning("Secondary node will be ignored on non-mirrored disk"
9207                         " template")
9208         self.op.snode = None
9209
9210     self._cds = _GetClusterDomainSecret()
9211
9212     if self.op.mode == constants.INSTANCE_IMPORT:
9213       # On import force_variant must be True, because if we forced it at
9214       # initial install, our only chance when importing it back is that it
9215       # works again!
9216       self.op.force_variant = True
9217
9218       if self.op.no_install:
9219         self.LogInfo("No-installation mode has no effect during import")
9220
9221     elif self.op.mode == constants.INSTANCE_CREATE:
9222       if self.op.os_type is None:
9223         raise errors.OpPrereqError("No guest OS specified",
9224                                    errors.ECODE_INVAL)
9225       if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
9226         raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
9227                                    " installation" % self.op.os_type,
9228                                    errors.ECODE_STATE)
9229       if self.op.disk_template is None:
9230         raise errors.OpPrereqError("No disk template specified",
9231                                    errors.ECODE_INVAL)
9232
9233     elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
9234       # Check handshake to ensure both clusters have the same domain secret
9235       src_handshake = self.op.source_handshake
9236       if not src_handshake:
9237         raise errors.OpPrereqError("Missing source handshake",
9238                                    errors.ECODE_INVAL)
9239
9240       errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
9241                                                            src_handshake)
9242       if errmsg:
9243         raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
9244                                    errors.ECODE_INVAL)
9245
9246       # Load and check source CA
9247       self.source_x509_ca_pem = self.op.source_x509_ca
9248       if not self.source_x509_ca_pem:
9249         raise errors.OpPrereqError("Missing source X509 CA",
9250                                    errors.ECODE_INVAL)
9251
9252       try:
9253         (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
9254                                                     self._cds)
9255       except OpenSSL.crypto.Error, err:
9256         raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
9257                                    (err, ), errors.ECODE_INVAL)
9258
9259       (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
9260       if errcode is not None:
9261         raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
9262                                    errors.ECODE_INVAL)
9263
9264       self.source_x509_ca = cert
9265
9266       src_instance_name = self.op.source_instance_name
9267       if not src_instance_name:
9268         raise errors.OpPrereqError("Missing source instance name",
9269                                    errors.ECODE_INVAL)
9270
9271       self.source_instance_name = \
9272           netutils.GetHostname(name=src_instance_name).name
9273
9274     else:
9275       raise errors.OpPrereqError("Invalid instance creation mode %r" %
9276                                  self.op.mode, errors.ECODE_INVAL)
9277
9278   def ExpandNames(self):
9279     """ExpandNames for CreateInstance.
9280
9281     Figure out the right locks for instance creation.
9282
9283     """
9284     self.needed_locks = {}
9285
9286     instance_name = self.op.instance_name
9287     # this is just a preventive check, but someone might still add this
9288     # instance in the meantime, and creation will fail at lock-add time
9289     if instance_name in self.cfg.GetInstanceList():
9290       raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
9291                                  instance_name, errors.ECODE_EXISTS)
9292
9293     self.add_locks[locking.LEVEL_INSTANCE] = instance_name
9294
9295     if self.op.iallocator:
9296       # TODO: Find a solution to not lock all nodes in the cluster, e.g. by
9297       # specifying a group on instance creation and then selecting nodes from
9298       # that group
9299       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9300       self.needed_locks[locking.LEVEL_NODE_RES] = locking.ALL_SET
9301     else:
9302       self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
9303       nodelist = [self.op.pnode]
9304       if self.op.snode is not None:
9305         self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
9306         nodelist.append(self.op.snode)
9307       self.needed_locks[locking.LEVEL_NODE] = nodelist
9308       # Lock resources of instance's primary and secondary nodes (copy to
9309       # prevent accidential modification)
9310       self.needed_locks[locking.LEVEL_NODE_RES] = list(nodelist)
9311
9312     # in case of import lock the source node too
9313     if self.op.mode == constants.INSTANCE_IMPORT:
9314       src_node = self.op.src_node
9315       src_path = self.op.src_path
9316
9317       if src_path is None:
9318         self.op.src_path = src_path = self.op.instance_name
9319
9320       if src_node is None:
9321         self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9322         self.op.src_node = None
9323         if os.path.isabs(src_path):
9324           raise errors.OpPrereqError("Importing an instance from a path"
9325                                      " requires a source node option",
9326                                      errors.ECODE_INVAL)
9327       else:
9328         self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
9329         if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
9330           self.needed_locks[locking.LEVEL_NODE].append(src_node)
9331         if not os.path.isabs(src_path):
9332           self.op.src_path = src_path = \
9333             utils.PathJoin(constants.EXPORT_DIR, src_path)
9334
9335   def _RunAllocator(self):
9336     """Run the allocator based on input opcode.
9337
9338     """
9339     nics = [n.ToDict() for n in self.nics]
9340     ial = IAllocator(self.cfg, self.rpc,
9341                      mode=constants.IALLOCATOR_MODE_ALLOC,
9342                      name=self.op.instance_name,
9343                      disk_template=self.op.disk_template,
9344                      tags=self.op.tags,
9345                      os=self.op.os_type,
9346                      vcpus=self.be_full[constants.BE_VCPUS],
9347                      memory=self.be_full[constants.BE_MAXMEM],
9348                      disks=self.disks,
9349                      nics=nics,
9350                      hypervisor=self.op.hypervisor,
9351                      )
9352
9353     ial.Run(self.op.iallocator)
9354
9355     if not ial.success:
9356       raise errors.OpPrereqError("Can't compute nodes using"
9357                                  " iallocator '%s': %s" %
9358                                  (self.op.iallocator, ial.info),
9359                                  errors.ECODE_NORES)
9360     if len(ial.result) != ial.required_nodes:
9361       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
9362                                  " of nodes (%s), required %s" %
9363                                  (self.op.iallocator, len(ial.result),
9364                                   ial.required_nodes), errors.ECODE_FAULT)
9365     self.op.pnode = ial.result[0]
9366     self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
9367                  self.op.instance_name, self.op.iallocator,
9368                  utils.CommaJoin(ial.result))
9369     if ial.required_nodes == 2:
9370       self.op.snode = ial.result[1]
9371
9372   def BuildHooksEnv(self):
9373     """Build hooks env.
9374
9375     This runs on master, primary and secondary nodes of the instance.
9376
9377     """
9378     env = {
9379       "ADD_MODE": self.op.mode,
9380       }
9381     if self.op.mode == constants.INSTANCE_IMPORT:
9382       env["SRC_NODE"] = self.op.src_node
9383       env["SRC_PATH"] = self.op.src_path
9384       env["SRC_IMAGES"] = self.src_images
9385
9386     env.update(_BuildInstanceHookEnv(
9387       name=self.op.instance_name,
9388       primary_node=self.op.pnode,
9389       secondary_nodes=self.secondaries,
9390       status=self.op.start,
9391       os_type=self.op.os_type,
9392       minmem=self.be_full[constants.BE_MINMEM],
9393       maxmem=self.be_full[constants.BE_MAXMEM],
9394       vcpus=self.be_full[constants.BE_VCPUS],
9395       nics=_NICListToTuple(self, self.nics),
9396       disk_template=self.op.disk_template,
9397       disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE])
9398              for d in self.disks],
9399       bep=self.be_full,
9400       hvp=self.hv_full,
9401       hypervisor_name=self.op.hypervisor,
9402       tags=self.op.tags,
9403     ))
9404
9405     return env
9406
9407   def BuildHooksNodes(self):
9408     """Build hooks nodes.
9409
9410     """
9411     nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
9412     return nl, nl
9413
9414   def _ReadExportInfo(self):
9415     """Reads the export information from disk.
9416
9417     It will override the opcode source node and path with the actual
9418     information, if these two were not specified before.
9419
9420     @return: the export information
9421
9422     """
9423     assert self.op.mode == constants.INSTANCE_IMPORT
9424
9425     src_node = self.op.src_node
9426     src_path = self.op.src_path
9427
9428     if src_node is None:
9429       locked_nodes = self.owned_locks(locking.LEVEL_NODE)
9430       exp_list = self.rpc.call_export_list(locked_nodes)
9431       found = False
9432       for node in exp_list:
9433         if exp_list[node].fail_msg:
9434           continue
9435         if src_path in exp_list[node].payload:
9436           found = True
9437           self.op.src_node = src_node = node
9438           self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
9439                                                        src_path)
9440           break
9441       if not found:
9442         raise errors.OpPrereqError("No export found for relative path %s" %
9443                                     src_path, errors.ECODE_INVAL)
9444
9445     _CheckNodeOnline(self, src_node)
9446     result = self.rpc.call_export_info(src_node, src_path)
9447     result.Raise("No export or invalid export found in dir %s" % src_path)
9448
9449     export_info = objects.SerializableConfigParser.Loads(str(result.payload))
9450     if not export_info.has_section(constants.INISECT_EXP):
9451       raise errors.ProgrammerError("Corrupted export config",
9452                                    errors.ECODE_ENVIRON)
9453
9454     ei_version = export_info.get(constants.INISECT_EXP, "version")
9455     if (int(ei_version) != constants.EXPORT_VERSION):
9456       raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
9457                                  (ei_version, constants.EXPORT_VERSION),
9458                                  errors.ECODE_ENVIRON)
9459     return export_info
9460
9461   def _ReadExportParams(self, einfo):
9462     """Use export parameters as defaults.
9463
9464     In case the opcode doesn't specify (as in override) some instance
9465     parameters, then try to use them from the export information, if
9466     that declares them.
9467
9468     """
9469     self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
9470
9471     if self.op.disk_template is None:
9472       if einfo.has_option(constants.INISECT_INS, "disk_template"):
9473         self.op.disk_template = einfo.get(constants.INISECT_INS,
9474                                           "disk_template")
9475         if self.op.disk_template not in constants.DISK_TEMPLATES:
9476           raise errors.OpPrereqError("Disk template specified in configuration"
9477                                      " file is not one of the allowed values:"
9478                                      " %s" % " ".join(constants.DISK_TEMPLATES))
9479       else:
9480         raise errors.OpPrereqError("No disk template specified and the export"
9481                                    " is missing the disk_template information",
9482                                    errors.ECODE_INVAL)
9483
9484     if not self.op.disks:
9485       disks = []
9486       # TODO: import the disk iv_name too
9487       for idx in range(constants.MAX_DISKS):
9488         if einfo.has_option(constants.INISECT_INS, "disk%d_size" % idx):
9489           disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
9490           disks.append({constants.IDISK_SIZE: disk_sz})
9491       self.op.disks = disks
9492       if not disks and self.op.disk_template != constants.DT_DISKLESS:
9493         raise errors.OpPrereqError("No disk info specified and the export"
9494                                    " is missing the disk information",
9495                                    errors.ECODE_INVAL)
9496
9497     if not self.op.nics:
9498       nics = []
9499       for idx in range(constants.MAX_NICS):
9500         if einfo.has_option(constants.INISECT_INS, "nic%d_mac" % idx):
9501           ndict = {}
9502           for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
9503             v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
9504             ndict[name] = v
9505           nics.append(ndict)
9506         else:
9507           break
9508       self.op.nics = nics
9509
9510     if not self.op.tags and einfo.has_option(constants.INISECT_INS, "tags"):
9511       self.op.tags = einfo.get(constants.INISECT_INS, "tags").split()
9512
9513     if (self.op.hypervisor is None and
9514         einfo.has_option(constants.INISECT_INS, "hypervisor")):
9515       self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
9516
9517     if einfo.has_section(constants.INISECT_HYP):
9518       # use the export parameters but do not override the ones
9519       # specified by the user
9520       for name, value in einfo.items(constants.INISECT_HYP):
9521         if name not in self.op.hvparams:
9522           self.op.hvparams[name] = value
9523
9524     if einfo.has_section(constants.INISECT_BEP):
9525       # use the parameters, without overriding
9526       for name, value in einfo.items(constants.INISECT_BEP):
9527         if name not in self.op.beparams:
9528           self.op.beparams[name] = value
9529         # Compatibility for the old "memory" be param
9530         if name == constants.BE_MEMORY:
9531           if constants.BE_MAXMEM not in self.op.beparams:
9532             self.op.beparams[constants.BE_MAXMEM] = value
9533           if constants.BE_MINMEM not in self.op.beparams:
9534             self.op.beparams[constants.BE_MINMEM] = value
9535     else:
9536       # try to read the parameters old style, from the main section
9537       for name in constants.BES_PARAMETERS:
9538         if (name not in self.op.beparams and
9539             einfo.has_option(constants.INISECT_INS, name)):
9540           self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
9541
9542     if einfo.has_section(constants.INISECT_OSP):
9543       # use the parameters, without overriding
9544       for name, value in einfo.items(constants.INISECT_OSP):
9545         if name not in self.op.osparams:
9546           self.op.osparams[name] = value
9547
9548   def _RevertToDefaults(self, cluster):
9549     """Revert the instance parameters to the default values.
9550
9551     """
9552     # hvparams
9553     hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
9554     for name in self.op.hvparams.keys():
9555       if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
9556         del self.op.hvparams[name]
9557     # beparams
9558     be_defs = cluster.SimpleFillBE({})
9559     for name in self.op.beparams.keys():
9560       if name in be_defs and be_defs[name] == self.op.beparams[name]:
9561         del self.op.beparams[name]
9562     # nic params
9563     nic_defs = cluster.SimpleFillNIC({})
9564     for nic in self.op.nics:
9565       for name in constants.NICS_PARAMETERS:
9566         if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
9567           del nic[name]
9568     # osparams
9569     os_defs = cluster.SimpleFillOS(self.op.os_type, {})
9570     for name in self.op.osparams.keys():
9571       if name in os_defs and os_defs[name] == self.op.osparams[name]:
9572         del self.op.osparams[name]
9573
9574   def _CalculateFileStorageDir(self):
9575     """Calculate final instance file storage dir.
9576
9577     """
9578     # file storage dir calculation/check
9579     self.instance_file_storage_dir = None
9580     if self.op.disk_template in constants.DTS_FILEBASED:
9581       # build the full file storage dir path
9582       joinargs = []
9583
9584       if self.op.disk_template == constants.DT_SHARED_FILE:
9585         get_fsd_fn = self.cfg.GetSharedFileStorageDir
9586       else:
9587         get_fsd_fn = self.cfg.GetFileStorageDir
9588
9589       cfg_storagedir = get_fsd_fn()
9590       if not cfg_storagedir:
9591         raise errors.OpPrereqError("Cluster file storage dir not defined")
9592       joinargs.append(cfg_storagedir)
9593
9594       if self.op.file_storage_dir is not None:
9595         joinargs.append(self.op.file_storage_dir)
9596
9597       joinargs.append(self.op.instance_name)
9598
9599       # pylint: disable=W0142
9600       self.instance_file_storage_dir = utils.PathJoin(*joinargs)
9601
9602   def CheckPrereq(self): # pylint: disable=R0914
9603     """Check prerequisites.
9604
9605     """
9606     self._CalculateFileStorageDir()
9607
9608     if self.op.mode == constants.INSTANCE_IMPORT:
9609       export_info = self._ReadExportInfo()
9610       self._ReadExportParams(export_info)
9611
9612     if (not self.cfg.GetVGName() and
9613         self.op.disk_template not in constants.DTS_NOT_LVM):
9614       raise errors.OpPrereqError("Cluster does not support lvm-based"
9615                                  " instances", errors.ECODE_STATE)
9616
9617     if (self.op.hypervisor is None or
9618         self.op.hypervisor == constants.VALUE_AUTO):
9619       self.op.hypervisor = self.cfg.GetHypervisorType()
9620
9621     cluster = self.cfg.GetClusterInfo()
9622     enabled_hvs = cluster.enabled_hypervisors
9623     if self.op.hypervisor not in enabled_hvs:
9624       raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
9625                                  " cluster (%s)" % (self.op.hypervisor,
9626                                   ",".join(enabled_hvs)),
9627                                  errors.ECODE_STATE)
9628
9629     # Check tag validity
9630     for tag in self.op.tags:
9631       objects.TaggableObject.ValidateTag(tag)
9632
9633     # check hypervisor parameter syntax (locally)
9634     utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
9635     filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
9636                                       self.op.hvparams)
9637     hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
9638     hv_type.CheckParameterSyntax(filled_hvp)
9639     self.hv_full = filled_hvp
9640     # check that we don't specify global parameters on an instance
9641     _CheckGlobalHvParams(self.op.hvparams)
9642
9643     # fill and remember the beparams dict
9644     default_beparams = cluster.beparams[constants.PP_DEFAULT]
9645     for param, value in self.op.beparams.iteritems():
9646       if value == constants.VALUE_AUTO:
9647         self.op.beparams[param] = default_beparams[param]
9648     objects.UpgradeBeParams(self.op.beparams)
9649     utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
9650     self.be_full = cluster.SimpleFillBE(self.op.beparams)
9651
9652     # build os parameters
9653     self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
9654
9655     # now that hvp/bep are in final format, let's reset to defaults,
9656     # if told to do so
9657     if self.op.identify_defaults:
9658       self._RevertToDefaults(cluster)
9659
9660     # NIC buildup
9661     self.nics = []
9662     for idx, nic in enumerate(self.op.nics):
9663       nic_mode_req = nic.get(constants.INIC_MODE, None)
9664       nic_mode = nic_mode_req
9665       if nic_mode is None or nic_mode == constants.VALUE_AUTO:
9666         nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
9667
9668       # in routed mode, for the first nic, the default ip is 'auto'
9669       if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
9670         default_ip_mode = constants.VALUE_AUTO
9671       else:
9672         default_ip_mode = constants.VALUE_NONE
9673
9674       # ip validity checks
9675       ip = nic.get(constants.INIC_IP, default_ip_mode)
9676       if ip is None or ip.lower() == constants.VALUE_NONE:
9677         nic_ip = None
9678       elif ip.lower() == constants.VALUE_AUTO:
9679         if not self.op.name_check:
9680           raise errors.OpPrereqError("IP address set to auto but name checks"
9681                                      " have been skipped",
9682                                      errors.ECODE_INVAL)
9683         nic_ip = self.hostname1.ip
9684       else:
9685         if not netutils.IPAddress.IsValid(ip):
9686           raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
9687                                      errors.ECODE_INVAL)
9688         nic_ip = ip
9689
9690       # TODO: check the ip address for uniqueness
9691       if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
9692         raise errors.OpPrereqError("Routed nic mode requires an ip address",
9693                                    errors.ECODE_INVAL)
9694
9695       # MAC address verification
9696       mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
9697       if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9698         mac = utils.NormalizeAndValidateMac(mac)
9699
9700         try:
9701           self.cfg.ReserveMAC(mac, self.proc.GetECId())
9702         except errors.ReservationError:
9703           raise errors.OpPrereqError("MAC address %s already in use"
9704                                      " in cluster" % mac,
9705                                      errors.ECODE_NOTUNIQUE)
9706
9707       #  Build nic parameters
9708       link = nic.get(constants.INIC_LINK, None)
9709       if link == constants.VALUE_AUTO:
9710         link = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_LINK]
9711       nicparams = {}
9712       if nic_mode_req:
9713         nicparams[constants.NIC_MODE] = nic_mode
9714       if link:
9715         nicparams[constants.NIC_LINK] = link
9716
9717       check_params = cluster.SimpleFillNIC(nicparams)
9718       objects.NIC.CheckParameterSyntax(check_params)
9719       self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
9720
9721     # disk checks/pre-build
9722     default_vg = self.cfg.GetVGName()
9723     self.disks = []
9724     for disk in self.op.disks:
9725       mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
9726       if mode not in constants.DISK_ACCESS_SET:
9727         raise errors.OpPrereqError("Invalid disk access mode '%s'" %
9728                                    mode, errors.ECODE_INVAL)
9729       size = disk.get(constants.IDISK_SIZE, None)
9730       if size is None:
9731         raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
9732       try:
9733         size = int(size)
9734       except (TypeError, ValueError):
9735         raise errors.OpPrereqError("Invalid disk size '%s'" % size,
9736                                    errors.ECODE_INVAL)
9737
9738       data_vg = disk.get(constants.IDISK_VG, default_vg)
9739       new_disk = {
9740         constants.IDISK_SIZE: size,
9741         constants.IDISK_MODE: mode,
9742         constants.IDISK_VG: data_vg,
9743         }
9744       if constants.IDISK_METAVG in disk:
9745         new_disk[constants.IDISK_METAVG] = disk[constants.IDISK_METAVG]
9746       if constants.IDISK_ADOPT in disk:
9747         new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
9748       self.disks.append(new_disk)
9749
9750     if self.op.mode == constants.INSTANCE_IMPORT:
9751       disk_images = []
9752       for idx in range(len(self.disks)):
9753         option = "disk%d_dump" % idx
9754         if export_info.has_option(constants.INISECT_INS, option):
9755           # FIXME: are the old os-es, disk sizes, etc. useful?
9756           export_name = export_info.get(constants.INISECT_INS, option)
9757           image = utils.PathJoin(self.op.src_path, export_name)
9758           disk_images.append(image)
9759         else:
9760           disk_images.append(False)
9761
9762       self.src_images = disk_images
9763
9764       old_name = export_info.get(constants.INISECT_INS, "name")
9765       if self.op.instance_name == old_name:
9766         for idx, nic in enumerate(self.nics):
9767           if nic.mac == constants.VALUE_AUTO:
9768             nic_mac_ini = "nic%d_mac" % idx
9769             nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
9770
9771     # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
9772
9773     # ip ping checks (we use the same ip that was resolved in ExpandNames)
9774     if self.op.ip_check:
9775       if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
9776         raise errors.OpPrereqError("IP %s of instance %s already in use" %
9777                                    (self.check_ip, self.op.instance_name),
9778                                    errors.ECODE_NOTUNIQUE)
9779
9780     #### mac address generation
9781     # By generating here the mac address both the allocator and the hooks get
9782     # the real final mac address rather than the 'auto' or 'generate' value.
9783     # There is a race condition between the generation and the instance object
9784     # creation, which means that we know the mac is valid now, but we're not
9785     # sure it will be when we actually add the instance. If things go bad
9786     # adding the instance will abort because of a duplicate mac, and the
9787     # creation job will fail.
9788     for nic in self.nics:
9789       if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9790         nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
9791
9792     #### allocator run
9793
9794     if self.op.iallocator is not None:
9795       self._RunAllocator()
9796
9797     # Release all unneeded node locks
9798     _ReleaseLocks(self, locking.LEVEL_NODE,
9799                   keep=filter(None, [self.op.pnode, self.op.snode,
9800                                      self.op.src_node]))
9801     _ReleaseLocks(self, locking.LEVEL_NODE_RES,
9802                   keep=filter(None, [self.op.pnode, self.op.snode,
9803                                      self.op.src_node]))
9804
9805     #### node related checks
9806
9807     # check primary node
9808     self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
9809     assert self.pnode is not None, \
9810       "Cannot retrieve locked node %s" % self.op.pnode
9811     if pnode.offline:
9812       raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
9813                                  pnode.name, errors.ECODE_STATE)
9814     if pnode.drained:
9815       raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
9816                                  pnode.name, errors.ECODE_STATE)
9817     if not pnode.vm_capable:
9818       raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
9819                                  " '%s'" % pnode.name, errors.ECODE_STATE)
9820
9821     self.secondaries = []
9822
9823     # mirror node verification
9824     if self.op.disk_template in constants.DTS_INT_MIRROR:
9825       if self.op.snode == pnode.name:
9826         raise errors.OpPrereqError("The secondary node cannot be the"
9827                                    " primary node", errors.ECODE_INVAL)
9828       _CheckNodeOnline(self, self.op.snode)
9829       _CheckNodeNotDrained(self, self.op.snode)
9830       _CheckNodeVmCapable(self, self.op.snode)
9831       self.secondaries.append(self.op.snode)
9832
9833       snode = self.cfg.GetNodeInfo(self.op.snode)
9834       if pnode.group != snode.group:
9835         self.LogWarning("The primary and secondary nodes are in two"
9836                         " different node groups; the disk parameters"
9837                         " from the first disk's node group will be"
9838                         " used")
9839
9840     nodenames = [pnode.name] + self.secondaries
9841
9842     # Verify instance specs
9843     ispec = {
9844       constants.ISPEC_MEM_SIZE: self.be_full.get(constants.BE_MAXMEM, None),
9845       constants.ISPEC_CPU_COUNT: self.be_full.get(constants.BE_VCPUS, None),
9846       constants.ISPEC_DISK_COUNT: len(self.disks),
9847       constants.ISPEC_DISK_SIZE: [disk["size"] for disk in self.disks],
9848       constants.ISPEC_NIC_COUNT: len(self.nics),
9849       }
9850
9851     group_info = self.cfg.GetNodeGroup(pnode.group)
9852     ipolicy = _CalculateGroupIPolicy(cluster, group_info)
9853     res = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec)
9854     if not self.op.ignore_ipolicy and res:
9855       raise errors.OpPrereqError(("Instance allocation to group %s violates"
9856                                   " policy: %s") % (pnode.group,
9857                                                     utils.CommaJoin(res)),
9858                                   errors.ECODE_INVAL)
9859
9860     # disk parameters (not customizable at instance or node level)
9861     # just use the primary node parameters, ignoring the secondary.
9862     self.diskparams = group_info.diskparams
9863
9864     if not self.adopt_disks:
9865       if self.op.disk_template == constants.DT_RBD:
9866         # _CheckRADOSFreeSpace() is just a placeholder.
9867         # Any function that checks prerequisites can be placed here.
9868         # Check if there is enough space on the RADOS cluster.
9869         _CheckRADOSFreeSpace()
9870       else:
9871         # Check lv size requirements, if not adopting
9872         req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
9873         _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
9874
9875     elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
9876       all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
9877                                 disk[constants.IDISK_ADOPT])
9878                      for disk in self.disks])
9879       if len(all_lvs) != len(self.disks):
9880         raise errors.OpPrereqError("Duplicate volume names given for adoption",
9881                                    errors.ECODE_INVAL)
9882       for lv_name in all_lvs:
9883         try:
9884           # FIXME: lv_name here is "vg/lv" need to ensure that other calls
9885           # to ReserveLV uses the same syntax
9886           self.cfg.ReserveLV(lv_name, self.proc.GetECId())
9887         except errors.ReservationError:
9888           raise errors.OpPrereqError("LV named %s used by another instance" %
9889                                      lv_name, errors.ECODE_NOTUNIQUE)
9890
9891       vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
9892       vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
9893
9894       node_lvs = self.rpc.call_lv_list([pnode.name],
9895                                        vg_names.payload.keys())[pnode.name]
9896       node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
9897       node_lvs = node_lvs.payload
9898
9899       delta = all_lvs.difference(node_lvs.keys())
9900       if delta:
9901         raise errors.OpPrereqError("Missing logical volume(s): %s" %
9902                                    utils.CommaJoin(delta),
9903                                    errors.ECODE_INVAL)
9904       online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
9905       if online_lvs:
9906         raise errors.OpPrereqError("Online logical volumes found, cannot"
9907                                    " adopt: %s" % utils.CommaJoin(online_lvs),
9908                                    errors.ECODE_STATE)
9909       # update the size of disk based on what is found
9910       for dsk in self.disks:
9911         dsk[constants.IDISK_SIZE] = \
9912           int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
9913                                         dsk[constants.IDISK_ADOPT])][0]))
9914
9915     elif self.op.disk_template == constants.DT_BLOCK:
9916       # Normalize and de-duplicate device paths
9917       all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
9918                        for disk in self.disks])
9919       if len(all_disks) != len(self.disks):
9920         raise errors.OpPrereqError("Duplicate disk names given for adoption",
9921                                    errors.ECODE_INVAL)
9922       baddisks = [d for d in all_disks
9923                   if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
9924       if baddisks:
9925         raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
9926                                    " cannot be adopted" %
9927                                    (", ".join(baddisks),
9928                                     constants.ADOPTABLE_BLOCKDEV_ROOT),
9929                                    errors.ECODE_INVAL)
9930
9931       node_disks = self.rpc.call_bdev_sizes([pnode.name],
9932                                             list(all_disks))[pnode.name]
9933       node_disks.Raise("Cannot get block device information from node %s" %
9934                        pnode.name)
9935       node_disks = node_disks.payload
9936       delta = all_disks.difference(node_disks.keys())
9937       if delta:
9938         raise errors.OpPrereqError("Missing block device(s): %s" %
9939                                    utils.CommaJoin(delta),
9940                                    errors.ECODE_INVAL)
9941       for dsk in self.disks:
9942         dsk[constants.IDISK_SIZE] = \
9943           int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
9944
9945     _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
9946
9947     _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
9948     # check OS parameters (remotely)
9949     _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
9950
9951     _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
9952
9953     # memory check on primary node
9954     #TODO(dynmem): use MINMEM for checking
9955     if self.op.start:
9956       _CheckNodeFreeMemory(self, self.pnode.name,
9957                            "creating instance %s" % self.op.instance_name,
9958                            self.be_full[constants.BE_MAXMEM],
9959                            self.op.hypervisor)
9960
9961     self.dry_run_result = list(nodenames)
9962
9963   def Exec(self, feedback_fn):
9964     """Create and add the instance to the cluster.
9965
9966     """
9967     instance = self.op.instance_name
9968     pnode_name = self.pnode.name
9969
9970     assert not (self.owned_locks(locking.LEVEL_NODE_RES) -
9971                 self.owned_locks(locking.LEVEL_NODE)), \
9972       "Node locks differ from node resource locks"
9973
9974     ht_kind = self.op.hypervisor
9975     if ht_kind in constants.HTS_REQ_PORT:
9976       network_port = self.cfg.AllocatePort()
9977     else:
9978       network_port = None
9979
9980     disks = _GenerateDiskTemplate(self,
9981                                   self.op.disk_template,
9982                                   instance, pnode_name,
9983                                   self.secondaries,
9984                                   self.disks,
9985                                   self.instance_file_storage_dir,
9986                                   self.op.file_driver,
9987                                   0,
9988                                   feedback_fn,
9989                                   self.diskparams)
9990
9991     iobj = objects.Instance(name=instance, os=self.op.os_type,
9992                             primary_node=pnode_name,
9993                             nics=self.nics, disks=disks,
9994                             disk_template=self.op.disk_template,
9995                             admin_state=constants.ADMINST_DOWN,
9996                             network_port=network_port,
9997                             beparams=self.op.beparams,
9998                             hvparams=self.op.hvparams,
9999                             hypervisor=self.op.hypervisor,
10000                             osparams=self.op.osparams,
10001                             )
10002
10003     if self.op.tags:
10004       for tag in self.op.tags:
10005         iobj.AddTag(tag)
10006
10007     if self.adopt_disks:
10008       if self.op.disk_template == constants.DT_PLAIN:
10009         # rename LVs to the newly-generated names; we need to construct
10010         # 'fake' LV disks with the old data, plus the new unique_id
10011         tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
10012         rename_to = []
10013         for t_dsk, a_dsk in zip(tmp_disks, self.disks):
10014           rename_to.append(t_dsk.logical_id)
10015           t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
10016           self.cfg.SetDiskID(t_dsk, pnode_name)
10017         result = self.rpc.call_blockdev_rename(pnode_name,
10018                                                zip(tmp_disks, rename_to))
10019         result.Raise("Failed to rename adoped LVs")
10020     else:
10021       feedback_fn("* creating instance disks...")
10022       try:
10023         _CreateDisks(self, iobj)
10024       except errors.OpExecError:
10025         self.LogWarning("Device creation failed, reverting...")
10026         try:
10027           _RemoveDisks(self, iobj)
10028         finally:
10029           self.cfg.ReleaseDRBDMinors(instance)
10030           raise
10031
10032     feedback_fn("adding instance %s to cluster config" % instance)
10033
10034     self.cfg.AddInstance(iobj, self.proc.GetECId())
10035
10036     # Declare that we don't want to remove the instance lock anymore, as we've
10037     # added the instance to the config
10038     del self.remove_locks[locking.LEVEL_INSTANCE]
10039
10040     if self.op.mode == constants.INSTANCE_IMPORT:
10041       # Release unused nodes
10042       _ReleaseLocks(self, locking.LEVEL_NODE, keep=[self.op.src_node])
10043     else:
10044       # Release all nodes
10045       _ReleaseLocks(self, locking.LEVEL_NODE)
10046
10047     disk_abort = False
10048     if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
10049       feedback_fn("* wiping instance disks...")
10050       try:
10051         _WipeDisks(self, iobj)
10052       except errors.OpExecError, err:
10053         logging.exception("Wiping disks failed")
10054         self.LogWarning("Wiping instance disks failed (%s)", err)
10055         disk_abort = True
10056
10057     if disk_abort:
10058       # Something is already wrong with the disks, don't do anything else
10059       pass
10060     elif self.op.wait_for_sync:
10061       disk_abort = not _WaitForSync(self, iobj)
10062     elif iobj.disk_template in constants.DTS_INT_MIRROR:
10063       # make sure the disks are not degraded (still sync-ing is ok)
10064       feedback_fn("* checking mirrors status")
10065       disk_abort = not _WaitForSync(self, iobj, oneshot=True)
10066     else:
10067       disk_abort = False
10068
10069     if disk_abort:
10070       _RemoveDisks(self, iobj)
10071       self.cfg.RemoveInstance(iobj.name)
10072       # Make sure the instance lock gets removed
10073       self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
10074       raise errors.OpExecError("There are some degraded disks for"
10075                                " this instance")
10076
10077     # Release all node resource locks
10078     _ReleaseLocks(self, locking.LEVEL_NODE_RES)
10079
10080     if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
10081       if self.op.mode == constants.INSTANCE_CREATE:
10082         if not self.op.no_install:
10083           pause_sync = (iobj.disk_template in constants.DTS_INT_MIRROR and
10084                         not self.op.wait_for_sync)
10085           if pause_sync:
10086             feedback_fn("* pausing disk sync to install instance OS")
10087             result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
10088                                                               iobj.disks, True)
10089             for idx, success in enumerate(result.payload):
10090               if not success:
10091                 logging.warn("pause-sync of instance %s for disk %d failed",
10092                              instance, idx)
10093
10094           feedback_fn("* running the instance OS create scripts...")
10095           # FIXME: pass debug option from opcode to backend
10096           os_add_result = \
10097             self.rpc.call_instance_os_add(pnode_name, (iobj, None), False,
10098                                           self.op.debug_level)
10099           if pause_sync:
10100             feedback_fn("* resuming disk sync")
10101             result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
10102                                                               iobj.disks, False)
10103             for idx, success in enumerate(result.payload):
10104               if not success:
10105                 logging.warn("resume-sync of instance %s for disk %d failed",
10106                              instance, idx)
10107
10108           os_add_result.Raise("Could not add os for instance %s"
10109                               " on node %s" % (instance, pnode_name))
10110
10111       elif self.op.mode == constants.INSTANCE_IMPORT:
10112         feedback_fn("* running the instance OS import scripts...")
10113
10114         transfers = []
10115
10116         for idx, image in enumerate(self.src_images):
10117           if not image:
10118             continue
10119
10120           # FIXME: pass debug option from opcode to backend
10121           dt = masterd.instance.DiskTransfer("disk/%s" % idx,
10122                                              constants.IEIO_FILE, (image, ),
10123                                              constants.IEIO_SCRIPT,
10124                                              (iobj.disks[idx], idx),
10125                                              None)
10126           transfers.append(dt)
10127
10128         import_result = \
10129           masterd.instance.TransferInstanceData(self, feedback_fn,
10130                                                 self.op.src_node, pnode_name,
10131                                                 self.pnode.secondary_ip,
10132                                                 iobj, transfers)
10133         if not compat.all(import_result):
10134           self.LogWarning("Some disks for instance %s on node %s were not"
10135                           " imported successfully" % (instance, pnode_name))
10136
10137       elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
10138         feedback_fn("* preparing remote import...")
10139         # The source cluster will stop the instance before attempting to make a
10140         # connection. In some cases stopping an instance can take a long time,
10141         # hence the shutdown timeout is added to the connection timeout.
10142         connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
10143                            self.op.source_shutdown_timeout)
10144         timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
10145
10146         assert iobj.primary_node == self.pnode.name
10147         disk_results = \
10148           masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
10149                                         self.source_x509_ca,
10150                                         self._cds, timeouts)
10151         if not compat.all(disk_results):
10152           # TODO: Should the instance still be started, even if some disks
10153           # failed to import (valid for local imports, too)?
10154           self.LogWarning("Some disks for instance %s on node %s were not"
10155                           " imported successfully" % (instance, pnode_name))
10156
10157         # Run rename script on newly imported instance
10158         assert iobj.name == instance
10159         feedback_fn("Running rename script for %s" % instance)
10160         result = self.rpc.call_instance_run_rename(pnode_name, iobj,
10161                                                    self.source_instance_name,
10162                                                    self.op.debug_level)
10163         if result.fail_msg:
10164           self.LogWarning("Failed to run rename script for %s on node"
10165                           " %s: %s" % (instance, pnode_name, result.fail_msg))
10166
10167       else:
10168         # also checked in the prereq part
10169         raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
10170                                      % self.op.mode)
10171
10172     assert not self.owned_locks(locking.LEVEL_NODE_RES)
10173
10174     if self.op.start:
10175       iobj.admin_state = constants.ADMINST_UP
10176       self.cfg.Update(iobj, feedback_fn)
10177       logging.info("Starting instance %s on node %s", instance, pnode_name)
10178       feedback_fn("* starting instance...")
10179       result = self.rpc.call_instance_start(pnode_name, (iobj, None, None),
10180                                             False)
10181       result.Raise("Could not start instance")
10182
10183     return list(iobj.all_nodes)
10184
10185
10186 def _CheckRADOSFreeSpace():
10187   """Compute disk size requirements inside the RADOS cluster.
10188
10189   """
10190   # For the RADOS cluster we assume there is always enough space.
10191   pass
10192
10193
10194 class LUInstanceConsole(NoHooksLU):
10195   """Connect to an instance's console.
10196
10197   This is somewhat special in that it returns the command line that
10198   you need to run on the master node in order to connect to the
10199   console.
10200
10201   """
10202   REQ_BGL = False
10203
10204   def ExpandNames(self):
10205     self.share_locks = _ShareAll()
10206     self._ExpandAndLockInstance()
10207
10208   def CheckPrereq(self):
10209     """Check prerequisites.
10210
10211     This checks that the instance is in the cluster.
10212
10213     """
10214     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10215     assert self.instance is not None, \
10216       "Cannot retrieve locked instance %s" % self.op.instance_name
10217     _CheckNodeOnline(self, self.instance.primary_node)
10218
10219   def Exec(self, feedback_fn):
10220     """Connect to the console of an instance
10221
10222     """
10223     instance = self.instance
10224     node = instance.primary_node
10225
10226     node_insts = self.rpc.call_instance_list([node],
10227                                              [instance.hypervisor])[node]
10228     node_insts.Raise("Can't get node information from %s" % node)
10229
10230     if instance.name not in node_insts.payload:
10231       if instance.admin_state == constants.ADMINST_UP:
10232         state = constants.INSTST_ERRORDOWN
10233       elif instance.admin_state == constants.ADMINST_DOWN:
10234         state = constants.INSTST_ADMINDOWN
10235       else:
10236         state = constants.INSTST_ADMINOFFLINE
10237       raise errors.OpExecError("Instance %s is not running (state %s)" %
10238                                (instance.name, state))
10239
10240     logging.debug("Connecting to console of %s on %s", instance.name, node)
10241
10242     return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
10243
10244
10245 def _GetInstanceConsole(cluster, instance):
10246   """Returns console information for an instance.
10247
10248   @type cluster: L{objects.Cluster}
10249   @type instance: L{objects.Instance}
10250   @rtype: dict
10251
10252   """
10253   hyper = hypervisor.GetHypervisor(instance.hypervisor)
10254   # beparams and hvparams are passed separately, to avoid editing the
10255   # instance and then saving the defaults in the instance itself.
10256   hvparams = cluster.FillHV(instance)
10257   beparams = cluster.FillBE(instance)
10258   console = hyper.GetInstanceConsole(instance, hvparams, beparams)
10259
10260   assert console.instance == instance.name
10261   assert console.Validate()
10262
10263   return console.ToDict()
10264
10265
10266 class LUInstanceReplaceDisks(LogicalUnit):
10267   """Replace the disks of an instance.
10268
10269   """
10270   HPATH = "mirrors-replace"
10271   HTYPE = constants.HTYPE_INSTANCE
10272   REQ_BGL = False
10273
10274   def CheckArguments(self):
10275     TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
10276                                   self.op.iallocator)
10277
10278   def ExpandNames(self):
10279     self._ExpandAndLockInstance()
10280
10281     assert locking.LEVEL_NODE not in self.needed_locks
10282     assert locking.LEVEL_NODE_RES not in self.needed_locks
10283     assert locking.LEVEL_NODEGROUP not in self.needed_locks
10284
10285     assert self.op.iallocator is None or self.op.remote_node is None, \
10286       "Conflicting options"
10287
10288     if self.op.remote_node is not None:
10289       self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10290
10291       # Warning: do not remove the locking of the new secondary here
10292       # unless DRBD8.AddChildren is changed to work in parallel;
10293       # currently it doesn't since parallel invocations of
10294       # FindUnusedMinor will conflict
10295       self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
10296       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
10297     else:
10298       self.needed_locks[locking.LEVEL_NODE] = []
10299       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10300
10301       if self.op.iallocator is not None:
10302         # iallocator will select a new node in the same group
10303         self.needed_locks[locking.LEVEL_NODEGROUP] = []
10304
10305     self.needed_locks[locking.LEVEL_NODE_RES] = []
10306
10307     self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
10308                                    self.op.iallocator, self.op.remote_node,
10309                                    self.op.disks, False, self.op.early_release,
10310                                    self.op.ignore_ipolicy)
10311
10312     self.tasklets = [self.replacer]
10313
10314   def DeclareLocks(self, level):
10315     if level == locking.LEVEL_NODEGROUP:
10316       assert self.op.remote_node is None
10317       assert self.op.iallocator is not None
10318       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
10319
10320       self.share_locks[locking.LEVEL_NODEGROUP] = 1
10321       # Lock all groups used by instance optimistically; this requires going
10322       # via the node before it's locked, requiring verification later on
10323       self.needed_locks[locking.LEVEL_NODEGROUP] = \
10324         self.cfg.GetInstanceNodeGroups(self.op.instance_name)
10325
10326     elif level == locking.LEVEL_NODE:
10327       if self.op.iallocator is not None:
10328         assert self.op.remote_node is None
10329         assert not self.needed_locks[locking.LEVEL_NODE]
10330
10331         # Lock member nodes of all locked groups
10332         self.needed_locks[locking.LEVEL_NODE] = [node_name
10333           for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
10334           for node_name in self.cfg.GetNodeGroup(group_uuid).members]
10335       else:
10336         self._LockInstancesNodes()
10337     elif level == locking.LEVEL_NODE_RES:
10338       # Reuse node locks
10339       self.needed_locks[locking.LEVEL_NODE_RES] = \
10340         self.needed_locks[locking.LEVEL_NODE]
10341
10342   def BuildHooksEnv(self):
10343     """Build hooks env.
10344
10345     This runs on the master, the primary and all the secondaries.
10346
10347     """
10348     instance = self.replacer.instance
10349     env = {
10350       "MODE": self.op.mode,
10351       "NEW_SECONDARY": self.op.remote_node,
10352       "OLD_SECONDARY": instance.secondary_nodes[0],
10353       }
10354     env.update(_BuildInstanceHookEnvByObject(self, instance))
10355     return env
10356
10357   def BuildHooksNodes(self):
10358     """Build hooks nodes.
10359
10360     """
10361     instance = self.replacer.instance
10362     nl = [
10363       self.cfg.GetMasterNode(),
10364       instance.primary_node,
10365       ]
10366     if self.op.remote_node is not None:
10367       nl.append(self.op.remote_node)
10368     return nl, nl
10369
10370   def CheckPrereq(self):
10371     """Check prerequisites.
10372
10373     """
10374     assert (self.glm.is_owned(locking.LEVEL_NODEGROUP) or
10375             self.op.iallocator is None)
10376
10377     # Verify if node group locks are still correct
10378     owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
10379     if owned_groups:
10380       _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
10381
10382     return LogicalUnit.CheckPrereq(self)
10383
10384
10385 class TLReplaceDisks(Tasklet):
10386   """Replaces disks for an instance.
10387
10388   Note: Locking is not within the scope of this class.
10389
10390   """
10391   def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
10392                disks, delay_iallocator, early_release, ignore_ipolicy):
10393     """Initializes this class.
10394
10395     """
10396     Tasklet.__init__(self, lu)
10397
10398     # Parameters
10399     self.instance_name = instance_name
10400     self.mode = mode
10401     self.iallocator_name = iallocator_name
10402     self.remote_node = remote_node
10403     self.disks = disks
10404     self.delay_iallocator = delay_iallocator
10405     self.early_release = early_release
10406     self.ignore_ipolicy = ignore_ipolicy
10407
10408     # Runtime data
10409     self.instance = None
10410     self.new_node = None
10411     self.target_node = None
10412     self.other_node = None
10413     self.remote_node_info = None
10414     self.node_secondary_ip = None
10415
10416   @staticmethod
10417   def CheckArguments(mode, remote_node, iallocator):
10418     """Helper function for users of this class.
10419
10420     """
10421     # check for valid parameter combination
10422     if mode == constants.REPLACE_DISK_CHG:
10423       if remote_node is None and iallocator is None:
10424         raise errors.OpPrereqError("When changing the secondary either an"
10425                                    " iallocator script must be used or the"
10426                                    " new node given", errors.ECODE_INVAL)
10427
10428       if remote_node is not None and iallocator is not None:
10429         raise errors.OpPrereqError("Give either the iallocator or the new"
10430                                    " secondary, not both", errors.ECODE_INVAL)
10431
10432     elif remote_node is not None or iallocator is not None:
10433       # Not replacing the secondary
10434       raise errors.OpPrereqError("The iallocator and new node options can"
10435                                  " only be used when changing the"
10436                                  " secondary node", errors.ECODE_INVAL)
10437
10438   @staticmethod
10439   def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
10440     """Compute a new secondary node using an IAllocator.
10441
10442     """
10443     ial = IAllocator(lu.cfg, lu.rpc,
10444                      mode=constants.IALLOCATOR_MODE_RELOC,
10445                      name=instance_name,
10446                      relocate_from=list(relocate_from))
10447
10448     ial.Run(iallocator_name)
10449
10450     if not ial.success:
10451       raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
10452                                  " %s" % (iallocator_name, ial.info),
10453                                  errors.ECODE_NORES)
10454
10455     if len(ial.result) != ial.required_nodes:
10456       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
10457                                  " of nodes (%s), required %s" %
10458                                  (iallocator_name,
10459                                   len(ial.result), ial.required_nodes),
10460                                  errors.ECODE_FAULT)
10461
10462     remote_node_name = ial.result[0]
10463
10464     lu.LogInfo("Selected new secondary for instance '%s': %s",
10465                instance_name, remote_node_name)
10466
10467     return remote_node_name
10468
10469   def _FindFaultyDisks(self, node_name):
10470     """Wrapper for L{_FindFaultyInstanceDisks}.
10471
10472     """
10473     return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
10474                                     node_name, True)
10475
10476   def _CheckDisksActivated(self, instance):
10477     """Checks if the instance disks are activated.
10478
10479     @param instance: The instance to check disks
10480     @return: True if they are activated, False otherwise
10481
10482     """
10483     nodes = instance.all_nodes
10484
10485     for idx, dev in enumerate(instance.disks):
10486       for node in nodes:
10487         self.lu.LogInfo("Checking disk/%d on %s", idx, node)
10488         self.cfg.SetDiskID(dev, node)
10489
10490         result = self.rpc.call_blockdev_find(node, dev)
10491
10492         if result.offline:
10493           continue
10494         elif result.fail_msg or not result.payload:
10495           return False
10496
10497     return True
10498
10499   def CheckPrereq(self):
10500     """Check prerequisites.
10501
10502     This checks that the instance is in the cluster.
10503
10504     """
10505     self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
10506     assert instance is not None, \
10507       "Cannot retrieve locked instance %s" % self.instance_name
10508
10509     if instance.disk_template != constants.DT_DRBD8:
10510       raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
10511                                  " instances", errors.ECODE_INVAL)
10512
10513     if len(instance.secondary_nodes) != 1:
10514       raise errors.OpPrereqError("The instance has a strange layout,"
10515                                  " expected one secondary but found %d" %
10516                                  len(instance.secondary_nodes),
10517                                  errors.ECODE_FAULT)
10518
10519     if not self.delay_iallocator:
10520       self._CheckPrereq2()
10521
10522   def _CheckPrereq2(self):
10523     """Check prerequisites, second part.
10524
10525     This function should always be part of CheckPrereq. It was separated and is
10526     now called from Exec because during node evacuation iallocator was only
10527     called with an unmodified cluster model, not taking planned changes into
10528     account.
10529
10530     """
10531     instance = self.instance
10532     secondary_node = instance.secondary_nodes[0]
10533
10534     if self.iallocator_name is None:
10535       remote_node = self.remote_node
10536     else:
10537       remote_node = self._RunAllocator(self.lu, self.iallocator_name,
10538                                        instance.name, instance.secondary_nodes)
10539
10540     if remote_node is None:
10541       self.remote_node_info = None
10542     else:
10543       assert remote_node in self.lu.owned_locks(locking.LEVEL_NODE), \
10544              "Remote node '%s' is not locked" % remote_node
10545
10546       self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
10547       assert self.remote_node_info is not None, \
10548         "Cannot retrieve locked node %s" % remote_node
10549
10550     if remote_node == self.instance.primary_node:
10551       raise errors.OpPrereqError("The specified node is the primary node of"
10552                                  " the instance", errors.ECODE_INVAL)
10553
10554     if remote_node == secondary_node:
10555       raise errors.OpPrereqError("The specified node is already the"
10556                                  " secondary node of the instance",
10557                                  errors.ECODE_INVAL)
10558
10559     if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
10560                                     constants.REPLACE_DISK_CHG):
10561       raise errors.OpPrereqError("Cannot specify disks to be replaced",
10562                                  errors.ECODE_INVAL)
10563
10564     if self.mode == constants.REPLACE_DISK_AUTO:
10565       if not self._CheckDisksActivated(instance):
10566         raise errors.OpPrereqError("Please run activate-disks on instance %s"
10567                                    " first" % self.instance_name,
10568                                    errors.ECODE_STATE)
10569       faulty_primary = self._FindFaultyDisks(instance.primary_node)
10570       faulty_secondary = self._FindFaultyDisks(secondary_node)
10571
10572       if faulty_primary and faulty_secondary:
10573         raise errors.OpPrereqError("Instance %s has faulty disks on more than"
10574                                    " one node and can not be repaired"
10575                                    " automatically" % self.instance_name,
10576                                    errors.ECODE_STATE)
10577
10578       if faulty_primary:
10579         self.disks = faulty_primary
10580         self.target_node = instance.primary_node
10581         self.other_node = secondary_node
10582         check_nodes = [self.target_node, self.other_node]
10583       elif faulty_secondary:
10584         self.disks = faulty_secondary
10585         self.target_node = secondary_node
10586         self.other_node = instance.primary_node
10587         check_nodes = [self.target_node, self.other_node]
10588       else:
10589         self.disks = []
10590         check_nodes = []
10591
10592     else:
10593       # Non-automatic modes
10594       if self.mode == constants.REPLACE_DISK_PRI:
10595         self.target_node = instance.primary_node
10596         self.other_node = secondary_node
10597         check_nodes = [self.target_node, self.other_node]
10598
10599       elif self.mode == constants.REPLACE_DISK_SEC:
10600         self.target_node = secondary_node
10601         self.other_node = instance.primary_node
10602         check_nodes = [self.target_node, self.other_node]
10603
10604       elif self.mode == constants.REPLACE_DISK_CHG:
10605         self.new_node = remote_node
10606         self.other_node = instance.primary_node
10607         self.target_node = secondary_node
10608         check_nodes = [self.new_node, self.other_node]
10609
10610         _CheckNodeNotDrained(self.lu, remote_node)
10611         _CheckNodeVmCapable(self.lu, remote_node)
10612
10613         old_node_info = self.cfg.GetNodeInfo(secondary_node)
10614         assert old_node_info is not None
10615         if old_node_info.offline and not self.early_release:
10616           # doesn't make sense to delay the release
10617           self.early_release = True
10618           self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
10619                           " early-release mode", secondary_node)
10620
10621       else:
10622         raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
10623                                      self.mode)
10624
10625       # If not specified all disks should be replaced
10626       if not self.disks:
10627         self.disks = range(len(self.instance.disks))
10628
10629     # TODO: This is ugly, but right now we can't distinguish between internal
10630     # submitted opcode and external one. We should fix that.
10631     if self.remote_node_info:
10632       # We change the node, lets verify it still meets instance policy
10633       new_group_info = self.cfg.GetNodeGroup(self.remote_node_info.group)
10634       ipolicy = _CalculateGroupIPolicy(self.cfg.GetClusterInfo(),
10635                                        new_group_info)
10636       _CheckTargetNodeIPolicy(self, ipolicy, instance, self.remote_node_info,
10637                               ignore=self.ignore_ipolicy)
10638
10639     # TODO: compute disk parameters
10640     primary_node_info = self.cfg.GetNodeInfo(instance.primary_node)
10641     secondary_node_info = self.cfg.GetNodeInfo(secondary_node)
10642     if primary_node_info.group != secondary_node_info.group:
10643       self.lu.LogInfo("The instance primary and secondary nodes are in two"
10644                       " different node groups; the disk parameters of the"
10645                       " primary node's group will be applied.")
10646
10647     self.diskparams = self.cfg.GetNodeGroup(primary_node_info.group).diskparams
10648
10649     for node in check_nodes:
10650       _CheckNodeOnline(self.lu, node)
10651
10652     touched_nodes = frozenset(node_name for node_name in [self.new_node,
10653                                                           self.other_node,
10654                                                           self.target_node]
10655                               if node_name is not None)
10656
10657     # Release unneeded node and node resource locks
10658     _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
10659     _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES, keep=touched_nodes)
10660
10661     # Release any owned node group
10662     if self.lu.glm.is_owned(locking.LEVEL_NODEGROUP):
10663       _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
10664
10665     # Check whether disks are valid
10666     for disk_idx in self.disks:
10667       instance.FindDisk(disk_idx)
10668
10669     # Get secondary node IP addresses
10670     self.node_secondary_ip = dict((name, node.secondary_ip) for (name, node)
10671                                   in self.cfg.GetMultiNodeInfo(touched_nodes))
10672
10673   def Exec(self, feedback_fn):
10674     """Execute disk replacement.
10675
10676     This dispatches the disk replacement to the appropriate handler.
10677
10678     """
10679     if self.delay_iallocator:
10680       self._CheckPrereq2()
10681
10682     if __debug__:
10683       # Verify owned locks before starting operation
10684       owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
10685       assert set(owned_nodes) == set(self.node_secondary_ip), \
10686           ("Incorrect node locks, owning %s, expected %s" %
10687            (owned_nodes, self.node_secondary_ip.keys()))
10688       assert (self.lu.owned_locks(locking.LEVEL_NODE) ==
10689               self.lu.owned_locks(locking.LEVEL_NODE_RES))
10690
10691       owned_instances = self.lu.owned_locks(locking.LEVEL_INSTANCE)
10692       assert list(owned_instances) == [self.instance_name], \
10693           "Instance '%s' not locked" % self.instance_name
10694
10695       assert not self.lu.glm.is_owned(locking.LEVEL_NODEGROUP), \
10696           "Should not own any node group lock at this point"
10697
10698     if not self.disks:
10699       feedback_fn("No disks need replacement")
10700       return
10701
10702     feedback_fn("Replacing disk(s) %s for %s" %
10703                 (utils.CommaJoin(self.disks), self.instance.name))
10704
10705     activate_disks = (self.instance.admin_state != constants.ADMINST_UP)
10706
10707     # Activate the instance disks if we're replacing them on a down instance
10708     if activate_disks:
10709       _StartInstanceDisks(self.lu, self.instance, True)
10710
10711     try:
10712       # Should we replace the secondary node?
10713       if self.new_node is not None:
10714         fn = self._ExecDrbd8Secondary
10715       else:
10716         fn = self._ExecDrbd8DiskOnly
10717
10718       result = fn(feedback_fn)
10719     finally:
10720       # Deactivate the instance disks if we're replacing them on a
10721       # down instance
10722       if activate_disks:
10723         _SafeShutdownInstanceDisks(self.lu, self.instance)
10724
10725     assert not self.lu.owned_locks(locking.LEVEL_NODE)
10726
10727     if __debug__:
10728       # Verify owned locks
10729       owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE_RES)
10730       nodes = frozenset(self.node_secondary_ip)
10731       assert ((self.early_release and not owned_nodes) or
10732               (not self.early_release and not (set(owned_nodes) - nodes))), \
10733         ("Not owning the correct locks, early_release=%s, owned=%r,"
10734          " nodes=%r" % (self.early_release, owned_nodes, nodes))
10735
10736     return result
10737
10738   def _CheckVolumeGroup(self, nodes):
10739     self.lu.LogInfo("Checking volume groups")
10740
10741     vgname = self.cfg.GetVGName()
10742
10743     # Make sure volume group exists on all involved nodes
10744     results = self.rpc.call_vg_list(nodes)
10745     if not results:
10746       raise errors.OpExecError("Can't list volume groups on the nodes")
10747
10748     for node in nodes:
10749       res = results[node]
10750       res.Raise("Error checking node %s" % node)
10751       if vgname not in res.payload:
10752         raise errors.OpExecError("Volume group '%s' not found on node %s" %
10753                                  (vgname, node))
10754
10755   def _CheckDisksExistence(self, nodes):
10756     # Check disk existence
10757     for idx, dev in enumerate(self.instance.disks):
10758       if idx not in self.disks:
10759         continue
10760
10761       for node in nodes:
10762         self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
10763         self.cfg.SetDiskID(dev, node)
10764
10765         result = self.rpc.call_blockdev_find(node, dev)
10766
10767         msg = result.fail_msg
10768         if msg or not result.payload:
10769           if not msg:
10770             msg = "disk not found"
10771           raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
10772                                    (idx, node, msg))
10773
10774   def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
10775     for idx, dev in enumerate(self.instance.disks):
10776       if idx not in self.disks:
10777         continue
10778
10779       self.lu.LogInfo("Checking disk/%d consistency on node %s" %
10780                       (idx, node_name))
10781
10782       if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
10783                                    ldisk=ldisk):
10784         raise errors.OpExecError("Node %s has degraded storage, unsafe to"
10785                                  " replace disks for instance %s" %
10786                                  (node_name, self.instance.name))
10787
10788   def _CreateNewStorage(self, node_name):
10789     """Create new storage on the primary or secondary node.
10790
10791     This is only used for same-node replaces, not for changing the
10792     secondary node, hence we don't want to modify the existing disk.
10793
10794     """
10795     iv_names = {}
10796
10797     for idx, dev in enumerate(self.instance.disks):
10798       if idx not in self.disks:
10799         continue
10800
10801       self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
10802
10803       self.cfg.SetDiskID(dev, node_name)
10804
10805       lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
10806       names = _GenerateUniqueNames(self.lu, lv_names)
10807
10808       _, data_p, meta_p = _ComputeLDParams(constants.DT_DRBD8, self.diskparams)
10809
10810       vg_data = dev.children[0].logical_id[0]
10811       lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
10812                              logical_id=(vg_data, names[0]), params=data_p)
10813       vg_meta = dev.children[1].logical_id[0]
10814       lv_meta = objects.Disk(dev_type=constants.LD_LV, size=DRBD_META_SIZE,
10815                              logical_id=(vg_meta, names[1]), params=meta_p)
10816
10817       new_lvs = [lv_data, lv_meta]
10818       old_lvs = [child.Copy() for child in dev.children]
10819       iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
10820
10821       # we pass force_create=True to force the LVM creation
10822       for new_lv in new_lvs:
10823         _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
10824                         _GetInstanceInfoText(self.instance), False)
10825
10826     return iv_names
10827
10828   def _CheckDevices(self, node_name, iv_names):
10829     for name, (dev, _, _) in iv_names.iteritems():
10830       self.cfg.SetDiskID(dev, node_name)
10831
10832       result = self.rpc.call_blockdev_find(node_name, dev)
10833
10834       msg = result.fail_msg
10835       if msg or not result.payload:
10836         if not msg:
10837           msg = "disk not found"
10838         raise errors.OpExecError("Can't find DRBD device %s: %s" %
10839                                  (name, msg))
10840
10841       if result.payload.is_degraded:
10842         raise errors.OpExecError("DRBD device %s is degraded!" % name)
10843
10844   def _RemoveOldStorage(self, node_name, iv_names):
10845     for name, (_, old_lvs, _) in iv_names.iteritems():
10846       self.lu.LogInfo("Remove logical volumes for %s" % name)
10847
10848       for lv in old_lvs:
10849         self.cfg.SetDiskID(lv, node_name)
10850
10851         msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
10852         if msg:
10853           self.lu.LogWarning("Can't remove old LV: %s" % msg,
10854                              hint="remove unused LVs manually")
10855
10856   def _ExecDrbd8DiskOnly(self, feedback_fn): # pylint: disable=W0613
10857     """Replace a disk on the primary or secondary for DRBD 8.
10858
10859     The algorithm for replace is quite complicated:
10860
10861       1. for each disk to be replaced:
10862
10863         1. create new LVs on the target node with unique names
10864         1. detach old LVs from the drbd device
10865         1. rename old LVs to name_replaced.<time_t>
10866         1. rename new LVs to old LVs
10867         1. attach the new LVs (with the old names now) to the drbd device
10868
10869       1. wait for sync across all devices
10870
10871       1. for each modified disk:
10872
10873         1. remove old LVs (which have the name name_replaces.<time_t>)
10874
10875     Failures are not very well handled.
10876
10877     """
10878     steps_total = 6
10879
10880     # Step: check device activation
10881     self.lu.LogStep(1, steps_total, "Check device existence")
10882     self._CheckDisksExistence([self.other_node, self.target_node])
10883     self._CheckVolumeGroup([self.target_node, self.other_node])
10884
10885     # Step: check other node consistency
10886     self.lu.LogStep(2, steps_total, "Check peer consistency")
10887     self._CheckDisksConsistency(self.other_node,
10888                                 self.other_node == self.instance.primary_node,
10889                                 False)
10890
10891     # Step: create new storage
10892     self.lu.LogStep(3, steps_total, "Allocate new storage")
10893     iv_names = self._CreateNewStorage(self.target_node)
10894
10895     # Step: for each lv, detach+rename*2+attach
10896     self.lu.LogStep(4, steps_total, "Changing drbd configuration")
10897     for dev, old_lvs, new_lvs in iv_names.itervalues():
10898       self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
10899
10900       result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
10901                                                      old_lvs)
10902       result.Raise("Can't detach drbd from local storage on node"
10903                    " %s for device %s" % (self.target_node, dev.iv_name))
10904       #dev.children = []
10905       #cfg.Update(instance)
10906
10907       # ok, we created the new LVs, so now we know we have the needed
10908       # storage; as such, we proceed on the target node to rename
10909       # old_lv to _old, and new_lv to old_lv; note that we rename LVs
10910       # using the assumption that logical_id == physical_id (which in
10911       # turn is the unique_id on that node)
10912
10913       # FIXME(iustin): use a better name for the replaced LVs
10914       temp_suffix = int(time.time())
10915       ren_fn = lambda d, suff: (d.physical_id[0],
10916                                 d.physical_id[1] + "_replaced-%s" % suff)
10917
10918       # Build the rename list based on what LVs exist on the node
10919       rename_old_to_new = []
10920       for to_ren in old_lvs:
10921         result = self.rpc.call_blockdev_find(self.target_node, to_ren)
10922         if not result.fail_msg and result.payload:
10923           # device exists
10924           rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
10925
10926       self.lu.LogInfo("Renaming the old LVs on the target node")
10927       result = self.rpc.call_blockdev_rename(self.target_node,
10928                                              rename_old_to_new)
10929       result.Raise("Can't rename old LVs on node %s" % self.target_node)
10930
10931       # Now we rename the new LVs to the old LVs
10932       self.lu.LogInfo("Renaming the new LVs on the target node")
10933       rename_new_to_old = [(new, old.physical_id)
10934                            for old, new in zip(old_lvs, new_lvs)]
10935       result = self.rpc.call_blockdev_rename(self.target_node,
10936                                              rename_new_to_old)
10937       result.Raise("Can't rename new LVs on node %s" % self.target_node)
10938
10939       # Intermediate steps of in memory modifications
10940       for old, new in zip(old_lvs, new_lvs):
10941         new.logical_id = old.logical_id
10942         self.cfg.SetDiskID(new, self.target_node)
10943
10944       # We need to modify old_lvs so that removal later removes the
10945       # right LVs, not the newly added ones; note that old_lvs is a
10946       # copy here
10947       for disk in old_lvs:
10948         disk.logical_id = ren_fn(disk, temp_suffix)
10949         self.cfg.SetDiskID(disk, self.target_node)
10950
10951       # Now that the new lvs have the old name, we can add them to the device
10952       self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
10953       result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
10954                                                   new_lvs)
10955       msg = result.fail_msg
10956       if msg:
10957         for new_lv in new_lvs:
10958           msg2 = self.rpc.call_blockdev_remove(self.target_node,
10959                                                new_lv).fail_msg
10960           if msg2:
10961             self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
10962                                hint=("cleanup manually the unused logical"
10963                                      "volumes"))
10964         raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
10965
10966     cstep = itertools.count(5)
10967
10968     if self.early_release:
10969       self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
10970       self._RemoveOldStorage(self.target_node, iv_names)
10971       # TODO: Check if releasing locks early still makes sense
10972       _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
10973     else:
10974       # Release all resource locks except those used by the instance
10975       _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
10976                     keep=self.node_secondary_ip.keys())
10977
10978     # Release all node locks while waiting for sync
10979     _ReleaseLocks(self.lu, locking.LEVEL_NODE)
10980
10981     # TODO: Can the instance lock be downgraded here? Take the optional disk
10982     # shutdown in the caller into consideration.
10983
10984     # Wait for sync
10985     # This can fail as the old devices are degraded and _WaitForSync
10986     # does a combined result over all disks, so we don't check its return value
10987     self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
10988     _WaitForSync(self.lu, self.instance)
10989
10990     # Check all devices manually
10991     self._CheckDevices(self.instance.primary_node, iv_names)
10992
10993     # Step: remove old storage
10994     if not self.early_release:
10995       self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
10996       self._RemoveOldStorage(self.target_node, iv_names)
10997
10998   def _ExecDrbd8Secondary(self, feedback_fn):
10999     """Replace the secondary node for DRBD 8.
11000
11001     The algorithm for replace is quite complicated:
11002       - for all disks of the instance:
11003         - create new LVs on the new node with same names
11004         - shutdown the drbd device on the old secondary
11005         - disconnect the drbd network on the primary
11006         - create the drbd device on the new secondary
11007         - network attach the drbd on the primary, using an artifice:
11008           the drbd code for Attach() will connect to the network if it
11009           finds a device which is connected to the good local disks but
11010           not network enabled
11011       - wait for sync across all devices
11012       - remove all disks from the old secondary
11013
11014     Failures are not very well handled.
11015
11016     """
11017     steps_total = 6
11018
11019     pnode = self.instance.primary_node
11020
11021     # Step: check device activation
11022     self.lu.LogStep(1, steps_total, "Check device existence")
11023     self._CheckDisksExistence([self.instance.primary_node])
11024     self._CheckVolumeGroup([self.instance.primary_node])
11025
11026     # Step: check other node consistency
11027     self.lu.LogStep(2, steps_total, "Check peer consistency")
11028     self._CheckDisksConsistency(self.instance.primary_node, True, True)
11029
11030     # Step: create new storage
11031     self.lu.LogStep(3, steps_total, "Allocate new storage")
11032     for idx, dev in enumerate(self.instance.disks):
11033       self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
11034                       (self.new_node, idx))
11035       # we pass force_create=True to force LVM creation
11036       for new_lv in dev.children:
11037         _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
11038                         _GetInstanceInfoText(self.instance), False)
11039
11040     # Step 4: dbrd minors and drbd setups changes
11041     # after this, we must manually remove the drbd minors on both the
11042     # error and the success paths
11043     self.lu.LogStep(4, steps_total, "Changing drbd configuration")
11044     minors = self.cfg.AllocateDRBDMinor([self.new_node
11045                                          for dev in self.instance.disks],
11046                                         self.instance.name)
11047     logging.debug("Allocated minors %r", minors)
11048
11049     iv_names = {}
11050     for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
11051       self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
11052                       (self.new_node, idx))
11053       # create new devices on new_node; note that we create two IDs:
11054       # one without port, so the drbd will be activated without
11055       # networking information on the new node at this stage, and one
11056       # with network, for the latter activation in step 4
11057       (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
11058       if self.instance.primary_node == o_node1:
11059         p_minor = o_minor1
11060       else:
11061         assert self.instance.primary_node == o_node2, "Three-node instance?"
11062         p_minor = o_minor2
11063
11064       new_alone_id = (self.instance.primary_node, self.new_node, None,
11065                       p_minor, new_minor, o_secret)
11066       new_net_id = (self.instance.primary_node, self.new_node, o_port,
11067                     p_minor, new_minor, o_secret)
11068
11069       iv_names[idx] = (dev, dev.children, new_net_id)
11070       logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
11071                     new_net_id)
11072       drbd_params, _, _ = _ComputeLDParams(constants.DT_DRBD8, self.diskparams)
11073       new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
11074                               logical_id=new_alone_id,
11075                               children=dev.children,
11076                               size=dev.size,
11077                               params=drbd_params)
11078       try:
11079         _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
11080                               _GetInstanceInfoText(self.instance), False)
11081       except errors.GenericError:
11082         self.cfg.ReleaseDRBDMinors(self.instance.name)
11083         raise
11084
11085     # We have new devices, shutdown the drbd on the old secondary
11086     for idx, dev in enumerate(self.instance.disks):
11087       self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
11088       self.cfg.SetDiskID(dev, self.target_node)
11089       msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
11090       if msg:
11091         self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
11092                            "node: %s" % (idx, msg),
11093                            hint=("Please cleanup this device manually as"
11094                                  " soon as possible"))
11095
11096     self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
11097     result = self.rpc.call_drbd_disconnect_net([pnode], self.node_secondary_ip,
11098                                                self.instance.disks)[pnode]
11099
11100     msg = result.fail_msg
11101     if msg:
11102       # detaches didn't succeed (unlikely)
11103       self.cfg.ReleaseDRBDMinors(self.instance.name)
11104       raise errors.OpExecError("Can't detach the disks from the network on"
11105                                " old node: %s" % (msg,))
11106
11107     # if we managed to detach at least one, we update all the disks of
11108     # the instance to point to the new secondary
11109     self.lu.LogInfo("Updating instance configuration")
11110     for dev, _, new_logical_id in iv_names.itervalues():
11111       dev.logical_id = new_logical_id
11112       self.cfg.SetDiskID(dev, self.instance.primary_node)
11113
11114     self.cfg.Update(self.instance, feedback_fn)
11115
11116     # Release all node locks (the configuration has been updated)
11117     _ReleaseLocks(self.lu, locking.LEVEL_NODE)
11118
11119     # and now perform the drbd attach
11120     self.lu.LogInfo("Attaching primary drbds to new secondary"
11121                     " (standalone => connected)")
11122     result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
11123                                             self.new_node],
11124                                            self.node_secondary_ip,
11125                                            self.instance.disks,
11126                                            self.instance.name,
11127                                            False)
11128     for to_node, to_result in result.items():
11129       msg = to_result.fail_msg
11130       if msg:
11131         self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
11132                            to_node, msg,
11133                            hint=("please do a gnt-instance info to see the"
11134                                  " status of disks"))
11135
11136     cstep = itertools.count(5)
11137
11138     if self.early_release:
11139       self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11140       self._RemoveOldStorage(self.target_node, iv_names)
11141       # TODO: Check if releasing locks early still makes sense
11142       _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
11143     else:
11144       # Release all resource locks except those used by the instance
11145       _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
11146                     keep=self.node_secondary_ip.keys())
11147
11148     # TODO: Can the instance lock be downgraded here? Take the optional disk
11149     # shutdown in the caller into consideration.
11150
11151     # Wait for sync
11152     # This can fail as the old devices are degraded and _WaitForSync
11153     # does a combined result over all disks, so we don't check its return value
11154     self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
11155     _WaitForSync(self.lu, self.instance)
11156
11157     # Check all devices manually
11158     self._CheckDevices(self.instance.primary_node, iv_names)
11159
11160     # Step: remove old storage
11161     if not self.early_release:
11162       self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11163       self._RemoveOldStorage(self.target_node, iv_names)
11164
11165
11166 class LURepairNodeStorage(NoHooksLU):
11167   """Repairs the volume group on a node.
11168
11169   """
11170   REQ_BGL = False
11171
11172   def CheckArguments(self):
11173     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
11174
11175     storage_type = self.op.storage_type
11176
11177     if (constants.SO_FIX_CONSISTENCY not in
11178         constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
11179       raise errors.OpPrereqError("Storage units of type '%s' can not be"
11180                                  " repaired" % storage_type,
11181                                  errors.ECODE_INVAL)
11182
11183   def ExpandNames(self):
11184     self.needed_locks = {
11185       locking.LEVEL_NODE: [self.op.node_name],
11186       }
11187
11188   def _CheckFaultyDisks(self, instance, node_name):
11189     """Ensure faulty disks abort the opcode or at least warn."""
11190     try:
11191       if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
11192                                   node_name, True):
11193         raise errors.OpPrereqError("Instance '%s' has faulty disks on"
11194                                    " node '%s'" % (instance.name, node_name),
11195                                    errors.ECODE_STATE)
11196     except errors.OpPrereqError, err:
11197       if self.op.ignore_consistency:
11198         self.proc.LogWarning(str(err.args[0]))
11199       else:
11200         raise
11201
11202   def CheckPrereq(self):
11203     """Check prerequisites.
11204
11205     """
11206     # Check whether any instance on this node has faulty disks
11207     for inst in _GetNodeInstances(self.cfg, self.op.node_name):
11208       if inst.admin_state != constants.ADMINST_UP:
11209         continue
11210       check_nodes = set(inst.all_nodes)
11211       check_nodes.discard(self.op.node_name)
11212       for inst_node_name in check_nodes:
11213         self._CheckFaultyDisks(inst, inst_node_name)
11214
11215   def Exec(self, feedback_fn):
11216     feedback_fn("Repairing storage unit '%s' on %s ..." %
11217                 (self.op.name, self.op.node_name))
11218
11219     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
11220     result = self.rpc.call_storage_execute(self.op.node_name,
11221                                            self.op.storage_type, st_args,
11222                                            self.op.name,
11223                                            constants.SO_FIX_CONSISTENCY)
11224     result.Raise("Failed to repair storage unit '%s' on %s" %
11225                  (self.op.name, self.op.node_name))
11226
11227
11228 class LUNodeEvacuate(NoHooksLU):
11229   """Evacuates instances off a list of nodes.
11230
11231   """
11232   REQ_BGL = False
11233
11234   _MODE2IALLOCATOR = {
11235     constants.NODE_EVAC_PRI: constants.IALLOCATOR_NEVAC_PRI,
11236     constants.NODE_EVAC_SEC: constants.IALLOCATOR_NEVAC_SEC,
11237     constants.NODE_EVAC_ALL: constants.IALLOCATOR_NEVAC_ALL,
11238     }
11239   assert frozenset(_MODE2IALLOCATOR.keys()) == constants.NODE_EVAC_MODES
11240   assert (frozenset(_MODE2IALLOCATOR.values()) ==
11241           constants.IALLOCATOR_NEVAC_MODES)
11242
11243   def CheckArguments(self):
11244     _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
11245
11246   def ExpandNames(self):
11247     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
11248
11249     if self.op.remote_node is not None:
11250       self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
11251       assert self.op.remote_node
11252
11253       if self.op.remote_node == self.op.node_name:
11254         raise errors.OpPrereqError("Can not use evacuated node as a new"
11255                                    " secondary node", errors.ECODE_INVAL)
11256
11257       if self.op.mode != constants.NODE_EVAC_SEC:
11258         raise errors.OpPrereqError("Without the use of an iallocator only"
11259                                    " secondary instances can be evacuated",
11260                                    errors.ECODE_INVAL)
11261
11262     # Declare locks
11263     self.share_locks = _ShareAll()
11264     self.needed_locks = {
11265       locking.LEVEL_INSTANCE: [],
11266       locking.LEVEL_NODEGROUP: [],
11267       locking.LEVEL_NODE: [],
11268       }
11269
11270     # Determine nodes (via group) optimistically, needs verification once locks
11271     # have been acquired
11272     self.lock_nodes = self._DetermineNodes()
11273
11274   def _DetermineNodes(self):
11275     """Gets the list of nodes to operate on.
11276
11277     """
11278     if self.op.remote_node is None:
11279       # Iallocator will choose any node(s) in the same group
11280       group_nodes = self.cfg.GetNodeGroupMembersByNodes([self.op.node_name])
11281     else:
11282       group_nodes = frozenset([self.op.remote_node])
11283
11284     # Determine nodes to be locked
11285     return set([self.op.node_name]) | group_nodes
11286
11287   def _DetermineInstances(self):
11288     """Builds list of instances to operate on.
11289
11290     """
11291     assert self.op.mode in constants.NODE_EVAC_MODES
11292
11293     if self.op.mode == constants.NODE_EVAC_PRI:
11294       # Primary instances only
11295       inst_fn = _GetNodePrimaryInstances
11296       assert self.op.remote_node is None, \
11297         "Evacuating primary instances requires iallocator"
11298     elif self.op.mode == constants.NODE_EVAC_SEC:
11299       # Secondary instances only
11300       inst_fn = _GetNodeSecondaryInstances
11301     else:
11302       # All instances
11303       assert self.op.mode == constants.NODE_EVAC_ALL
11304       inst_fn = _GetNodeInstances
11305       # TODO: In 2.6, change the iallocator interface to take an evacuation mode
11306       # per instance
11307       raise errors.OpPrereqError("Due to an issue with the iallocator"
11308                                  " interface it is not possible to evacuate"
11309                                  " all instances at once; specify explicitly"
11310                                  " whether to evacuate primary or secondary"
11311                                  " instances",
11312                                  errors.ECODE_INVAL)
11313
11314     return inst_fn(self.cfg, self.op.node_name)
11315
11316   def DeclareLocks(self, level):
11317     if level == locking.LEVEL_INSTANCE:
11318       # Lock instances optimistically, needs verification once node and group
11319       # locks have been acquired
11320       self.needed_locks[locking.LEVEL_INSTANCE] = \
11321         set(i.name for i in self._DetermineInstances())
11322
11323     elif level == locking.LEVEL_NODEGROUP:
11324       # Lock node groups for all potential target nodes optimistically, needs
11325       # verification once nodes have been acquired
11326       self.needed_locks[locking.LEVEL_NODEGROUP] = \
11327         self.cfg.GetNodeGroupsFromNodes(self.lock_nodes)
11328
11329     elif level == locking.LEVEL_NODE:
11330       self.needed_locks[locking.LEVEL_NODE] = self.lock_nodes
11331
11332   def CheckPrereq(self):
11333     # Verify locks
11334     owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
11335     owned_nodes = self.owned_locks(locking.LEVEL_NODE)
11336     owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
11337
11338     need_nodes = self._DetermineNodes()
11339
11340     if not owned_nodes.issuperset(need_nodes):
11341       raise errors.OpPrereqError("Nodes in same group as '%s' changed since"
11342                                  " locks were acquired, current nodes are"
11343                                  " are '%s', used to be '%s'; retry the"
11344                                  " operation" %
11345                                  (self.op.node_name,
11346                                   utils.CommaJoin(need_nodes),
11347                                   utils.CommaJoin(owned_nodes)),
11348                                  errors.ECODE_STATE)
11349
11350     wanted_groups = self.cfg.GetNodeGroupsFromNodes(owned_nodes)
11351     if owned_groups != wanted_groups:
11352       raise errors.OpExecError("Node groups changed since locks were acquired,"
11353                                " current groups are '%s', used to be '%s';"
11354                                " retry the operation" %
11355                                (utils.CommaJoin(wanted_groups),
11356                                 utils.CommaJoin(owned_groups)))
11357
11358     # Determine affected instances
11359     self.instances = self._DetermineInstances()
11360     self.instance_names = [i.name for i in self.instances]
11361
11362     if set(self.instance_names) != owned_instances:
11363       raise errors.OpExecError("Instances on node '%s' changed since locks"
11364                                " were acquired, current instances are '%s',"
11365                                " used to be '%s'; retry the operation" %
11366                                (self.op.node_name,
11367                                 utils.CommaJoin(self.instance_names),
11368                                 utils.CommaJoin(owned_instances)))
11369
11370     if self.instance_names:
11371       self.LogInfo("Evacuating instances from node '%s': %s",
11372                    self.op.node_name,
11373                    utils.CommaJoin(utils.NiceSort(self.instance_names)))
11374     else:
11375       self.LogInfo("No instances to evacuate from node '%s'",
11376                    self.op.node_name)
11377
11378     if self.op.remote_node is not None:
11379       for i in self.instances:
11380         if i.primary_node == self.op.remote_node:
11381           raise errors.OpPrereqError("Node %s is the primary node of"
11382                                      " instance %s, cannot use it as"
11383                                      " secondary" %
11384                                      (self.op.remote_node, i.name),
11385                                      errors.ECODE_INVAL)
11386
11387   def Exec(self, feedback_fn):
11388     assert (self.op.iallocator is not None) ^ (self.op.remote_node is not None)
11389
11390     if not self.instance_names:
11391       # No instances to evacuate
11392       jobs = []
11393
11394     elif self.op.iallocator is not None:
11395       # TODO: Implement relocation to other group
11396       ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_NODE_EVAC,
11397                        evac_mode=self._MODE2IALLOCATOR[self.op.mode],
11398                        instances=list(self.instance_names))
11399
11400       ial.Run(self.op.iallocator)
11401
11402       if not ial.success:
11403         raise errors.OpPrereqError("Can't compute node evacuation using"
11404                                    " iallocator '%s': %s" %
11405                                    (self.op.iallocator, ial.info),
11406                                    errors.ECODE_NORES)
11407
11408       jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, True)
11409
11410     elif self.op.remote_node is not None:
11411       assert self.op.mode == constants.NODE_EVAC_SEC
11412       jobs = [
11413         [opcodes.OpInstanceReplaceDisks(instance_name=instance_name,
11414                                         remote_node=self.op.remote_node,
11415                                         disks=[],
11416                                         mode=constants.REPLACE_DISK_CHG,
11417                                         early_release=self.op.early_release)]
11418         for instance_name in self.instance_names
11419         ]
11420
11421     else:
11422       raise errors.ProgrammerError("No iallocator or remote node")
11423
11424     return ResultWithJobs(jobs)
11425
11426
11427 def _SetOpEarlyRelease(early_release, op):
11428   """Sets C{early_release} flag on opcodes if available.
11429
11430   """
11431   try:
11432     op.early_release = early_release
11433   except AttributeError:
11434     assert not isinstance(op, opcodes.OpInstanceReplaceDisks)
11435
11436   return op
11437
11438
11439 def _NodeEvacDest(use_nodes, group, nodes):
11440   """Returns group or nodes depending on caller's choice.
11441
11442   """
11443   if use_nodes:
11444     return utils.CommaJoin(nodes)
11445   else:
11446     return group
11447
11448
11449 def _LoadNodeEvacResult(lu, alloc_result, early_release, use_nodes):
11450   """Unpacks the result of change-group and node-evacuate iallocator requests.
11451
11452   Iallocator modes L{constants.IALLOCATOR_MODE_NODE_EVAC} and
11453   L{constants.IALLOCATOR_MODE_CHG_GROUP}.
11454
11455   @type lu: L{LogicalUnit}
11456   @param lu: Logical unit instance
11457   @type alloc_result: tuple/list
11458   @param alloc_result: Result from iallocator
11459   @type early_release: bool
11460   @param early_release: Whether to release locks early if possible
11461   @type use_nodes: bool
11462   @param use_nodes: Whether to display node names instead of groups
11463
11464   """
11465   (moved, failed, jobs) = alloc_result
11466
11467   if failed:
11468     failreason = utils.CommaJoin("%s (%s)" % (name, reason)
11469                                  for (name, reason) in failed)
11470     lu.LogWarning("Unable to evacuate instances %s", failreason)
11471     raise errors.OpExecError("Unable to evacuate instances %s" % failreason)
11472
11473   if moved:
11474     lu.LogInfo("Instances to be moved: %s",
11475                utils.CommaJoin("%s (to %s)" %
11476                                (name, _NodeEvacDest(use_nodes, group, nodes))
11477                                for (name, group, nodes) in moved))
11478
11479   return [map(compat.partial(_SetOpEarlyRelease, early_release),
11480               map(opcodes.OpCode.LoadOpCode, ops))
11481           for ops in jobs]
11482
11483
11484 class LUInstanceGrowDisk(LogicalUnit):
11485   """Grow a disk of an instance.
11486
11487   """
11488   HPATH = "disk-grow"
11489   HTYPE = constants.HTYPE_INSTANCE
11490   REQ_BGL = False
11491
11492   def ExpandNames(self):
11493     self._ExpandAndLockInstance()
11494     self.needed_locks[locking.LEVEL_NODE] = []
11495     self.needed_locks[locking.LEVEL_NODE_RES] = []
11496     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
11497     self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
11498
11499   def DeclareLocks(self, level):
11500     if level == locking.LEVEL_NODE:
11501       self._LockInstancesNodes()
11502     elif level == locking.LEVEL_NODE_RES:
11503       # Copy node locks
11504       self.needed_locks[locking.LEVEL_NODE_RES] = \
11505         self.needed_locks[locking.LEVEL_NODE][:]
11506
11507   def BuildHooksEnv(self):
11508     """Build hooks env.
11509
11510     This runs on the master, the primary and all the secondaries.
11511
11512     """
11513     env = {
11514       "DISK": self.op.disk,
11515       "AMOUNT": self.op.amount,
11516       }
11517     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
11518     return env
11519
11520   def BuildHooksNodes(self):
11521     """Build hooks nodes.
11522
11523     """
11524     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
11525     return (nl, nl)
11526
11527   def CheckPrereq(self):
11528     """Check prerequisites.
11529
11530     This checks that the instance is in the cluster.
11531
11532     """
11533     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
11534     assert instance is not None, \
11535       "Cannot retrieve locked instance %s" % self.op.instance_name
11536     nodenames = list(instance.all_nodes)
11537     for node in nodenames:
11538       _CheckNodeOnline(self, node)
11539
11540     self.instance = instance
11541
11542     if instance.disk_template not in constants.DTS_GROWABLE:
11543       raise errors.OpPrereqError("Instance's disk layout does not support"
11544                                  " growing", errors.ECODE_INVAL)
11545
11546     self.disk = instance.FindDisk(self.op.disk)
11547
11548     if instance.disk_template not in (constants.DT_FILE,
11549                                       constants.DT_SHARED_FILE,
11550                                       constants.DT_RBD):
11551       # TODO: check the free disk space for file, when that feature will be
11552       # supported
11553       _CheckNodesFreeDiskPerVG(self, nodenames,
11554                                self.disk.ComputeGrowth(self.op.amount))
11555
11556   def Exec(self, feedback_fn):
11557     """Execute disk grow.
11558
11559     """
11560     instance = self.instance
11561     disk = self.disk
11562
11563     assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
11564     assert (self.owned_locks(locking.LEVEL_NODE) ==
11565             self.owned_locks(locking.LEVEL_NODE_RES))
11566
11567     disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
11568     if not disks_ok:
11569       raise errors.OpExecError("Cannot activate block device to grow")
11570
11571     feedback_fn("Growing disk %s of instance '%s' by %s" %
11572                 (self.op.disk, instance.name,
11573                  utils.FormatUnit(self.op.amount, "h")))
11574
11575     # First run all grow ops in dry-run mode
11576     for node in instance.all_nodes:
11577       self.cfg.SetDiskID(disk, node)
11578       result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, True)
11579       result.Raise("Grow request failed to node %s" % node)
11580
11581     # We know that (as far as we can test) operations across different
11582     # nodes will succeed, time to run it for real
11583     for node in instance.all_nodes:
11584       self.cfg.SetDiskID(disk, node)
11585       result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, False)
11586       result.Raise("Grow request failed to node %s" % node)
11587
11588       # TODO: Rewrite code to work properly
11589       # DRBD goes into sync mode for a short amount of time after executing the
11590       # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
11591       # calling "resize" in sync mode fails. Sleeping for a short amount of
11592       # time is a work-around.
11593       time.sleep(5)
11594
11595     disk.RecordGrow(self.op.amount)
11596     self.cfg.Update(instance, feedback_fn)
11597
11598     # Changes have been recorded, release node lock
11599     _ReleaseLocks(self, locking.LEVEL_NODE)
11600
11601     # Downgrade lock while waiting for sync
11602     self.glm.downgrade(locking.LEVEL_INSTANCE)
11603
11604     if self.op.wait_for_sync:
11605       disk_abort = not _WaitForSync(self, instance, disks=[disk])
11606       if disk_abort:
11607         self.proc.LogWarning("Disk sync-ing has not returned a good"
11608                              " status; please check the instance")
11609       if instance.admin_state != constants.ADMINST_UP:
11610         _SafeShutdownInstanceDisks(self, instance, disks=[disk])
11611     elif instance.admin_state != constants.ADMINST_UP:
11612       self.proc.LogWarning("Not shutting down the disk even if the instance is"
11613                            " not supposed to be running because no wait for"
11614                            " sync mode was requested")
11615
11616     assert self.owned_locks(locking.LEVEL_NODE_RES)
11617     assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
11618
11619
11620 class LUInstanceQueryData(NoHooksLU):
11621   """Query runtime instance data.
11622
11623   """
11624   REQ_BGL = False
11625
11626   def ExpandNames(self):
11627     self.needed_locks = {}
11628
11629     # Use locking if requested or when non-static information is wanted
11630     if not (self.op.static or self.op.use_locking):
11631       self.LogWarning("Non-static data requested, locks need to be acquired")
11632       self.op.use_locking = True
11633
11634     if self.op.instances or not self.op.use_locking:
11635       # Expand instance names right here
11636       self.wanted_names = _GetWantedInstances(self, self.op.instances)
11637     else:
11638       # Will use acquired locks
11639       self.wanted_names = None
11640
11641     if self.op.use_locking:
11642       self.share_locks = _ShareAll()
11643
11644       if self.wanted_names is None:
11645         self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
11646       else:
11647         self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
11648
11649       self.needed_locks[locking.LEVEL_NODE] = []
11650       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
11651
11652   def DeclareLocks(self, level):
11653     if self.op.use_locking and level == locking.LEVEL_NODE:
11654       self._LockInstancesNodes()
11655
11656   def CheckPrereq(self):
11657     """Check prerequisites.
11658
11659     This only checks the optional instance list against the existing names.
11660
11661     """
11662     if self.wanted_names is None:
11663       assert self.op.use_locking, "Locking was not used"
11664       self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
11665
11666     self.wanted_instances = \
11667         map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
11668
11669   def _ComputeBlockdevStatus(self, node, instance_name, dev):
11670     """Returns the status of a block device
11671
11672     """
11673     if self.op.static or not node:
11674       return None
11675
11676     self.cfg.SetDiskID(dev, node)
11677
11678     result = self.rpc.call_blockdev_find(node, dev)
11679     if result.offline:
11680       return None
11681
11682     result.Raise("Can't compute disk status for %s" % instance_name)
11683
11684     status = result.payload
11685     if status is None:
11686       return None
11687
11688     return (status.dev_path, status.major, status.minor,
11689             status.sync_percent, status.estimated_time,
11690             status.is_degraded, status.ldisk_status)
11691
11692   def _ComputeDiskStatus(self, instance, snode, dev):
11693     """Compute block device status.
11694
11695     """
11696     if dev.dev_type in constants.LDS_DRBD:
11697       # we change the snode then (otherwise we use the one passed in)
11698       if dev.logical_id[0] == instance.primary_node:
11699         snode = dev.logical_id[1]
11700       else:
11701         snode = dev.logical_id[0]
11702
11703     dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
11704                                               instance.name, dev)
11705     dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
11706
11707     if dev.children:
11708       dev_children = map(compat.partial(self._ComputeDiskStatus,
11709                                         instance, snode),
11710                          dev.children)
11711     else:
11712       dev_children = []
11713
11714     return {
11715       "iv_name": dev.iv_name,
11716       "dev_type": dev.dev_type,
11717       "logical_id": dev.logical_id,
11718       "physical_id": dev.physical_id,
11719       "pstatus": dev_pstatus,
11720       "sstatus": dev_sstatus,
11721       "children": dev_children,
11722       "mode": dev.mode,
11723       "size": dev.size,
11724       }
11725
11726   def Exec(self, feedback_fn):
11727     """Gather and return data"""
11728     result = {}
11729
11730     cluster = self.cfg.GetClusterInfo()
11731
11732     pri_nodes = self.cfg.GetMultiNodeInfo(i.primary_node
11733                                           for i in self.wanted_instances)
11734     for instance, (_, pnode) in zip(self.wanted_instances, pri_nodes):
11735       if self.op.static or pnode.offline:
11736         remote_state = None
11737         if pnode.offline:
11738           self.LogWarning("Primary node %s is marked offline, returning static"
11739                           " information only for instance %s" %
11740                           (pnode.name, instance.name))
11741       else:
11742         remote_info = self.rpc.call_instance_info(instance.primary_node,
11743                                                   instance.name,
11744                                                   instance.hypervisor)
11745         remote_info.Raise("Error checking node %s" % instance.primary_node)
11746         remote_info = remote_info.payload
11747         if remote_info and "state" in remote_info:
11748           remote_state = "up"
11749         else:
11750           if instance.admin_state == constants.ADMINST_UP:
11751             remote_state = "down"
11752           else:
11753             remote_state = instance.admin_state
11754
11755       disks = map(compat.partial(self._ComputeDiskStatus, instance, None),
11756                   instance.disks)
11757
11758       result[instance.name] = {
11759         "name": instance.name,
11760         "config_state": instance.admin_state,
11761         "run_state": remote_state,
11762         "pnode": instance.primary_node,
11763         "snodes": instance.secondary_nodes,
11764         "os": instance.os,
11765         # this happens to be the same format used for hooks
11766         "nics": _NICListToTuple(self, instance.nics),
11767         "disk_template": instance.disk_template,
11768         "disks": disks,
11769         "hypervisor": instance.hypervisor,
11770         "network_port": instance.network_port,
11771         "hv_instance": instance.hvparams,
11772         "hv_actual": cluster.FillHV(instance, skip_globals=True),
11773         "be_instance": instance.beparams,
11774         "be_actual": cluster.FillBE(instance),
11775         "os_instance": instance.osparams,
11776         "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
11777         "serial_no": instance.serial_no,
11778         "mtime": instance.mtime,
11779         "ctime": instance.ctime,
11780         "uuid": instance.uuid,
11781         }
11782
11783     return result
11784
11785
11786 def PrepareContainerMods(mods, private_fn):
11787   """Prepares a list of container modifications by adding a private data field.
11788
11789   @type mods: list of tuples; (operation, index, parameters)
11790   @param mods: List of modifications
11791   @type private_fn: callable or None
11792   @param private_fn: Callable for constructing a private data field for a
11793     modification
11794   @rtype: list
11795
11796   """
11797   if private_fn is None:
11798     fn = lambda: None
11799   else:
11800     fn = private_fn
11801
11802   return [(op, idx, params, fn()) for (op, idx, params) in mods]
11803
11804
11805 #: Type description for changes as returned by L{ApplyContainerMods}'s
11806 #: callbacks
11807 _TApplyContModsCbChanges = \
11808   ht.TMaybeListOf(ht.TAnd(ht.TIsLength(2), ht.TItems([
11809     ht.TNonEmptyString,
11810     ht.TAny,
11811     ])))
11812
11813
11814 def ApplyContainerMods(kind, container, chgdesc, mods,
11815                        create_fn, modify_fn, remove_fn):
11816   """Applies descriptions in C{mods} to C{container}.
11817
11818   @type kind: string
11819   @param kind: One-word item description
11820   @type container: list
11821   @param container: Container to modify
11822   @type chgdesc: None or list
11823   @param chgdesc: List of applied changes
11824   @type mods: list
11825   @param mods: Modifications as returned by L{PrepareContainerMods}
11826   @type create_fn: callable
11827   @param create_fn: Callback for creating a new item (L{constants.DDM_ADD});
11828     receives absolute item index, parameters and private data object as added
11829     by L{PrepareContainerMods}, returns tuple containing new item and changes
11830     as list
11831   @type modify_fn: callable
11832   @param modify_fn: Callback for modifying an existing item
11833     (L{constants.DDM_MODIFY}); receives absolute item index, item, parameters
11834     and private data object as added by L{PrepareContainerMods}, returns
11835     changes as list
11836   @type remove_fn: callable
11837   @param remove_fn: Callback on removing item; receives absolute item index,
11838     item and private data object as added by L{PrepareContainerMods}
11839
11840   """
11841   for (op, idx, params, private) in mods:
11842     if idx == -1:
11843       # Append
11844       absidx = len(container) - 1
11845     elif idx < 0:
11846       raise IndexError("Not accepting negative indices other than -1")
11847     elif idx > len(container):
11848       raise IndexError("Got %s index %s, but there are only %s" %
11849                        (kind, idx, len(container)))
11850     else:
11851       absidx = idx
11852
11853     changes = None
11854
11855     if op == constants.DDM_ADD:
11856       # Calculate where item will be added
11857       if idx == -1:
11858         addidx = len(container)
11859       else:
11860         addidx = idx
11861
11862       if create_fn is None:
11863         item = params
11864       else:
11865         (item, changes) = create_fn(addidx, params, private)
11866
11867       if idx == -1:
11868         container.append(item)
11869       else:
11870         assert idx >= 0
11871         assert idx <= len(container)
11872         # list.insert does so before the specified index
11873         container.insert(idx, item)
11874     else:
11875       # Retrieve existing item
11876       try:
11877         item = container[absidx]
11878       except IndexError:
11879         raise IndexError("Invalid %s index %s" % (kind, idx))
11880
11881       if op == constants.DDM_REMOVE:
11882         assert not params
11883
11884         if remove_fn is not None:
11885           remove_fn(absidx, item, private)
11886
11887         changes = [("%s/%s" % (kind, absidx), "remove")]
11888
11889         assert container[absidx] == item
11890         del container[absidx]
11891       elif op == constants.DDM_MODIFY:
11892         if modify_fn is not None:
11893           changes = modify_fn(absidx, item, params, private)
11894       else:
11895         raise errors.ProgrammerError("Unhandled operation '%s'" % op)
11896
11897     assert _TApplyContModsCbChanges(changes)
11898
11899     if not (chgdesc is None or changes is None):
11900       chgdesc.extend(changes)
11901
11902
11903 def _UpdateIvNames(base_index, disks):
11904   """Updates the C{iv_name} attribute of disks.
11905
11906   @type disks: list of L{objects.Disk}
11907
11908   """
11909   for (idx, disk) in enumerate(disks):
11910     disk.iv_name = "disk/%s" % (base_index + idx, )
11911
11912
11913 class _InstNicModPrivate:
11914   """Data structure for network interface modifications.
11915
11916   Used by L{LUInstanceSetParams}.
11917
11918   """
11919   def __init__(self):
11920     self.params = None
11921     self.filled = None
11922
11923
11924 class LUInstanceSetParams(LogicalUnit):
11925   """Modifies an instances's parameters.
11926
11927   """
11928   HPATH = "instance-modify"
11929   HTYPE = constants.HTYPE_INSTANCE
11930   REQ_BGL = False
11931
11932   @staticmethod
11933   def _UpgradeDiskNicMods(kind, mods, verify_fn):
11934     assert ht.TList(mods)
11935     assert not mods or len(mods[0]) in (2, 3)
11936
11937     if mods and len(mods[0]) == 2:
11938       result = []
11939
11940       addremove = 0
11941       for op, params in mods:
11942         if op in (constants.DDM_ADD, constants.DDM_REMOVE):
11943           result.append((op, -1, params))
11944           addremove += 1
11945
11946           if addremove > 1:
11947             raise errors.OpPrereqError("Only one %s add or remove operation is"
11948                                        " supported at a time" % kind,
11949                                        errors.ECODE_INVAL)
11950         else:
11951           result.append((constants.DDM_MODIFY, op, params))
11952
11953       assert verify_fn(result)
11954     else:
11955       result = mods
11956
11957     return result
11958
11959   @staticmethod
11960   def _CheckMods(kind, mods, key_types, item_fn):
11961     """Ensures requested disk/NIC modifications are valid.
11962
11963     """
11964     for (op, _, params) in mods:
11965       assert ht.TDict(params)
11966
11967       utils.ForceDictType(params, key_types)
11968
11969       if op == constants.DDM_REMOVE:
11970         if params:
11971           raise errors.OpPrereqError("No settings should be passed when"
11972                                      " removing a %s" % kind,
11973                                      errors.ECODE_INVAL)
11974       elif op in (constants.DDM_ADD, constants.DDM_MODIFY):
11975         item_fn(op, params)
11976       else:
11977         raise errors.ProgrammerError("Unhandled operation '%s'" % op)
11978
11979   @staticmethod
11980   def _VerifyDiskModification(op, params):
11981     """Verifies a disk modification.
11982
11983     """
11984     if op == constants.DDM_ADD:
11985       mode = params.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
11986       if mode not in constants.DISK_ACCESS_SET:
11987         raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
11988                                    errors.ECODE_INVAL)
11989
11990       size = params.get(constants.IDISK_SIZE, None)
11991       if size is None:
11992         raise errors.OpPrereqError("Required disk parameter '%s' missing" %
11993                                    constants.IDISK_SIZE, errors.ECODE_INVAL)
11994
11995       try:
11996         size = int(size)
11997       except (TypeError, ValueError), err:
11998         raise errors.OpPrereqError("Invalid disk size parameter: %s" % err,
11999                                    errors.ECODE_INVAL)
12000
12001       params[constants.IDISK_SIZE] = size
12002
12003     elif op == constants.DDM_MODIFY and constants.IDISK_SIZE in params:
12004       raise errors.OpPrereqError("Disk size change not possible, use"
12005                                  " grow-disk", errors.ECODE_INVAL)
12006
12007   @staticmethod
12008   def _VerifyNicModification(op, params):
12009     """Verifies a network interface modification.
12010
12011     """
12012     if op in (constants.DDM_ADD, constants.DDM_MODIFY):
12013       ip = params.get(constants.INIC_IP, None)
12014       if ip is None:
12015         pass
12016       elif ip.lower() == constants.VALUE_NONE:
12017         params[constants.INIC_IP] = None
12018       elif not netutils.IPAddress.IsValid(ip):
12019         raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
12020                                    errors.ECODE_INVAL)
12021
12022       bridge = params.get("bridge", None)
12023       link = params.get(constants.INIC_LINK, None)
12024       if bridge and link:
12025         raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
12026                                    " at the same time", errors.ECODE_INVAL)
12027       elif bridge and bridge.lower() == constants.VALUE_NONE:
12028         params["bridge"] = None
12029       elif link and link.lower() == constants.VALUE_NONE:
12030         params[constants.INIC_LINK] = None
12031
12032       if op == constants.DDM_ADD:
12033         macaddr = params.get(constants.INIC_MAC, None)
12034         if macaddr is None:
12035           params[constants.INIC_MAC] = constants.VALUE_AUTO
12036
12037       if constants.INIC_MAC in params:
12038         macaddr = params[constants.INIC_MAC]
12039         if macaddr not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
12040           macaddr = utils.NormalizeAndValidateMac(macaddr)
12041
12042         if op == constants.DDM_MODIFY and macaddr == constants.VALUE_AUTO:
12043           raise errors.OpPrereqError("'auto' is not a valid MAC address when"
12044                                      " modifying an existing NIC",
12045                                      errors.ECODE_INVAL)
12046
12047   def CheckArguments(self):
12048     if not (self.op.nics or self.op.disks or self.op.disk_template or
12049             self.op.hvparams or self.op.beparams or self.op.os_name or
12050             self.op.offline is not None or self.op.runtime_mem):
12051       raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
12052
12053     if self.op.hvparams:
12054       _CheckGlobalHvParams(self.op.hvparams)
12055
12056     self.op.disks = \
12057       self._UpgradeDiskNicMods("disk", self.op.disks,
12058         opcodes.OpInstanceSetParams.TestDiskModifications)
12059     self.op.nics = \
12060       self._UpgradeDiskNicMods("NIC", self.op.nics,
12061         opcodes.OpInstanceSetParams.TestNicModifications)
12062
12063     # Check disk modifications
12064     self._CheckMods("disk", self.op.disks, constants.IDISK_PARAMS_TYPES,
12065                     self._VerifyDiskModification)
12066
12067     if self.op.disks and self.op.disk_template is not None:
12068       raise errors.OpPrereqError("Disk template conversion and other disk"
12069                                  " changes not supported at the same time",
12070                                  errors.ECODE_INVAL)
12071
12072     if (self.op.disk_template and
12073         self.op.disk_template in constants.DTS_INT_MIRROR and
12074         self.op.remote_node is None):
12075       raise errors.OpPrereqError("Changing the disk template to a mirrored"
12076                                  " one requires specifying a secondary node",
12077                                  errors.ECODE_INVAL)
12078
12079     # Check NIC modifications
12080     self._CheckMods("NIC", self.op.nics, constants.INIC_PARAMS_TYPES,
12081                     self._VerifyNicModification)
12082
12083   def ExpandNames(self):
12084     self._ExpandAndLockInstance()
12085     # Can't even acquire node locks in shared mode as upcoming changes in
12086     # Ganeti 2.6 will start to modify the node object on disk conversion
12087     self.needed_locks[locking.LEVEL_NODE] = []
12088     self.needed_locks[locking.LEVEL_NODE_RES] = []
12089     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
12090
12091   def DeclareLocks(self, level):
12092     # TODO: Acquire group lock in shared mode (disk parameters)
12093     if level == locking.LEVEL_NODE:
12094       self._LockInstancesNodes()
12095       if self.op.disk_template and self.op.remote_node:
12096         self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
12097         self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
12098     elif level == locking.LEVEL_NODE_RES and self.op.disk_template:
12099       # Copy node locks
12100       self.needed_locks[locking.LEVEL_NODE_RES] = \
12101         self.needed_locks[locking.LEVEL_NODE][:]
12102
12103   def BuildHooksEnv(self):
12104     """Build hooks env.
12105
12106     This runs on the master, primary and secondaries.
12107
12108     """
12109     args = dict()
12110     if constants.BE_MINMEM in self.be_new:
12111       args["minmem"] = self.be_new[constants.BE_MINMEM]
12112     if constants.BE_MAXMEM in self.be_new:
12113       args["maxmem"] = self.be_new[constants.BE_MAXMEM]
12114     if constants.BE_VCPUS in self.be_new:
12115       args["vcpus"] = self.be_new[constants.BE_VCPUS]
12116     # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
12117     # information at all.
12118
12119     if self._new_nics is not None:
12120       nics = []
12121
12122       for nic in self._new_nics:
12123         nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
12124         mode = nicparams[constants.NIC_MODE]
12125         link = nicparams[constants.NIC_LINK]
12126         nics.append((nic.ip, nic.mac, mode, link))
12127
12128       args["nics"] = nics
12129
12130     env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
12131     if self.op.disk_template:
12132       env["NEW_DISK_TEMPLATE"] = self.op.disk_template
12133     if self.op.runtime_mem:
12134       env["RUNTIME_MEMORY"] = self.op.runtime_mem
12135
12136     return env
12137
12138   def BuildHooksNodes(self):
12139     """Build hooks nodes.
12140
12141     """
12142     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
12143     return (nl, nl)
12144
12145   def _PrepareNicModification(self, params, private, old_ip, old_params,
12146                               cluster, pnode):
12147     update_params_dict = dict([(key, params[key])
12148                                for key in constants.NICS_PARAMETERS
12149                                if key in params])
12150
12151     if "bridge" in params:
12152       update_params_dict[constants.NIC_LINK] = params["bridge"]
12153
12154     new_params = _GetUpdatedParams(old_params, update_params_dict)
12155     utils.ForceDictType(new_params, constants.NICS_PARAMETER_TYPES)
12156
12157     new_filled_params = cluster.SimpleFillNIC(new_params)
12158     objects.NIC.CheckParameterSyntax(new_filled_params)
12159
12160     new_mode = new_filled_params[constants.NIC_MODE]
12161     if new_mode == constants.NIC_MODE_BRIDGED:
12162       bridge = new_filled_params[constants.NIC_LINK]
12163       msg = self.rpc.call_bridges_exist(pnode, [bridge]).fail_msg
12164       if msg:
12165         msg = "Error checking bridges on node '%s': %s" % (pnode, msg)
12166         if self.op.force:
12167           self.warn.append(msg)
12168         else:
12169           raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
12170
12171     elif new_mode == constants.NIC_MODE_ROUTED:
12172       ip = params.get(constants.INIC_IP, old_ip)
12173       if ip is None:
12174         raise errors.OpPrereqError("Cannot set the NIC IP address to None"
12175                                    " on a routed NIC", errors.ECODE_INVAL)
12176
12177     if constants.INIC_MAC in params:
12178       mac = params[constants.INIC_MAC]
12179       if mac is None:
12180         raise errors.OpPrereqError("Cannot unset the NIC MAC address",
12181                                    errors.ECODE_INVAL)
12182       elif mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
12183         # otherwise generate the MAC address
12184         params[constants.INIC_MAC] = \
12185           self.cfg.GenerateMAC(self.proc.GetECId())
12186       else:
12187         # or validate/reserve the current one
12188         try:
12189           self.cfg.ReserveMAC(mac, self.proc.GetECId())
12190         except errors.ReservationError:
12191           raise errors.OpPrereqError("MAC address '%s' already in use"
12192                                      " in cluster" % mac,
12193                                      errors.ECODE_NOTUNIQUE)
12194
12195     private.params = new_params
12196     private.filled = new_filled_params
12197
12198     return (None, None)
12199
12200   def CheckPrereq(self):
12201     """Check prerequisites.
12202
12203     This only checks the instance list against the existing names.
12204
12205     """
12206     # checking the new params on the primary/secondary nodes
12207
12208     instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
12209     cluster = self.cluster = self.cfg.GetClusterInfo()
12210     assert self.instance is not None, \
12211       "Cannot retrieve locked instance %s" % self.op.instance_name
12212     pnode = instance.primary_node
12213     nodelist = list(instance.all_nodes)
12214     pnode_info = self.cfg.GetNodeInfo(pnode)
12215     self.diskparams = self.cfg.GetNodeGroup(pnode_info.group).diskparams
12216
12217     # Prepare disk/NIC modifications
12218     self.diskmod = PrepareContainerMods(self.op.disks, None)
12219     self.nicmod = PrepareContainerMods(self.op.nics, _InstNicModPrivate)
12220
12221     # OS change
12222     if self.op.os_name and not self.op.force:
12223       _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
12224                       self.op.force_variant)
12225       instance_os = self.op.os_name
12226     else:
12227       instance_os = instance.os
12228
12229     assert not (self.op.disk_template and self.op.disks), \
12230       "Can't modify disk template and apply disk changes at the same time"
12231
12232     if self.op.disk_template:
12233       if instance.disk_template == self.op.disk_template:
12234         raise errors.OpPrereqError("Instance already has disk template %s" %
12235                                    instance.disk_template, errors.ECODE_INVAL)
12236
12237       if (instance.disk_template,
12238           self.op.disk_template) not in self._DISK_CONVERSIONS:
12239         raise errors.OpPrereqError("Unsupported disk template conversion from"
12240                                    " %s to %s" % (instance.disk_template,
12241                                                   self.op.disk_template),
12242                                    errors.ECODE_INVAL)
12243       _CheckInstanceState(self, instance, INSTANCE_DOWN,
12244                           msg="cannot change disk template")
12245       if self.op.disk_template in constants.DTS_INT_MIRROR:
12246         if self.op.remote_node == pnode:
12247           raise errors.OpPrereqError("Given new secondary node %s is the same"
12248                                      " as the primary node of the instance" %
12249                                      self.op.remote_node, errors.ECODE_STATE)
12250         _CheckNodeOnline(self, self.op.remote_node)
12251         _CheckNodeNotDrained(self, self.op.remote_node)
12252         # FIXME: here we assume that the old instance type is DT_PLAIN
12253         assert instance.disk_template == constants.DT_PLAIN
12254         disks = [{constants.IDISK_SIZE: d.size,
12255                   constants.IDISK_VG: d.logical_id[0]}
12256                  for d in instance.disks]
12257         required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
12258         _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
12259
12260         snode_info = self.cfg.GetNodeInfo(self.op.remote_node)
12261         snode_group = self.cfg.GetNodeGroup(snode_info.group)
12262         ipolicy = _CalculateGroupIPolicy(cluster, snode_group)
12263         _CheckTargetNodeIPolicy(self, ipolicy, instance, snode_info,
12264                                 ignore=self.op.ignore_ipolicy)
12265         if pnode_info.group != snode_info.group:
12266           self.LogWarning("The primary and secondary nodes are in two"
12267                           " different node groups; the disk parameters"
12268                           " from the first disk's node group will be"
12269                           " used")
12270
12271     # hvparams processing
12272     if self.op.hvparams:
12273       hv_type = instance.hypervisor
12274       i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
12275       utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
12276       hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
12277
12278       # local check
12279       hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
12280       _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
12281       self.hv_proposed = self.hv_new = hv_new # the new actual values
12282       self.hv_inst = i_hvdict # the new dict (without defaults)
12283     else:
12284       self.hv_proposed = cluster.SimpleFillHV(instance.hypervisor, instance.os,
12285                                               instance.hvparams)
12286       self.hv_new = self.hv_inst = {}
12287
12288     # beparams processing
12289     if self.op.beparams:
12290       i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
12291                                    use_none=True)
12292       objects.UpgradeBeParams(i_bedict)
12293       utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
12294       be_new = cluster.SimpleFillBE(i_bedict)
12295       self.be_proposed = self.be_new = be_new # the new actual values
12296       self.be_inst = i_bedict # the new dict (without defaults)
12297     else:
12298       self.be_new = self.be_inst = {}
12299       self.be_proposed = cluster.SimpleFillBE(instance.beparams)
12300     be_old = cluster.FillBE(instance)
12301
12302     # CPU param validation -- checking every time a paramtere is
12303     # changed to cover all cases where either CPU mask or vcpus have
12304     # changed
12305     if (constants.BE_VCPUS in self.be_proposed and
12306         constants.HV_CPU_MASK in self.hv_proposed):
12307       cpu_list = \
12308         utils.ParseMultiCpuMask(self.hv_proposed[constants.HV_CPU_MASK])
12309       # Verify mask is consistent with number of vCPUs. Can skip this
12310       # test if only 1 entry in the CPU mask, which means same mask
12311       # is applied to all vCPUs.
12312       if (len(cpu_list) > 1 and
12313           len(cpu_list) != self.be_proposed[constants.BE_VCPUS]):
12314         raise errors.OpPrereqError("Number of vCPUs [%d] does not match the"
12315                                    " CPU mask [%s]" %
12316                                    (self.be_proposed[constants.BE_VCPUS],
12317                                     self.hv_proposed[constants.HV_CPU_MASK]),
12318                                    errors.ECODE_INVAL)
12319
12320       # Only perform this test if a new CPU mask is given
12321       if constants.HV_CPU_MASK in self.hv_new:
12322         # Calculate the largest CPU number requested
12323         max_requested_cpu = max(map(max, cpu_list))
12324         # Check that all of the instance's nodes have enough physical CPUs to
12325         # satisfy the requested CPU mask
12326         _CheckNodesPhysicalCPUs(self, instance.all_nodes,
12327                                 max_requested_cpu + 1, instance.hypervisor)
12328
12329     # osparams processing
12330     if self.op.osparams:
12331       i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
12332       _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
12333       self.os_inst = i_osdict # the new dict (without defaults)
12334     else:
12335       self.os_inst = {}
12336
12337     self.warn = []
12338
12339     #TODO(dynmem): do the appropriate check involving MINMEM
12340     if (constants.BE_MAXMEM in self.op.beparams and not self.op.force and
12341         be_new[constants.BE_MAXMEM] > be_old[constants.BE_MAXMEM]):
12342       mem_check_list = [pnode]
12343       if be_new[constants.BE_AUTO_BALANCE]:
12344         # either we changed auto_balance to yes or it was from before
12345         mem_check_list.extend(instance.secondary_nodes)
12346       instance_info = self.rpc.call_instance_info(pnode, instance.name,
12347                                                   instance.hypervisor)
12348       nodeinfo = self.rpc.call_node_info(mem_check_list, None,
12349                                          [instance.hypervisor])
12350       pninfo = nodeinfo[pnode]
12351       msg = pninfo.fail_msg
12352       if msg:
12353         # Assume the primary node is unreachable and go ahead
12354         self.warn.append("Can't get info from primary node %s: %s" %
12355                          (pnode, msg))
12356       else:
12357         (_, _, (pnhvinfo, )) = pninfo.payload
12358         if not isinstance(pnhvinfo.get("memory_free", None), int):
12359           self.warn.append("Node data from primary node %s doesn't contain"
12360                            " free memory information" % pnode)
12361         elif instance_info.fail_msg:
12362           self.warn.append("Can't get instance runtime information: %s" %
12363                           instance_info.fail_msg)
12364         else:
12365           if instance_info.payload:
12366             current_mem = int(instance_info.payload["memory"])
12367           else:
12368             # Assume instance not running
12369             # (there is a slight race condition here, but it's not very
12370             # probable, and we have no other way to check)
12371             # TODO: Describe race condition
12372             current_mem = 0
12373           #TODO(dynmem): do the appropriate check involving MINMEM
12374           miss_mem = (be_new[constants.BE_MAXMEM] - current_mem -
12375                       pnhvinfo["memory_free"])
12376           if miss_mem > 0:
12377             raise errors.OpPrereqError("This change will prevent the instance"
12378                                        " from starting, due to %d MB of memory"
12379                                        " missing on its primary node" %
12380                                        miss_mem,
12381                                        errors.ECODE_NORES)
12382
12383       if be_new[constants.BE_AUTO_BALANCE]:
12384         for node, nres in nodeinfo.items():
12385           if node not in instance.secondary_nodes:
12386             continue
12387           nres.Raise("Can't get info from secondary node %s" % node,
12388                      prereq=True, ecode=errors.ECODE_STATE)
12389           (_, _, (nhvinfo, )) = nres.payload
12390           if not isinstance(nhvinfo.get("memory_free", None), int):
12391             raise errors.OpPrereqError("Secondary node %s didn't return free"
12392                                        " memory information" % node,
12393                                        errors.ECODE_STATE)
12394           #TODO(dynmem): do the appropriate check involving MINMEM
12395           elif be_new[constants.BE_MAXMEM] > nhvinfo["memory_free"]:
12396             raise errors.OpPrereqError("This change will prevent the instance"
12397                                        " from failover to its secondary node"
12398                                        " %s, due to not enough memory" % node,
12399                                        errors.ECODE_STATE)
12400
12401     if self.op.runtime_mem:
12402       remote_info = self.rpc.call_instance_info(instance.primary_node,
12403                                                 instance.name,
12404                                                 instance.hypervisor)
12405       remote_info.Raise("Error checking node %s" % instance.primary_node)
12406       if not remote_info.payload: # not running already
12407         raise errors.OpPrereqError("Instance %s is not running" % instance.name,
12408                                    errors.ECODE_STATE)
12409
12410       current_memory = remote_info.payload["memory"]
12411       if (not self.op.force and
12412            (self.op.runtime_mem > self.be_proposed[constants.BE_MAXMEM] or
12413             self.op.runtime_mem < self.be_proposed[constants.BE_MINMEM])):
12414         raise errors.OpPrereqError("Instance %s must have memory between %d"
12415                                    " and %d MB of memory unless --force is"
12416                                    " given" % (instance.name,
12417                                     self.be_proposed[constants.BE_MINMEM],
12418                                     self.be_proposed[constants.BE_MAXMEM]),
12419                                    errors.ECODE_INVAL)
12420
12421       if self.op.runtime_mem > current_memory:
12422         _CheckNodeFreeMemory(self, instance.primary_node,
12423                              "ballooning memory for instance %s" %
12424                              instance.name,
12425                              self.op.memory - current_memory,
12426                              instance.hypervisor)
12427
12428     if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
12429       raise errors.OpPrereqError("Disk operations not supported for"
12430                                  " diskless instances",
12431                                  errors.ECODE_INVAL)
12432
12433     def _PrepareNicCreate(_, params, private):
12434       return self._PrepareNicModification(params, private, None, {},
12435                                           cluster, pnode)
12436
12437     def _PrepareNicMod(_, nic, params, private):
12438       return self._PrepareNicModification(params, private, nic.ip,
12439                                           nic.nicparams, cluster, pnode)
12440
12441     # Verify NIC changes (operating on copy)
12442     nics = instance.nics[:]
12443     ApplyContainerMods("NIC", nics, None, self.nicmod,
12444                        _PrepareNicCreate, _PrepareNicMod, None)
12445     if len(nics) > constants.MAX_NICS:
12446       raise errors.OpPrereqError("Instance has too many network interfaces"
12447                                  " (%d), cannot add more" % constants.MAX_NICS,
12448                                  errors.ECODE_STATE)
12449
12450     # Verify disk changes (operating on a copy)
12451     disks = instance.disks[:]
12452     ApplyContainerMods("disk", disks, None, self.diskmod, None, None, None)
12453     if len(disks) > constants.MAX_DISKS:
12454       raise errors.OpPrereqError("Instance has too many disks (%d), cannot add"
12455                                  " more" % constants.MAX_DISKS,
12456                                  errors.ECODE_STATE)
12457
12458     if self.op.offline is not None:
12459       if self.op.offline:
12460         msg = "can't change to offline"
12461       else:
12462         msg = "can't change to online"
12463       _CheckInstanceState(self, instance, CAN_CHANGE_INSTANCE_OFFLINE, msg=msg)
12464
12465     # Pre-compute NIC changes (necessary to use result in hooks)
12466     self._nic_chgdesc = []
12467     if self.nicmod:
12468       # Operate on copies as this is still in prereq
12469       nics = [nic.Copy() for nic in instance.nics]
12470       ApplyContainerMods("NIC", nics, self._nic_chgdesc, self.nicmod,
12471                          self._CreateNewNic, self._ApplyNicMods, None)
12472       self._new_nics = nics
12473     else:
12474       self._new_nics = None
12475
12476   def _ConvertPlainToDrbd(self, feedback_fn):
12477     """Converts an instance from plain to drbd.
12478
12479     """
12480     feedback_fn("Converting template to drbd")
12481     instance = self.instance
12482     pnode = instance.primary_node
12483     snode = self.op.remote_node
12484
12485     assert instance.disk_template == constants.DT_PLAIN
12486
12487     # create a fake disk info for _GenerateDiskTemplate
12488     disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
12489                   constants.IDISK_VG: d.logical_id[0]}
12490                  for d in instance.disks]
12491     new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
12492                                       instance.name, pnode, [snode],
12493                                       disk_info, None, None, 0, feedback_fn,
12494                                       self.diskparams)
12495     info = _GetInstanceInfoText(instance)
12496     feedback_fn("Creating aditional volumes...")
12497     # first, create the missing data and meta devices
12498     for disk in new_disks:
12499       # unfortunately this is... not too nice
12500       _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
12501                             info, True)
12502       for child in disk.children:
12503         _CreateSingleBlockDev(self, snode, instance, child, info, True)
12504     # at this stage, all new LVs have been created, we can rename the
12505     # old ones
12506     feedback_fn("Renaming original volumes...")
12507     rename_list = [(o, n.children[0].logical_id)
12508                    for (o, n) in zip(instance.disks, new_disks)]
12509     result = self.rpc.call_blockdev_rename(pnode, rename_list)
12510     result.Raise("Failed to rename original LVs")
12511
12512     feedback_fn("Initializing DRBD devices...")
12513     # all child devices are in place, we can now create the DRBD devices
12514     for disk in new_disks:
12515       for node in [pnode, snode]:
12516         f_create = node == pnode
12517         _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
12518
12519     # at this point, the instance has been modified
12520     instance.disk_template = constants.DT_DRBD8
12521     instance.disks = new_disks
12522     self.cfg.Update(instance, feedback_fn)
12523
12524     # Release node locks while waiting for sync
12525     _ReleaseLocks(self, locking.LEVEL_NODE)
12526
12527     # disks are created, waiting for sync
12528     disk_abort = not _WaitForSync(self, instance,
12529                                   oneshot=not self.op.wait_for_sync)
12530     if disk_abort:
12531       raise errors.OpExecError("There are some degraded disks for"
12532                                " this instance, please cleanup manually")
12533
12534     # Node resource locks will be released by caller
12535
12536   def _ConvertDrbdToPlain(self, feedback_fn):
12537     """Converts an instance from drbd to plain.
12538
12539     """
12540     instance = self.instance
12541
12542     assert len(instance.secondary_nodes) == 1
12543     assert instance.disk_template == constants.DT_DRBD8
12544
12545     pnode = instance.primary_node
12546     snode = instance.secondary_nodes[0]
12547     feedback_fn("Converting template to plain")
12548
12549     old_disks = instance.disks
12550     new_disks = [d.children[0] for d in old_disks]
12551
12552     # copy over size and mode
12553     for parent, child in zip(old_disks, new_disks):
12554       child.size = parent.size
12555       child.mode = parent.mode
12556
12557     # update instance structure
12558     instance.disks = new_disks
12559     instance.disk_template = constants.DT_PLAIN
12560     self.cfg.Update(instance, feedback_fn)
12561
12562     # Release locks in case removing disks takes a while
12563     _ReleaseLocks(self, locking.LEVEL_NODE)
12564
12565     feedback_fn("Removing volumes on the secondary node...")
12566     for disk in old_disks:
12567       self.cfg.SetDiskID(disk, snode)
12568       msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
12569       if msg:
12570         self.LogWarning("Could not remove block device %s on node %s,"
12571                         " continuing anyway: %s", disk.iv_name, snode, msg)
12572
12573     feedback_fn("Removing unneeded volumes on the primary node...")
12574     for idx, disk in enumerate(old_disks):
12575       meta = disk.children[1]
12576       self.cfg.SetDiskID(meta, pnode)
12577       msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
12578       if msg:
12579         self.LogWarning("Could not remove metadata for disk %d on node %s,"
12580                         " continuing anyway: %s", idx, pnode, msg)
12581
12582     # this is a DRBD disk, return its port to the pool
12583     for disk in old_disks:
12584       tcp_port = disk.logical_id[2]
12585       self.cfg.AddTcpUdpPort(tcp_port)
12586
12587     # Node resource locks will be released by caller
12588
12589   def _CreateNewDisk(self, idx, params, _):
12590     """Creates a new disk.
12591
12592     """
12593     instance = self.instance
12594
12595     # add a new disk
12596     if instance.disk_template in constants.DTS_FILEBASED:
12597       (file_driver, file_path) = instance.disks[0].logical_id
12598       file_path = os.path.dirname(file_path)
12599     else:
12600       file_driver = file_path = None
12601
12602     disk = \
12603       _GenerateDiskTemplate(self, instance.disk_template, instance.name,
12604                             instance.primary_node, instance.secondary_nodes,
12605                             [params], file_path, file_driver, idx,
12606                             self.Log, self.diskparams)[0]
12607
12608     info = _GetInstanceInfoText(instance)
12609
12610     logging.info("Creating volume %s for instance %s",
12611                  disk.iv_name, instance.name)
12612     # Note: this needs to be kept in sync with _CreateDisks
12613     #HARDCODE
12614     for node in instance.all_nodes:
12615       f_create = (node == instance.primary_node)
12616       try:
12617         _CreateBlockDev(self, node, instance, disk, f_create, info, f_create)
12618       except errors.OpExecError, err:
12619         self.LogWarning("Failed to create volume %s (%s) on node '%s': %s",
12620                         disk.iv_name, disk, node, err)
12621
12622     return (disk, [
12623       ("disk/%d" % idx, "add:size=%s,mode=%s" % (disk.size, disk.mode)),
12624       ])
12625
12626   @staticmethod
12627   def _ModifyDisk(idx, disk, params, _):
12628     """Modifies a disk.
12629
12630     """
12631     disk.mode = params[constants.IDISK_MODE]
12632
12633     return [
12634       ("disk.mode/%d" % idx, disk.mode),
12635       ]
12636
12637   def _RemoveDisk(self, idx, root, _):
12638     """Removes a disk.
12639
12640     """
12641     for node, disk in root.ComputeNodeTree(self.instance.primary_node):
12642       self.cfg.SetDiskID(disk, node)
12643       msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
12644       if msg:
12645         self.LogWarning("Could not remove disk/%d on node '%s': %s,"
12646                         " continuing anyway", idx, node, msg)
12647
12648     # if this is a DRBD disk, return its port to the pool
12649     if root.dev_type in constants.LDS_DRBD:
12650       self.cfg.AddTcpUdpPort(root.logical_id[2])
12651
12652   @staticmethod
12653   def _CreateNewNic(idx, params, private):
12654     """Creates data structure for a new network interface.
12655
12656     """
12657     mac = params[constants.INIC_MAC]
12658     ip = params.get(constants.INIC_IP, None)
12659     nicparams = private.params
12660
12661     return (objects.NIC(mac=mac, ip=ip, nicparams=nicparams), [
12662       ("nic.%d" % idx,
12663        "add:mac=%s,ip=%s,mode=%s,link=%s" %
12664        (mac, ip, private.filled[constants.NIC_MODE],
12665        private.filled[constants.NIC_LINK])),
12666       ])
12667
12668   @staticmethod
12669   def _ApplyNicMods(idx, nic, params, private):
12670     """Modifies a network interface.
12671
12672     """
12673     changes = []
12674
12675     for key in [constants.INIC_MAC, constants.INIC_IP]:
12676       if key in params:
12677         changes.append(("nic.%s/%d" % (key, idx), params[key]))
12678         setattr(nic, key, params[key])
12679
12680     if private.params:
12681       nic.nicparams = private.params
12682
12683       for (key, val) in params.items():
12684         changes.append(("nic.%s/%d" % (key, idx), val))
12685
12686     return changes
12687
12688   def Exec(self, feedback_fn):
12689     """Modifies an instance.
12690
12691     All parameters take effect only at the next restart of the instance.
12692
12693     """
12694     # Process here the warnings from CheckPrereq, as we don't have a
12695     # feedback_fn there.
12696     # TODO: Replace with self.LogWarning
12697     for warn in self.warn:
12698       feedback_fn("WARNING: %s" % warn)
12699
12700     assert ((self.op.disk_template is None) ^
12701             bool(self.owned_locks(locking.LEVEL_NODE_RES))), \
12702       "Not owning any node resource locks"
12703
12704     result = []
12705     instance = self.instance
12706
12707     # runtime memory
12708     if self.op.runtime_mem:
12709       rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node,
12710                                                      instance,
12711                                                      self.op.runtime_mem)
12712       rpcres.Raise("Cannot modify instance runtime memory")
12713       result.append(("runtime_memory", self.op.runtime_mem))
12714
12715     # Apply disk changes
12716     ApplyContainerMods("disk", instance.disks, result, self.diskmod,
12717                        self._CreateNewDisk, self._ModifyDisk, self._RemoveDisk)
12718     _UpdateIvNames(0, instance.disks)
12719
12720     if self.op.disk_template:
12721       if __debug__:
12722         check_nodes = set(instance.all_nodes)
12723         if self.op.remote_node:
12724           check_nodes.add(self.op.remote_node)
12725         for level in [locking.LEVEL_NODE, locking.LEVEL_NODE_RES]:
12726           owned = self.owned_locks(level)
12727           assert not (check_nodes - owned), \
12728             ("Not owning the correct locks, owning %r, expected at least %r" %
12729              (owned, check_nodes))
12730
12731       r_shut = _ShutdownInstanceDisks(self, instance)
12732       if not r_shut:
12733         raise errors.OpExecError("Cannot shutdown instance disks, unable to"
12734                                  " proceed with disk template conversion")
12735       mode = (instance.disk_template, self.op.disk_template)
12736       try:
12737         self._DISK_CONVERSIONS[mode](self, feedback_fn)
12738       except:
12739         self.cfg.ReleaseDRBDMinors(instance.name)
12740         raise
12741       result.append(("disk_template", self.op.disk_template))
12742
12743       assert instance.disk_template == self.op.disk_template, \
12744         ("Expected disk template '%s', found '%s'" %
12745          (self.op.disk_template, instance.disk_template))
12746
12747     # Release node and resource locks if there are any (they might already have
12748     # been released during disk conversion)
12749     _ReleaseLocks(self, locking.LEVEL_NODE)
12750     _ReleaseLocks(self, locking.LEVEL_NODE_RES)
12751
12752     # Apply NIC changes
12753     if self._new_nics is not None:
12754       instance.nics = self._new_nics
12755       result.extend(self._nic_chgdesc)
12756
12757     # hvparams changes
12758     if self.op.hvparams:
12759       instance.hvparams = self.hv_inst
12760       for key, val in self.op.hvparams.iteritems():
12761         result.append(("hv/%s" % key, val))
12762
12763     # beparams changes
12764     if self.op.beparams:
12765       instance.beparams = self.be_inst
12766       for key, val in self.op.beparams.iteritems():
12767         result.append(("be/%s" % key, val))
12768
12769     # OS change
12770     if self.op.os_name:
12771       instance.os = self.op.os_name
12772
12773     # osparams changes
12774     if self.op.osparams:
12775       instance.osparams = self.os_inst
12776       for key, val in self.op.osparams.iteritems():
12777         result.append(("os/%s" % key, val))
12778
12779     if self.op.offline is None:
12780       # Ignore
12781       pass
12782     elif self.op.offline:
12783       # Mark instance as offline
12784       self.cfg.MarkInstanceOffline(instance.name)
12785       result.append(("admin_state", constants.ADMINST_OFFLINE))
12786     else:
12787       # Mark instance as online, but stopped
12788       self.cfg.MarkInstanceDown(instance.name)
12789       result.append(("admin_state", constants.ADMINST_DOWN))
12790
12791     self.cfg.Update(instance, feedback_fn)
12792
12793     assert not (self.owned_locks(locking.LEVEL_NODE_RES) or
12794                 self.owned_locks(locking.LEVEL_NODE)), \
12795       "All node locks should have been released by now"
12796
12797     return result
12798
12799   _DISK_CONVERSIONS = {
12800     (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
12801     (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
12802     }
12803
12804
12805 class LUInstanceChangeGroup(LogicalUnit):
12806   HPATH = "instance-change-group"
12807   HTYPE = constants.HTYPE_INSTANCE
12808   REQ_BGL = False
12809
12810   def ExpandNames(self):
12811     self.share_locks = _ShareAll()
12812     self.needed_locks = {
12813       locking.LEVEL_NODEGROUP: [],
12814       locking.LEVEL_NODE: [],
12815       }
12816
12817     self._ExpandAndLockInstance()
12818
12819     if self.op.target_groups:
12820       self.req_target_uuids = map(self.cfg.LookupNodeGroup,
12821                                   self.op.target_groups)
12822     else:
12823       self.req_target_uuids = None
12824
12825     self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
12826
12827   def DeclareLocks(self, level):
12828     if level == locking.LEVEL_NODEGROUP:
12829       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
12830
12831       if self.req_target_uuids:
12832         lock_groups = set(self.req_target_uuids)
12833
12834         # Lock all groups used by instance optimistically; this requires going
12835         # via the node before it's locked, requiring verification later on
12836         instance_groups = self.cfg.GetInstanceNodeGroups(self.op.instance_name)
12837         lock_groups.update(instance_groups)
12838       else:
12839         # No target groups, need to lock all of them
12840         lock_groups = locking.ALL_SET
12841
12842       self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
12843
12844     elif level == locking.LEVEL_NODE:
12845       if self.req_target_uuids:
12846         # Lock all nodes used by instances
12847         self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
12848         self._LockInstancesNodes()
12849
12850         # Lock all nodes in all potential target groups
12851         lock_groups = (frozenset(self.owned_locks(locking.LEVEL_NODEGROUP)) -
12852                        self.cfg.GetInstanceNodeGroups(self.op.instance_name))
12853         member_nodes = [node_name
12854                         for group in lock_groups
12855                         for node_name in self.cfg.GetNodeGroup(group).members]
12856         self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
12857       else:
12858         # Lock all nodes as all groups are potential targets
12859         self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
12860
12861   def CheckPrereq(self):
12862     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
12863     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
12864     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
12865
12866     assert (self.req_target_uuids is None or
12867             owned_groups.issuperset(self.req_target_uuids))
12868     assert owned_instances == set([self.op.instance_name])
12869
12870     # Get instance information
12871     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
12872
12873     # Check if node groups for locked instance are still correct
12874     assert owned_nodes.issuperset(self.instance.all_nodes), \
12875       ("Instance %s's nodes changed while we kept the lock" %
12876        self.op.instance_name)
12877
12878     inst_groups = _CheckInstanceNodeGroups(self.cfg, self.op.instance_name,
12879                                            owned_groups)
12880
12881     if self.req_target_uuids:
12882       # User requested specific target groups
12883       self.target_uuids = self.req_target_uuids
12884     else:
12885       # All groups except those used by the instance are potential targets
12886       self.target_uuids = owned_groups - inst_groups
12887
12888     conflicting_groups = self.target_uuids & inst_groups
12889     if conflicting_groups:
12890       raise errors.OpPrereqError("Can't use group(s) '%s' as targets, they are"
12891                                  " used by the instance '%s'" %
12892                                  (utils.CommaJoin(conflicting_groups),
12893                                   self.op.instance_name),
12894                                  errors.ECODE_INVAL)
12895
12896     if not self.target_uuids:
12897       raise errors.OpPrereqError("There are no possible target groups",
12898                                  errors.ECODE_INVAL)
12899
12900   def BuildHooksEnv(self):
12901     """Build hooks env.
12902
12903     """
12904     assert self.target_uuids
12905
12906     env = {
12907       "TARGET_GROUPS": " ".join(self.target_uuids),
12908       }
12909
12910     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
12911
12912     return env
12913
12914   def BuildHooksNodes(self):
12915     """Build hooks nodes.
12916
12917     """
12918     mn = self.cfg.GetMasterNode()
12919     return ([mn], [mn])
12920
12921   def Exec(self, feedback_fn):
12922     instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
12923
12924     assert instances == [self.op.instance_name], "Instance not locked"
12925
12926     ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
12927                      instances=instances, target_groups=list(self.target_uuids))
12928
12929     ial.Run(self.op.iallocator)
12930
12931     if not ial.success:
12932       raise errors.OpPrereqError("Can't compute solution for changing group of"
12933                                  " instance '%s' using iallocator '%s': %s" %
12934                                  (self.op.instance_name, self.op.iallocator,
12935                                   ial.info),
12936                                  errors.ECODE_NORES)
12937
12938     jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
12939
12940     self.LogInfo("Iallocator returned %s job(s) for changing group of"
12941                  " instance '%s'", len(jobs), self.op.instance_name)
12942
12943     return ResultWithJobs(jobs)
12944
12945
12946 class LUBackupQuery(NoHooksLU):
12947   """Query the exports list
12948
12949   """
12950   REQ_BGL = False
12951
12952   def ExpandNames(self):
12953     self.needed_locks = {}
12954     self.share_locks[locking.LEVEL_NODE] = 1
12955     if not self.op.nodes:
12956       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
12957     else:
12958       self.needed_locks[locking.LEVEL_NODE] = \
12959         _GetWantedNodes(self, self.op.nodes)
12960
12961   def Exec(self, feedback_fn):
12962     """Compute the list of all the exported system images.
12963
12964     @rtype: dict
12965     @return: a dictionary with the structure node->(export-list)
12966         where export-list is a list of the instances exported on
12967         that node.
12968
12969     """
12970     self.nodes = self.owned_locks(locking.LEVEL_NODE)
12971     rpcresult = self.rpc.call_export_list(self.nodes)
12972     result = {}
12973     for node in rpcresult:
12974       if rpcresult[node].fail_msg:
12975         result[node] = False
12976       else:
12977         result[node] = rpcresult[node].payload
12978
12979     return result
12980
12981
12982 class LUBackupPrepare(NoHooksLU):
12983   """Prepares an instance for an export and returns useful information.
12984
12985   """
12986   REQ_BGL = False
12987
12988   def ExpandNames(self):
12989     self._ExpandAndLockInstance()
12990
12991   def CheckPrereq(self):
12992     """Check prerequisites.
12993
12994     """
12995     instance_name = self.op.instance_name
12996
12997     self.instance = self.cfg.GetInstanceInfo(instance_name)
12998     assert self.instance is not None, \
12999           "Cannot retrieve locked instance %s" % self.op.instance_name
13000     _CheckNodeOnline(self, self.instance.primary_node)
13001
13002     self._cds = _GetClusterDomainSecret()
13003
13004   def Exec(self, feedback_fn):
13005     """Prepares an instance for an export.
13006
13007     """
13008     instance = self.instance
13009
13010     if self.op.mode == constants.EXPORT_MODE_REMOTE:
13011       salt = utils.GenerateSecret(8)
13012
13013       feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
13014       result = self.rpc.call_x509_cert_create(instance.primary_node,
13015                                               constants.RIE_CERT_VALIDITY)
13016       result.Raise("Can't create X509 key and certificate on %s" % result.node)
13017
13018       (name, cert_pem) = result.payload
13019
13020       cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
13021                                              cert_pem)
13022
13023       return {
13024         "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
13025         "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
13026                           salt),
13027         "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
13028         }
13029
13030     return None
13031
13032
13033 class LUBackupExport(LogicalUnit):
13034   """Export an instance to an image in the cluster.
13035
13036   """
13037   HPATH = "instance-export"
13038   HTYPE = constants.HTYPE_INSTANCE
13039   REQ_BGL = False
13040
13041   def CheckArguments(self):
13042     """Check the arguments.
13043
13044     """
13045     self.x509_key_name = self.op.x509_key_name
13046     self.dest_x509_ca_pem = self.op.destination_x509_ca
13047
13048     if self.op.mode == constants.EXPORT_MODE_REMOTE:
13049       if not self.x509_key_name:
13050         raise errors.OpPrereqError("Missing X509 key name for encryption",
13051                                    errors.ECODE_INVAL)
13052
13053       if not self.dest_x509_ca_pem:
13054         raise errors.OpPrereqError("Missing destination X509 CA",
13055                                    errors.ECODE_INVAL)
13056
13057   def ExpandNames(self):
13058     self._ExpandAndLockInstance()
13059
13060     # Lock all nodes for local exports
13061     if self.op.mode == constants.EXPORT_MODE_LOCAL:
13062       # FIXME: lock only instance primary and destination node
13063       #
13064       # Sad but true, for now we have do lock all nodes, as we don't know where
13065       # the previous export might be, and in this LU we search for it and
13066       # remove it from its current node. In the future we could fix this by:
13067       #  - making a tasklet to search (share-lock all), then create the
13068       #    new one, then one to remove, after
13069       #  - removing the removal operation altogether
13070       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
13071
13072   def DeclareLocks(self, level):
13073     """Last minute lock declaration."""
13074     # All nodes are locked anyway, so nothing to do here.
13075
13076   def BuildHooksEnv(self):
13077     """Build hooks env.
13078
13079     This will run on the master, primary node and target node.
13080
13081     """
13082     env = {
13083       "EXPORT_MODE": self.op.mode,
13084       "EXPORT_NODE": self.op.target_node,
13085       "EXPORT_DO_SHUTDOWN": self.op.shutdown,
13086       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
13087       # TODO: Generic function for boolean env variables
13088       "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
13089       }
13090
13091     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
13092
13093     return env
13094
13095   def BuildHooksNodes(self):
13096     """Build hooks nodes.
13097
13098     """
13099     nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
13100
13101     if self.op.mode == constants.EXPORT_MODE_LOCAL:
13102       nl.append(self.op.target_node)
13103
13104     return (nl, nl)
13105
13106   def CheckPrereq(self):
13107     """Check prerequisites.
13108
13109     This checks that the instance and node names are valid.
13110
13111     """
13112     instance_name = self.op.instance_name
13113
13114     self.instance = self.cfg.GetInstanceInfo(instance_name)
13115     assert self.instance is not None, \
13116           "Cannot retrieve locked instance %s" % self.op.instance_name
13117     _CheckNodeOnline(self, self.instance.primary_node)
13118
13119     if (self.op.remove_instance and
13120         self.instance.admin_state == constants.ADMINST_UP and
13121         not self.op.shutdown):
13122       raise errors.OpPrereqError("Can not remove instance without shutting it"
13123                                  " down before")
13124
13125     if self.op.mode == constants.EXPORT_MODE_LOCAL:
13126       self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
13127       self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
13128       assert self.dst_node is not None
13129
13130       _CheckNodeOnline(self, self.dst_node.name)
13131       _CheckNodeNotDrained(self, self.dst_node.name)
13132
13133       self._cds = None
13134       self.dest_disk_info = None
13135       self.dest_x509_ca = None
13136
13137     elif self.op.mode == constants.EXPORT_MODE_REMOTE:
13138       self.dst_node = None
13139
13140       if len(self.op.target_node) != len(self.instance.disks):
13141         raise errors.OpPrereqError(("Received destination information for %s"
13142                                     " disks, but instance %s has %s disks") %
13143                                    (len(self.op.target_node), instance_name,
13144                                     len(self.instance.disks)),
13145                                    errors.ECODE_INVAL)
13146
13147       cds = _GetClusterDomainSecret()
13148
13149       # Check X509 key name
13150       try:
13151         (key_name, hmac_digest, hmac_salt) = self.x509_key_name
13152       except (TypeError, ValueError), err:
13153         raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
13154
13155       if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
13156         raise errors.OpPrereqError("HMAC for X509 key name is wrong",
13157                                    errors.ECODE_INVAL)
13158
13159       # Load and verify CA
13160       try:
13161         (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
13162       except OpenSSL.crypto.Error, err:
13163         raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
13164                                    (err, ), errors.ECODE_INVAL)
13165
13166       (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
13167       if errcode is not None:
13168         raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
13169                                    (msg, ), errors.ECODE_INVAL)
13170
13171       self.dest_x509_ca = cert
13172
13173       # Verify target information
13174       disk_info = []
13175       for idx, disk_data in enumerate(self.op.target_node):
13176         try:
13177           (host, port, magic) = \
13178             masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
13179         except errors.GenericError, err:
13180           raise errors.OpPrereqError("Target info for disk %s: %s" %
13181                                      (idx, err), errors.ECODE_INVAL)
13182
13183         disk_info.append((host, port, magic))
13184
13185       assert len(disk_info) == len(self.op.target_node)
13186       self.dest_disk_info = disk_info
13187
13188     else:
13189       raise errors.ProgrammerError("Unhandled export mode %r" %
13190                                    self.op.mode)
13191
13192     # instance disk type verification
13193     # TODO: Implement export support for file-based disks
13194     for disk in self.instance.disks:
13195       if disk.dev_type == constants.LD_FILE:
13196         raise errors.OpPrereqError("Export not supported for instances with"
13197                                    " file-based disks", errors.ECODE_INVAL)
13198
13199   def _CleanupExports(self, feedback_fn):
13200     """Removes exports of current instance from all other nodes.
13201
13202     If an instance in a cluster with nodes A..D was exported to node C, its
13203     exports will be removed from the nodes A, B and D.
13204
13205     """
13206     assert self.op.mode != constants.EXPORT_MODE_REMOTE
13207
13208     nodelist = self.cfg.GetNodeList()
13209     nodelist.remove(self.dst_node.name)
13210
13211     # on one-node clusters nodelist will be empty after the removal
13212     # if we proceed the backup would be removed because OpBackupQuery
13213     # substitutes an empty list with the full cluster node list.
13214     iname = self.instance.name
13215     if nodelist:
13216       feedback_fn("Removing old exports for instance %s" % iname)
13217       exportlist = self.rpc.call_export_list(nodelist)
13218       for node in exportlist:
13219         if exportlist[node].fail_msg:
13220           continue
13221         if iname in exportlist[node].payload:
13222           msg = self.rpc.call_export_remove(node, iname).fail_msg
13223           if msg:
13224             self.LogWarning("Could not remove older export for instance %s"
13225                             " on node %s: %s", iname, node, msg)
13226
13227   def Exec(self, feedback_fn):
13228     """Export an instance to an image in the cluster.
13229
13230     """
13231     assert self.op.mode in constants.EXPORT_MODES
13232
13233     instance = self.instance
13234     src_node = instance.primary_node
13235
13236     if self.op.shutdown:
13237       # shutdown the instance, but not the disks
13238       feedback_fn("Shutting down instance %s" % instance.name)
13239       result = self.rpc.call_instance_shutdown(src_node, instance,
13240                                                self.op.shutdown_timeout)
13241       # TODO: Maybe ignore failures if ignore_remove_failures is set
13242       result.Raise("Could not shutdown instance %s on"
13243                    " node %s" % (instance.name, src_node))
13244
13245     # set the disks ID correctly since call_instance_start needs the
13246     # correct drbd minor to create the symlinks
13247     for disk in instance.disks:
13248       self.cfg.SetDiskID(disk, src_node)
13249
13250     activate_disks = (instance.admin_state != constants.ADMINST_UP)
13251
13252     if activate_disks:
13253       # Activate the instance disks if we'exporting a stopped instance
13254       feedback_fn("Activating disks for %s" % instance.name)
13255       _StartInstanceDisks(self, instance, None)
13256
13257     try:
13258       helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
13259                                                      instance)
13260
13261       helper.CreateSnapshots()
13262       try:
13263         if (self.op.shutdown and
13264             instance.admin_state == constants.ADMINST_UP and
13265             not self.op.remove_instance):
13266           assert not activate_disks
13267           feedback_fn("Starting instance %s" % instance.name)
13268           result = self.rpc.call_instance_start(src_node,
13269                                                 (instance, None, None), False)
13270           msg = result.fail_msg
13271           if msg:
13272             feedback_fn("Failed to start instance: %s" % msg)
13273             _ShutdownInstanceDisks(self, instance)
13274             raise errors.OpExecError("Could not start instance: %s" % msg)
13275
13276         if self.op.mode == constants.EXPORT_MODE_LOCAL:
13277           (fin_resu, dresults) = helper.LocalExport(self.dst_node)
13278         elif self.op.mode == constants.EXPORT_MODE_REMOTE:
13279           connect_timeout = constants.RIE_CONNECT_TIMEOUT
13280           timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
13281
13282           (key_name, _, _) = self.x509_key_name
13283
13284           dest_ca_pem = \
13285             OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
13286                                             self.dest_x509_ca)
13287
13288           (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
13289                                                      key_name, dest_ca_pem,
13290                                                      timeouts)
13291       finally:
13292         helper.Cleanup()
13293
13294       # Check for backwards compatibility
13295       assert len(dresults) == len(instance.disks)
13296       assert compat.all(isinstance(i, bool) for i in dresults), \
13297              "Not all results are boolean: %r" % dresults
13298
13299     finally:
13300       if activate_disks:
13301         feedback_fn("Deactivating disks for %s" % instance.name)
13302         _ShutdownInstanceDisks(self, instance)
13303
13304     if not (compat.all(dresults) and fin_resu):
13305       failures = []
13306       if not fin_resu:
13307         failures.append("export finalization")
13308       if not compat.all(dresults):
13309         fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
13310                                if not dsk)
13311         failures.append("disk export: disk(s) %s" % fdsk)
13312
13313       raise errors.OpExecError("Export failed, errors in %s" %
13314                                utils.CommaJoin(failures))
13315
13316     # At this point, the export was successful, we can cleanup/finish
13317
13318     # Remove instance if requested
13319     if self.op.remove_instance:
13320       feedback_fn("Removing instance %s" % instance.name)
13321       _RemoveInstance(self, feedback_fn, instance,
13322                       self.op.ignore_remove_failures)
13323
13324     if self.op.mode == constants.EXPORT_MODE_LOCAL:
13325       self._CleanupExports(feedback_fn)
13326
13327     return fin_resu, dresults
13328
13329
13330 class LUBackupRemove(NoHooksLU):
13331   """Remove exports related to the named instance.
13332
13333   """
13334   REQ_BGL = False
13335
13336   def ExpandNames(self):
13337     self.needed_locks = {}
13338     # We need all nodes to be locked in order for RemoveExport to work, but we
13339     # don't need to lock the instance itself, as nothing will happen to it (and
13340     # we can remove exports also for a removed instance)
13341     self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
13342
13343   def Exec(self, feedback_fn):
13344     """Remove any export.
13345
13346     """
13347     instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
13348     # If the instance was not found we'll try with the name that was passed in.
13349     # This will only work if it was an FQDN, though.
13350     fqdn_warn = False
13351     if not instance_name:
13352       fqdn_warn = True
13353       instance_name = self.op.instance_name
13354
13355     locked_nodes = self.owned_locks(locking.LEVEL_NODE)
13356     exportlist = self.rpc.call_export_list(locked_nodes)
13357     found = False
13358     for node in exportlist:
13359       msg = exportlist[node].fail_msg
13360       if msg:
13361         self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
13362         continue
13363       if instance_name in exportlist[node].payload:
13364         found = True
13365         result = self.rpc.call_export_remove(node, instance_name)
13366         msg = result.fail_msg
13367         if msg:
13368           logging.error("Could not remove export for instance %s"
13369                         " on node %s: %s", instance_name, node, msg)
13370
13371     if fqdn_warn and not found:
13372       feedback_fn("Export not found. If trying to remove an export belonging"
13373                   " to a deleted instance please use its Fully Qualified"
13374                   " Domain Name.")
13375
13376
13377 class LUGroupAdd(LogicalUnit):
13378   """Logical unit for creating node groups.
13379
13380   """
13381   HPATH = "group-add"
13382   HTYPE = constants.HTYPE_GROUP
13383   REQ_BGL = False
13384
13385   def ExpandNames(self):
13386     # We need the new group's UUID here so that we can create and acquire the
13387     # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
13388     # that it should not check whether the UUID exists in the configuration.
13389     self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
13390     self.needed_locks = {}
13391     self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
13392
13393   def CheckPrereq(self):
13394     """Check prerequisites.
13395
13396     This checks that the given group name is not an existing node group
13397     already.
13398
13399     """
13400     try:
13401       existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13402     except errors.OpPrereqError:
13403       pass
13404     else:
13405       raise errors.OpPrereqError("Desired group name '%s' already exists as a"
13406                                  " node group (UUID: %s)" %
13407                                  (self.op.group_name, existing_uuid),
13408                                  errors.ECODE_EXISTS)
13409
13410     if self.op.ndparams:
13411       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
13412
13413     if self.op.hv_state:
13414       self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
13415     else:
13416       self.new_hv_state = None
13417
13418     if self.op.disk_state:
13419       self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
13420     else:
13421       self.new_disk_state = None
13422
13423     if self.op.diskparams:
13424       for templ in constants.DISK_TEMPLATES:
13425         if templ not in self.op.diskparams:
13426           self.op.diskparams[templ] = {}
13427         utils.ForceDictType(self.op.diskparams[templ], constants.DISK_DT_TYPES)
13428     else:
13429       self.op.diskparams = self.cfg.GetClusterInfo().diskparams
13430
13431     if self.op.ipolicy:
13432       cluster = self.cfg.GetClusterInfo()
13433       full_ipolicy = cluster.SimpleFillIPolicy(self.op.ipolicy)
13434       try:
13435         objects.InstancePolicy.CheckParameterSyntax(full_ipolicy)
13436       except errors.ConfigurationError, err:
13437         raise errors.OpPrereqError("Invalid instance policy: %s" % err,
13438                                    errors.ECODE_INVAL)
13439
13440   def BuildHooksEnv(self):
13441     """Build hooks env.
13442
13443     """
13444     return {
13445       "GROUP_NAME": self.op.group_name,
13446       }
13447
13448   def BuildHooksNodes(self):
13449     """Build hooks nodes.
13450
13451     """
13452     mn = self.cfg.GetMasterNode()
13453     return ([mn], [mn])
13454
13455   def Exec(self, feedback_fn):
13456     """Add the node group to the cluster.
13457
13458     """
13459     group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
13460                                   uuid=self.group_uuid,
13461                                   alloc_policy=self.op.alloc_policy,
13462                                   ndparams=self.op.ndparams,
13463                                   diskparams=self.op.diskparams,
13464                                   ipolicy=self.op.ipolicy,
13465                                   hv_state_static=self.new_hv_state,
13466                                   disk_state_static=self.new_disk_state)
13467
13468     self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
13469     del self.remove_locks[locking.LEVEL_NODEGROUP]
13470
13471
13472 class LUGroupAssignNodes(NoHooksLU):
13473   """Logical unit for assigning nodes to groups.
13474
13475   """
13476   REQ_BGL = False
13477
13478   def ExpandNames(self):
13479     # These raise errors.OpPrereqError on their own:
13480     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13481     self.op.nodes = _GetWantedNodes(self, self.op.nodes)
13482
13483     # We want to lock all the affected nodes and groups. We have readily
13484     # available the list of nodes, and the *destination* group. To gather the
13485     # list of "source" groups, we need to fetch node information later on.
13486     self.needed_locks = {
13487       locking.LEVEL_NODEGROUP: set([self.group_uuid]),
13488       locking.LEVEL_NODE: self.op.nodes,
13489       }
13490
13491   def DeclareLocks(self, level):
13492     if level == locking.LEVEL_NODEGROUP:
13493       assert len(self.needed_locks[locking.LEVEL_NODEGROUP]) == 1
13494
13495       # Try to get all affected nodes' groups without having the group or node
13496       # lock yet. Needs verification later in the code flow.
13497       groups = self.cfg.GetNodeGroupsFromNodes(self.op.nodes)
13498
13499       self.needed_locks[locking.LEVEL_NODEGROUP].update(groups)
13500
13501   def CheckPrereq(self):
13502     """Check prerequisites.
13503
13504     """
13505     assert self.needed_locks[locking.LEVEL_NODEGROUP]
13506     assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
13507             frozenset(self.op.nodes))
13508
13509     expected_locks = (set([self.group_uuid]) |
13510                       self.cfg.GetNodeGroupsFromNodes(self.op.nodes))
13511     actual_locks = self.owned_locks(locking.LEVEL_NODEGROUP)
13512     if actual_locks != expected_locks:
13513       raise errors.OpExecError("Nodes changed groups since locks were acquired,"
13514                                " current groups are '%s', used to be '%s'" %
13515                                (utils.CommaJoin(expected_locks),
13516                                 utils.CommaJoin(actual_locks)))
13517
13518     self.node_data = self.cfg.GetAllNodesInfo()
13519     self.group = self.cfg.GetNodeGroup(self.group_uuid)
13520     instance_data = self.cfg.GetAllInstancesInfo()
13521
13522     if self.group is None:
13523       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
13524                                (self.op.group_name, self.group_uuid))
13525
13526     (new_splits, previous_splits) = \
13527       self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
13528                                              for node in self.op.nodes],
13529                                             self.node_data, instance_data)
13530
13531     if new_splits:
13532       fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
13533
13534       if not self.op.force:
13535         raise errors.OpExecError("The following instances get split by this"
13536                                  " change and --force was not given: %s" %
13537                                  fmt_new_splits)
13538       else:
13539         self.LogWarning("This operation will split the following instances: %s",
13540                         fmt_new_splits)
13541
13542         if previous_splits:
13543           self.LogWarning("In addition, these already-split instances continue"
13544                           " to be split across groups: %s",
13545                           utils.CommaJoin(utils.NiceSort(previous_splits)))
13546
13547   def Exec(self, feedback_fn):
13548     """Assign nodes to a new group.
13549
13550     """
13551     mods = [(node_name, self.group_uuid) for node_name in self.op.nodes]
13552
13553     self.cfg.AssignGroupNodes(mods)
13554
13555   @staticmethod
13556   def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
13557     """Check for split instances after a node assignment.
13558
13559     This method considers a series of node assignments as an atomic operation,
13560     and returns information about split instances after applying the set of
13561     changes.
13562
13563     In particular, it returns information about newly split instances, and
13564     instances that were already split, and remain so after the change.
13565
13566     Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
13567     considered.
13568
13569     @type changes: list of (node_name, new_group_uuid) pairs.
13570     @param changes: list of node assignments to consider.
13571     @param node_data: a dict with data for all nodes
13572     @param instance_data: a dict with all instances to consider
13573     @rtype: a two-tuple
13574     @return: a list of instances that were previously okay and result split as a
13575       consequence of this change, and a list of instances that were previously
13576       split and this change does not fix.
13577
13578     """
13579     changed_nodes = dict((node, group) for node, group in changes
13580                          if node_data[node].group != group)
13581
13582     all_split_instances = set()
13583     previously_split_instances = set()
13584
13585     def InstanceNodes(instance):
13586       return [instance.primary_node] + list(instance.secondary_nodes)
13587
13588     for inst in instance_data.values():
13589       if inst.disk_template not in constants.DTS_INT_MIRROR:
13590         continue
13591
13592       instance_nodes = InstanceNodes(inst)
13593
13594       if len(set(node_data[node].group for node in instance_nodes)) > 1:
13595         previously_split_instances.add(inst.name)
13596
13597       if len(set(changed_nodes.get(node, node_data[node].group)
13598                  for node in instance_nodes)) > 1:
13599         all_split_instances.add(inst.name)
13600
13601     return (list(all_split_instances - previously_split_instances),
13602             list(previously_split_instances & all_split_instances))
13603
13604
13605 class _GroupQuery(_QueryBase):
13606   FIELDS = query.GROUP_FIELDS
13607
13608   def ExpandNames(self, lu):
13609     lu.needed_locks = {}
13610
13611     self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
13612     self._cluster = lu.cfg.GetClusterInfo()
13613     name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
13614
13615     if not self.names:
13616       self.wanted = [name_to_uuid[name]
13617                      for name in utils.NiceSort(name_to_uuid.keys())]
13618     else:
13619       # Accept names to be either names or UUIDs.
13620       missing = []
13621       self.wanted = []
13622       all_uuid = frozenset(self._all_groups.keys())
13623
13624       for name in self.names:
13625         if name in all_uuid:
13626           self.wanted.append(name)
13627         elif name in name_to_uuid:
13628           self.wanted.append(name_to_uuid[name])
13629         else:
13630           missing.append(name)
13631
13632       if missing:
13633         raise errors.OpPrereqError("Some groups do not exist: %s" %
13634                                    utils.CommaJoin(missing),
13635                                    errors.ECODE_NOENT)
13636
13637   def DeclareLocks(self, lu, level):
13638     pass
13639
13640   def _GetQueryData(self, lu):
13641     """Computes the list of node groups and their attributes.
13642
13643     """
13644     do_nodes = query.GQ_NODE in self.requested_data
13645     do_instances = query.GQ_INST in self.requested_data
13646
13647     group_to_nodes = None
13648     group_to_instances = None
13649
13650     # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
13651     # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
13652     # latter GetAllInstancesInfo() is not enough, for we have to go through
13653     # instance->node. Hence, we will need to process nodes even if we only need
13654     # instance information.
13655     if do_nodes or do_instances:
13656       all_nodes = lu.cfg.GetAllNodesInfo()
13657       group_to_nodes = dict((uuid, []) for uuid in self.wanted)
13658       node_to_group = {}
13659
13660       for node in all_nodes.values():
13661         if node.group in group_to_nodes:
13662           group_to_nodes[node.group].append(node.name)
13663           node_to_group[node.name] = node.group
13664
13665       if do_instances:
13666         all_instances = lu.cfg.GetAllInstancesInfo()
13667         group_to_instances = dict((uuid, []) for uuid in self.wanted)
13668
13669         for instance in all_instances.values():
13670           node = instance.primary_node
13671           if node in node_to_group:
13672             group_to_instances[node_to_group[node]].append(instance.name)
13673
13674         if not do_nodes:
13675           # Do not pass on node information if it was not requested.
13676           group_to_nodes = None
13677
13678     return query.GroupQueryData(self._cluster,
13679                                 [self._all_groups[uuid]
13680                                  for uuid in self.wanted],
13681                                 group_to_nodes, group_to_instances)
13682
13683
13684 class LUGroupQuery(NoHooksLU):
13685   """Logical unit for querying node groups.
13686
13687   """
13688   REQ_BGL = False
13689
13690   def CheckArguments(self):
13691     self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
13692                           self.op.output_fields, False)
13693
13694   def ExpandNames(self):
13695     self.gq.ExpandNames(self)
13696
13697   def DeclareLocks(self, level):
13698     self.gq.DeclareLocks(self, level)
13699
13700   def Exec(self, feedback_fn):
13701     return self.gq.OldStyleQuery(self)
13702
13703
13704 class LUGroupSetParams(LogicalUnit):
13705   """Modifies the parameters of a node group.
13706
13707   """
13708   HPATH = "group-modify"
13709   HTYPE = constants.HTYPE_GROUP
13710   REQ_BGL = False
13711
13712   def CheckArguments(self):
13713     all_changes = [
13714       self.op.ndparams,
13715       self.op.diskparams,
13716       self.op.alloc_policy,
13717       self.op.hv_state,
13718       self.op.disk_state,
13719       self.op.ipolicy,
13720       ]
13721
13722     if all_changes.count(None) == len(all_changes):
13723       raise errors.OpPrereqError("Please pass at least one modification",
13724                                  errors.ECODE_INVAL)
13725
13726   def ExpandNames(self):
13727     # This raises errors.OpPrereqError on its own:
13728     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13729
13730     self.needed_locks = {
13731       locking.LEVEL_INSTANCE: [],
13732       locking.LEVEL_NODEGROUP: [self.group_uuid],
13733       }
13734
13735     self.share_locks[locking.LEVEL_INSTANCE] = 1
13736
13737   def DeclareLocks(self, level):
13738     if level == locking.LEVEL_INSTANCE:
13739       assert not self.needed_locks[locking.LEVEL_INSTANCE]
13740
13741       # Lock instances optimistically, needs verification once group lock has
13742       # been acquired
13743       self.needed_locks[locking.LEVEL_INSTANCE] = \
13744           self.cfg.GetNodeGroupInstances(self.group_uuid)
13745
13746   def CheckPrereq(self):
13747     """Check prerequisites.
13748
13749     """
13750     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
13751
13752     # Check if locked instances are still correct
13753     _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
13754
13755     self.group = self.cfg.GetNodeGroup(self.group_uuid)
13756     cluster = self.cfg.GetClusterInfo()
13757
13758     if self.group is None:
13759       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
13760                                (self.op.group_name, self.group_uuid))
13761
13762     if self.op.ndparams:
13763       new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
13764       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
13765       self.new_ndparams = new_ndparams
13766
13767     if self.op.diskparams:
13768       self.new_diskparams = dict()
13769       for templ in constants.DISK_TEMPLATES:
13770         if templ not in self.op.diskparams:
13771           self.op.diskparams[templ] = {}
13772         new_templ_params = _GetUpdatedParams(self.group.diskparams[templ],
13773                                              self.op.diskparams[templ])
13774         utils.ForceDictType(new_templ_params, constants.DISK_DT_TYPES)
13775         self.new_diskparams[templ] = new_templ_params
13776
13777     if self.op.hv_state:
13778       self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
13779                                                  self.group.hv_state_static)
13780
13781     if self.op.disk_state:
13782       self.new_disk_state = \
13783         _MergeAndVerifyDiskState(self.op.disk_state,
13784                                  self.group.disk_state_static)
13785
13786     if self.op.ipolicy:
13787       self.new_ipolicy = _GetUpdatedIPolicy(self.group.ipolicy,
13788                                             self.op.ipolicy,
13789                                             group_policy=True)
13790
13791       new_ipolicy = cluster.SimpleFillIPolicy(self.new_ipolicy)
13792       inst_filter = lambda inst: inst.name in owned_instances
13793       instances = self.cfg.GetInstancesInfoByFilter(inst_filter).values()
13794       violations = \
13795           _ComputeNewInstanceViolations(_CalculateGroupIPolicy(cluster,
13796                                                                self.group),
13797                                         new_ipolicy, instances)
13798
13799       if violations:
13800         self.LogWarning("After the ipolicy change the following instances"
13801                         " violate them: %s",
13802                         utils.CommaJoin(violations))
13803
13804   def BuildHooksEnv(self):
13805     """Build hooks env.
13806
13807     """
13808     return {
13809       "GROUP_NAME": self.op.group_name,
13810       "NEW_ALLOC_POLICY": self.op.alloc_policy,
13811       }
13812
13813   def BuildHooksNodes(self):
13814     """Build hooks nodes.
13815
13816     """
13817     mn = self.cfg.GetMasterNode()
13818     return ([mn], [mn])
13819
13820   def Exec(self, feedback_fn):
13821     """Modifies the node group.
13822
13823     """
13824     result = []
13825
13826     if self.op.ndparams:
13827       self.group.ndparams = self.new_ndparams
13828       result.append(("ndparams", str(self.group.ndparams)))
13829
13830     if self.op.diskparams:
13831       self.group.diskparams = self.new_diskparams
13832       result.append(("diskparams", str(self.group.diskparams)))
13833
13834     if self.op.alloc_policy:
13835       self.group.alloc_policy = self.op.alloc_policy
13836
13837     if self.op.hv_state:
13838       self.group.hv_state_static = self.new_hv_state
13839
13840     if self.op.disk_state:
13841       self.group.disk_state_static = self.new_disk_state
13842
13843     if self.op.ipolicy:
13844       self.group.ipolicy = self.new_ipolicy
13845
13846     self.cfg.Update(self.group, feedback_fn)
13847     return result
13848
13849
13850 class LUGroupRemove(LogicalUnit):
13851   HPATH = "group-remove"
13852   HTYPE = constants.HTYPE_GROUP
13853   REQ_BGL = False
13854
13855   def ExpandNames(self):
13856     # This will raises errors.OpPrereqError on its own:
13857     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13858     self.needed_locks = {
13859       locking.LEVEL_NODEGROUP: [self.group_uuid],
13860       }
13861
13862   def CheckPrereq(self):
13863     """Check prerequisites.
13864
13865     This checks that the given group name exists as a node group, that is
13866     empty (i.e., contains no nodes), and that is not the last group of the
13867     cluster.
13868
13869     """
13870     # Verify that the group is empty.
13871     group_nodes = [node.name
13872                    for node in self.cfg.GetAllNodesInfo().values()
13873                    if node.group == self.group_uuid]
13874
13875     if group_nodes:
13876       raise errors.OpPrereqError("Group '%s' not empty, has the following"
13877                                  " nodes: %s" %
13878                                  (self.op.group_name,
13879                                   utils.CommaJoin(utils.NiceSort(group_nodes))),
13880                                  errors.ECODE_STATE)
13881
13882     # Verify the cluster would not be left group-less.
13883     if len(self.cfg.GetNodeGroupList()) == 1:
13884       raise errors.OpPrereqError("Group '%s' is the only group,"
13885                                  " cannot be removed" %
13886                                  self.op.group_name,
13887                                  errors.ECODE_STATE)
13888
13889   def BuildHooksEnv(self):
13890     """Build hooks env.
13891
13892     """
13893     return {
13894       "GROUP_NAME": self.op.group_name,
13895       }
13896
13897   def BuildHooksNodes(self):
13898     """Build hooks nodes.
13899
13900     """
13901     mn = self.cfg.GetMasterNode()
13902     return ([mn], [mn])
13903
13904   def Exec(self, feedback_fn):
13905     """Remove the node group.
13906
13907     """
13908     try:
13909       self.cfg.RemoveNodeGroup(self.group_uuid)
13910     except errors.ConfigurationError:
13911       raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
13912                                (self.op.group_name, self.group_uuid))
13913
13914     self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
13915
13916
13917 class LUGroupRename(LogicalUnit):
13918   HPATH = "group-rename"
13919   HTYPE = constants.HTYPE_GROUP
13920   REQ_BGL = False
13921
13922   def ExpandNames(self):
13923     # This raises errors.OpPrereqError on its own:
13924     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13925
13926     self.needed_locks = {
13927       locking.LEVEL_NODEGROUP: [self.group_uuid],
13928       }
13929
13930   def CheckPrereq(self):
13931     """Check prerequisites.
13932
13933     Ensures requested new name is not yet used.
13934
13935     """
13936     try:
13937       new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
13938     except errors.OpPrereqError:
13939       pass
13940     else:
13941       raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
13942                                  " node group (UUID: %s)" %
13943                                  (self.op.new_name, new_name_uuid),
13944                                  errors.ECODE_EXISTS)
13945
13946   def BuildHooksEnv(self):
13947     """Build hooks env.
13948
13949     """
13950     return {
13951       "OLD_NAME": self.op.group_name,
13952       "NEW_NAME": self.op.new_name,
13953       }
13954
13955   def BuildHooksNodes(self):
13956     """Build hooks nodes.
13957
13958     """
13959     mn = self.cfg.GetMasterNode()
13960
13961     all_nodes = self.cfg.GetAllNodesInfo()
13962     all_nodes.pop(mn, None)
13963
13964     run_nodes = [mn]
13965     run_nodes.extend(node.name for node in all_nodes.values()
13966                      if node.group == self.group_uuid)
13967
13968     return (run_nodes, run_nodes)
13969
13970   def Exec(self, feedback_fn):
13971     """Rename the node group.
13972
13973     """
13974     group = self.cfg.GetNodeGroup(self.group_uuid)
13975
13976     if group is None:
13977       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
13978                                (self.op.group_name, self.group_uuid))
13979
13980     group.name = self.op.new_name
13981     self.cfg.Update(group, feedback_fn)
13982
13983     return self.op.new_name
13984
13985
13986 class LUGroupEvacuate(LogicalUnit):
13987   HPATH = "group-evacuate"
13988   HTYPE = constants.HTYPE_GROUP
13989   REQ_BGL = False
13990
13991   def ExpandNames(self):
13992     # This raises errors.OpPrereqError on its own:
13993     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13994
13995     if self.op.target_groups:
13996       self.req_target_uuids = map(self.cfg.LookupNodeGroup,
13997                                   self.op.target_groups)
13998     else:
13999       self.req_target_uuids = []
14000
14001     if self.group_uuid in self.req_target_uuids:
14002       raise errors.OpPrereqError("Group to be evacuated (%s) can not be used"
14003                                  " as a target group (targets are %s)" %
14004                                  (self.group_uuid,
14005                                   utils.CommaJoin(self.req_target_uuids)),
14006                                  errors.ECODE_INVAL)
14007
14008     self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
14009
14010     self.share_locks = _ShareAll()
14011     self.needed_locks = {
14012       locking.LEVEL_INSTANCE: [],
14013       locking.LEVEL_NODEGROUP: [],
14014       locking.LEVEL_NODE: [],
14015       }
14016
14017   def DeclareLocks(self, level):
14018     if level == locking.LEVEL_INSTANCE:
14019       assert not self.needed_locks[locking.LEVEL_INSTANCE]
14020
14021       # Lock instances optimistically, needs verification once node and group
14022       # locks have been acquired
14023       self.needed_locks[locking.LEVEL_INSTANCE] = \
14024         self.cfg.GetNodeGroupInstances(self.group_uuid)
14025
14026     elif level == locking.LEVEL_NODEGROUP:
14027       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
14028
14029       if self.req_target_uuids:
14030         lock_groups = set([self.group_uuid] + self.req_target_uuids)
14031
14032         # Lock all groups used by instances optimistically; this requires going
14033         # via the node before it's locked, requiring verification later on
14034         lock_groups.update(group_uuid
14035                            for instance_name in
14036                              self.owned_locks(locking.LEVEL_INSTANCE)
14037                            for group_uuid in
14038                              self.cfg.GetInstanceNodeGroups(instance_name))
14039       else:
14040         # No target groups, need to lock all of them
14041         lock_groups = locking.ALL_SET
14042
14043       self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
14044
14045     elif level == locking.LEVEL_NODE:
14046       # This will only lock the nodes in the group to be evacuated which
14047       # contain actual instances
14048       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
14049       self._LockInstancesNodes()
14050
14051       # Lock all nodes in group to be evacuated and target groups
14052       owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
14053       assert self.group_uuid in owned_groups
14054       member_nodes = [node_name
14055                       for group in owned_groups
14056                       for node_name in self.cfg.GetNodeGroup(group).members]
14057       self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
14058
14059   def CheckPrereq(self):
14060     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
14061     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
14062     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
14063
14064     assert owned_groups.issuperset(self.req_target_uuids)
14065     assert self.group_uuid in owned_groups
14066
14067     # Check if locked instances are still correct
14068     _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
14069
14070     # Get instance information
14071     self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
14072
14073     # Check if node groups for locked instances are still correct
14074     for instance_name in owned_instances:
14075       inst = self.instances[instance_name]
14076       assert owned_nodes.issuperset(inst.all_nodes), \
14077         "Instance %s's nodes changed while we kept the lock" % instance_name
14078
14079       inst_groups = _CheckInstanceNodeGroups(self.cfg, instance_name,
14080                                              owned_groups)
14081
14082       assert self.group_uuid in inst_groups, \
14083         "Instance %s has no node in group %s" % (instance_name, self.group_uuid)
14084
14085     if self.req_target_uuids:
14086       # User requested specific target groups
14087       self.target_uuids = self.req_target_uuids
14088     else:
14089       # All groups except the one to be evacuated are potential targets
14090       self.target_uuids = [group_uuid for group_uuid in owned_groups
14091                            if group_uuid != self.group_uuid]
14092
14093       if not self.target_uuids:
14094         raise errors.OpPrereqError("There are no possible target groups",
14095                                    errors.ECODE_INVAL)
14096
14097   def BuildHooksEnv(self):
14098     """Build hooks env.
14099
14100     """
14101     return {
14102       "GROUP_NAME": self.op.group_name,
14103       "TARGET_GROUPS": " ".join(self.target_uuids),
14104       }
14105
14106   def BuildHooksNodes(self):
14107     """Build hooks nodes.
14108
14109     """
14110     mn = self.cfg.GetMasterNode()
14111
14112     assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
14113
14114     run_nodes = [mn] + self.cfg.GetNodeGroup(self.group_uuid).members
14115
14116     return (run_nodes, run_nodes)
14117
14118   def Exec(self, feedback_fn):
14119     instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
14120
14121     assert self.group_uuid not in self.target_uuids
14122
14123     ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
14124                      instances=instances, target_groups=self.target_uuids)
14125
14126     ial.Run(self.op.iallocator)
14127
14128     if not ial.success:
14129       raise errors.OpPrereqError("Can't compute group evacuation using"
14130                                  " iallocator '%s': %s" %
14131                                  (self.op.iallocator, ial.info),
14132                                  errors.ECODE_NORES)
14133
14134     jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
14135
14136     self.LogInfo("Iallocator returned %s job(s) for evacuating node group %s",
14137                  len(jobs), self.op.group_name)
14138
14139     return ResultWithJobs(jobs)
14140
14141
14142 class TagsLU(NoHooksLU): # pylint: disable=W0223
14143   """Generic tags LU.
14144
14145   This is an abstract class which is the parent of all the other tags LUs.
14146
14147   """
14148   def ExpandNames(self):
14149     self.group_uuid = None
14150     self.needed_locks = {}
14151     if self.op.kind == constants.TAG_NODE:
14152       self.op.name = _ExpandNodeName(self.cfg, self.op.name)
14153       self.needed_locks[locking.LEVEL_NODE] = self.op.name
14154     elif self.op.kind == constants.TAG_INSTANCE:
14155       self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
14156       self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
14157     elif self.op.kind == constants.TAG_NODEGROUP:
14158       self.group_uuid = self.cfg.LookupNodeGroup(self.op.name)
14159
14160     # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
14161     # not possible to acquire the BGL based on opcode parameters)
14162
14163   def CheckPrereq(self):
14164     """Check prerequisites.
14165
14166     """
14167     if self.op.kind == constants.TAG_CLUSTER:
14168       self.target = self.cfg.GetClusterInfo()
14169     elif self.op.kind == constants.TAG_NODE:
14170       self.target = self.cfg.GetNodeInfo(self.op.name)
14171     elif self.op.kind == constants.TAG_INSTANCE:
14172       self.target = self.cfg.GetInstanceInfo(self.op.name)
14173     elif self.op.kind == constants.TAG_NODEGROUP:
14174       self.target = self.cfg.GetNodeGroup(self.group_uuid)
14175     else:
14176       raise errors.OpPrereqError("Wrong tag type requested (%s)" %
14177                                  str(self.op.kind), errors.ECODE_INVAL)
14178
14179
14180 class LUTagsGet(TagsLU):
14181   """Returns the tags of a given object.
14182
14183   """
14184   REQ_BGL = False
14185
14186   def ExpandNames(self):
14187     TagsLU.ExpandNames(self)
14188
14189     # Share locks as this is only a read operation
14190     self.share_locks = _ShareAll()
14191
14192   def Exec(self, feedback_fn):
14193     """Returns the tag list.
14194
14195     """
14196     return list(self.target.GetTags())
14197
14198
14199 class LUTagsSearch(NoHooksLU):
14200   """Searches the tags for a given pattern.
14201
14202   """
14203   REQ_BGL = False
14204
14205   def ExpandNames(self):
14206     self.needed_locks = {}
14207
14208   def CheckPrereq(self):
14209     """Check prerequisites.
14210
14211     This checks the pattern passed for validity by compiling it.
14212
14213     """
14214     try:
14215       self.re = re.compile(self.op.pattern)
14216     except re.error, err:
14217       raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
14218                                  (self.op.pattern, err), errors.ECODE_INVAL)
14219
14220   def Exec(self, feedback_fn):
14221     """Returns the tag list.
14222
14223     """
14224     cfg = self.cfg
14225     tgts = [("/cluster", cfg.GetClusterInfo())]
14226     ilist = cfg.GetAllInstancesInfo().values()
14227     tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
14228     nlist = cfg.GetAllNodesInfo().values()
14229     tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
14230     tgts.extend(("/nodegroup/%s" % n.name, n)
14231                 for n in cfg.GetAllNodeGroupsInfo().values())
14232     results = []
14233     for path, target in tgts:
14234       for tag in target.GetTags():
14235         if self.re.search(tag):
14236           results.append((path, tag))
14237     return results
14238
14239
14240 class LUTagsSet(TagsLU):
14241   """Sets a tag on a given object.
14242
14243   """
14244   REQ_BGL = False
14245
14246   def CheckPrereq(self):
14247     """Check prerequisites.
14248
14249     This checks the type and length of the tag name and value.
14250
14251     """
14252     TagsLU.CheckPrereq(self)
14253     for tag in self.op.tags:
14254       objects.TaggableObject.ValidateTag(tag)
14255
14256   def Exec(self, feedback_fn):
14257     """Sets the tag.
14258
14259     """
14260     try:
14261       for tag in self.op.tags:
14262         self.target.AddTag(tag)
14263     except errors.TagError, err:
14264       raise errors.OpExecError("Error while setting tag: %s" % str(err))
14265     self.cfg.Update(self.target, feedback_fn)
14266
14267
14268 class LUTagsDel(TagsLU):
14269   """Delete a list of tags from a given object.
14270
14271   """
14272   REQ_BGL = False
14273
14274   def CheckPrereq(self):
14275     """Check prerequisites.
14276
14277     This checks that we have the given tag.
14278
14279     """
14280     TagsLU.CheckPrereq(self)
14281     for tag in self.op.tags:
14282       objects.TaggableObject.ValidateTag(tag)
14283     del_tags = frozenset(self.op.tags)
14284     cur_tags = self.target.GetTags()
14285
14286     diff_tags = del_tags - cur_tags
14287     if diff_tags:
14288       diff_names = ("'%s'" % i for i in sorted(diff_tags))
14289       raise errors.OpPrereqError("Tag(s) %s not found" %
14290                                  (utils.CommaJoin(diff_names), ),
14291                                  errors.ECODE_NOENT)
14292
14293   def Exec(self, feedback_fn):
14294     """Remove the tag from the object.
14295
14296     """
14297     for tag in self.op.tags:
14298       self.target.RemoveTag(tag)
14299     self.cfg.Update(self.target, feedback_fn)
14300
14301
14302 class LUTestDelay(NoHooksLU):
14303   """Sleep for a specified amount of time.
14304
14305   This LU sleeps on the master and/or nodes for a specified amount of
14306   time.
14307
14308   """
14309   REQ_BGL = False
14310
14311   def ExpandNames(self):
14312     """Expand names and set required locks.
14313
14314     This expands the node list, if any.
14315
14316     """
14317     self.needed_locks = {}
14318     if self.op.on_nodes:
14319       # _GetWantedNodes can be used here, but is not always appropriate to use
14320       # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
14321       # more information.
14322       self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
14323       self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
14324
14325   def _TestDelay(self):
14326     """Do the actual sleep.
14327
14328     """
14329     if self.op.on_master:
14330       if not utils.TestDelay(self.op.duration):
14331         raise errors.OpExecError("Error during master delay test")
14332     if self.op.on_nodes:
14333       result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
14334       for node, node_result in result.items():
14335         node_result.Raise("Failure during rpc call to node %s" % node)
14336
14337   def Exec(self, feedback_fn):
14338     """Execute the test delay opcode, with the wanted repetitions.
14339
14340     """
14341     if self.op.repeat == 0:
14342       self._TestDelay()
14343     else:
14344       top_value = self.op.repeat - 1
14345       for i in range(self.op.repeat):
14346         self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
14347         self._TestDelay()
14348
14349
14350 class LUTestJqueue(NoHooksLU):
14351   """Utility LU to test some aspects of the job queue.
14352
14353   """
14354   REQ_BGL = False
14355
14356   # Must be lower than default timeout for WaitForJobChange to see whether it
14357   # notices changed jobs
14358   _CLIENT_CONNECT_TIMEOUT = 20.0
14359   _CLIENT_CONFIRM_TIMEOUT = 60.0
14360
14361   @classmethod
14362   def _NotifyUsingSocket(cls, cb, errcls):
14363     """Opens a Unix socket and waits for another program to connect.
14364
14365     @type cb: callable
14366     @param cb: Callback to send socket name to client
14367     @type errcls: class
14368     @param errcls: Exception class to use for errors
14369
14370     """
14371     # Using a temporary directory as there's no easy way to create temporary
14372     # sockets without writing a custom loop around tempfile.mktemp and
14373     # socket.bind
14374     tmpdir = tempfile.mkdtemp()
14375     try:
14376       tmpsock = utils.PathJoin(tmpdir, "sock")
14377
14378       logging.debug("Creating temporary socket at %s", tmpsock)
14379       sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
14380       try:
14381         sock.bind(tmpsock)
14382         sock.listen(1)
14383
14384         # Send details to client
14385         cb(tmpsock)
14386
14387         # Wait for client to connect before continuing
14388         sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
14389         try:
14390           (conn, _) = sock.accept()
14391         except socket.error, err:
14392           raise errcls("Client didn't connect in time (%s)" % err)
14393       finally:
14394         sock.close()
14395     finally:
14396       # Remove as soon as client is connected
14397       shutil.rmtree(tmpdir)
14398
14399     # Wait for client to close
14400     try:
14401       try:
14402         # pylint: disable=E1101
14403         # Instance of '_socketobject' has no ... member
14404         conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
14405         conn.recv(1)
14406       except socket.error, err:
14407         raise errcls("Client failed to confirm notification (%s)" % err)
14408     finally:
14409       conn.close()
14410
14411   def _SendNotification(self, test, arg, sockname):
14412     """Sends a notification to the client.
14413
14414     @type test: string
14415     @param test: Test name
14416     @param arg: Test argument (depends on test)
14417     @type sockname: string
14418     @param sockname: Socket path
14419
14420     """
14421     self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
14422
14423   def _Notify(self, prereq, test, arg):
14424     """Notifies the client of a test.
14425
14426     @type prereq: bool
14427     @param prereq: Whether this is a prereq-phase test
14428     @type test: string
14429     @param test: Test name
14430     @param arg: Test argument (depends on test)
14431
14432     """
14433     if prereq:
14434       errcls = errors.OpPrereqError
14435     else:
14436       errcls = errors.OpExecError
14437
14438     return self._NotifyUsingSocket(compat.partial(self._SendNotification,
14439                                                   test, arg),
14440                                    errcls)
14441
14442   def CheckArguments(self):
14443     self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
14444     self.expandnames_calls = 0
14445
14446   def ExpandNames(self):
14447     checkargs_calls = getattr(self, "checkargs_calls", 0)
14448     if checkargs_calls < 1:
14449       raise errors.ProgrammerError("CheckArguments was not called")
14450
14451     self.expandnames_calls += 1
14452
14453     if self.op.notify_waitlock:
14454       self._Notify(True, constants.JQT_EXPANDNAMES, None)
14455
14456     self.LogInfo("Expanding names")
14457
14458     # Get lock on master node (just to get a lock, not for a particular reason)
14459     self.needed_locks = {
14460       locking.LEVEL_NODE: self.cfg.GetMasterNode(),
14461       }
14462
14463   def Exec(self, feedback_fn):
14464     if self.expandnames_calls < 1:
14465       raise errors.ProgrammerError("ExpandNames was not called")
14466
14467     if self.op.notify_exec:
14468       self._Notify(False, constants.JQT_EXEC, None)
14469
14470     self.LogInfo("Executing")
14471
14472     if self.op.log_messages:
14473       self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
14474       for idx, msg in enumerate(self.op.log_messages):
14475         self.LogInfo("Sending log message %s", idx + 1)
14476         feedback_fn(constants.JQT_MSGPREFIX + msg)
14477         # Report how many test messages have been sent
14478         self._Notify(False, constants.JQT_LOGMSG, idx + 1)
14479
14480     if self.op.fail:
14481       raise errors.OpExecError("Opcode failure was requested")
14482
14483     return True
14484
14485
14486 class IAllocator(object):
14487   """IAllocator framework.
14488
14489   An IAllocator instance has three sets of attributes:
14490     - cfg that is needed to query the cluster
14491     - input data (all members of the _KEYS class attribute are required)
14492     - four buffer attributes (in|out_data|text), that represent the
14493       input (to the external script) in text and data structure format,
14494       and the output from it, again in two formats
14495     - the result variables from the script (success, info, nodes) for
14496       easy usage
14497
14498   """
14499   # pylint: disable=R0902
14500   # lots of instance attributes
14501
14502   def __init__(self, cfg, rpc_runner, mode, **kwargs):
14503     self.cfg = cfg
14504     self.rpc = rpc_runner
14505     # init buffer variables
14506     self.in_text = self.out_text = self.in_data = self.out_data = None
14507     # init all input fields so that pylint is happy
14508     self.mode = mode
14509     self.memory = self.disks = self.disk_template = None
14510     self.os = self.tags = self.nics = self.vcpus = None
14511     self.hypervisor = None
14512     self.relocate_from = None
14513     self.name = None
14514     self.instances = None
14515     self.evac_mode = None
14516     self.target_groups = []
14517     # computed fields
14518     self.required_nodes = None
14519     # init result fields
14520     self.success = self.info = self.result = None
14521
14522     try:
14523       (fn, keydata, self._result_check) = self._MODE_DATA[self.mode]
14524     except KeyError:
14525       raise errors.ProgrammerError("Unknown mode '%s' passed to the"
14526                                    " IAllocator" % self.mode)
14527
14528     keyset = [n for (n, _) in keydata]
14529
14530     for key in kwargs:
14531       if key not in keyset:
14532         raise errors.ProgrammerError("Invalid input parameter '%s' to"
14533                                      " IAllocator" % key)
14534       setattr(self, key, kwargs[key])
14535
14536     for key in keyset:
14537       if key not in kwargs:
14538         raise errors.ProgrammerError("Missing input parameter '%s' to"
14539                                      " IAllocator" % key)
14540     self._BuildInputData(compat.partial(fn, self), keydata)
14541
14542   def _ComputeClusterData(self):
14543     """Compute the generic allocator input data.
14544
14545     This is the data that is independent of the actual operation.
14546
14547     """
14548     cfg = self.cfg
14549     cluster_info = cfg.GetClusterInfo()
14550     # cluster data
14551     data = {
14552       "version": constants.IALLOCATOR_VERSION,
14553       "cluster_name": cfg.GetClusterName(),
14554       "cluster_tags": list(cluster_info.GetTags()),
14555       "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
14556       "ipolicy": cluster_info.ipolicy,
14557       }
14558     ninfo = cfg.GetAllNodesInfo()
14559     iinfo = cfg.GetAllInstancesInfo().values()
14560     i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
14561
14562     # node data
14563     node_list = [n.name for n in ninfo.values() if n.vm_capable]
14564
14565     if self.mode == constants.IALLOCATOR_MODE_ALLOC:
14566       hypervisor_name = self.hypervisor
14567     elif self.mode == constants.IALLOCATOR_MODE_RELOC:
14568       hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
14569     else:
14570       hypervisor_name = cluster_info.primary_hypervisor
14571
14572     node_data = self.rpc.call_node_info(node_list, [cfg.GetVGName()],
14573                                         [hypervisor_name])
14574     node_iinfo = \
14575       self.rpc.call_all_instances_info(node_list,
14576                                        cluster_info.enabled_hypervisors)
14577
14578     data["nodegroups"] = self._ComputeNodeGroupData(cfg)
14579
14580     config_ndata = self._ComputeBasicNodeData(cfg, ninfo)
14581     data["nodes"] = self._ComputeDynamicNodeData(ninfo, node_data, node_iinfo,
14582                                                  i_list, config_ndata)
14583     assert len(data["nodes"]) == len(ninfo), \
14584         "Incomplete node data computed"
14585
14586     data["instances"] = self._ComputeInstanceData(cluster_info, i_list)
14587
14588     self.in_data = data
14589
14590   @staticmethod
14591   def _ComputeNodeGroupData(cfg):
14592     """Compute node groups data.
14593
14594     """
14595     cluster = cfg.GetClusterInfo()
14596     ng = dict((guuid, {
14597       "name": gdata.name,
14598       "alloc_policy": gdata.alloc_policy,
14599       "ipolicy": _CalculateGroupIPolicy(cluster, gdata),
14600       })
14601       for guuid, gdata in cfg.GetAllNodeGroupsInfo().items())
14602
14603     return ng
14604
14605   @staticmethod
14606   def _ComputeBasicNodeData(cfg, node_cfg):
14607     """Compute global node data.
14608
14609     @rtype: dict
14610     @returns: a dict of name: (node dict, node config)
14611
14612     """
14613     # fill in static (config-based) values
14614     node_results = dict((ninfo.name, {
14615       "tags": list(ninfo.GetTags()),
14616       "primary_ip": ninfo.primary_ip,
14617       "secondary_ip": ninfo.secondary_ip,
14618       "offline": ninfo.offline,
14619       "drained": ninfo.drained,
14620       "master_candidate": ninfo.master_candidate,
14621       "group": ninfo.group,
14622       "master_capable": ninfo.master_capable,
14623       "vm_capable": ninfo.vm_capable,
14624       "ndparams": cfg.GetNdParams(ninfo),
14625       })
14626       for ninfo in node_cfg.values())
14627
14628     return node_results
14629
14630   @staticmethod
14631   def _ComputeDynamicNodeData(node_cfg, node_data, node_iinfo, i_list,
14632                               node_results):
14633     """Compute global node data.
14634
14635     @param node_results: the basic node structures as filled from the config
14636
14637     """
14638     #TODO(dynmem): compute the right data on MAX and MIN memory
14639     # make a copy of the current dict
14640     node_results = dict(node_results)
14641     for nname, nresult in node_data.items():
14642       assert nname in node_results, "Missing basic data for node %s" % nname
14643       ninfo = node_cfg[nname]
14644
14645       if not (ninfo.offline or ninfo.drained):
14646         nresult.Raise("Can't get data for node %s" % nname)
14647         node_iinfo[nname].Raise("Can't get node instance info from node %s" %
14648                                 nname)
14649         remote_info = _MakeLegacyNodeInfo(nresult.payload)
14650
14651         for attr in ["memory_total", "memory_free", "memory_dom0",
14652                      "vg_size", "vg_free", "cpu_total"]:
14653           if attr not in remote_info:
14654             raise errors.OpExecError("Node '%s' didn't return attribute"
14655                                      " '%s'" % (nname, attr))
14656           if not isinstance(remote_info[attr], int):
14657             raise errors.OpExecError("Node '%s' returned invalid value"
14658                                      " for '%s': %s" %
14659                                      (nname, attr, remote_info[attr]))
14660         # compute memory used by primary instances
14661         i_p_mem = i_p_up_mem = 0
14662         for iinfo, beinfo in i_list:
14663           if iinfo.primary_node == nname:
14664             i_p_mem += beinfo[constants.BE_MAXMEM]
14665             if iinfo.name not in node_iinfo[nname].payload:
14666               i_used_mem = 0
14667             else:
14668               i_used_mem = int(node_iinfo[nname].payload[iinfo.name]["memory"])
14669             i_mem_diff = beinfo[constants.BE_MAXMEM] - i_used_mem
14670             remote_info["memory_free"] -= max(0, i_mem_diff)
14671
14672             if iinfo.admin_state == constants.ADMINST_UP:
14673               i_p_up_mem += beinfo[constants.BE_MAXMEM]
14674
14675         # compute memory used by instances
14676         pnr_dyn = {
14677           "total_memory": remote_info["memory_total"],
14678           "reserved_memory": remote_info["memory_dom0"],
14679           "free_memory": remote_info["memory_free"],
14680           "total_disk": remote_info["vg_size"],
14681           "free_disk": remote_info["vg_free"],
14682           "total_cpus": remote_info["cpu_total"],
14683           "i_pri_memory": i_p_mem,
14684           "i_pri_up_memory": i_p_up_mem,
14685           }
14686         pnr_dyn.update(node_results[nname])
14687         node_results[nname] = pnr_dyn
14688
14689     return node_results
14690
14691   @staticmethod
14692   def _ComputeInstanceData(cluster_info, i_list):
14693     """Compute global instance data.
14694
14695     """
14696     instance_data = {}
14697     for iinfo, beinfo in i_list:
14698       nic_data = []
14699       for nic in iinfo.nics:
14700         filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
14701         nic_dict = {
14702           "mac": nic.mac,
14703           "ip": nic.ip,
14704           "mode": filled_params[constants.NIC_MODE],
14705           "link": filled_params[constants.NIC_LINK],
14706           }
14707         if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
14708           nic_dict["bridge"] = filled_params[constants.NIC_LINK]
14709         nic_data.append(nic_dict)
14710       pir = {
14711         "tags": list(iinfo.GetTags()),
14712         "admin_state": iinfo.admin_state,
14713         "vcpus": beinfo[constants.BE_VCPUS],
14714         "memory": beinfo[constants.BE_MAXMEM],
14715         "os": iinfo.os,
14716         "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
14717         "nics": nic_data,
14718         "disks": [{constants.IDISK_SIZE: dsk.size,
14719                    constants.IDISK_MODE: dsk.mode}
14720                   for dsk in iinfo.disks],
14721         "disk_template": iinfo.disk_template,
14722         "hypervisor": iinfo.hypervisor,
14723         }
14724       pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
14725                                                  pir["disks"])
14726       instance_data[iinfo.name] = pir
14727
14728     return instance_data
14729
14730   def _AddNewInstance(self):
14731     """Add new instance data to allocator structure.
14732
14733     This in combination with _AllocatorGetClusterData will create the
14734     correct structure needed as input for the allocator.
14735
14736     The checks for the completeness of the opcode must have already been
14737     done.
14738
14739     """
14740     disk_space = _ComputeDiskSize(self.disk_template, self.disks)
14741
14742     if self.disk_template in constants.DTS_INT_MIRROR:
14743       self.required_nodes = 2
14744     else:
14745       self.required_nodes = 1
14746
14747     request = {
14748       "name": self.name,
14749       "disk_template": self.disk_template,
14750       "tags": self.tags,
14751       "os": self.os,
14752       "vcpus": self.vcpus,
14753       "memory": self.memory,
14754       "disks": self.disks,
14755       "disk_space_total": disk_space,
14756       "nics": self.nics,
14757       "required_nodes": self.required_nodes,
14758       "hypervisor": self.hypervisor,
14759       }
14760
14761     return request
14762
14763   def _AddRelocateInstance(self):
14764     """Add relocate instance data to allocator structure.
14765
14766     This in combination with _IAllocatorGetClusterData will create the
14767     correct structure needed as input for the allocator.
14768
14769     The checks for the completeness of the opcode must have already been
14770     done.
14771
14772     """
14773     instance = self.cfg.GetInstanceInfo(self.name)
14774     if instance is None:
14775       raise errors.ProgrammerError("Unknown instance '%s' passed to"
14776                                    " IAllocator" % self.name)
14777
14778     if instance.disk_template not in constants.DTS_MIRRORED:
14779       raise errors.OpPrereqError("Can't relocate non-mirrored instances",
14780                                  errors.ECODE_INVAL)
14781
14782     if instance.disk_template in constants.DTS_INT_MIRROR and \
14783         len(instance.secondary_nodes) != 1:
14784       raise errors.OpPrereqError("Instance has not exactly one secondary node",
14785                                  errors.ECODE_STATE)
14786
14787     self.required_nodes = 1
14788     disk_sizes = [{constants.IDISK_SIZE: disk.size} for disk in instance.disks]
14789     disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
14790
14791     request = {
14792       "name": self.name,
14793       "disk_space_total": disk_space,
14794       "required_nodes": self.required_nodes,
14795       "relocate_from": self.relocate_from,
14796       }
14797     return request
14798
14799   def _AddNodeEvacuate(self):
14800     """Get data for node-evacuate requests.
14801
14802     """
14803     return {
14804       "instances": self.instances,
14805       "evac_mode": self.evac_mode,
14806       }
14807
14808   def _AddChangeGroup(self):
14809     """Get data for node-evacuate requests.
14810
14811     """
14812     return {
14813       "instances": self.instances,
14814       "target_groups": self.target_groups,
14815       }
14816
14817   def _BuildInputData(self, fn, keydata):
14818     """Build input data structures.
14819
14820     """
14821     self._ComputeClusterData()
14822
14823     request = fn()
14824     request["type"] = self.mode
14825     for keyname, keytype in keydata:
14826       if keyname not in request:
14827         raise errors.ProgrammerError("Request parameter %s is missing" %
14828                                      keyname)
14829       val = request[keyname]
14830       if not keytype(val):
14831         raise errors.ProgrammerError("Request parameter %s doesn't pass"
14832                                      " validation, value %s, expected"
14833                                      " type %s" % (keyname, val, keytype))
14834     self.in_data["request"] = request
14835
14836     self.in_text = serializer.Dump(self.in_data)
14837
14838   _STRING_LIST = ht.TListOf(ht.TString)
14839   _JOB_LIST = ht.TListOf(ht.TListOf(ht.TStrictDict(True, False, {
14840      # pylint: disable=E1101
14841      # Class '...' has no 'OP_ID' member
14842      "OP_ID": ht.TElemOf([opcodes.OpInstanceFailover.OP_ID,
14843                           opcodes.OpInstanceMigrate.OP_ID,
14844                           opcodes.OpInstanceReplaceDisks.OP_ID])
14845      })))
14846
14847   _NEVAC_MOVED = \
14848     ht.TListOf(ht.TAnd(ht.TIsLength(3),
14849                        ht.TItems([ht.TNonEmptyString,
14850                                   ht.TNonEmptyString,
14851                                   ht.TListOf(ht.TNonEmptyString),
14852                                  ])))
14853   _NEVAC_FAILED = \
14854     ht.TListOf(ht.TAnd(ht.TIsLength(2),
14855                        ht.TItems([ht.TNonEmptyString,
14856                                   ht.TMaybeString,
14857                                  ])))
14858   _NEVAC_RESULT = ht.TAnd(ht.TIsLength(3),
14859                           ht.TItems([_NEVAC_MOVED, _NEVAC_FAILED, _JOB_LIST]))
14860
14861   _MODE_DATA = {
14862     constants.IALLOCATOR_MODE_ALLOC:
14863       (_AddNewInstance,
14864        [
14865         ("name", ht.TString),
14866         ("memory", ht.TInt),
14867         ("disks", ht.TListOf(ht.TDict)),
14868         ("disk_template", ht.TString),
14869         ("os", ht.TString),
14870         ("tags", _STRING_LIST),
14871         ("nics", ht.TListOf(ht.TDict)),
14872         ("vcpus", ht.TInt),
14873         ("hypervisor", ht.TString),
14874         ], ht.TList),
14875     constants.IALLOCATOR_MODE_RELOC:
14876       (_AddRelocateInstance,
14877        [("name", ht.TString), ("relocate_from", _STRING_LIST)],
14878        ht.TList),
14879      constants.IALLOCATOR_MODE_NODE_EVAC:
14880       (_AddNodeEvacuate, [
14881         ("instances", _STRING_LIST),
14882         ("evac_mode", ht.TElemOf(constants.IALLOCATOR_NEVAC_MODES)),
14883         ], _NEVAC_RESULT),
14884      constants.IALLOCATOR_MODE_CHG_GROUP:
14885       (_AddChangeGroup, [
14886         ("instances", _STRING_LIST),
14887         ("target_groups", _STRING_LIST),
14888         ], _NEVAC_RESULT),
14889     }
14890
14891   def Run(self, name, validate=True, call_fn=None):
14892     """Run an instance allocator and return the results.
14893
14894     """
14895     if call_fn is None:
14896       call_fn = self.rpc.call_iallocator_runner
14897
14898     result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
14899     result.Raise("Failure while running the iallocator script")
14900
14901     self.out_text = result.payload
14902     if validate:
14903       self._ValidateResult()
14904
14905   def _ValidateResult(self):
14906     """Process the allocator results.
14907
14908     This will process and if successful save the result in
14909     self.out_data and the other parameters.
14910
14911     """
14912     try:
14913       rdict = serializer.Load(self.out_text)
14914     except Exception, err:
14915       raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
14916
14917     if not isinstance(rdict, dict):
14918       raise errors.OpExecError("Can't parse iallocator results: not a dict")
14919
14920     # TODO: remove backwards compatiblity in later versions
14921     if "nodes" in rdict and "result" not in rdict:
14922       rdict["result"] = rdict["nodes"]
14923       del rdict["nodes"]
14924
14925     for key in "success", "info", "result":
14926       if key not in rdict:
14927         raise errors.OpExecError("Can't parse iallocator results:"
14928                                  " missing key '%s'" % key)
14929       setattr(self, key, rdict[key])
14930
14931     if not self._result_check(self.result):
14932       raise errors.OpExecError("Iallocator returned invalid result,"
14933                                " expected %s, got %s" %
14934                                (self._result_check, self.result),
14935                                errors.ECODE_INVAL)
14936
14937     if self.mode == constants.IALLOCATOR_MODE_RELOC:
14938       assert self.relocate_from is not None
14939       assert self.required_nodes == 1
14940
14941       node2group = dict((name, ndata["group"])
14942                         for (name, ndata) in self.in_data["nodes"].items())
14943
14944       fn = compat.partial(self._NodesToGroups, node2group,
14945                           self.in_data["nodegroups"])
14946
14947       instance = self.cfg.GetInstanceInfo(self.name)
14948       request_groups = fn(self.relocate_from + [instance.primary_node])
14949       result_groups = fn(rdict["result"] + [instance.primary_node])
14950
14951       if self.success and not set(result_groups).issubset(request_groups):
14952         raise errors.OpExecError("Groups of nodes returned by iallocator (%s)"
14953                                  " differ from original groups (%s)" %
14954                                  (utils.CommaJoin(result_groups),
14955                                   utils.CommaJoin(request_groups)))
14956
14957     elif self.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
14958       assert self.evac_mode in constants.IALLOCATOR_NEVAC_MODES
14959
14960     self.out_data = rdict
14961
14962   @staticmethod
14963   def _NodesToGroups(node2group, groups, nodes):
14964     """Returns a list of unique group names for a list of nodes.
14965
14966     @type node2group: dict
14967     @param node2group: Map from node name to group UUID
14968     @type groups: dict
14969     @param groups: Group information
14970     @type nodes: list
14971     @param nodes: Node names
14972
14973     """
14974     result = set()
14975
14976     for node in nodes:
14977       try:
14978         group_uuid = node2group[node]
14979       except KeyError:
14980         # Ignore unknown node
14981         pass
14982       else:
14983         try:
14984           group = groups[group_uuid]
14985         except KeyError:
14986           # Can't find group, let's use UUID
14987           group_name = group_uuid
14988         else:
14989           group_name = group["name"]
14990
14991         result.add(group_name)
14992
14993     return sorted(result)
14994
14995
14996 class LUTestAllocator(NoHooksLU):
14997   """Run allocator tests.
14998
14999   This LU runs the allocator tests
15000
15001   """
15002   def CheckPrereq(self):
15003     """Check prerequisites.
15004
15005     This checks the opcode parameters depending on the director and mode test.
15006
15007     """
15008     if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
15009       for attr in ["memory", "disks", "disk_template",
15010                    "os", "tags", "nics", "vcpus"]:
15011         if not hasattr(self.op, attr):
15012           raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
15013                                      attr, errors.ECODE_INVAL)
15014       iname = self.cfg.ExpandInstanceName(self.op.name)
15015       if iname is not None:
15016         raise errors.OpPrereqError("Instance '%s' already in the cluster" %
15017                                    iname, errors.ECODE_EXISTS)
15018       if not isinstance(self.op.nics, list):
15019         raise errors.OpPrereqError("Invalid parameter 'nics'",
15020                                    errors.ECODE_INVAL)
15021       if not isinstance(self.op.disks, list):
15022         raise errors.OpPrereqError("Invalid parameter 'disks'",
15023                                    errors.ECODE_INVAL)
15024       for row in self.op.disks:
15025         if (not isinstance(row, dict) or
15026             constants.IDISK_SIZE not in row or
15027             not isinstance(row[constants.IDISK_SIZE], int) or
15028             constants.IDISK_MODE not in row or
15029             row[constants.IDISK_MODE] not in constants.DISK_ACCESS_SET):
15030           raise errors.OpPrereqError("Invalid contents of the 'disks'"
15031                                      " parameter", errors.ECODE_INVAL)
15032       if self.op.hypervisor is None:
15033         self.op.hypervisor = self.cfg.GetHypervisorType()
15034     elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
15035       fname = _ExpandInstanceName(self.cfg, self.op.name)
15036       self.op.name = fname
15037       self.relocate_from = \
15038           list(self.cfg.GetInstanceInfo(fname).secondary_nodes)
15039     elif self.op.mode in (constants.IALLOCATOR_MODE_CHG_GROUP,
15040                           constants.IALLOCATOR_MODE_NODE_EVAC):
15041       if not self.op.instances:
15042         raise errors.OpPrereqError("Missing instances", errors.ECODE_INVAL)
15043       self.op.instances = _GetWantedInstances(self, self.op.instances)
15044     else:
15045       raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
15046                                  self.op.mode, errors.ECODE_INVAL)
15047
15048     if self.op.direction == constants.IALLOCATOR_DIR_OUT:
15049       if self.op.allocator is None:
15050         raise errors.OpPrereqError("Missing allocator name",
15051                                    errors.ECODE_INVAL)
15052     elif self.op.direction != constants.IALLOCATOR_DIR_IN:
15053       raise errors.OpPrereqError("Wrong allocator test '%s'" %
15054                                  self.op.direction, errors.ECODE_INVAL)
15055
15056   def Exec(self, feedback_fn):
15057     """Run the allocator test.
15058
15059     """
15060     if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
15061       ial = IAllocator(self.cfg, self.rpc,
15062                        mode=self.op.mode,
15063                        name=self.op.name,
15064                        memory=self.op.memory,
15065                        disks=self.op.disks,
15066                        disk_template=self.op.disk_template,
15067                        os=self.op.os,
15068                        tags=self.op.tags,
15069                        nics=self.op.nics,
15070                        vcpus=self.op.vcpus,
15071                        hypervisor=self.op.hypervisor,
15072                        )
15073     elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
15074       ial = IAllocator(self.cfg, self.rpc,
15075                        mode=self.op.mode,
15076                        name=self.op.name,
15077                        relocate_from=list(self.relocate_from),
15078                        )
15079     elif self.op.mode == constants.IALLOCATOR_MODE_CHG_GROUP:
15080       ial = IAllocator(self.cfg, self.rpc,
15081                        mode=self.op.mode,
15082                        instances=self.op.instances,
15083                        target_groups=self.op.target_groups)
15084     elif self.op.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
15085       ial = IAllocator(self.cfg, self.rpc,
15086                        mode=self.op.mode,
15087                        instances=self.op.instances,
15088                        evac_mode=self.op.evac_mode)
15089     else:
15090       raise errors.ProgrammerError("Uncatched mode %s in"
15091                                    " LUTestAllocator.Exec", self.op.mode)
15092
15093     if self.op.direction == constants.IALLOCATOR_DIR_IN:
15094       result = ial.in_text
15095     else:
15096       ial.Run(self.op.allocator, validate=False)
15097       result = ial.out_text
15098     return result
15099
15100
15101 #: Query type implementations
15102 _QUERY_IMPL = {
15103   constants.QR_INSTANCE: _InstanceQuery,
15104   constants.QR_NODE: _NodeQuery,
15105   constants.QR_GROUP: _GroupQuery,
15106   constants.QR_OS: _OsQuery,
15107   }
15108
15109 assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
15110
15111
15112 def _GetQueryImplementation(name):
15113   """Returns the implemtnation for a query type.
15114
15115   @param name: Query type, must be one of L{constants.QR_VIA_OP}
15116
15117   """
15118   try:
15119     return _QUERY_IMPL[name]
15120   except KeyError:
15121     raise errors.OpPrereqError("Unknown query resource '%s'" % name,
15122                                errors.ECODE_INVAL)