code.grnet.gr Git - ganeti-local/blob - lib/cmdlib.py

   1 #
   2 #
   3
   4 # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012 Google Inc.
   5 #
   6 # This program is free software; you can redistribute it and/or modify
   7 # it under the terms of the GNU General Public License as published by
   8 # the Free Software Foundation; either version 2 of the License, or
   9 # (at your option) any later version.
  10 #
  11 # This program is distributed in the hope that it will be useful, but
  12 # WITHOUT ANY WARRANTY; without even the implied warranty of
  13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 # General Public License for more details.
  15 #
  16 # You should have received a copy of the GNU General Public License
  17 # along with this program; if not, write to the Free Software
  18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  19 # 02110-1301, USA.
  20
  21
  22 """Module implementing the master-side code."""
  23
  24 # pylint: disable=W0201,C0302
  25
  26 # W0201 since most LU attributes are defined in CheckPrereq or similar
  27 # functions
  28
  29 # C0302: since we have waaaay too many lines in this module
  30
  31 import os
  32 import os.path
  33 import time
  34 import re
  35 import platform
  36 import logging
  37 import copy
  38 import OpenSSL
  39 import socket
  40 import tempfile
  41 import shutil
  42 import itertools
  43 import operator
  44
  45 from ganeti import ssh
  46 from ganeti import utils
  47 from ganeti import errors
  48 from ganeti import hypervisor
  49 from ganeti import locking
  50 from ganeti import constants
  51 from ganeti import objects
  52 from ganeti import serializer
  53 from ganeti import ssconf
  54 from ganeti import uidpool
  55 from ganeti import compat
  56 from ganeti import masterd
  57 from ganeti import netutils
  58 from ganeti import query
  59 from ganeti import qlang
  60 from ganeti import opcodes
  61 from ganeti import ht
  62 from ganeti import rpc
  63
  64 import ganeti.masterd.instance # pylint: disable=W0611
  65
  66
  67 #: Size of DRBD meta block device
  68 DRBD_META_SIZE = 128
  69
  70 # States of instance
  71 INSTANCE_DOWN = [constants.ADMINST_DOWN]
  72 INSTANCE_ONLINE = [constants.ADMINST_DOWN, constants.ADMINST_UP]
  73 INSTANCE_NOT_RUNNING = [constants.ADMINST_DOWN, constants.ADMINST_OFFLINE]
  74
  75 #: Instance status in which an instance can be marked as offline/online
  76 CAN_CHANGE_INSTANCE_OFFLINE = (frozenset(INSTANCE_DOWN) | frozenset([
  77   constants.ADMINST_OFFLINE,
  78   ]))
  79
  80
  81 class ResultWithJobs:
  82   """Data container for LU results with jobs.
  83
  84   Instances of this class returned from L{LogicalUnit.Exec} will be recognized
  85   by L{mcpu.Processor._ProcessResult}. The latter will then submit the jobs
  86   contained in the C{jobs} attribute and include the job IDs in the opcode
  87   result.
  88
  89   """
  90   def __init__(self, jobs, **kwargs):
  91     """Initializes this class.
  92
  93     Additional return values can be specified as keyword arguments.
  94
  95     @type jobs: list of lists of L{opcode.OpCode}
  96     @param jobs: A list of lists of opcode objects
  97
  98     """
  99     self.jobs = jobs
 100     self.other = kwargs
 101
 102
 103 class LogicalUnit(object):
 104   """Logical Unit base class.
 105
 106   Subclasses must follow these rules:
 107     - implement ExpandNames
 108     - implement CheckPrereq (except when tasklets are used)
 109     - implement Exec (except when tasklets are used)
 110     - implement BuildHooksEnv
 111     - implement BuildHooksNodes
 112     - redefine HPATH and HTYPE
 113     - optionally redefine their run requirements:
 114         REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
 115
 116   Note that all commands require root permissions.
 117
 118   @ivar dry_run_result: the value (if any) that will be returned to the caller
 119       in dry-run mode (signalled by opcode dry_run parameter)
 120
 121   """
 122   HPATH = None
 123   HTYPE = None
 124   REQ_BGL = True
 125
 126   def __init__(self, processor, op, context, rpc_runner):
 127     """Constructor for LogicalUnit.
 128
 129     This needs to be overridden in derived classes in order to check op
 130     validity.
 131
 132     """
 133     self.proc = processor
 134     self.op = op
 135     self.cfg = context.cfg
 136     self.glm = context.glm
 137     # readability alias
 138     self.owned_locks = context.glm.list_owned
 139     self.context = context
 140     self.rpc = rpc_runner
 141     # Dicts used to declare locking needs to mcpu
 142     self.needed_locks = None
 143     self.share_locks = dict.fromkeys(locking.LEVELS, 0)
 144     self.add_locks = {}
 145     self.remove_locks = {}
 146     # Used to force good behavior when calling helper functions
 147     self.recalculate_locks = {}
 148     # logging
 149     self.Log = processor.Log # pylint: disable=C0103
 150     self.LogWarning = processor.LogWarning # pylint: disable=C0103
 151     self.LogInfo = processor.LogInfo # pylint: disable=C0103
 152     self.LogStep = processor.LogStep # pylint: disable=C0103
 153     # support for dry-run
 154     self.dry_run_result = None
 155     # support for generic debug attribute
 156     if (not hasattr(self.op, "debug_level") or
 157         not isinstance(self.op.debug_level, int)):
 158       self.op.debug_level = 0
 159
 160     # Tasklets
 161     self.tasklets = None
 162
 163     # Validate opcode parameters and set defaults
 164     self.op.Validate(True)
 165
 166     self.CheckArguments()
 167
 168   def CheckArguments(self):
 169     """Check syntactic validity for the opcode arguments.
 170
 171     This method is for doing a simple syntactic check and ensure
 172     validity of opcode parameters, without any cluster-related
 173     checks. While the same can be accomplished in ExpandNames and/or
 174     CheckPrereq, doing these separate is better because:
 175
 176       - ExpandNames is left as as purely a lock-related function
 177       - CheckPrereq is run after we have acquired locks (and possible
 178         waited for them)
 179
 180     The function is allowed to change the self.op attribute so that
 181     later methods can no longer worry about missing parameters.
 182
 183     """
 184     pass
 185
 186   def ExpandNames(self):
 187     """Expand names for this LU.
 188
 189     This method is called before starting to execute the opcode, and it should
 190     update all the parameters of the opcode to their canonical form (e.g. a
 191     short node name must be fully expanded after this method has successfully
 192     completed). This way locking, hooks, logging, etc. can work correctly.
 193
 194     LUs which implement this method must also populate the self.needed_locks
 195     member, as a dict with lock levels as keys, and a list of needed lock names
 196     as values. Rules:
 197
 198       - use an empty dict if you don't need any lock
 199       - if you don't need any lock at a particular level omit that
 200         level (note that in this case C{DeclareLocks} won't be called
 201         at all for that level)
 202       - if you need locks at a level, but you can't calculate it in
 203         this function, initialise that level with an empty list and do
 204         further processing in L{LogicalUnit.DeclareLocks} (see that
 205         function's docstring)
 206       - don't put anything for the BGL level
 207       - if you want all locks at a level use L{locking.ALL_SET} as a value
 208
 209     If you need to share locks (rather than acquire them exclusively) at one
 210     level you can modify self.share_locks, setting a true value (usually 1) for
 211     that level. By default locks are not shared.
 212
 213     This function can also define a list of tasklets, which then will be
 214     executed in order instead of the usual LU-level CheckPrereq and Exec
 215     functions, if those are not defined by the LU.
 216
 217     Examples::
 218
 219       # Acquire all nodes and one instance
 220       self.needed_locks = {
 221         locking.LEVEL_NODE: locking.ALL_SET,
 222         locking.LEVEL_INSTANCE: ['instance1.example.com'],
 223       }
 224       # Acquire just two nodes
 225       self.needed_locks = {
 226         locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
 227       }
 228       # Acquire no locks
 229       self.needed_locks = {} # No, you can't leave it to the default value None
 230
 231     """
 232     # The implementation of this method is mandatory only if the new LU is
 233     # concurrent, so that old LUs don't need to be changed all at the same
 234     # time.
 235     if self.REQ_BGL:
 236       self.needed_locks = {} # Exclusive LUs don't need locks.
 237     else:
 238       raise NotImplementedError
 239
 240   def DeclareLocks(self, level):
 241     """Declare LU locking needs for a level
 242
 243     While most LUs can just declare their locking needs at ExpandNames time,
 244     sometimes there's the need to calculate some locks after having acquired
 245     the ones before. This function is called just before acquiring locks at a
 246     particular level, but after acquiring the ones at lower levels, and permits
 247     such calculations. It can be used to modify self.needed_locks, and by
 248     default it does nothing.
 249
 250     This function is only called if you have something already set in
 251     self.needed_locks for the level.
 252
 253     @param level: Locking level which is going to be locked
 254     @type level: member of L{ganeti.locking.LEVELS}
 255
 256     """
 257
 258   def CheckPrereq(self):
 259     """Check prerequisites for this LU.
 260
 261     This method should check that the prerequisites for the execution
 262     of this LU are fulfilled. It can do internode communication, but
 263     it should be idempotent - no cluster or system changes are
 264     allowed.
 265
 266     The method should raise errors.OpPrereqError in case something is
 267     not fulfilled. Its return value is ignored.
 268
 269     This method should also update all the parameters of the opcode to
 270     their canonical form if it hasn't been done by ExpandNames before.
 271
 272     """
 273     if self.tasklets is not None:
 274       for (idx, tl) in enumerate(self.tasklets):
 275         logging.debug("Checking prerequisites for tasklet %s/%s",
 276                       idx + 1, len(self.tasklets))
 277         tl.CheckPrereq()
 278     else:
 279       pass
 280
 281   def Exec(self, feedback_fn):
 282     """Execute the LU.
 283
 284     This method should implement the actual work. It should raise
 285     errors.OpExecError for failures that are somewhat dealt with in
 286     code, or expected.
 287
 288     """
 289     if self.tasklets is not None:
 290       for (idx, tl) in enumerate(self.tasklets):
 291         logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
 292         tl.Exec(feedback_fn)
 293     else:
 294       raise NotImplementedError
 295
 296   def BuildHooksEnv(self):
 297     """Build hooks environment for this LU.
 298
 299     @rtype: dict
 300     @return: Dictionary containing the environment that will be used for
 301       running the hooks for this LU. The keys of the dict must not be prefixed
 302       with "GANETI_"--that'll be added by the hooks runner. The hooks runner
 303       will extend the environment with additional variables. If no environment
 304       should be defined, an empty dictionary should be returned (not C{None}).
 305     @note: If the C{HPATH} attribute of the LU class is C{None}, this function
 306       will not be called.
 307
 308     """
 309     raise NotImplementedError
 310
 311   def BuildHooksNodes(self):
 312     """Build list of nodes to run LU's hooks.
 313
 314     @rtype: tuple; (list, list)
 315     @return: Tuple containing a list of node names on which the hook
 316       should run before the execution and a list of node names on which the
 317       hook should run after the execution. No nodes should be returned as an
 318       empty list (and not None).
 319     @note: If the C{HPATH} attribute of the LU class is C{None}, this function
 320       will not be called.
 321
 322     """
 323     raise NotImplementedError
 324
 325   def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
 326     """Notify the LU about the results of its hooks.
 327
 328     This method is called every time a hooks phase is executed, and notifies
 329     the Logical Unit about the hooks' result. The LU can then use it to alter
 330     its result based on the hooks.  By default the method does nothing and the
 331     previous result is passed back unchanged but any LU can define it if it
 332     wants to use the local cluster hook-scripts somehow.
 333
 334     @param phase: one of L{constants.HOOKS_PHASE_POST} or
 335         L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
 336     @param hook_results: the results of the multi-node hooks rpc call
 337     @param feedback_fn: function used send feedback back to the caller
 338     @param lu_result: the previous Exec result this LU had, or None
 339         in the PRE phase
 340     @return: the new Exec result, based on the previous result
 341         and hook results
 342
 343     """
 344     # API must be kept, thus we ignore the unused argument and could
 345     # be a function warnings
 346     # pylint: disable=W0613,R0201
 347     return lu_result
 348
 349   def _ExpandAndLockInstance(self):
 350     """Helper function to expand and lock an instance.
 351
 352     Many LUs that work on an instance take its name in self.op.instance_name
 353     and need to expand it and then declare the expanded name for locking. This
 354     function does it, and then updates self.op.instance_name to the expanded
 355     name. It also initializes needed_locks as a dict, if this hasn't been done
 356     before.
 357
 358     """
 359     if self.needed_locks is None:
 360       self.needed_locks = {}
 361     else:
 362       assert locking.LEVEL_INSTANCE not in self.needed_locks, \
 363         "_ExpandAndLockInstance called with instance-level locks set"
 364     self.op.instance_name = _ExpandInstanceName(self.cfg,
 365                                                 self.op.instance_name)
 366     self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
 367
 368   def _LockInstancesNodes(self, primary_only=False,
 369                           level=locking.LEVEL_NODE):
 370     """Helper function to declare instances' nodes for locking.
 371
 372     This function should be called after locking one or more instances to lock
 373     their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
 374     with all primary or secondary nodes for instances already locked and
 375     present in self.needed_locks[locking.LEVEL_INSTANCE].
 376
 377     It should be called from DeclareLocks, and for safety only works if
 378     self.recalculate_locks[locking.LEVEL_NODE] is set.
 379
 380     In the future it may grow parameters to just lock some instance's nodes, or
 381     to just lock primaries or secondary nodes, if needed.
 382
 383     If should be called in DeclareLocks in a way similar to::
 384
 385       if level == locking.LEVEL_NODE:
 386         self._LockInstancesNodes()
 387
 388     @type primary_only: boolean
 389     @param primary_only: only lock primary nodes of locked instances
 390     @param level: Which lock level to use for locking nodes
 391
 392     """
 393     assert level in self.recalculate_locks, \
 394       "_LockInstancesNodes helper function called with no nodes to recalculate"
 395
 396     # TODO: check if we're really been called with the instance locks held
 397
 398     # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
 399     # future we might want to have different behaviors depending on the value
 400     # of self.recalculate_locks[locking.LEVEL_NODE]
 401     wanted_nodes = []
 402     locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
 403     for _, instance in self.cfg.GetMultiInstanceInfo(locked_i):
 404       wanted_nodes.append(instance.primary_node)
 405       if not primary_only:
 406         wanted_nodes.extend(instance.secondary_nodes)
 407
 408     if self.recalculate_locks[level] == constants.LOCKS_REPLACE:
 409       self.needed_locks[level] = wanted_nodes
 410     elif self.recalculate_locks[level] == constants.LOCKS_APPEND:
 411       self.needed_locks[level].extend(wanted_nodes)
 412     else:
 413       raise errors.ProgrammerError("Unknown recalculation mode")
 414
 415     del self.recalculate_locks[level]
 416
 417
 418 class NoHooksLU(LogicalUnit): # pylint: disable=W0223
 419   """Simple LU which runs no hooks.
 420
 421   This LU is intended as a parent for other LogicalUnits which will
 422   run no hooks, in order to reduce duplicate code.
 423
 424   """
 425   HPATH = None
 426   HTYPE = None
 427
 428   def BuildHooksEnv(self):
 429     """Empty BuildHooksEnv for NoHooksLu.
 430
 431     This just raises an error.
 432
 433     """
 434     raise AssertionError("BuildHooksEnv called for NoHooksLUs")
 435
 436   def BuildHooksNodes(self):
 437     """Empty BuildHooksNodes for NoHooksLU.
 438
 439     """
 440     raise AssertionError("BuildHooksNodes called for NoHooksLU")
 441
 442
 443 class Tasklet:
 444   """Tasklet base class.
 445
 446   Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
 447   they can mix legacy code with tasklets. Locking needs to be done in the LU,
 448   tasklets know nothing about locks.
 449
 450   Subclasses must follow these rules:
 451     - Implement CheckPrereq
 452     - Implement Exec
 453
 454   """
 455   def __init__(self, lu):
 456     self.lu = lu
 457
 458     # Shortcuts
 459     self.cfg = lu.cfg
 460     self.rpc = lu.rpc
 461
 462   def CheckPrereq(self):
 463     """Check prerequisites for this tasklets.
 464
 465     This method should check whether the prerequisites for the execution of
 466     this tasklet are fulfilled. It can do internode communication, but it
 467     should be idempotent - no cluster or system changes are allowed.
 468
 469     The method should raise errors.OpPrereqError in case something is not
 470     fulfilled. Its return value is ignored.
 471
 472     This method should also update all parameters to their canonical form if it
 473     hasn't been done before.
 474
 475     """
 476     pass
 477
 478   def Exec(self, feedback_fn):
 479     """Execute the tasklet.
 480
 481     This method should implement the actual work. It should raise
 482     errors.OpExecError for failures that are somewhat dealt with in code, or
 483     expected.
 484
 485     """
 486     raise NotImplementedError
 487
 488
 489 class _QueryBase:
 490   """Base for query utility classes.
 491
 492   """
 493   #: Attribute holding field definitions
 494   FIELDS = None
 495
 496   def __init__(self, qfilter, fields, use_locking):
 497     """Initializes this class.
 498
 499     """
 500     self.use_locking = use_locking
 501
 502     self.query = query.Query(self.FIELDS, fields, qfilter=qfilter,
 503                              namefield="name")
 504     self.requested_data = self.query.RequestedData()
 505     self.names = self.query.RequestedNames()
 506
 507     # Sort only if no names were requested
 508     self.sort_by_name = not self.names
 509
 510     self.do_locking = None
 511     self.wanted = None
 512
 513   def _GetNames(self, lu, all_names, lock_level):
 514     """Helper function to determine names asked for in the query.
 515
 516     """
 517     if self.do_locking:
 518       names = lu.owned_locks(lock_level)
 519     else:
 520       names = all_names
 521
 522     if self.wanted == locking.ALL_SET:
 523       assert not self.names
 524       # caller didn't specify names, so ordering is not important
 525       return utils.NiceSort(names)
 526
 527     # caller specified names and we must keep the same order
 528     assert self.names
 529     assert not self.do_locking or lu.glm.is_owned(lock_level)
 530
 531     missing = set(self.wanted).difference(names)
 532     if missing:
 533       raise errors.OpExecError("Some items were removed before retrieving"
 534                                " their data: %s" % missing)
 535
 536     # Return expanded names
 537     return self.wanted
 538
 539   def ExpandNames(self, lu):
 540     """Expand names for this query.
 541
 542     See L{LogicalUnit.ExpandNames}.
 543
 544     """
 545     raise NotImplementedError()
 546
 547   def DeclareLocks(self, lu, level):
 548     """Declare locks for this query.
 549
 550     See L{LogicalUnit.DeclareLocks}.
 551
 552     """
 553     raise NotImplementedError()
 554
 555   def _GetQueryData(self, lu):
 556     """Collects all data for this query.
 557
 558     @return: Query data object
 559
 560     """
 561     raise NotImplementedError()
 562
 563   def NewStyleQuery(self, lu):
 564     """Collect data and execute query.
 565
 566     """
 567     return query.GetQueryResponse(self.query, self._GetQueryData(lu),
 568                                   sort_by_name=self.sort_by_name)
 569
 570   def OldStyleQuery(self, lu):
 571     """Collect data and execute query.
 572
 573     """
 574     return self.query.OldStyleQuery(self._GetQueryData(lu),
 575                                     sort_by_name=self.sort_by_name)
 576
 577
 578 def _ShareAll():
 579   """Returns a dict declaring all lock levels shared.
 580
 581   """
 582   return dict.fromkeys(locking.LEVELS, 1)
 583
 584
 585 def _MakeLegacyNodeInfo(data):
 586   """Formats the data returned by L{rpc.RpcRunner.call_node_info}.
 587
 588   Converts the data into a single dictionary. This is fine for most use cases,
 589   but some require information from more than one volume group or hypervisor.
 590
 591   """
 592   (bootid, (vg_info, ), (hv_info, )) = data
 593
 594   return utils.JoinDisjointDicts(utils.JoinDisjointDicts(vg_info, hv_info), {
 595     "bootid": bootid,
 596     })
 597
 598
 599 def _CheckInstanceNodeGroups(cfg, instance_name, owned_groups):
 600   """Checks if the owned node groups are still correct for an instance.
 601
 602   @type cfg: L{config.ConfigWriter}
 603   @param cfg: The cluster configuration
 604   @type instance_name: string
 605   @param instance_name: Instance name
 606   @type owned_groups: set or frozenset
 607   @param owned_groups: List of currently owned node groups
 608
 609   """
 610   inst_groups = cfg.GetInstanceNodeGroups(instance_name)
 611
 612   if not owned_groups.issuperset(inst_groups):
 613     raise errors.OpPrereqError("Instance %s's node groups changed since"
 614                                " locks were acquired, current groups are"
 615                                " are '%s', owning groups '%s'; retry the"
 616                                " operation" %
 617                                (instance_name,
 618                                 utils.CommaJoin(inst_groups),
 619                                 utils.CommaJoin(owned_groups)),
 620                                errors.ECODE_STATE)
 621
 622   return inst_groups
 623
 624
 625 def _CheckNodeGroupInstances(cfg, group_uuid, owned_instances):
 626   """Checks if the instances in a node group are still correct.
 627
 628   @type cfg: L{config.ConfigWriter}
 629   @param cfg: The cluster configuration
 630   @type group_uuid: string
 631   @param group_uuid: Node group UUID
 632   @type owned_instances: set or frozenset
 633   @param owned_instances: List of currently owned instances
 634
 635   """
 636   wanted_instances = cfg.GetNodeGroupInstances(group_uuid)
 637   if owned_instances != wanted_instances:
 638     raise errors.OpPrereqError("Instances in node group '%s' changed since"
 639                                " locks were acquired, wanted '%s', have '%s';"
 640                                " retry the operation" %
 641                                (group_uuid,
 642                                 utils.CommaJoin(wanted_instances),
 643                                 utils.CommaJoin(owned_instances)),
 644                                errors.ECODE_STATE)
 645
 646   return wanted_instances
 647
 648
 649 def _SupportsOob(cfg, node):
 650   """Tells if node supports OOB.
 651
 652   @type cfg: L{config.ConfigWriter}
 653   @param cfg: The cluster configuration
 654   @type node: L{objects.Node}
 655   @param node: The node
 656   @return: The OOB script if supported or an empty string otherwise
 657
 658   """
 659   return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
 660
 661
 662 def _GetWantedNodes(lu, nodes):
 663   """Returns list of checked and expanded node names.
 664
 665   @type lu: L{LogicalUnit}
 666   @param lu: the logical unit on whose behalf we execute
 667   @type nodes: list
 668   @param nodes: list of node names or None for all nodes
 669   @rtype: list
 670   @return: the list of nodes, sorted
 671   @raise errors.ProgrammerError: if the nodes parameter is wrong type
 672
 673   """
 674   if nodes:
 675     return [_ExpandNodeName(lu.cfg, name) for name in nodes]
 676
 677   return utils.NiceSort(lu.cfg.GetNodeList())
 678
 679
 680 def _GetWantedInstances(lu, instances):
 681   """Returns list of checked and expanded instance names.
 682
 683   @type lu: L{LogicalUnit}
 684   @param lu: the logical unit on whose behalf we execute
 685   @type instances: list
 686   @param instances: list of instance names or None for all instances
 687   @rtype: list
 688   @return: the list of instances, sorted
 689   @raise errors.OpPrereqError: if the instances parameter is wrong type
 690   @raise errors.OpPrereqError: if any of the passed instances is not found
 691
 692   """
 693   if instances:
 694     wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
 695   else:
 696     wanted = utils.NiceSort(lu.cfg.GetInstanceList())
 697   return wanted
 698
 699
 700 def _GetUpdatedParams(old_params, update_dict,
 701                       use_default=True, use_none=False):
 702   """Return the new version of a parameter dictionary.
 703
 704   @type old_params: dict
 705   @param old_params: old parameters
 706   @type update_dict: dict
 707   @param update_dict: dict containing new parameter values, or
 708       constants.VALUE_DEFAULT to reset the parameter to its default
 709       value
 710   @param use_default: boolean
 711   @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
 712       values as 'to be deleted' values
 713   @param use_none: boolean
 714   @type use_none: whether to recognise C{None} values as 'to be
 715       deleted' values
 716   @rtype: dict
 717   @return: the new parameter dictionary
 718
 719   """
 720   params_copy = copy.deepcopy(old_params)
 721   for key, val in update_dict.iteritems():
 722     if ((use_default and val == constants.VALUE_DEFAULT) or
 723         (use_none and val is None)):
 724       try:
 725         del params_copy[key]
 726       except KeyError:
 727         pass
 728     else:
 729       params_copy[key] = val
 730   return params_copy
 731
 732
 733 def _GetUpdatedIPolicy(old_ipolicy, new_ipolicy, group_policy=False):
 734   """Return the new version of a instance policy.
 735
 736   @param group_policy: whether this policy applies to a group and thus
 737     we should support removal of policy entries
 738
 739   """
 740   use_none = use_default = group_policy
 741   ipolicy = copy.deepcopy(old_ipolicy)
 742   for key, value in new_ipolicy.items():
 743     if key not in constants.IPOLICY_ALL_KEYS:
 744       raise errors.OpPrereqError("Invalid key in new ipolicy: %s" % key,
 745                                  errors.ECODE_INVAL)
 746     if key in constants.IPOLICY_ISPECS:
 747       utils.ForceDictType(value, constants.ISPECS_PARAMETER_TYPES)
 748       ipolicy[key] = _GetUpdatedParams(old_ipolicy.get(key, {}), value,
 749                                        use_none=use_none,
 750                                        use_default=use_default)
 751     else:
 752       if not value or value == [constants.VALUE_DEFAULT]:
 753         if group_policy:
 754           del ipolicy[key]
 755         else:
 756           raise errors.OpPrereqError("Can't unset ipolicy attribute '%s'"
 757                                      " on the cluster'" % key,
 758                                      errors.ECODE_INVAL)
 759       else:
 760         if key in constants.IPOLICY_PARAMETERS:
 761           # FIXME: we assume all such values are float
 762           try:
 763             ipolicy[key] = float(value)
 764           except (TypeError, ValueError), err:
 765             raise errors.OpPrereqError("Invalid value for attribute"
 766                                        " '%s': '%s', error: %s" %
 767                                        (key, value, err), errors.ECODE_INVAL)
 768         else:
 769           # FIXME: we assume all others are lists; this should be redone
 770           # in a nicer way
 771           ipolicy[key] = list(value)
 772   try:
 773     objects.InstancePolicy.CheckParameterSyntax(ipolicy)
 774   except errors.ConfigurationError, err:
 775     raise errors.OpPrereqError("Invalid instance policy: %s" % err,
 776                                errors.ECODE_INVAL)
 777   return ipolicy
 778
 779
 780 def _UpdateAndVerifySubDict(base, updates, type_check):
 781   """Updates and verifies a dict with sub dicts of the same type.
 782
 783   @param base: The dict with the old data
 784   @param updates: The dict with the new data
 785   @param type_check: Dict suitable to ForceDictType to verify correct types
 786   @returns: A new dict with updated and verified values
 787
 788   """
 789   def fn(old, value):
 790     new = _GetUpdatedParams(old, value)
 791     utils.ForceDictType(new, type_check)
 792     return new
 793
 794   ret = copy.deepcopy(base)
 795   ret.update(dict((key, fn(base.get(key, {}), value))
 796                   for key, value in updates.items()))
 797   return ret
 798
 799
 800 def _MergeAndVerifyHvState(op_input, obj_input):
 801   """Combines the hv state from an opcode with the one of the object
 802
 803   @param op_input: The input dict from the opcode
 804   @param obj_input: The input dict from the objects
 805   @return: The verified and updated dict
 806
 807   """
 808   if op_input:
 809     invalid_hvs = set(op_input) - constants.HYPER_TYPES
 810     if invalid_hvs:
 811       raise errors.OpPrereqError("Invalid hypervisor(s) in hypervisor state:"
 812                                  " %s" % utils.CommaJoin(invalid_hvs),
 813                                  errors.ECODE_INVAL)
 814     if obj_input is None:
 815       obj_input = {}
 816     type_check = constants.HVSTS_PARAMETER_TYPES
 817     return _UpdateAndVerifySubDict(obj_input, op_input, type_check)
 818
 819   return None
 820
 821
 822 def _MergeAndVerifyDiskState(op_input, obj_input):
 823   """Combines the disk state from an opcode with the one of the object
 824
 825   @param op_input: The input dict from the opcode
 826   @param obj_input: The input dict from the objects
 827   @return: The verified and updated dict
 828   """
 829   if op_input:
 830     invalid_dst = set(op_input) - constants.DS_VALID_TYPES
 831     if invalid_dst:
 832       raise errors.OpPrereqError("Invalid storage type(s) in disk state: %s" %
 833                                  utils.CommaJoin(invalid_dst),
 834                                  errors.ECODE_INVAL)
 835     type_check = constants.DSS_PARAMETER_TYPES
 836     if obj_input is None:
 837       obj_input = {}
 838     return dict((key, _UpdateAndVerifySubDict(obj_input.get(key, {}), value,
 839                                               type_check))
 840                 for key, value in op_input.items())
 841
 842   return None
 843
 844
 845 def _ReleaseLocks(lu, level, names=None, keep=None):
 846   """Releases locks owned by an LU.
 847
 848   @type lu: L{LogicalUnit}
 849   @param level: Lock level
 850   @type names: list or None
 851   @param names: Names of locks to release
 852   @type keep: list or None
 853   @param keep: Names of locks to retain
 854
 855   """
 856   assert not (keep is not None and names is not None), \
 857          "Only one of the 'names' and the 'keep' parameters can be given"
 858
 859   if names is not None:
 860     should_release = names.__contains__
 861   elif keep:
 862     should_release = lambda name: name not in keep
 863   else:
 864     should_release = None
 865
 866   owned = lu.owned_locks(level)
 867   if not owned:
 868     # Not owning any lock at this level, do nothing
 869     pass
 870
 871   elif should_release:
 872     retain = []
 873     release = []
 874
 875     # Determine which locks to release
 876     for name in owned:
 877       if should_release(name):
 878         release.append(name)
 879       else:
 880         retain.append(name)
 881
 882     assert len(lu.owned_locks(level)) == (len(retain) + len(release))
 883
 884     # Release just some locks
 885     lu.glm.release(level, names=release)
 886
 887     assert frozenset(lu.owned_locks(level)) == frozenset(retain)
 888   else:
 889     # Release everything
 890     lu.glm.release(level)
 891
 892     assert not lu.glm.is_owned(level), "No locks should be owned"
 893
 894
 895 def _MapInstanceDisksToNodes(instances):
 896   """Creates a map from (node, volume) to instance name.
 897
 898   @type instances: list of L{objects.Instance}
 899   @rtype: dict; tuple of (node name, volume name) as key, instance name as value
 900
 901   """
 902   return dict(((node, vol), inst.name)
 903               for inst in instances
 904               for (node, vols) in inst.MapLVsByNode().items()
 905               for vol in vols)
 906
 907
 908 def _RunPostHook(lu, node_name):
 909   """Runs the post-hook for an opcode on a single node.
 910
 911   """
 912   hm = lu.proc.BuildHooksManager(lu)
 913   try:
 914     hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
 915   except:
 916     # pylint: disable=W0702
 917     lu.LogWarning("Errors occurred running hooks on %s" % node_name)
 918
 919
 920 def _CheckOutputFields(static, dynamic, selected):
 921   """Checks whether all selected fields are valid.
 922
 923   @type static: L{utils.FieldSet}
 924   @param static: static fields set
 925   @type dynamic: L{utils.FieldSet}
 926   @param dynamic: dynamic fields set
 927
 928   """
 929   f = utils.FieldSet()
 930   f.Extend(static)
 931   f.Extend(dynamic)
 932
 933   delta = f.NonMatching(selected)
 934   if delta:
 935     raise errors.OpPrereqError("Unknown output fields selected: %s"
 936                                % ",".join(delta), errors.ECODE_INVAL)
 937
 938
 939 def _CheckGlobalHvParams(params):
 940   """Validates that given hypervisor params are not global ones.
 941
 942   This will ensure that instances don't get customised versions of
 943   global params.
 944
 945   """
 946   used_globals = constants.HVC_GLOBALS.intersection(params)
 947   if used_globals:
 948     msg = ("The following hypervisor parameters are global and cannot"
 949            " be customized at instance level, please modify them at"
 950            " cluster level: %s" % utils.CommaJoin(used_globals))
 951     raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
 952
 953
 954 def _CheckNodeOnline(lu, node, msg=None):
 955   """Ensure that a given node is online.
 956
 957   @param lu: the LU on behalf of which we make the check
 958   @param node: the node to check
 959   @param msg: if passed, should be a message to replace the default one
 960   @raise errors.OpPrereqError: if the node is offline
 961
 962   """
 963   if msg is None:
 964     msg = "Can't use offline node"
 965   if lu.cfg.GetNodeInfo(node).offline:
 966     raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
 967
 968
 969 def _CheckNodeNotDrained(lu, node):
 970   """Ensure that a given node is not drained.
 971
 972   @param lu: the LU on behalf of which we make the check
 973   @param node: the node to check
 974   @raise errors.OpPrereqError: if the node is drained
 975
 976   """
 977   if lu.cfg.GetNodeInfo(node).drained:
 978     raise errors.OpPrereqError("Can't use drained node %s" % node,
 979                                errors.ECODE_STATE)
 980
 981
 982 def _CheckNodeVmCapable(lu, node):
 983   """Ensure that a given node is vm capable.
 984
 985   @param lu: the LU on behalf of which we make the check
 986   @param node: the node to check
 987   @raise errors.OpPrereqError: if the node is not vm capable
 988
 989   """
 990   if not lu.cfg.GetNodeInfo(node).vm_capable:
 991     raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
 992                                errors.ECODE_STATE)
 993
 994
 995 def _CheckNodeHasOS(lu, node, os_name, force_variant):
 996   """Ensure that a node supports a given OS.
 997
 998   @param lu: the LU on behalf of which we make the check
 999   @param node: the node to check
1000   @param os_name: the OS to query about
1001   @param force_variant: whether to ignore variant errors
1002   @raise errors.OpPrereqError: if the node is not supporting the OS
1003
1004   """
1005   result = lu.rpc.call_os_get(node, os_name)
1006   result.Raise("OS '%s' not in supported OS list for node %s" %
1007                (os_name, node),
1008                prereq=True, ecode=errors.ECODE_INVAL)
1009   if not force_variant:
1010     _CheckOSVariant(result.payload, os_name)
1011
1012
1013 def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
1014   """Ensure that a node has the given secondary ip.
1015
1016   @type lu: L{LogicalUnit}
1017   @param lu: the LU on behalf of which we make the check
1018   @type node: string
1019   @param node: the node to check
1020   @type secondary_ip: string
1021   @param secondary_ip: the ip to check
1022   @type prereq: boolean
1023   @param prereq: whether to throw a prerequisite or an execute error
1024   @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
1025   @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
1026
1027   """
1028   result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
1029   result.Raise("Failure checking secondary ip on node %s" % node,
1030                prereq=prereq, ecode=errors.ECODE_ENVIRON)
1031   if not result.payload:
1032     msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
1033            " please fix and re-run this command" % secondary_ip)
1034     if prereq:
1035       raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
1036     else:
1037       raise errors.OpExecError(msg)
1038
1039
1040 def _GetClusterDomainSecret():
1041   """Reads the cluster domain secret.
1042
1043   """
1044   return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
1045                                strict=True)
1046
1047
1048 def _CheckInstanceState(lu, instance, req_states, msg=None):
1049   """Ensure that an instance is in one of the required states.
1050
1051   @param lu: the LU on behalf of which we make the check
1052   @param instance: the instance to check
1053   @param msg: if passed, should be a message to replace the default one
1054   @raise errors.OpPrereqError: if the instance is not in the required state
1055
1056   """
1057   if msg is None:
1058     msg = "can't use instance from outside %s states" % ", ".join(req_states)
1059   if instance.admin_state not in req_states:
1060     raise errors.OpPrereqError("Instance '%s' is marked to be %s, %s" %
1061                                (instance.name, instance.admin_state, msg),
1062                                errors.ECODE_STATE)
1063
1064   if constants.ADMINST_UP not in req_states:
1065     pnode = instance.primary_node
1066     ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
1067     ins_l.Raise("Can't contact node %s for instance information" % pnode,
1068                 prereq=True, ecode=errors.ECODE_ENVIRON)
1069
1070     if instance.name in ins_l.payload:
1071       raise errors.OpPrereqError("Instance %s is running, %s" %
1072                                  (instance.name, msg), errors.ECODE_STATE)
1073
1074
1075 def _ComputeMinMaxSpec(name, ipolicy, value):
1076   """Computes if value is in the desired range.
1077
1078   @param name: name of the parameter for which we perform the check
1079   @param ipolicy: dictionary containing min, max and std values
1080   @param value: actual value that we want to use
1081   @return: None or element not meeting the criteria
1082
1083
1084   """
1085   if value in [None, constants.VALUE_AUTO]:
1086     return None
1087   max_v = ipolicy[constants.ISPECS_MAX].get(name, value)
1088   min_v = ipolicy[constants.ISPECS_MIN].get(name, value)
1089   if value > max_v or min_v > value:
1090     return ("%s value %s is not in range [%s, %s]" %
1091             (name, value, min_v, max_v))
1092   return None
1093
1094
1095 def _ComputeIPolicySpecViolation(ipolicy, mem_size, cpu_count, disk_count,
1096                                  nic_count, disk_sizes,
1097                                  _compute_fn=_ComputeMinMaxSpec):
1098   """Verifies ipolicy against provided specs.
1099
1100   @type ipolicy: dict
1101   @param ipolicy: The ipolicy
1102   @type mem_size: int
1103   @param mem_size: The memory size
1104   @type cpu_count: int
1105   @param cpu_count: Used cpu cores
1106   @type disk_count: int
1107   @param disk_count: Number of disks used
1108   @type nic_count: int
1109   @param nic_count: Number of nics used
1110   @type disk_sizes: list of ints
1111   @param disk_sizes: Disk sizes of used disk (len must match C{disk_count})
1112   @param _compute_fn: The compute function (unittest only)
1113   @return: A list of violations, or an empty list of no violations are found
1114
1115   """
1116   assert disk_count == len(disk_sizes)
1117
1118   test_settings = [
1119     (constants.ISPEC_MEM_SIZE, mem_size),
1120     (constants.ISPEC_CPU_COUNT, cpu_count),
1121     (constants.ISPEC_DISK_COUNT, disk_count),
1122     (constants.ISPEC_NIC_COUNT, nic_count),
1123     ] + map((lambda d: (constants.ISPEC_DISK_SIZE, d)), disk_sizes)
1124
1125   return filter(None,
1126                 (_compute_fn(name, ipolicy, value)
1127                  for (name, value) in test_settings))
1128
1129
1130 def _ComputeIPolicyInstanceViolation(ipolicy, instance,
1131                                      _compute_fn=_ComputeIPolicySpecViolation):
1132   """Compute if instance meets the specs of ipolicy.
1133
1134   @type ipolicy: dict
1135   @param ipolicy: The ipolicy to verify against
1136   @type instance: L{objects.Instance}
1137   @param instance: The instance to verify
1138   @param _compute_fn: The function to verify ipolicy (unittest only)
1139   @see: L{_ComputeIPolicySpecViolation}
1140
1141   """
1142   mem_size = instance.beparams.get(constants.BE_MAXMEM, None)
1143   cpu_count = instance.beparams.get(constants.BE_VCPUS, None)
1144   disk_count = len(instance.disks)
1145   disk_sizes = [disk.size for disk in instance.disks]
1146   nic_count = len(instance.nics)
1147
1148   return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1149                      disk_sizes)
1150
1151
1152 def _ComputeIPolicyInstanceSpecViolation(ipolicy, instance_spec,
1153     _compute_fn=_ComputeIPolicySpecViolation):
1154   """Compute if instance specs meets the specs of ipolicy.
1155
1156   @type ipolicy: dict
1157   @param ipolicy: The ipolicy to verify against
1158   @param instance_spec: dict
1159   @param instance_spec: The instance spec to verify
1160   @param _compute_fn: The function to verify ipolicy (unittest only)
1161   @see: L{_ComputeIPolicySpecViolation}
1162
1163   """
1164   mem_size = instance_spec.get(constants.ISPEC_MEM_SIZE, None)
1165   cpu_count = instance_spec.get(constants.ISPEC_CPU_COUNT, None)
1166   disk_count = instance_spec.get(constants.ISPEC_DISK_COUNT, 0)
1167   disk_sizes = instance_spec.get(constants.ISPEC_DISK_SIZE, [])
1168   nic_count = instance_spec.get(constants.ISPEC_NIC_COUNT, 0)
1169
1170   return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1171                      disk_sizes)
1172
1173
1174 def _ComputeIPolicyNodeViolation(ipolicy, instance, current_group,
1175                                  target_group,
1176                                  _compute_fn=_ComputeIPolicyInstanceViolation):
1177   """Compute if instance meets the specs of the new target group.
1178
1179   @param ipolicy: The ipolicy to verify
1180   @param instance: The instance object to verify
1181   @param current_group: The current group of the instance
1182   @param target_group: The new group of the instance
1183   @param _compute_fn: The function to verify ipolicy (unittest only)
1184   @see: L{_ComputeIPolicySpecViolation}
1185
1186   """
1187   if current_group == target_group:
1188     return []
1189   else:
1190     return _compute_fn(ipolicy, instance)
1191
1192
1193 def _CheckTargetNodeIPolicy(lu, ipolicy, instance, node, ignore=False,
1194                             _compute_fn=_ComputeIPolicyNodeViolation):
1195   """Checks that the target node is correct in terms of instance policy.
1196
1197   @param ipolicy: The ipolicy to verify
1198   @param instance: The instance object to verify
1199   @param node: The new node to relocate
1200   @param ignore: Ignore violations of the ipolicy
1201   @param _compute_fn: The function to verify ipolicy (unittest only)
1202   @see: L{_ComputeIPolicySpecViolation}
1203
1204   """
1205   primary_node = lu.cfg.GetNodeInfo(instance.primary_node)
1206   res = _compute_fn(ipolicy, instance, primary_node.group, node.group)
1207
1208   if res:
1209     msg = ("Instance does not meet target node group's (%s) instance"
1210            " policy: %s") % (node.group, utils.CommaJoin(res))
1211     if ignore:
1212       lu.LogWarning(msg)
1213     else:
1214       raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
1215
1216
1217 def _ComputeNewInstanceViolations(old_ipolicy, new_ipolicy, instances):
1218   """Computes a set of any instances that would violate the new ipolicy.
1219
1220   @param old_ipolicy: The current (still in-place) ipolicy
1221   @param new_ipolicy: The new (to become) ipolicy
1222   @param instances: List of instances to verify
1223   @return: A list of instances which violates the new ipolicy but did not before
1224
1225   """
1226   return (_ComputeViolatingInstances(old_ipolicy, instances) -
1227           _ComputeViolatingInstances(new_ipolicy, instances))
1228
1229
1230 def _ExpandItemName(fn, name, kind):
1231   """Expand an item name.
1232
1233   @param fn: the function to use for expansion
1234   @param name: requested item name
1235   @param kind: text description ('Node' or 'Instance')
1236   @return: the resolved (full) name
1237   @raise errors.OpPrereqError: if the item is not found
1238
1239   """
1240   full_name = fn(name)
1241   if full_name is None:
1242     raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
1243                                errors.ECODE_NOENT)
1244   return full_name
1245
1246
1247 def _ExpandNodeName(cfg, name):
1248   """Wrapper over L{_ExpandItemName} for nodes."""
1249   return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
1250
1251
1252 def _ExpandInstanceName(cfg, name):
1253   """Wrapper over L{_ExpandItemName} for instance."""
1254   return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
1255
1256
1257 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
1258                           minmem, maxmem, vcpus, nics, disk_template, disks,
1259                           bep, hvp, hypervisor_name, tags):
1260   """Builds instance related env variables for hooks
1261
1262   This builds the hook environment from individual variables.
1263
1264   @type name: string
1265   @param name: the name of the instance
1266   @type primary_node: string
1267   @param primary_node: the name of the instance's primary node
1268   @type secondary_nodes: list
1269   @param secondary_nodes: list of secondary nodes as strings
1270   @type os_type: string
1271   @param os_type: the name of the instance's OS
1272   @type status: string
1273   @param status: the desired status of the instance
1274   @type minmem: string
1275   @param minmem: the minimum memory size of the instance
1276   @type maxmem: string
1277   @param maxmem: the maximum memory size of the instance
1278   @type vcpus: string
1279   @param vcpus: the count of VCPUs the instance has
1280   @type nics: list
1281   @param nics: list of tuples (ip, mac, mode, link) representing
1282       the NICs the instance has
1283   @type disk_template: string
1284   @param disk_template: the disk template of the instance
1285   @type disks: list
1286   @param disks: the list of (size, mode) pairs
1287   @type bep: dict
1288   @param bep: the backend parameters for the instance
1289   @type hvp: dict
1290   @param hvp: the hypervisor parameters for the instance
1291   @type hypervisor_name: string
1292   @param hypervisor_name: the hypervisor for the instance
1293   @type tags: list
1294   @param tags: list of instance tags as strings
1295   @rtype: dict
1296   @return: the hook environment for this instance
1297
1298   """
1299   env = {
1300     "OP_TARGET": name,
1301     "INSTANCE_NAME": name,
1302     "INSTANCE_PRIMARY": primary_node,
1303     "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
1304     "INSTANCE_OS_TYPE": os_type,
1305     "INSTANCE_STATUS": status,
1306     "INSTANCE_MINMEM": minmem,
1307     "INSTANCE_MAXMEM": maxmem,
1308     # TODO(2.7) remove deprecated "memory" value
1309     "INSTANCE_MEMORY": maxmem,
1310     "INSTANCE_VCPUS": vcpus,
1311     "INSTANCE_DISK_TEMPLATE": disk_template,
1312     "INSTANCE_HYPERVISOR": hypervisor_name,
1313   }
1314   if nics:
1315     nic_count = len(nics)
1316     for idx, (ip, mac, mode, link) in enumerate(nics):
1317       if ip is None:
1318         ip = ""
1319       env["INSTANCE_NIC%d_IP" % idx] = ip
1320       env["INSTANCE_NIC%d_MAC" % idx] = mac
1321       env["INSTANCE_NIC%d_MODE" % idx] = mode
1322       env["INSTANCE_NIC%d_LINK" % idx] = link
1323       if mode == constants.NIC_MODE_BRIDGED:
1324         env["INSTANCE_NIC%d_BRIDGE" % idx] = link
1325   else:
1326     nic_count = 0
1327
1328   env["INSTANCE_NIC_COUNT"] = nic_count
1329
1330   if disks:
1331     disk_count = len(disks)
1332     for idx, (size, mode) in enumerate(disks):
1333       env["INSTANCE_DISK%d_SIZE" % idx] = size
1334       env["INSTANCE_DISK%d_MODE" % idx] = mode
1335   else:
1336     disk_count = 0
1337
1338   env["INSTANCE_DISK_COUNT"] = disk_count
1339
1340   if not tags:
1341     tags = []
1342
1343   env["INSTANCE_TAGS"] = " ".join(tags)
1344
1345   for source, kind in [(bep, "BE"), (hvp, "HV")]:
1346     for key, value in source.items():
1347       env["INSTANCE_%s_%s" % (kind, key)] = value
1348
1349   return env
1350
1351
1352 def _NICListToTuple(lu, nics):
1353   """Build a list of nic information tuples.
1354
1355   This list is suitable to be passed to _BuildInstanceHookEnv or as a return
1356   value in LUInstanceQueryData.
1357
1358   @type lu:  L{LogicalUnit}
1359   @param lu: the logical unit on whose behalf we execute
1360   @type nics: list of L{objects.NIC}
1361   @param nics: list of nics to convert to hooks tuples
1362
1363   """
1364   hooks_nics = []
1365   cluster = lu.cfg.GetClusterInfo()
1366   for nic in nics:
1367     ip = nic.ip
1368     mac = nic.mac
1369     filled_params = cluster.SimpleFillNIC(nic.nicparams)
1370     mode = filled_params[constants.NIC_MODE]
1371     link = filled_params[constants.NIC_LINK]
1372     hooks_nics.append((ip, mac, mode, link))
1373   return hooks_nics
1374
1375
1376 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
1377   """Builds instance related env variables for hooks from an object.
1378
1379   @type lu: L{LogicalUnit}
1380   @param lu: the logical unit on whose behalf we execute
1381   @type instance: L{objects.Instance}
1382   @param instance: the instance for which we should build the
1383       environment
1384   @type override: dict
1385   @param override: dictionary with key/values that will override
1386       our values
1387   @rtype: dict
1388   @return: the hook environment dictionary
1389
1390   """
1391   cluster = lu.cfg.GetClusterInfo()
1392   bep = cluster.FillBE(instance)
1393   hvp = cluster.FillHV(instance)
1394   args = {
1395     "name": instance.name,
1396     "primary_node": instance.primary_node,
1397     "secondary_nodes": instance.secondary_nodes,
1398     "os_type": instance.os,
1399     "status": instance.admin_state,
1400     "maxmem": bep[constants.BE_MAXMEM],
1401     "minmem": bep[constants.BE_MINMEM],
1402     "vcpus": bep[constants.BE_VCPUS],
1403     "nics": _NICListToTuple(lu, instance.nics),
1404     "disk_template": instance.disk_template,
1405     "disks": [(disk.size, disk.mode) for disk in instance.disks],
1406     "bep": bep,
1407     "hvp": hvp,
1408     "hypervisor_name": instance.hypervisor,
1409     "tags": instance.tags,
1410   }
1411   if override:
1412     args.update(override)
1413   return _BuildInstanceHookEnv(**args) # pylint: disable=W0142
1414
1415
1416 def _AdjustCandidatePool(lu, exceptions):
1417   """Adjust the candidate pool after node operations.
1418
1419   """
1420   mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1421   if mod_list:
1422     lu.LogInfo("Promoted nodes to master candidate role: %s",
1423                utils.CommaJoin(node.name for node in mod_list))
1424     for name in mod_list:
1425       lu.context.ReaddNode(name)
1426   mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1427   if mc_now > mc_max:
1428     lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1429                (mc_now, mc_max))
1430
1431
1432 def _DecideSelfPromotion(lu, exceptions=None):
1433   """Decide whether I should promote myself as a master candidate.
1434
1435   """
1436   cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1437   mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1438   # the new node will increase mc_max with one, so:
1439   mc_should = min(mc_should + 1, cp_size)
1440   return mc_now < mc_should
1441
1442
1443 def _CalculateGroupIPolicy(cluster, group):
1444   """Calculate instance policy for group.
1445
1446   """
1447   return cluster.SimpleFillIPolicy(group.ipolicy)
1448
1449
1450 def _ComputeViolatingInstances(ipolicy, instances):
1451   """Computes a set of instances who violates given ipolicy.
1452
1453   @param ipolicy: The ipolicy to verify
1454   @type instances: object.Instance
1455   @param instances: List of instances to verify
1456   @return: A frozenset of instance names violating the ipolicy
1457
1458   """
1459   return frozenset([inst.name for inst in instances
1460                     if _ComputeIPolicyInstanceViolation(ipolicy, inst)])
1461
1462
1463 def _CheckNicsBridgesExist(lu, target_nics, target_node):
1464   """Check that the brigdes needed by a list of nics exist.
1465
1466   """
1467   cluster = lu.cfg.GetClusterInfo()
1468   paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1469   brlist = [params[constants.NIC_LINK] for params in paramslist
1470             if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1471   if brlist:
1472     result = lu.rpc.call_bridges_exist(target_node, brlist)
1473     result.Raise("Error checking bridges on destination node '%s'" %
1474                  target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1475
1476
1477 def _CheckInstanceBridgesExist(lu, instance, node=None):
1478   """Check that the brigdes needed by an instance exist.
1479
1480   """
1481   if node is None:
1482     node = instance.primary_node
1483   _CheckNicsBridgesExist(lu, instance.nics, node)
1484
1485
1486 def _CheckOSVariant(os_obj, name):
1487   """Check whether an OS name conforms to the os variants specification.
1488
1489   @type os_obj: L{objects.OS}
1490   @param os_obj: OS object to check
1491   @type name: string
1492   @param name: OS name passed by the user, to check for validity
1493
1494   """
1495   variant = objects.OS.GetVariant(name)
1496   if not os_obj.supported_variants:
1497     if variant:
1498       raise errors.OpPrereqError("OS '%s' doesn't support variants ('%s'"
1499                                  " passed)" % (os_obj.name, variant),
1500                                  errors.ECODE_INVAL)
1501     return
1502   if not variant:
1503     raise errors.OpPrereqError("OS name must include a variant",
1504                                errors.ECODE_INVAL)
1505
1506   if variant not in os_obj.supported_variants:
1507     raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1508
1509
1510 def _GetNodeInstancesInner(cfg, fn):
1511   return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1512
1513
1514 def _GetNodeInstances(cfg, node_name):
1515   """Returns a list of all primary and secondary instances on a node.
1516
1517   """
1518
1519   return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1520
1521
1522 def _GetNodePrimaryInstances(cfg, node_name):
1523   """Returns primary instances on a node.
1524
1525   """
1526   return _GetNodeInstancesInner(cfg,
1527                                 lambda inst: node_name == inst.primary_node)
1528
1529
1530 def _GetNodeSecondaryInstances(cfg, node_name):
1531   """Returns secondary instances on a node.
1532
1533   """
1534   return _GetNodeInstancesInner(cfg,
1535                                 lambda inst: node_name in inst.secondary_nodes)
1536
1537
1538 def _GetStorageTypeArgs(cfg, storage_type):
1539   """Returns the arguments for a storage type.
1540
1541   """
1542   # Special case for file storage
1543   if storage_type == constants.ST_FILE:
1544     # storage.FileStorage wants a list of storage directories
1545     return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1546
1547   return []
1548
1549
1550 def _FindFaultyInstanceDisks(cfg, rpc_runner, instance, node_name, prereq):
1551   faulty = []
1552
1553   for dev in instance.disks:
1554     cfg.SetDiskID(dev, node_name)
1555
1556   result = rpc_runner.call_blockdev_getmirrorstatus(node_name, instance.disks)
1557   result.Raise("Failed to get disk status from node %s" % node_name,
1558                prereq=prereq, ecode=errors.ECODE_ENVIRON)
1559
1560   for idx, bdev_status in enumerate(result.payload):
1561     if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1562       faulty.append(idx)
1563
1564   return faulty
1565
1566
1567 def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1568   """Check the sanity of iallocator and node arguments and use the
1569   cluster-wide iallocator if appropriate.
1570
1571   Check that at most one of (iallocator, node) is specified. If none is
1572   specified, then the LU's opcode's iallocator slot is filled with the
1573   cluster-wide default iallocator.
1574
1575   @type iallocator_slot: string
1576   @param iallocator_slot: the name of the opcode iallocator slot
1577   @type node_slot: string
1578   @param node_slot: the name of the opcode target node slot
1579
1580   """
1581   node = getattr(lu.op, node_slot, None)
1582   iallocator = getattr(lu.op, iallocator_slot, None)
1583
1584   if node is not None and iallocator is not None:
1585     raise errors.OpPrereqError("Do not specify both, iallocator and node",
1586                                errors.ECODE_INVAL)
1587   elif node is None and iallocator is None:
1588     default_iallocator = lu.cfg.GetDefaultIAllocator()
1589     if default_iallocator:
1590       setattr(lu.op, iallocator_slot, default_iallocator)
1591     else:
1592       raise errors.OpPrereqError("No iallocator or node given and no"
1593                                  " cluster-wide default iallocator found;"
1594                                  " please specify either an iallocator or a"
1595                                  " node, or set a cluster-wide default"
1596                                  " iallocator")
1597
1598
1599 def _GetDefaultIAllocator(cfg, iallocator):
1600   """Decides on which iallocator to use.
1601
1602   @type cfg: L{config.ConfigWriter}
1603   @param cfg: Cluster configuration object
1604   @type iallocator: string or None
1605   @param iallocator: Iallocator specified in opcode
1606   @rtype: string
1607   @return: Iallocator name
1608
1609   """
1610   if not iallocator:
1611     # Use default iallocator
1612     iallocator = cfg.GetDefaultIAllocator()
1613
1614   if not iallocator:
1615     raise errors.OpPrereqError("No iallocator was specified, neither in the"
1616                                " opcode nor as a cluster-wide default",
1617                                errors.ECODE_INVAL)
1618
1619   return iallocator
1620
1621
1622 class LUClusterPostInit(LogicalUnit):
1623   """Logical unit for running hooks after cluster initialization.
1624
1625   """
1626   HPATH = "cluster-init"
1627   HTYPE = constants.HTYPE_CLUSTER
1628
1629   def BuildHooksEnv(self):
1630     """Build hooks env.
1631
1632     """
1633     return {
1634       "OP_TARGET": self.cfg.GetClusterName(),
1635       }
1636
1637   def BuildHooksNodes(self):
1638     """Build hooks nodes.
1639
1640     """
1641     return ([], [self.cfg.GetMasterNode()])
1642
1643   def Exec(self, feedback_fn):
1644     """Nothing to do.
1645
1646     """
1647     return True
1648
1649
1650 class LUClusterDestroy(LogicalUnit):
1651   """Logical unit for destroying the cluster.
1652
1653   """
1654   HPATH = "cluster-destroy"
1655   HTYPE = constants.HTYPE_CLUSTER
1656
1657   def BuildHooksEnv(self):
1658     """Build hooks env.
1659
1660     """
1661     return {
1662       "OP_TARGET": self.cfg.GetClusterName(),
1663       }
1664
1665   def BuildHooksNodes(self):
1666     """Build hooks nodes.
1667
1668     """
1669     return ([], [])
1670
1671   def CheckPrereq(self):
1672     """Check prerequisites.
1673
1674     This checks whether the cluster is empty.
1675
1676     Any errors are signaled by raising errors.OpPrereqError.
1677
1678     """
1679     master = self.cfg.GetMasterNode()
1680
1681     nodelist = self.cfg.GetNodeList()
1682     if len(nodelist) != 1 or nodelist[0] != master:
1683       raise errors.OpPrereqError("There are still %d node(s) in"
1684                                  " this cluster." % (len(nodelist) - 1),
1685                                  errors.ECODE_INVAL)
1686     instancelist = self.cfg.GetInstanceList()
1687     if instancelist:
1688       raise errors.OpPrereqError("There are still %d instance(s) in"
1689                                  " this cluster." % len(instancelist),
1690                                  errors.ECODE_INVAL)
1691
1692   def Exec(self, feedback_fn):
1693     """Destroys the cluster.
1694
1695     """
1696     master_params = self.cfg.GetMasterNetworkParameters()
1697
1698     # Run post hooks on master node before it's removed
1699     _RunPostHook(self, master_params.name)
1700
1701     ems = self.cfg.GetUseExternalMipScript()
1702     result = self.rpc.call_node_deactivate_master_ip(master_params.name,
1703                                                      master_params, ems)
1704     if result.fail_msg:
1705       self.LogWarning("Error disabling the master IP address: %s",
1706                       result.fail_msg)
1707
1708     return master_params.name
1709
1710
1711 def _VerifyCertificate(filename):
1712   """Verifies a certificate for L{LUClusterVerifyConfig}.
1713
1714   @type filename: string
1715   @param filename: Path to PEM file
1716
1717   """
1718   try:
1719     cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1720                                            utils.ReadFile(filename))
1721   except Exception, err: # pylint: disable=W0703
1722     return (LUClusterVerifyConfig.ETYPE_ERROR,
1723             "Failed to load X509 certificate %s: %s" % (filename, err))
1724
1725   (errcode, msg) = \
1726     utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1727                                 constants.SSL_CERT_EXPIRATION_ERROR)
1728
1729   if msg:
1730     fnamemsg = "While verifying %s: %s" % (filename, msg)
1731   else:
1732     fnamemsg = None
1733
1734   if errcode is None:
1735     return (None, fnamemsg)
1736   elif errcode == utils.CERT_WARNING:
1737     return (LUClusterVerifyConfig.ETYPE_WARNING, fnamemsg)
1738   elif errcode == utils.CERT_ERROR:
1739     return (LUClusterVerifyConfig.ETYPE_ERROR, fnamemsg)
1740
1741   raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1742
1743
1744 def _GetAllHypervisorParameters(cluster, instances):
1745   """Compute the set of all hypervisor parameters.
1746
1747   @type cluster: L{objects.Cluster}
1748   @param cluster: the cluster object
1749   @param instances: list of L{objects.Instance}
1750   @param instances: additional instances from which to obtain parameters
1751   @rtype: list of (origin, hypervisor, parameters)
1752   @return: a list with all parameters found, indicating the hypervisor they
1753        apply to, and the origin (can be "cluster", "os X", or "instance Y")
1754
1755   """
1756   hvp_data = []
1757
1758   for hv_name in cluster.enabled_hypervisors:
1759     hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
1760
1761   for os_name, os_hvp in cluster.os_hvp.items():
1762     for hv_name, hv_params in os_hvp.items():
1763       if hv_params:
1764         full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
1765         hvp_data.append(("os %s" % os_name, hv_name, full_params))
1766
1767   # TODO: collapse identical parameter values in a single one
1768   for instance in instances:
1769     if instance.hvparams:
1770       hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
1771                        cluster.FillHV(instance)))
1772
1773   return hvp_data
1774
1775
1776 class _VerifyErrors(object):
1777   """Mix-in for cluster/group verify LUs.
1778
1779   It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects
1780   self.op and self._feedback_fn to be available.)
1781
1782   """
1783
1784   ETYPE_FIELD = "code"
1785   ETYPE_ERROR = "ERROR"
1786   ETYPE_WARNING = "WARNING"
1787
1788   def _Error(self, ecode, item, msg, *args, **kwargs):
1789     """Format an error message.
1790
1791     Based on the opcode's error_codes parameter, either format a
1792     parseable error code, or a simpler error string.
1793
1794     This must be called only from Exec and functions called from Exec.
1795
1796     """
1797     ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1798     itype, etxt, _ = ecode
1799     # first complete the msg
1800     if args:
1801       msg = msg % args
1802     # then format the whole message
1803     if self.op.error_codes: # This is a mix-in. pylint: disable=E1101
1804       msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1805     else:
1806       if item:
1807         item = " " + item
1808       else:
1809         item = ""
1810       msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1811     # and finally report it via the feedback_fn
1812     self._feedback_fn("  - %s" % msg) # Mix-in. pylint: disable=E1101
1813
1814   def _ErrorIf(self, cond, ecode, *args, **kwargs):
1815     """Log an error message if the passed condition is True.
1816
1817     """
1818     cond = (bool(cond)
1819             or self.op.debug_simulate_errors) # pylint: disable=E1101
1820
1821     # If the error code is in the list of ignored errors, demote the error to a
1822     # warning
1823     (_, etxt, _) = ecode
1824     if etxt in self.op.ignore_errors:     # pylint: disable=E1101
1825       kwargs[self.ETYPE_FIELD] = self.ETYPE_WARNING
1826
1827     if cond:
1828       self._Error(ecode, *args, **kwargs)
1829
1830     # do not mark the operation as failed for WARN cases only
1831     if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1832       self.bad = self.bad or cond
1833
1834
1835 class LUClusterVerify(NoHooksLU):
1836   """Submits all jobs necessary to verify the cluster.
1837
1838   """
1839   REQ_BGL = False
1840
1841   def ExpandNames(self):
1842     self.needed_locks = {}
1843
1844   def Exec(self, feedback_fn):
1845     jobs = []
1846
1847     if self.op.group_name:
1848       groups = [self.op.group_name]
1849       depends_fn = lambda: None
1850     else:
1851       groups = self.cfg.GetNodeGroupList()
1852
1853       # Verify global configuration
1854       jobs.append([
1855         opcodes.OpClusterVerifyConfig(ignore_errors=self.op.ignore_errors)
1856         ])
1857
1858       # Always depend on global verification
1859       depends_fn = lambda: [(-len(jobs), [])]
1860
1861     jobs.extend([opcodes.OpClusterVerifyGroup(group_name=group,
1862                                             ignore_errors=self.op.ignore_errors,
1863                                             depends=depends_fn())]
1864                 for group in groups)
1865
1866     # Fix up all parameters
1867     for op in itertools.chain(*jobs): # pylint: disable=W0142
1868       op.debug_simulate_errors = self.op.debug_simulate_errors
1869       op.verbose = self.op.verbose
1870       op.error_codes = self.op.error_codes
1871       try:
1872         op.skip_checks = self.op.skip_checks
1873       except AttributeError:
1874         assert not isinstance(op, opcodes.OpClusterVerifyGroup)
1875
1876     return ResultWithJobs(jobs)
1877
1878
1879 class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
1880   """Verifies the cluster config.
1881
1882   """
1883   REQ_BGL = True
1884
1885   def _VerifyHVP(self, hvp_data):
1886     """Verifies locally the syntax of the hypervisor parameters.
1887
1888     """
1889     for item, hv_name, hv_params in hvp_data:
1890       msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
1891              (item, hv_name))
1892       try:
1893         hv_class = hypervisor.GetHypervisor(hv_name)
1894         utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
1895         hv_class.CheckParameterSyntax(hv_params)
1896       except errors.GenericError, err:
1897         self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg % str(err))
1898
1899   def ExpandNames(self):
1900     # Information can be safely retrieved as the BGL is acquired in exclusive
1901     # mode
1902     assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER)
1903     self.all_group_info = self.cfg.GetAllNodeGroupsInfo()
1904     self.all_node_info = self.cfg.GetAllNodesInfo()
1905     self.all_inst_info = self.cfg.GetAllInstancesInfo()
1906     self.needed_locks = {}
1907
1908   def Exec(self, feedback_fn):
1909     """Verify integrity of cluster, performing various test on nodes.
1910
1911     """
1912     self.bad = False
1913     self._feedback_fn = feedback_fn
1914
1915     feedback_fn("* Verifying cluster config")
1916
1917     for msg in self.cfg.VerifyConfig():
1918       self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg)
1919
1920     feedback_fn("* Verifying cluster certificate files")
1921
1922     for cert_filename in constants.ALL_CERT_FILES:
1923       (errcode, msg) = _VerifyCertificate(cert_filename)
1924       self._ErrorIf(errcode, constants.CV_ECLUSTERCERT, None, msg, code=errcode)
1925
1926     feedback_fn("* Verifying hypervisor parameters")
1927
1928     self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(),
1929                                                 self.all_inst_info.values()))
1930
1931     feedback_fn("* Verifying all nodes belong to an existing group")
1932
1933     # We do this verification here because, should this bogus circumstance
1934     # occur, it would never be caught by VerifyGroup, which only acts on
1935     # nodes/instances reachable from existing node groups.
1936
1937     dangling_nodes = set(node.name for node in self.all_node_info.values()
1938                          if node.group not in self.all_group_info)
1939
1940     dangling_instances = {}
1941     no_node_instances = []
1942
1943     for inst in self.all_inst_info.values():
1944       if inst.primary_node in dangling_nodes:
1945         dangling_instances.setdefault(inst.primary_node, []).append(inst.name)
1946       elif inst.primary_node not in self.all_node_info:
1947         no_node_instances.append(inst.name)
1948
1949     pretty_dangling = [
1950         "%s (%s)" %
1951         (node.name,
1952          utils.CommaJoin(dangling_instances.get(node.name,
1953                                                 ["no instances"])))
1954         for node in dangling_nodes]
1955
1956     self._ErrorIf(bool(dangling_nodes), constants.CV_ECLUSTERDANGLINGNODES,
1957                   None,
1958                   "the following nodes (and their instances) belong to a non"
1959                   " existing group: %s", utils.CommaJoin(pretty_dangling))
1960
1961     self._ErrorIf(bool(no_node_instances), constants.CV_ECLUSTERDANGLINGINST,
1962                   None,
1963                   "the following instances have a non-existing primary-node:"
1964                   " %s", utils.CommaJoin(no_node_instances))
1965
1966     return not self.bad
1967
1968
1969 class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
1970   """Verifies the status of a node group.
1971
1972   """
1973   HPATH = "cluster-verify"
1974   HTYPE = constants.HTYPE_CLUSTER
1975   REQ_BGL = False
1976
1977   _HOOKS_INDENT_RE = re.compile("^", re.M)
1978
1979   class NodeImage(object):
1980     """A class representing the logical and physical status of a node.
1981
1982     @type name: string
1983     @ivar name: the node name to which this object refers
1984     @ivar volumes: a structure as returned from
1985         L{ganeti.backend.GetVolumeList} (runtime)
1986     @ivar instances: a list of running instances (runtime)
1987     @ivar pinst: list of configured primary instances (config)
1988     @ivar sinst: list of configured secondary instances (config)
1989     @ivar sbp: dictionary of {primary-node: list of instances} for all
1990         instances for which this node is secondary (config)
1991     @ivar mfree: free memory, as reported by hypervisor (runtime)
1992     @ivar dfree: free disk, as reported by the node (runtime)
1993     @ivar offline: the offline status (config)
1994     @type rpc_fail: boolean
1995     @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1996         not whether the individual keys were correct) (runtime)
1997     @type lvm_fail: boolean
1998     @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1999     @type hyp_fail: boolean
2000     @ivar hyp_fail: whether the RPC call didn't return the instance list
2001     @type ghost: boolean
2002     @ivar ghost: whether this is a known node or not (config)
2003     @type os_fail: boolean
2004     @ivar os_fail: whether the RPC call didn't return valid OS data
2005     @type oslist: list
2006     @ivar oslist: list of OSes as diagnosed by DiagnoseOS
2007     @type vm_capable: boolean
2008     @ivar vm_capable: whether the node can host instances
2009
2010     """
2011     def __init__(self, offline=False, name=None, vm_capable=True):
2012       self.name = name
2013       self.volumes = {}
2014       self.instances = []
2015       self.pinst = []
2016       self.sinst = []
2017       self.sbp = {}
2018       self.mfree = 0
2019       self.dfree = 0
2020       self.offline = offline
2021       self.vm_capable = vm_capable
2022       self.rpc_fail = False
2023       self.lvm_fail = False
2024       self.hyp_fail = False
2025       self.ghost = False
2026       self.os_fail = False
2027       self.oslist = {}
2028
2029   def ExpandNames(self):
2030     # This raises errors.OpPrereqError on its own:
2031     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
2032
2033     # Get instances in node group; this is unsafe and needs verification later
2034     inst_names = \
2035       self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True)
2036
2037     self.needed_locks = {
2038       locking.LEVEL_INSTANCE: inst_names,
2039       locking.LEVEL_NODEGROUP: [self.group_uuid],
2040       locking.LEVEL_NODE: [],
2041       }
2042
2043     self.share_locks = _ShareAll()
2044
2045   def DeclareLocks(self, level):
2046     if level == locking.LEVEL_NODE:
2047       # Get members of node group; this is unsafe and needs verification later
2048       nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
2049
2050       all_inst_info = self.cfg.GetAllInstancesInfo()
2051
2052       # In Exec(), we warn about mirrored instances that have primary and
2053       # secondary living in separate node groups. To fully verify that
2054       # volumes for these instances are healthy, we will need to do an
2055       # extra call to their secondaries. We ensure here those nodes will
2056       # be locked.
2057       for inst in self.owned_locks(locking.LEVEL_INSTANCE):
2058         # Important: access only the instances whose lock is owned
2059         if all_inst_info[inst].disk_template in constants.DTS_INT_MIRROR:
2060           nodes.update(all_inst_info[inst].secondary_nodes)
2061
2062       self.needed_locks[locking.LEVEL_NODE] = nodes
2063
2064   def CheckPrereq(self):
2065     assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
2066     self.group_info = self.cfg.GetNodeGroup(self.group_uuid)
2067
2068     group_nodes = set(self.group_info.members)
2069     group_instances = \
2070       self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True)
2071
2072     unlocked_nodes = \
2073         group_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
2074
2075     unlocked_instances = \
2076         group_instances.difference(self.owned_locks(locking.LEVEL_INSTANCE))
2077
2078     if unlocked_nodes:
2079       raise errors.OpPrereqError("Missing lock for nodes: %s" %
2080                                  utils.CommaJoin(unlocked_nodes),
2081                                  errors.ECODE_STATE)
2082
2083     if unlocked_instances:
2084       raise errors.OpPrereqError("Missing lock for instances: %s" %
2085                                  utils.CommaJoin(unlocked_instances),
2086                                  errors.ECODE_STATE)
2087
2088     self.all_node_info = self.cfg.GetAllNodesInfo()
2089     self.all_inst_info = self.cfg.GetAllInstancesInfo()
2090
2091     self.my_node_names = utils.NiceSort(group_nodes)
2092     self.my_inst_names = utils.NiceSort(group_instances)
2093
2094     self.my_node_info = dict((name, self.all_node_info[name])
2095                              for name in self.my_node_names)
2096
2097     self.my_inst_info = dict((name, self.all_inst_info[name])
2098                              for name in self.my_inst_names)
2099
2100     # We detect here the nodes that will need the extra RPC calls for verifying
2101     # split LV volumes; they should be locked.
2102     extra_lv_nodes = set()
2103
2104     for inst in self.my_inst_info.values():
2105       if inst.disk_template in constants.DTS_INT_MIRROR:
2106         for nname in inst.all_nodes:
2107           if self.all_node_info[nname].group != self.group_uuid:
2108             extra_lv_nodes.add(nname)
2109
2110     unlocked_lv_nodes = \
2111         extra_lv_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
2112
2113     if unlocked_lv_nodes:
2114       raise errors.OpPrereqError("Missing node locks for LV check: %s" %
2115                                  utils.CommaJoin(unlocked_lv_nodes),
2116                                  errors.ECODE_STATE)
2117     self.extra_lv_nodes = list(extra_lv_nodes)
2118
2119   def _VerifyNode(self, ninfo, nresult):
2120     """Perform some basic validation on data returned from a node.
2121
2122       - check the result data structure is well formed and has all the
2123         mandatory fields
2124       - check ganeti version
2125
2126     @type ninfo: L{objects.Node}
2127     @param ninfo: the node to check
2128     @param nresult: the results from the node
2129     @rtype: boolean
2130     @return: whether overall this call was successful (and we can expect
2131          reasonable values in the respose)
2132
2133     """
2134     node = ninfo.name
2135     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2136
2137     # main result, nresult should be a non-empty dict
2138     test = not nresult or not isinstance(nresult, dict)
2139     _ErrorIf(test, constants.CV_ENODERPC, node,
2140                   "unable to verify node: no data returned")
2141     if test:
2142       return False
2143
2144     # compares ganeti version
2145     local_version = constants.PROTOCOL_VERSION
2146     remote_version = nresult.get("version", None)
2147     test = not (remote_version and
2148                 isinstance(remote_version, (list, tuple)) and
2149                 len(remote_version) == 2)
2150     _ErrorIf(test, constants.CV_ENODERPC, node,
2151              "connection to node returned invalid data")
2152     if test:
2153       return False
2154
2155     test = local_version != remote_version[0]
2156     _ErrorIf(test, constants.CV_ENODEVERSION, node,
2157              "incompatible protocol versions: master %s,"
2158              " node %s", local_version, remote_version[0])
2159     if test:
2160       return False
2161
2162     # node seems compatible, we can actually try to look into its results
2163
2164     # full package version
2165     self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
2166                   constants.CV_ENODEVERSION, node,
2167                   "software version mismatch: master %s, node %s",
2168                   constants.RELEASE_VERSION, remote_version[1],
2169                   code=self.ETYPE_WARNING)
2170
2171     hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
2172     if ninfo.vm_capable and isinstance(hyp_result, dict):
2173       for hv_name, hv_result in hyp_result.iteritems():
2174         test = hv_result is not None
2175         _ErrorIf(test, constants.CV_ENODEHV, node,
2176                  "hypervisor %s verify failure: '%s'", hv_name, hv_result)
2177
2178     hvp_result = nresult.get(constants.NV_HVPARAMS, None)
2179     if ninfo.vm_capable and isinstance(hvp_result, list):
2180       for item, hv_name, hv_result in hvp_result:
2181         _ErrorIf(True, constants.CV_ENODEHV, node,
2182                  "hypervisor %s parameter verify failure (source %s): %s",
2183                  hv_name, item, hv_result)
2184
2185     test = nresult.get(constants.NV_NODESETUP,
2186                        ["Missing NODESETUP results"])
2187     _ErrorIf(test, constants.CV_ENODESETUP, node, "node setup error: %s",
2188              "; ".join(test))
2189
2190     return True
2191
2192   def _VerifyNodeTime(self, ninfo, nresult,
2193                       nvinfo_starttime, nvinfo_endtime):
2194     """Check the node time.
2195
2196     @type ninfo: L{objects.Node}
2197     @param ninfo: the node to check
2198     @param nresult: the remote results for the node
2199     @param nvinfo_starttime: the start time of the RPC call
2200     @param nvinfo_endtime: the end time of the RPC call
2201
2202     """
2203     node = ninfo.name
2204     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2205
2206     ntime = nresult.get(constants.NV_TIME, None)
2207     try:
2208       ntime_merged = utils.MergeTime(ntime)
2209     except (ValueError, TypeError):
2210       _ErrorIf(True, constants.CV_ENODETIME, node, "Node returned invalid time")
2211       return
2212
2213     if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
2214       ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
2215     elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
2216       ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
2217     else:
2218       ntime_diff = None
2219
2220     _ErrorIf(ntime_diff is not None, constants.CV_ENODETIME, node,
2221              "Node time diverges by at least %s from master node time",
2222              ntime_diff)
2223
2224   def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
2225     """Check the node LVM results.
2226
2227     @type ninfo: L{objects.Node}
2228     @param ninfo: the node to check
2229     @param nresult: the remote results for the node
2230     @param vg_name: the configured VG name
2231
2232     """
2233     if vg_name is None:
2234       return
2235
2236     node = ninfo.name
2237     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2238
2239     # checks vg existence and size > 20G
2240     vglist = nresult.get(constants.NV_VGLIST, None)
2241     test = not vglist
2242     _ErrorIf(test, constants.CV_ENODELVM, node, "unable to check volume groups")
2243     if not test:
2244       vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
2245                                             constants.MIN_VG_SIZE)
2246       _ErrorIf(vgstatus, constants.CV_ENODELVM, node, vgstatus)
2247
2248     # check pv names
2249     pvlist = nresult.get(constants.NV_PVLIST, None)
2250     test = pvlist is None
2251     _ErrorIf(test, constants.CV_ENODELVM, node, "Can't get PV list from node")
2252     if not test:
2253       # check that ':' is not present in PV names, since it's a
2254       # special character for lvcreate (denotes the range of PEs to
2255       # use on the PV)
2256       for _, pvname, owner_vg in pvlist:
2257         test = ":" in pvname
2258         _ErrorIf(test, constants.CV_ENODELVM, node,
2259                  "Invalid character ':' in PV '%s' of VG '%s'",
2260                  pvname, owner_vg)
2261
2262   def _VerifyNodeBridges(self, ninfo, nresult, bridges):
2263     """Check the node bridges.
2264
2265     @type ninfo: L{objects.Node}
2266     @param ninfo: the node to check
2267     @param nresult: the remote results for the node
2268     @param bridges: the expected list of bridges
2269
2270     """
2271     if not bridges:
2272       return
2273
2274     node = ninfo.name
2275     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2276
2277     missing = nresult.get(constants.NV_BRIDGES, None)
2278     test = not isinstance(missing, list)
2279     _ErrorIf(test, constants.CV_ENODENET, node,
2280              "did not return valid bridge information")
2281     if not test:
2282       _ErrorIf(bool(missing), constants.CV_ENODENET, node,
2283                "missing bridges: %s" % utils.CommaJoin(sorted(missing)))
2284
2285   def _VerifyNodeUserScripts(self, ninfo, nresult):
2286     """Check the results of user scripts presence and executability on the node
2287
2288     @type ninfo: L{objects.Node}
2289     @param ninfo: the node to check
2290     @param nresult: the remote results for the node
2291
2292     """
2293     node = ninfo.name
2294
2295     test = not constants.NV_USERSCRIPTS in nresult
2296     self._ErrorIf(test, constants.CV_ENODEUSERSCRIPTS, node,
2297                   "did not return user scripts information")
2298
2299     broken_scripts = nresult.get(constants.NV_USERSCRIPTS, None)
2300     if not test:
2301       self._ErrorIf(broken_scripts, constants.CV_ENODEUSERSCRIPTS, node,
2302                     "user scripts not present or not executable: %s" %
2303                     utils.CommaJoin(sorted(broken_scripts)))
2304
2305   def _VerifyNodeNetwork(self, ninfo, nresult):
2306     """Check the node network connectivity results.
2307
2308     @type ninfo: L{objects.Node}
2309     @param ninfo: the node to check
2310     @param nresult: the remote results for the node
2311
2312     """
2313     node = ninfo.name
2314     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2315
2316     test = constants.NV_NODELIST not in nresult
2317     _ErrorIf(test, constants.CV_ENODESSH, node,
2318              "node hasn't returned node ssh connectivity data")
2319     if not test:
2320       if nresult[constants.NV_NODELIST]:
2321         for a_node, a_msg in nresult[constants.NV_NODELIST].items():
2322           _ErrorIf(True, constants.CV_ENODESSH, node,
2323                    "ssh communication with node '%s': %s", a_node, a_msg)
2324
2325     test = constants.NV_NODENETTEST not in nresult
2326     _ErrorIf(test, constants.CV_ENODENET, node,
2327              "node hasn't returned node tcp connectivity data")
2328     if not test:
2329       if nresult[constants.NV_NODENETTEST]:
2330         nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
2331         for anode in nlist:
2332           _ErrorIf(True, constants.CV_ENODENET, node,
2333                    "tcp communication with node '%s': %s",
2334                    anode, nresult[constants.NV_NODENETTEST][anode])
2335
2336     test = constants.NV_MASTERIP not in nresult
2337     _ErrorIf(test, constants.CV_ENODENET, node,
2338              "node hasn't returned node master IP reachability data")
2339     if not test:
2340       if not nresult[constants.NV_MASTERIP]:
2341         if node == self.master_node:
2342           msg = "the master node cannot reach the master IP (not configured?)"
2343         else:
2344           msg = "cannot reach the master IP"
2345         _ErrorIf(True, constants.CV_ENODENET, node, msg)
2346
2347   def _VerifyInstance(self, instance, instanceconfig, node_image,
2348                       diskstatus):
2349     """Verify an instance.
2350
2351     This function checks to see if the required block devices are
2352     available on the instance's node.
2353
2354     """
2355     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2356     node_current = instanceconfig.primary_node
2357
2358     node_vol_should = {}
2359     instanceconfig.MapLVsByNode(node_vol_should)
2360
2361     ipolicy = _CalculateGroupIPolicy(self.cfg.GetClusterInfo(), self.group_info)
2362     err = _ComputeIPolicyInstanceViolation(ipolicy, instanceconfig)
2363     _ErrorIf(err, constants.CV_EINSTANCEPOLICY, instance, err)
2364
2365     for node in node_vol_should:
2366       n_img = node_image[node]
2367       if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2368         # ignore missing volumes on offline or broken nodes
2369         continue
2370       for volume in node_vol_should[node]:
2371         test = volume not in n_img.volumes
2372         _ErrorIf(test, constants.CV_EINSTANCEMISSINGDISK, instance,
2373                  "volume %s missing on node %s", volume, node)
2374
2375     if instanceconfig.admin_state == constants.ADMINST_UP:
2376       pri_img = node_image[node_current]
2377       test = instance not in pri_img.instances and not pri_img.offline
2378       _ErrorIf(test, constants.CV_EINSTANCEDOWN, instance,
2379                "instance not running on its primary node %s",
2380                node_current)
2381
2382     diskdata = [(nname, success, status, idx)
2383                 for (nname, disks) in diskstatus.items()
2384                 for idx, (success, status) in enumerate(disks)]
2385
2386     for nname, success, bdev_status, idx in diskdata:
2387       # the 'ghost node' construction in Exec() ensures that we have a
2388       # node here
2389       snode = node_image[nname]
2390       bad_snode = snode.ghost or snode.offline
2391       _ErrorIf(instanceconfig.admin_state == constants.ADMINST_UP and
2392                not success and not bad_snode,
2393                constants.CV_EINSTANCEFAULTYDISK, instance,
2394                "couldn't retrieve status for disk/%s on %s: %s",
2395                idx, nname, bdev_status)
2396       _ErrorIf((instanceconfig.admin_state == constants.ADMINST_UP and
2397                 success and bdev_status.ldisk_status == constants.LDS_FAULTY),
2398                constants.CV_EINSTANCEFAULTYDISK, instance,
2399                "disk/%s on %s is faulty", idx, nname)
2400
2401   def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
2402     """Verify if there are any unknown volumes in the cluster.
2403
2404     The .os, .swap and backup volumes are ignored. All other volumes are
2405     reported as unknown.
2406
2407     @type reserved: L{ganeti.utils.FieldSet}
2408     @param reserved: a FieldSet of reserved volume names
2409
2410     """
2411     for node, n_img in node_image.items():
2412       if (n_img.offline or n_img.rpc_fail or n_img.lvm_fail or
2413           self.all_node_info[node].group != self.group_uuid):
2414         # skip non-healthy nodes
2415         continue
2416       for volume in n_img.volumes:
2417         test = ((node not in node_vol_should or
2418                 volume not in node_vol_should[node]) and
2419                 not reserved.Matches(volume))
2420         self._ErrorIf(test, constants.CV_ENODEORPHANLV, node,
2421                       "volume %s is unknown", volume)
2422
2423   def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
2424     """Verify N+1 Memory Resilience.
2425
2426     Check that if one single node dies we can still start all the
2427     instances it was primary for.
2428
2429     """
2430     cluster_info = self.cfg.GetClusterInfo()
2431     for node, n_img in node_image.items():
2432       # This code checks that every node which is now listed as
2433       # secondary has enough memory to host all instances it is
2434       # supposed to should a single other node in the cluster fail.
2435       # FIXME: not ready for failover to an arbitrary node
2436       # FIXME: does not support file-backed instances
2437       # WARNING: we currently take into account down instances as well
2438       # as up ones, considering that even if they're down someone
2439       # might want to start them even in the event of a node failure.
2440       if n_img.offline or self.all_node_info[node].group != self.group_uuid:
2441         # we're skipping nodes marked offline and nodes in other groups from
2442         # the N+1 warning, since most likely we don't have good memory
2443         # infromation from them; we already list instances living on such
2444         # nodes, and that's enough warning
2445         continue
2446       #TODO(dynmem): also consider ballooning out other instances
2447       for prinode, instances in n_img.sbp.items():
2448         needed_mem = 0
2449         for instance in instances:
2450           bep = cluster_info.FillBE(instance_cfg[instance])
2451           if bep[constants.BE_AUTO_BALANCE]:
2452             needed_mem += bep[constants.BE_MINMEM]
2453         test = n_img.mfree < needed_mem
2454         self._ErrorIf(test, constants.CV_ENODEN1, node,
2455                       "not enough memory to accomodate instance failovers"
2456                       " should node %s fail (%dMiB needed, %dMiB available)",
2457                       prinode, needed_mem, n_img.mfree)
2458
2459   @classmethod
2460   def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
2461                    (files_all, files_opt, files_mc, files_vm)):
2462     """Verifies file checksums collected from all nodes.
2463
2464     @param errorif: Callback for reporting errors
2465     @param nodeinfo: List of L{objects.Node} objects
2466     @param master_node: Name of master node
2467     @param all_nvinfo: RPC results
2468
2469     """
2470     # Define functions determining which nodes to consider for a file
2471     files2nodefn = [
2472       (files_all, None),
2473       (files_mc, lambda node: (node.master_candidate or
2474                                node.name == master_node)),
2475       (files_vm, lambda node: node.vm_capable),
2476       ]
2477
2478     # Build mapping from filename to list of nodes which should have the file
2479     nodefiles = {}
2480     for (files, fn) in files2nodefn:
2481       if fn is None:
2482         filenodes = nodeinfo
2483       else:
2484         filenodes = filter(fn, nodeinfo)
2485       nodefiles.update((filename,
2486                         frozenset(map(operator.attrgetter("name"), filenodes)))
2487                        for filename in files)
2488
2489     assert set(nodefiles) == (files_all | files_mc | files_vm)
2490
2491     fileinfo = dict((filename, {}) for filename in nodefiles)
2492     ignore_nodes = set()
2493
2494     for node in nodeinfo:
2495       if node.offline:
2496         ignore_nodes.add(node.name)
2497         continue
2498
2499       nresult = all_nvinfo[node.name]
2500
2501       if nresult.fail_msg or not nresult.payload:
2502         node_files = None
2503       else:
2504         node_files = nresult.payload.get(constants.NV_FILELIST, None)
2505
2506       test = not (node_files and isinstance(node_files, dict))
2507       errorif(test, constants.CV_ENODEFILECHECK, node.name,
2508               "Node did not return file checksum data")
2509       if test:
2510         ignore_nodes.add(node.name)
2511         continue
2512
2513       # Build per-checksum mapping from filename to nodes having it
2514       for (filename, checksum) in node_files.items():
2515         assert filename in nodefiles
2516         fileinfo[filename].setdefault(checksum, set()).add(node.name)
2517
2518     for (filename, checksums) in fileinfo.items():
2519       assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
2520
2521       # Nodes having the file
2522       with_file = frozenset(node_name
2523                             for nodes in fileinfo[filename].values()
2524                             for node_name in nodes) - ignore_nodes
2525
2526       expected_nodes = nodefiles[filename] - ignore_nodes
2527
2528       # Nodes missing file
2529       missing_file = expected_nodes - with_file
2530
2531       if filename in files_opt:
2532         # All or no nodes
2533         errorif(missing_file and missing_file != expected_nodes,
2534                 constants.CV_ECLUSTERFILECHECK, None,
2535                 "File %s is optional, but it must exist on all or no"
2536                 " nodes (not found on %s)",
2537                 filename, utils.CommaJoin(utils.NiceSort(missing_file)))
2538       else:
2539         errorif(missing_file, constants.CV_ECLUSTERFILECHECK, None,
2540                 "File %s is missing from node(s) %s", filename,
2541                 utils.CommaJoin(utils.NiceSort(missing_file)))
2542
2543         # Warn if a node has a file it shouldn't
2544         unexpected = with_file - expected_nodes
2545         errorif(unexpected,
2546                 constants.CV_ECLUSTERFILECHECK, None,
2547                 "File %s should not exist on node(s) %s",
2548                 filename, utils.CommaJoin(utils.NiceSort(unexpected)))
2549
2550       # See if there are multiple versions of the file
2551       test = len(checksums) > 1
2552       if test:
2553         variants = ["variant %s on %s" %
2554                     (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
2555                     for (idx, (checksum, nodes)) in
2556                       enumerate(sorted(checksums.items()))]
2557       else:
2558         variants = []
2559
2560       errorif(test, constants.CV_ECLUSTERFILECHECK, None,
2561               "File %s found with %s different checksums (%s)",
2562               filename, len(checksums), "; ".join(variants))
2563
2564   def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
2565                       drbd_map):
2566     """Verifies and the node DRBD status.
2567
2568     @type ninfo: L{objects.Node}
2569     @param ninfo: the node to check
2570     @param nresult: the remote results for the node
2571     @param instanceinfo: the dict of instances
2572     @param drbd_helper: the configured DRBD usermode helper
2573     @param drbd_map: the DRBD map as returned by
2574         L{ganeti.config.ConfigWriter.ComputeDRBDMap}
2575
2576     """
2577     node = ninfo.name
2578     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2579
2580     if drbd_helper:
2581       helper_result = nresult.get(constants.NV_DRBDHELPER, None)
2582       test = (helper_result == None)
2583       _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2584                "no drbd usermode helper returned")
2585       if helper_result:
2586         status, payload = helper_result
2587         test = not status
2588         _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2589                  "drbd usermode helper check unsuccessful: %s", payload)
2590         test = status and (payload != drbd_helper)
2591         _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2592                  "wrong drbd usermode helper: %s", payload)
2593
2594     # compute the DRBD minors
2595     node_drbd = {}
2596     for minor, instance in drbd_map[node].items():
2597       test = instance not in instanceinfo
2598       _ErrorIf(test, constants.CV_ECLUSTERCFG, None,
2599                "ghost instance '%s' in temporary DRBD map", instance)
2600         # ghost instance should not be running, but otherwise we
2601         # don't give double warnings (both ghost instance and
2602         # unallocated minor in use)
2603       if test:
2604         node_drbd[minor] = (instance, False)
2605       else:
2606         instance = instanceinfo[instance]
2607         node_drbd[minor] = (instance.name,
2608                             instance.admin_state == constants.ADMINST_UP)
2609
2610     # and now check them
2611     used_minors = nresult.get(constants.NV_DRBDLIST, [])
2612     test = not isinstance(used_minors, (tuple, list))
2613     _ErrorIf(test, constants.CV_ENODEDRBD, node,
2614              "cannot parse drbd status file: %s", str(used_minors))
2615     if test:
2616       # we cannot check drbd status
2617       return
2618
2619     for minor, (iname, must_exist) in node_drbd.items():
2620       test = minor not in used_minors and must_exist
2621       _ErrorIf(test, constants.CV_ENODEDRBD, node,
2622                "drbd minor %d of instance %s is not active", minor, iname)
2623     for minor in used_minors:
2624       test = minor not in node_drbd
2625       _ErrorIf(test, constants.CV_ENODEDRBD, node,
2626                "unallocated drbd minor %d is in use", minor)
2627
2628   def _UpdateNodeOS(self, ninfo, nresult, nimg):
2629     """Builds the node OS structures.
2630
2631     @type ninfo: L{objects.Node}
2632     @param ninfo: the node to check
2633     @param nresult: the remote results for the node
2634     @param nimg: the node image object
2635
2636     """
2637     node = ninfo.name
2638     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2639
2640     remote_os = nresult.get(constants.NV_OSLIST, None)
2641     test = (not isinstance(remote_os, list) or
2642             not compat.all(isinstance(v, list) and len(v) == 7
2643                            for v in remote_os))
2644
2645     _ErrorIf(test, constants.CV_ENODEOS, node,
2646              "node hasn't returned valid OS data")
2647
2648     nimg.os_fail = test
2649
2650     if test:
2651       return
2652
2653     os_dict = {}
2654
2655     for (name, os_path, status, diagnose,
2656          variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
2657
2658       if name not in os_dict:
2659         os_dict[name] = []
2660
2661       # parameters is a list of lists instead of list of tuples due to
2662       # JSON lacking a real tuple type, fix it:
2663       parameters = [tuple(v) for v in parameters]
2664       os_dict[name].append((os_path, status, diagnose,
2665                             set(variants), set(parameters), set(api_ver)))
2666
2667     nimg.oslist = os_dict
2668
2669   def _VerifyNodeOS(self, ninfo, nimg, base):
2670     """Verifies the node OS list.
2671
2672     @type ninfo: L{objects.Node}
2673     @param ninfo: the node to check
2674     @param nimg: the node image object
2675     @param base: the 'template' node we match against (e.g. from the master)
2676
2677     """
2678     node = ninfo.name
2679     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2680
2681     assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
2682
2683     beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
2684     for os_name, os_data in nimg.oslist.items():
2685       assert os_data, "Empty OS status for OS %s?!" % os_name
2686       f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
2687       _ErrorIf(not f_status, constants.CV_ENODEOS, node,
2688                "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
2689       _ErrorIf(len(os_data) > 1, constants.CV_ENODEOS, node,
2690                "OS '%s' has multiple entries (first one shadows the rest): %s",
2691                os_name, utils.CommaJoin([v[0] for v in os_data]))
2692       # comparisons with the 'base' image
2693       test = os_name not in base.oslist
2694       _ErrorIf(test, constants.CV_ENODEOS, node,
2695                "Extra OS %s not present on reference node (%s)",
2696                os_name, base.name)
2697       if test:
2698         continue
2699       assert base.oslist[os_name], "Base node has empty OS status?"
2700       _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
2701       if not b_status:
2702         # base OS is invalid, skipping
2703         continue
2704       for kind, a, b in [("API version", f_api, b_api),
2705                          ("variants list", f_var, b_var),
2706                          ("parameters", beautify_params(f_param),
2707                           beautify_params(b_param))]:
2708         _ErrorIf(a != b, constants.CV_ENODEOS, node,
2709                  "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
2710                  kind, os_name, base.name,
2711                  utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
2712
2713     # check any missing OSes
2714     missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
2715     _ErrorIf(missing, constants.CV_ENODEOS, node,
2716              "OSes present on reference node %s but missing on this node: %s",
2717              base.name, utils.CommaJoin(missing))
2718
2719   def _VerifyOob(self, ninfo, nresult):
2720     """Verifies out of band functionality of a node.
2721
2722     @type ninfo: L{objects.Node}
2723     @param ninfo: the node to check
2724     @param nresult: the remote results for the node
2725
2726     """
2727     node = ninfo.name
2728     # We just have to verify the paths on master and/or master candidates
2729     # as the oob helper is invoked on the master
2730     if ((ninfo.master_candidate or ninfo.master_capable) and
2731         constants.NV_OOB_PATHS in nresult):
2732       for path_result in nresult[constants.NV_OOB_PATHS]:
2733         self._ErrorIf(path_result, constants.CV_ENODEOOBPATH, node, path_result)
2734
2735   def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
2736     """Verifies and updates the node volume data.
2737
2738     This function will update a L{NodeImage}'s internal structures
2739     with data from the remote call.
2740
2741     @type ninfo: L{objects.Node}
2742     @param ninfo: the node to check
2743     @param nresult: the remote results for the node
2744     @param nimg: the node image object
2745     @param vg_name: the configured VG name
2746
2747     """
2748     node = ninfo.name
2749     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2750
2751     nimg.lvm_fail = True
2752     lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
2753     if vg_name is None:
2754       pass
2755     elif isinstance(lvdata, basestring):
2756       _ErrorIf(True, constants.CV_ENODELVM, node, "LVM problem on node: %s",
2757                utils.SafeEncode(lvdata))
2758     elif not isinstance(lvdata, dict):
2759       _ErrorIf(True, constants.CV_ENODELVM, node,
2760                "rpc call to node failed (lvlist)")
2761     else:
2762       nimg.volumes = lvdata
2763       nimg.lvm_fail = False
2764
2765   def _UpdateNodeInstances(self, ninfo, nresult, nimg):
2766     """Verifies and updates the node instance list.
2767
2768     If the listing was successful, then updates this node's instance
2769     list. Otherwise, it marks the RPC call as failed for the instance
2770     list key.
2771
2772     @type ninfo: L{objects.Node}
2773     @param ninfo: the node to check
2774     @param nresult: the remote results for the node
2775     @param nimg: the node image object
2776
2777     """
2778     idata = nresult.get(constants.NV_INSTANCELIST, None)
2779     test = not isinstance(idata, list)
2780     self._ErrorIf(test, constants.CV_ENODEHV, ninfo.name,
2781                   "rpc call to node failed (instancelist): %s",
2782                   utils.SafeEncode(str(idata)))
2783     if test:
2784       nimg.hyp_fail = True
2785     else:
2786       nimg.instances = idata
2787
2788   def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
2789     """Verifies and computes a node information map
2790
2791     @type ninfo: L{objects.Node}
2792     @param ninfo: the node to check
2793     @param nresult: the remote results for the node
2794     @param nimg: the node image object
2795     @param vg_name: the configured VG name
2796
2797     """
2798     node = ninfo.name
2799     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2800
2801     # try to read free memory (from the hypervisor)
2802     hv_info = nresult.get(constants.NV_HVINFO, None)
2803     test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
2804     _ErrorIf(test, constants.CV_ENODEHV, node,
2805              "rpc call to node failed (hvinfo)")
2806     if not test:
2807       try:
2808         nimg.mfree = int(hv_info["memory_free"])
2809       except (ValueError, TypeError):
2810         _ErrorIf(True, constants.CV_ENODERPC, node,
2811                  "node returned invalid nodeinfo, check hypervisor")
2812
2813     # FIXME: devise a free space model for file based instances as well
2814     if vg_name is not None:
2815       test = (constants.NV_VGLIST not in nresult or
2816               vg_name not in nresult[constants.NV_VGLIST])
2817       _ErrorIf(test, constants.CV_ENODELVM, node,
2818                "node didn't return data for the volume group '%s'"
2819                " - it is either missing or broken", vg_name)
2820       if not test:
2821         try:
2822           nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
2823         except (ValueError, TypeError):
2824           _ErrorIf(True, constants.CV_ENODERPC, node,
2825                    "node returned invalid LVM info, check LVM status")
2826
2827   def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
2828     """Gets per-disk status information for all instances.
2829
2830     @type nodelist: list of strings
2831     @param nodelist: Node names
2832     @type node_image: dict of (name, L{objects.Node})
2833     @param node_image: Node objects
2834     @type instanceinfo: dict of (name, L{objects.Instance})
2835     @param instanceinfo: Instance objects
2836     @rtype: {instance: {node: [(succes, payload)]}}
2837     @return: a dictionary of per-instance dictionaries with nodes as
2838         keys and disk information as values; the disk information is a
2839         list of tuples (success, payload)
2840
2841     """
2842     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2843
2844     node_disks = {}
2845     node_disks_devonly = {}
2846     diskless_instances = set()
2847     diskless = constants.DT_DISKLESS
2848
2849     for nname in nodelist:
2850       node_instances = list(itertools.chain(node_image[nname].pinst,
2851                                             node_image[nname].sinst))
2852       diskless_instances.update(inst for inst in node_instances
2853                                 if instanceinfo[inst].disk_template == diskless)
2854       disks = [(inst, disk)
2855                for inst in node_instances
2856                for disk in instanceinfo[inst].disks]
2857
2858       if not disks:
2859         # No need to collect data
2860         continue
2861
2862       node_disks[nname] = disks
2863
2864       # Creating copies as SetDiskID below will modify the objects and that can
2865       # lead to incorrect data returned from nodes
2866       devonly = [dev.Copy() for (_, dev) in disks]
2867
2868       for dev in devonly:
2869         self.cfg.SetDiskID(dev, nname)
2870
2871       node_disks_devonly[nname] = devonly
2872
2873     assert len(node_disks) == len(node_disks_devonly)
2874
2875     # Collect data from all nodes with disks
2876     result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
2877                                                           node_disks_devonly)
2878
2879     assert len(result) == len(node_disks)
2880
2881     instdisk = {}
2882
2883     for (nname, nres) in result.items():
2884       disks = node_disks[nname]
2885
2886       if nres.offline:
2887         # No data from this node
2888         data = len(disks) * [(False, "node offline")]
2889       else:
2890         msg = nres.fail_msg
2891         _ErrorIf(msg, constants.CV_ENODERPC, nname,
2892                  "while getting disk information: %s", msg)
2893         if msg:
2894           # No data from this node
2895           data = len(disks) * [(False, msg)]
2896         else:
2897           data = []
2898           for idx, i in enumerate(nres.payload):
2899             if isinstance(i, (tuple, list)) and len(i) == 2:
2900               data.append(i)
2901             else:
2902               logging.warning("Invalid result from node %s, entry %d: %s",
2903                               nname, idx, i)
2904               data.append((False, "Invalid result from the remote node"))
2905
2906       for ((inst, _), status) in zip(disks, data):
2907         instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
2908
2909     # Add empty entries for diskless instances.
2910     for inst in diskless_instances:
2911       assert inst not in instdisk
2912       instdisk[inst] = {}
2913
2914     assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
2915                       len(nnames) <= len(instanceinfo[inst].all_nodes) and
2916                       compat.all(isinstance(s, (tuple, list)) and
2917                                  len(s) == 2 for s in statuses)
2918                       for inst, nnames in instdisk.items()
2919                       for nname, statuses in nnames.items())
2920     assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
2921
2922     return instdisk
2923
2924   @staticmethod
2925   def _SshNodeSelector(group_uuid, all_nodes):
2926     """Create endless iterators for all potential SSH check hosts.
2927
2928     """
2929     nodes = [node for node in all_nodes
2930              if (node.group != group_uuid and
2931                  not node.offline)]
2932     keyfunc = operator.attrgetter("group")
2933
2934     return map(itertools.cycle,
2935                [sorted(map(operator.attrgetter("name"), names))
2936                 for _, names in itertools.groupby(sorted(nodes, key=keyfunc),
2937                                                   keyfunc)])
2938
2939   @classmethod
2940   def _SelectSshCheckNodes(cls, group_nodes, group_uuid, all_nodes):
2941     """Choose which nodes should talk to which other nodes.
2942
2943     We will make nodes contact all nodes in their group, and one node from
2944     every other group.
2945
2946     @warning: This algorithm has a known issue if one node group is much
2947       smaller than others (e.g. just one node). In such a case all other
2948       nodes will talk to the single node.
2949
2950     """
2951     online_nodes = sorted(node.name for node in group_nodes if not node.offline)
2952     sel = cls._SshNodeSelector(group_uuid, all_nodes)
2953
2954     return (online_nodes,
2955             dict((name, sorted([i.next() for i in sel]))
2956                  for name in online_nodes))
2957
2958   def BuildHooksEnv(self):
2959     """Build hooks env.
2960
2961     Cluster-Verify hooks just ran in the post phase and their failure makes
2962     the output be logged in the verify output and the verification to fail.
2963
2964     """
2965     env = {
2966       "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
2967       }
2968
2969     env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
2970                for node in self.my_node_info.values())
2971
2972     return env
2973
2974   def BuildHooksNodes(self):
2975     """Build hooks nodes.
2976
2977     """
2978     return ([], self.my_node_names)
2979
2980   def Exec(self, feedback_fn):
2981     """Verify integrity of the node group, performing various test on nodes.
2982
2983     """
2984     # This method has too many local variables. pylint: disable=R0914
2985     feedback_fn("* Verifying group '%s'" % self.group_info.name)
2986
2987     if not self.my_node_names:
2988       # empty node group
2989       feedback_fn("* Empty node group, skipping verification")
2990       return True
2991
2992     self.bad = False
2993     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2994     verbose = self.op.verbose
2995     self._feedback_fn = feedback_fn
2996
2997     vg_name = self.cfg.GetVGName()
2998     drbd_helper = self.cfg.GetDRBDHelper()
2999     cluster = self.cfg.GetClusterInfo()
3000     groupinfo = self.cfg.GetAllNodeGroupsInfo()
3001     hypervisors = cluster.enabled_hypervisors
3002     node_data_list = [self.my_node_info[name] for name in self.my_node_names]
3003
3004     i_non_redundant = [] # Non redundant instances
3005     i_non_a_balanced = [] # Non auto-balanced instances
3006     i_offline = 0 # Count of offline instances
3007     n_offline = 0 # Count of offline nodes
3008     n_drained = 0 # Count of nodes being drained
3009     node_vol_should = {}
3010
3011     # FIXME: verify OS list
3012
3013     # File verification
3014     filemap = _ComputeAncillaryFiles(cluster, False)
3015
3016     # do local checksums
3017     master_node = self.master_node = self.cfg.GetMasterNode()
3018     master_ip = self.cfg.GetMasterIP()
3019
3020     feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names))
3021
3022     user_scripts = []
3023     if self.cfg.GetUseExternalMipScript():
3024       user_scripts.append(constants.EXTERNAL_MASTER_SETUP_SCRIPT)
3025
3026     node_verify_param = {
3027       constants.NV_FILELIST:
3028         utils.UniqueSequence(filename
3029                              for files in filemap
3030                              for filename in files),
3031       constants.NV_NODELIST:
3032         self._SelectSshCheckNodes(node_data_list, self.group_uuid,
3033                                   self.all_node_info.values()),
3034       constants.NV_HYPERVISOR: hypervisors,
3035       constants.NV_HVPARAMS:
3036         _GetAllHypervisorParameters(cluster, self.all_inst_info.values()),
3037       constants.NV_NODENETTEST: [(node.name, node.primary_ip, node.secondary_ip)
3038                                  for node in node_data_list
3039                                  if not node.offline],
3040       constants.NV_INSTANCELIST: hypervisors,
3041       constants.NV_VERSION: None,
3042       constants.NV_HVINFO: self.cfg.GetHypervisorType(),
3043       constants.NV_NODESETUP: None,
3044       constants.NV_TIME: None,
3045       constants.NV_MASTERIP: (master_node, master_ip),
3046       constants.NV_OSLIST: None,
3047       constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
3048       constants.NV_USERSCRIPTS: user_scripts,
3049       }
3050
3051     if vg_name is not None:
3052       node_verify_param[constants.NV_VGLIST] = None
3053       node_verify_param[constants.NV_LVLIST] = vg_name
3054       node_verify_param[constants.NV_PVLIST] = [vg_name]
3055       node_verify_param[constants.NV_DRBDLIST] = None
3056
3057     if drbd_helper:
3058       node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
3059
3060     # bridge checks
3061     # FIXME: this needs to be changed per node-group, not cluster-wide
3062     bridges = set()
3063     default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
3064     if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3065       bridges.add(default_nicpp[constants.NIC_LINK])
3066     for instance in self.my_inst_info.values():
3067       for nic in instance.nics:
3068         full_nic = cluster.SimpleFillNIC(nic.nicparams)
3069         if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3070           bridges.add(full_nic[constants.NIC_LINK])
3071
3072     if bridges:
3073       node_verify_param[constants.NV_BRIDGES] = list(bridges)
3074
3075     # Build our expected cluster state
3076     node_image = dict((node.name, self.NodeImage(offline=node.offline,
3077                                                  name=node.name,
3078                                                  vm_capable=node.vm_capable))
3079                       for node in node_data_list)
3080
3081     # Gather OOB paths
3082     oob_paths = []
3083     for node in self.all_node_info.values():
3084       path = _SupportsOob(self.cfg, node)
3085       if path and path not in oob_paths:
3086         oob_paths.append(path)
3087
3088     if oob_paths:
3089       node_verify_param[constants.NV_OOB_PATHS] = oob_paths
3090
3091     for instance in self.my_inst_names:
3092       inst_config = self.my_inst_info[instance]
3093
3094       for nname in inst_config.all_nodes:
3095         if nname not in node_image:
3096           gnode = self.NodeImage(name=nname)
3097           gnode.ghost = (nname not in self.all_node_info)
3098           node_image[nname] = gnode
3099
3100       inst_config.MapLVsByNode(node_vol_should)
3101
3102       pnode = inst_config.primary_node
3103       node_image[pnode].pinst.append(instance)
3104
3105       for snode in inst_config.secondary_nodes:
3106         nimg = node_image[snode]
3107         nimg.sinst.append(instance)
3108         if pnode not in nimg.sbp:
3109           nimg.sbp[pnode] = []
3110         nimg.sbp[pnode].append(instance)
3111
3112     # At this point, we have the in-memory data structures complete,
3113     # except for the runtime information, which we'll gather next
3114
3115     # Due to the way our RPC system works, exact response times cannot be
3116     # guaranteed (e.g. a broken node could run into a timeout). By keeping the
3117     # time before and after executing the request, we can at least have a time
3118     # window.
3119     nvinfo_starttime = time.time()
3120     all_nvinfo = self.rpc.call_node_verify(self.my_node_names,
3121                                            node_verify_param,
3122                                            self.cfg.GetClusterName())
3123     nvinfo_endtime = time.time()
3124
3125     if self.extra_lv_nodes and vg_name is not None:
3126       extra_lv_nvinfo = \
3127           self.rpc.call_node_verify(self.extra_lv_nodes,
3128                                     {constants.NV_LVLIST: vg_name},
3129                                     self.cfg.GetClusterName())
3130     else:
3131       extra_lv_nvinfo = {}
3132
3133     all_drbd_map = self.cfg.ComputeDRBDMap()
3134
3135     feedback_fn("* Gathering disk information (%s nodes)" %
3136                 len(self.my_node_names))
3137     instdisk = self._CollectDiskInfo(self.my_node_names, node_image,
3138                                      self.my_inst_info)
3139
3140     feedback_fn("* Verifying configuration file consistency")
3141
3142     # If not all nodes are being checked, we need to make sure the master node
3143     # and a non-checked vm_capable node are in the list.
3144     absent_nodes = set(self.all_node_info).difference(self.my_node_info)
3145     if absent_nodes:
3146       vf_nvinfo = all_nvinfo.copy()
3147       vf_node_info = list(self.my_node_info.values())
3148       additional_nodes = []
3149       if master_node not in self.my_node_info:
3150         additional_nodes.append(master_node)
3151         vf_node_info.append(self.all_node_info[master_node])
3152       # Add the first vm_capable node we find which is not included
3153       for node in absent_nodes:
3154         nodeinfo = self.all_node_info[node]
3155         if nodeinfo.vm_capable and not nodeinfo.offline:
3156           additional_nodes.append(node)
3157           vf_node_info.append(self.all_node_info[node])
3158           break
3159       key = constants.NV_FILELIST
3160       vf_nvinfo.update(self.rpc.call_node_verify(additional_nodes,
3161                                                  {key: node_verify_param[key]},
3162                                                  self.cfg.GetClusterName()))
3163     else:
3164       vf_nvinfo = all_nvinfo
3165       vf_node_info = self.my_node_info.values()
3166
3167     self._VerifyFiles(_ErrorIf, vf_node_info, master_node, vf_nvinfo, filemap)
3168
3169     feedback_fn("* Verifying node status")
3170
3171     refos_img = None
3172
3173     for node_i in node_data_list:
3174       node = node_i.name
3175       nimg = node_image[node]
3176
3177       if node_i.offline:
3178         if verbose:
3179           feedback_fn("* Skipping offline node %s" % (node,))
3180         n_offline += 1
3181         continue
3182
3183       if node == master_node:
3184         ntype = "master"
3185       elif node_i.master_candidate:
3186         ntype = "master candidate"
3187       elif node_i.drained:
3188         ntype = "drained"
3189         n_drained += 1
3190       else:
3191         ntype = "regular"
3192       if verbose:
3193         feedback_fn("* Verifying node %s (%s)" % (node, ntype))
3194
3195       msg = all_nvinfo[node].fail_msg
3196       _ErrorIf(msg, constants.CV_ENODERPC, node, "while contacting node: %s",
3197                msg)
3198       if msg:
3199         nimg.rpc_fail = True
3200         continue
3201
3202       nresult = all_nvinfo[node].payload
3203
3204       nimg.call_ok = self._VerifyNode(node_i, nresult)
3205       self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
3206       self._VerifyNodeNetwork(node_i, nresult)
3207       self._VerifyNodeUserScripts(node_i, nresult)
3208       self._VerifyOob(node_i, nresult)
3209
3210       if nimg.vm_capable:
3211         self._VerifyNodeLVM(node_i, nresult, vg_name)
3212         self._VerifyNodeDrbd(node_i, nresult, self.all_inst_info, drbd_helper,
3213                              all_drbd_map)
3214
3215         self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
3216         self._UpdateNodeInstances(node_i, nresult, nimg)
3217         self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
3218         self._UpdateNodeOS(node_i, nresult, nimg)
3219
3220         if not nimg.os_fail:
3221           if refos_img is None:
3222             refos_img = nimg
3223           self._VerifyNodeOS(node_i, nimg, refos_img)
3224         self._VerifyNodeBridges(node_i, nresult, bridges)
3225
3226         # Check whether all running instancies are primary for the node. (This
3227         # can no longer be done from _VerifyInstance below, since some of the
3228         # wrong instances could be from other node groups.)
3229         non_primary_inst = set(nimg.instances).difference(nimg.pinst)
3230
3231         for inst in non_primary_inst:
3232           # FIXME: investigate best way to handle offline insts
3233           if inst.admin_state == constants.ADMINST_OFFLINE:
3234             if verbose:
3235               feedback_fn("* Skipping offline instance %s" % inst.name)
3236             i_offline += 1
3237             continue
3238           test = inst in self.all_inst_info
3239           _ErrorIf(test, constants.CV_EINSTANCEWRONGNODE, inst,
3240                    "instance should not run on node %s", node_i.name)
3241           _ErrorIf(not test, constants.CV_ENODEORPHANINSTANCE, node_i.name,
3242                    "node is running unknown instance %s", inst)
3243
3244     for node, result in extra_lv_nvinfo.items():
3245       self._UpdateNodeVolumes(self.all_node_info[node], result.payload,
3246                               node_image[node], vg_name)
3247
3248     feedback_fn("* Verifying instance status")
3249     for instance in self.my_inst_names:
3250       if verbose:
3251         feedback_fn("* Verifying instance %s" % instance)
3252       inst_config = self.my_inst_info[instance]
3253       self._VerifyInstance(instance, inst_config, node_image,
3254                            instdisk[instance])
3255       inst_nodes_offline = []
3256
3257       pnode = inst_config.primary_node
3258       pnode_img = node_image[pnode]
3259       _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
3260                constants.CV_ENODERPC, pnode, "instance %s, connection to"
3261                " primary node failed", instance)
3262
3263       _ErrorIf(inst_config.admin_state == constants.ADMINST_UP and
3264                pnode_img.offline,
3265                constants.CV_EINSTANCEBADNODE, instance,
3266                "instance is marked as running and lives on offline node %s",
3267                inst_config.primary_node)
3268
3269       # If the instance is non-redundant we cannot survive losing its primary
3270       # node, so we are not N+1 compliant. On the other hand we have no disk
3271       # templates with more than one secondary so that situation is not well
3272       # supported either.
3273       # FIXME: does not support file-backed instances
3274       if not inst_config.secondary_nodes:
3275         i_non_redundant.append(instance)
3276
3277       _ErrorIf(len(inst_config.secondary_nodes) > 1,
3278                constants.CV_EINSTANCELAYOUT,
3279                instance, "instance has multiple secondary nodes: %s",
3280                utils.CommaJoin(inst_config.secondary_nodes),
3281                code=self.ETYPE_WARNING)
3282
3283       if inst_config.disk_template in constants.DTS_INT_MIRROR:
3284         pnode = inst_config.primary_node
3285         instance_nodes = utils.NiceSort(inst_config.all_nodes)
3286         instance_groups = {}
3287
3288         for node in instance_nodes:
3289           instance_groups.setdefault(self.all_node_info[node].group,
3290                                      []).append(node)
3291
3292         pretty_list = [
3293           "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
3294           # Sort so that we always list the primary node first.
3295           for group, nodes in sorted(instance_groups.items(),
3296                                      key=lambda (_, nodes): pnode in nodes,
3297                                      reverse=True)]
3298
3299         self._ErrorIf(len(instance_groups) > 1,
3300                       constants.CV_EINSTANCESPLITGROUPS,
3301                       instance, "instance has primary and secondary nodes in"
3302                       " different groups: %s", utils.CommaJoin(pretty_list),
3303                       code=self.ETYPE_WARNING)
3304
3305       if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
3306         i_non_a_balanced.append(instance)
3307
3308       for snode in inst_config.secondary_nodes:
3309         s_img = node_image[snode]
3310         _ErrorIf(s_img.rpc_fail and not s_img.offline, constants.CV_ENODERPC,
3311                  snode, "instance %s, connection to secondary node failed",
3312                  instance)
3313
3314         if s_img.offline:
3315           inst_nodes_offline.append(snode)
3316
3317       # warn that the instance lives on offline nodes
3318       _ErrorIf(inst_nodes_offline, constants.CV_EINSTANCEBADNODE, instance,
3319                "instance has offline secondary node(s) %s",
3320                utils.CommaJoin(inst_nodes_offline))
3321       # ... or ghost/non-vm_capable nodes
3322       for node in inst_config.all_nodes:
3323         _ErrorIf(node_image[node].ghost, constants.CV_EINSTANCEBADNODE,
3324                  instance, "instance lives on ghost node %s", node)
3325         _ErrorIf(not node_image[node].vm_capable, constants.CV_EINSTANCEBADNODE,
3326                  instance, "instance lives on non-vm_capable node %s", node)
3327
3328     feedback_fn("* Verifying orphan volumes")
3329     reserved = utils.FieldSet(*cluster.reserved_lvs)
3330
3331     # We will get spurious "unknown volume" warnings if any node of this group
3332     # is secondary for an instance whose primary is in another group. To avoid
3333     # them, we find these instances and add their volumes to node_vol_should.
3334     for inst in self.all_inst_info.values():
3335       for secondary in inst.secondary_nodes:
3336         if (secondary in self.my_node_info
3337             and inst.name not in self.my_inst_info):
3338           inst.MapLVsByNode(node_vol_should)
3339           break
3340
3341     self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
3342
3343     if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
3344       feedback_fn("* Verifying N+1 Memory redundancy")
3345       self._VerifyNPlusOneMemory(node_image, self.my_inst_info)
3346
3347     feedback_fn("* Other Notes")
3348     if i_non_redundant:
3349       feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
3350                   % len(i_non_redundant))
3351
3352     if i_non_a_balanced:
3353       feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
3354                   % len(i_non_a_balanced))
3355
3356     if i_offline:
3357       feedback_fn("  - NOTICE: %d offline instance(s) found." % i_offline)
3358
3359     if n_offline:
3360       feedback_fn("  - NOTICE: %d offline node(s) found." % n_offline)
3361
3362     if n_drained:
3363       feedback_fn("  - NOTICE: %d drained node(s) found." % n_drained)
3364
3365     return not self.bad
3366
3367   def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
3368     """Analyze the post-hooks' result
3369
3370     This method analyses the hook result, handles it, and sends some
3371     nicely-formatted feedback back to the user.
3372
3373     @param phase: one of L{constants.HOOKS_PHASE_POST} or
3374         L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
3375     @param hooks_results: the results of the multi-node hooks rpc call
3376     @param feedback_fn: function used send feedback back to the caller
3377     @param lu_result: previous Exec result
3378     @return: the new Exec result, based on the previous result
3379         and hook results
3380
3381     """
3382     # We only really run POST phase hooks, only for non-empty groups,
3383     # and are only interested in their results
3384     if not self.my_node_names:
3385       # empty node group
3386       pass
3387     elif phase == constants.HOOKS_PHASE_POST:
3388       # Used to change hooks' output to proper indentation
3389       feedback_fn("* Hooks Results")
3390       assert hooks_results, "invalid result from hooks"
3391
3392       for node_name in hooks_results:
3393         res = hooks_results[node_name]
3394         msg = res.fail_msg
3395         test = msg and not res.offline
3396         self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3397                       "Communication failure in hooks execution: %s", msg)
3398         if res.offline or msg:
3399           # No need to investigate payload if node is offline or gave
3400           # an error.
3401           continue
3402         for script, hkr, output in res.payload:
3403           test = hkr == constants.HKR_FAIL
3404           self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3405                         "Script %s failed, output:", script)
3406           if test:
3407             output = self._HOOKS_INDENT_RE.sub("      ", output)
3408             feedback_fn("%s" % output)
3409             lu_result = False
3410
3411     return lu_result
3412
3413
3414 class LUClusterVerifyDisks(NoHooksLU):
3415   """Verifies the cluster disks status.
3416
3417   """
3418   REQ_BGL = False
3419
3420   def ExpandNames(self):
3421     self.share_locks = _ShareAll()
3422     self.needed_locks = {
3423       locking.LEVEL_NODEGROUP: locking.ALL_SET,
3424       }
3425
3426   def Exec(self, feedback_fn):
3427     group_names = self.owned_locks(locking.LEVEL_NODEGROUP)
3428
3429     # Submit one instance of L{opcodes.OpGroupVerifyDisks} per node group
3430     return ResultWithJobs([[opcodes.OpGroupVerifyDisks(group_name=group)]
3431                            for group in group_names])
3432
3433
3434 class LUGroupVerifyDisks(NoHooksLU):
3435   """Verifies the status of all disks in a node group.
3436
3437   """
3438   REQ_BGL = False
3439
3440   def ExpandNames(self):
3441     # Raises errors.OpPrereqError on its own if group can't be found
3442     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
3443
3444     self.share_locks = _ShareAll()
3445     self.needed_locks = {
3446       locking.LEVEL_INSTANCE: [],
3447       locking.LEVEL_NODEGROUP: [],
3448       locking.LEVEL_NODE: [],
3449       }
3450
3451   def DeclareLocks(self, level):
3452     if level == locking.LEVEL_INSTANCE:
3453       assert not self.needed_locks[locking.LEVEL_INSTANCE]
3454
3455       # Lock instances optimistically, needs verification once node and group
3456       # locks have been acquired
3457       self.needed_locks[locking.LEVEL_INSTANCE] = \
3458         self.cfg.GetNodeGroupInstances(self.group_uuid)
3459
3460     elif level == locking.LEVEL_NODEGROUP:
3461       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
3462
3463       self.needed_locks[locking.LEVEL_NODEGROUP] = \
3464         set([self.group_uuid] +
3465             # Lock all groups used by instances optimistically; this requires
3466             # going via the node before it's locked, requiring verification
3467             # later on
3468             [group_uuid
3469              for instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
3470              for group_uuid in self.cfg.GetInstanceNodeGroups(instance_name)])
3471
3472     elif level == locking.LEVEL_NODE:
3473       # This will only lock the nodes in the group to be verified which contain
3474       # actual instances
3475       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
3476       self._LockInstancesNodes()
3477
3478       # Lock all nodes in group to be verified
3479       assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
3480       member_nodes = self.cfg.GetNodeGroup(self.group_uuid).members
3481       self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
3482
3483   def CheckPrereq(self):
3484     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
3485     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
3486     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
3487
3488     assert self.group_uuid in owned_groups
3489
3490     # Check if locked instances are still correct
3491     _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
3492
3493     # Get instance information
3494     self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
3495
3496     # Check if node groups for locked instances are still correct
3497     for (instance_name, inst) in self.instances.items():
3498       assert owned_nodes.issuperset(inst.all_nodes), \
3499         "Instance %s's nodes changed while we kept the lock" % instance_name
3500
3501       inst_groups = _CheckInstanceNodeGroups(self.cfg, instance_name,
3502                                              owned_groups)
3503
3504       assert self.group_uuid in inst_groups, \
3505         "Instance %s has no node in group %s" % (instance_name, self.group_uuid)
3506
3507   def Exec(self, feedback_fn):
3508     """Verify integrity of cluster disks.
3509
3510     @rtype: tuple of three items
3511     @return: a tuple of (dict of node-to-node_error, list of instances
3512         which need activate-disks, dict of instance: (node, volume) for
3513         missing volumes
3514
3515     """
3516     res_nodes = {}
3517     res_instances = set()
3518     res_missing = {}
3519
3520     nv_dict = _MapInstanceDisksToNodes([inst
3521             for inst in self.instances.values()
3522             if inst.admin_state == constants.ADMINST_UP])
3523
3524     if nv_dict:
3525       nodes = utils.NiceSort(set(self.owned_locks(locking.LEVEL_NODE)) &
3526                              set(self.cfg.GetVmCapableNodeList()))
3527
3528       node_lvs = self.rpc.call_lv_list(nodes, [])
3529
3530       for (node, node_res) in node_lvs.items():
3531         if node_res.offline:
3532           continue
3533
3534         msg = node_res.fail_msg
3535         if msg:
3536           logging.warning("Error enumerating LVs on node %s: %s", node, msg)
3537           res_nodes[node] = msg
3538           continue
3539
3540         for lv_name, (_, _, lv_online) in node_res.payload.items():
3541           inst = nv_dict.pop((node, lv_name), None)
3542           if not (lv_online or inst is None):
3543             res_instances.add(inst)
3544
3545       # any leftover items in nv_dict are missing LVs, let's arrange the data
3546       # better
3547       for key, inst in nv_dict.iteritems():
3548         res_missing.setdefault(inst, []).append(list(key))
3549
3550     return (res_nodes, list(res_instances), res_missing)
3551
3552
3553 class LUClusterRepairDiskSizes(NoHooksLU):
3554   """Verifies the cluster disks sizes.
3555
3556   """
3557   REQ_BGL = False
3558
3559   def ExpandNames(self):
3560     if self.op.instances:
3561       self.wanted_names = _GetWantedInstances(self, self.op.instances)
3562       self.needed_locks = {
3563         locking.LEVEL_NODE_RES: [],
3564         locking.LEVEL_INSTANCE: self.wanted_names,
3565         }
3566       self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
3567     else:
3568       self.wanted_names = None
3569       self.needed_locks = {
3570         locking.LEVEL_NODE_RES: locking.ALL_SET,
3571         locking.LEVEL_INSTANCE: locking.ALL_SET,
3572         }
3573     self.share_locks = {
3574       locking.LEVEL_NODE_RES: 1,
3575       locking.LEVEL_INSTANCE: 0,
3576       }
3577
3578   def DeclareLocks(self, level):
3579     if level == locking.LEVEL_NODE_RES and self.wanted_names is not None:
3580       self._LockInstancesNodes(primary_only=True, level=level)
3581
3582   def CheckPrereq(self):
3583     """Check prerequisites.
3584
3585     This only checks the optional instance list against the existing names.
3586
3587     """
3588     if self.wanted_names is None:
3589       self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
3590
3591     self.wanted_instances = \
3592         map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
3593
3594   def _EnsureChildSizes(self, disk):
3595     """Ensure children of the disk have the needed disk size.
3596
3597     This is valid mainly for DRBD8 and fixes an issue where the
3598     children have smaller disk size.
3599
3600     @param disk: an L{ganeti.objects.Disk} object
3601
3602     """
3603     if disk.dev_type == constants.LD_DRBD8:
3604       assert disk.children, "Empty children for DRBD8?"
3605       fchild = disk.children[0]
3606       mismatch = fchild.size < disk.size
3607       if mismatch:
3608         self.LogInfo("Child disk has size %d, parent %d, fixing",
3609                      fchild.size, disk.size)
3610         fchild.size = disk.size
3611
3612       # and we recurse on this child only, not on the metadev
3613       return self._EnsureChildSizes(fchild) or mismatch
3614     else:
3615       return False
3616
3617   def Exec(self, feedback_fn):
3618     """Verify the size of cluster disks.
3619
3620     """
3621     # TODO: check child disks too
3622     # TODO: check differences in size between primary/secondary nodes
3623     per_node_disks = {}
3624     for instance in self.wanted_instances:
3625       pnode = instance.primary_node
3626       if pnode not in per_node_disks:
3627         per_node_disks[pnode] = []
3628       for idx, disk in enumerate(instance.disks):
3629         per_node_disks[pnode].append((instance, idx, disk))
3630
3631     assert not (frozenset(per_node_disks.keys()) -
3632                 self.owned_locks(locking.LEVEL_NODE_RES)), \
3633       "Not owning correct locks"
3634     assert not self.owned_locks(locking.LEVEL_NODE)
3635
3636     changed = []
3637     for node, dskl in per_node_disks.items():
3638       newl = [v[2].Copy() for v in dskl]
3639       for dsk in newl:
3640         self.cfg.SetDiskID(dsk, node)
3641       result = self.rpc.call_blockdev_getsize(node, newl)
3642       if result.fail_msg:
3643         self.LogWarning("Failure in blockdev_getsize call to node"
3644                         " %s, ignoring", node)
3645         continue
3646       if len(result.payload) != len(dskl):
3647         logging.warning("Invalid result from node %s: len(dksl)=%d,"
3648                         " result.payload=%s", node, len(dskl), result.payload)
3649         self.LogWarning("Invalid result from node %s, ignoring node results",
3650                         node)
3651         continue
3652       for ((instance, idx, disk), size) in zip(dskl, result.payload):
3653         if size is None:
3654           self.LogWarning("Disk %d of instance %s did not return size"
3655                           " information, ignoring", idx, instance.name)
3656           continue
3657         if not isinstance(size, (int, long)):
3658           self.LogWarning("Disk %d of instance %s did not return valid"
3659                           " size information, ignoring", idx, instance.name)
3660           continue
3661         size = size >> 20
3662         if size != disk.size:
3663           self.LogInfo("Disk %d of instance %s has mismatched size,"
3664                        " correcting: recorded %d, actual %d", idx,
3665                        instance.name, disk.size, size)
3666           disk.size = size
3667           self.cfg.Update(instance, feedback_fn)
3668           changed.append((instance.name, idx, size))
3669         if self._EnsureChildSizes(disk):
3670           self.cfg.Update(instance, feedback_fn)
3671           changed.append((instance.name, idx, disk.size))
3672     return changed
3673
3674
3675 class LUClusterRename(LogicalUnit):
3676   """Rename the cluster.
3677
3678   """
3679   HPATH = "cluster-rename"
3680   HTYPE = constants.HTYPE_CLUSTER
3681
3682   def BuildHooksEnv(self):
3683     """Build hooks env.
3684
3685     """
3686     return {
3687       "OP_TARGET": self.cfg.GetClusterName(),
3688       "NEW_NAME": self.op.name,
3689       }
3690
3691   def BuildHooksNodes(self):
3692     """Build hooks nodes.
3693
3694     """
3695     return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
3696
3697   def CheckPrereq(self):
3698     """Verify that the passed name is a valid one.
3699
3700     """
3701     hostname = netutils.GetHostname(name=self.op.name,
3702                                     family=self.cfg.GetPrimaryIPFamily())
3703
3704     new_name = hostname.name
3705     self.ip = new_ip = hostname.ip
3706     old_name = self.cfg.GetClusterName()
3707     old_ip = self.cfg.GetMasterIP()
3708     if new_name == old_name and new_ip == old_ip:
3709       raise errors.OpPrereqError("Neither the name nor the IP address of the"
3710                                  " cluster has changed",
3711                                  errors.ECODE_INVAL)
3712     if new_ip != old_ip:
3713       if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
3714         raise errors.OpPrereqError("The given cluster IP address (%s) is"
3715                                    " reachable on the network" %
3716                                    new_ip, errors.ECODE_NOTUNIQUE)
3717
3718     self.op.name = new_name
3719
3720   def Exec(self, feedback_fn):
3721     """Rename the cluster.
3722
3723     """
3724     clustername = self.op.name
3725     new_ip = self.ip
3726
3727     # shutdown the master IP
3728     master_params = self.cfg.GetMasterNetworkParameters()
3729     ems = self.cfg.GetUseExternalMipScript()
3730     result = self.rpc.call_node_deactivate_master_ip(master_params.name,
3731                                                      master_params, ems)
3732     result.Raise("Could not disable the master role")
3733
3734     try:
3735       cluster = self.cfg.GetClusterInfo()
3736       cluster.cluster_name = clustername
3737       cluster.master_ip = new_ip
3738       self.cfg.Update(cluster, feedback_fn)
3739
3740       # update the known hosts file
3741       ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
3742       node_list = self.cfg.GetOnlineNodeList()
3743       try:
3744         node_list.remove(master_params.name)
3745       except ValueError:
3746         pass
3747       _UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE)
3748     finally:
3749       master_params.ip = new_ip
3750       result = self.rpc.call_node_activate_master_ip(master_params.name,
3751                                                      master_params, ems)
3752       msg = result.fail_msg
3753       if msg:
3754         self.LogWarning("Could not re-enable the master role on"
3755                         " the master, please restart manually: %s", msg)
3756
3757     return clustername
3758
3759
3760 def _ValidateNetmask(cfg, netmask):
3761   """Checks if a netmask is valid.
3762
3763   @type cfg: L{config.ConfigWriter}
3764   @param cfg: The cluster configuration
3765   @type netmask: int
3766   @param netmask: the netmask to be verified
3767   @raise errors.OpPrereqError: if the validation fails
3768
3769   """
3770   ip_family = cfg.GetPrimaryIPFamily()
3771   try:
3772     ipcls = netutils.IPAddress.GetClassFromIpFamily(ip_family)
3773   except errors.ProgrammerError:
3774     raise errors.OpPrereqError("Invalid primary ip family: %s." %
3775                                ip_family)
3776   if not ipcls.ValidateNetmask(netmask):
3777     raise errors.OpPrereqError("CIDR netmask (%s) not valid" %
3778                                 (netmask))
3779
3780
3781 class LUClusterSetParams(LogicalUnit):
3782   """Change the parameters of the cluster.
3783
3784   """
3785   HPATH = "cluster-modify"
3786   HTYPE = constants.HTYPE_CLUSTER
3787   REQ_BGL = False
3788
3789   def CheckArguments(self):
3790     """Check parameters
3791
3792     """
3793     if self.op.uid_pool:
3794       uidpool.CheckUidPool(self.op.uid_pool)
3795
3796     if self.op.add_uids:
3797       uidpool.CheckUidPool(self.op.add_uids)
3798
3799     if self.op.remove_uids:
3800       uidpool.CheckUidPool(self.op.remove_uids)
3801
3802     if self.op.master_netmask is not None:
3803       _ValidateNetmask(self.cfg, self.op.master_netmask)
3804
3805     if self.op.diskparams:
3806       for dt_params in self.op.diskparams.values():
3807         utils.ForceDictType(dt_params, constants.DISK_DT_TYPES)
3808
3809   def ExpandNames(self):
3810     # FIXME: in the future maybe other cluster params won't require checking on
3811     # all nodes to be modified.
3812     self.needed_locks = {
3813       locking.LEVEL_NODE: locking.ALL_SET,
3814       locking.LEVEL_INSTANCE: locking.ALL_SET,
3815       locking.LEVEL_NODEGROUP: locking.ALL_SET,
3816     }
3817     self.share_locks = {
3818         locking.LEVEL_NODE: 1,
3819         locking.LEVEL_INSTANCE: 1,
3820         locking.LEVEL_NODEGROUP: 1,
3821     }
3822
3823   def BuildHooksEnv(self):
3824     """Build hooks env.
3825
3826     """
3827     return {
3828       "OP_TARGET": self.cfg.GetClusterName(),
3829       "NEW_VG_NAME": self.op.vg_name,
3830       }
3831
3832   def BuildHooksNodes(self):
3833     """Build hooks nodes.
3834
3835     """
3836     mn = self.cfg.GetMasterNode()
3837     return ([mn], [mn])
3838
3839   def CheckPrereq(self):
3840     """Check prerequisites.
3841
3842     This checks whether the given params don't conflict and
3843     if the given volume group is valid.
3844
3845     """
3846     if self.op.vg_name is not None and not self.op.vg_name:
3847       if self.cfg.HasAnyDiskOfType(constants.LD_LV):
3848         raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
3849                                    " instances exist", errors.ECODE_INVAL)
3850
3851     if self.op.drbd_helper is not None and not self.op.drbd_helper:
3852       if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
3853         raise errors.OpPrereqError("Cannot disable drbd helper while"
3854                                    " drbd-based instances exist",
3855                                    errors.ECODE_INVAL)
3856
3857     node_list = self.owned_locks(locking.LEVEL_NODE)
3858
3859     # if vg_name not None, checks given volume group on all nodes
3860     if self.op.vg_name:
3861       vglist = self.rpc.call_vg_list(node_list)
3862       for node in node_list:
3863         msg = vglist[node].fail_msg
3864         if msg:
3865           # ignoring down node
3866           self.LogWarning("Error while gathering data on node %s"
3867                           " (ignoring node): %s", node, msg)
3868           continue
3869         vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
3870                                               self.op.vg_name,
3871                                               constants.MIN_VG_SIZE)
3872         if vgstatus:
3873           raise errors.OpPrereqError("Error on node '%s': %s" %
3874                                      (node, vgstatus), errors.ECODE_ENVIRON)
3875
3876     if self.op.drbd_helper:
3877       # checks given drbd helper on all nodes
3878       helpers = self.rpc.call_drbd_helper(node_list)
3879       for (node, ninfo) in self.cfg.GetMultiNodeInfo(node_list):
3880         if ninfo.offline:
3881           self.LogInfo("Not checking drbd helper on offline node %s", node)
3882           continue
3883         msg = helpers[node].fail_msg
3884         if msg:
3885           raise errors.OpPrereqError("Error checking drbd helper on node"
3886                                      " '%s': %s" % (node, msg),
3887                                      errors.ECODE_ENVIRON)
3888         node_helper = helpers[node].payload
3889         if node_helper != self.op.drbd_helper:
3890           raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
3891                                      (node, node_helper), errors.ECODE_ENVIRON)
3892
3893     self.cluster = cluster = self.cfg.GetClusterInfo()
3894     # validate params changes
3895     if self.op.beparams:
3896       objects.UpgradeBeParams(self.op.beparams)
3897       utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
3898       self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
3899
3900     if self.op.ndparams:
3901       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
3902       self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
3903
3904       # TODO: we need a more general way to handle resetting
3905       # cluster-level parameters to default values
3906       if self.new_ndparams["oob_program"] == "":
3907         self.new_ndparams["oob_program"] = \
3908             constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
3909
3910     if self.op.hv_state:
3911       new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
3912                                             self.cluster.hv_state_static)
3913       self.new_hv_state = dict((hv, cluster.SimpleFillHvState(values))
3914                                for hv, values in new_hv_state.items())
3915
3916     if self.op.disk_state:
3917       new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state,
3918                                                 self.cluster.disk_state_static)
3919       self.new_disk_state = \
3920         dict((storage, dict((name, cluster.SimpleFillDiskState(values))
3921                             for name, values in svalues.items()))
3922              for storage, svalues in new_disk_state.items())
3923
3924     if self.op.ipolicy:
3925       self.new_ipolicy = _GetUpdatedIPolicy(cluster.ipolicy, self.op.ipolicy,
3926                                             group_policy=False)
3927
3928       all_instances = self.cfg.GetAllInstancesInfo().values()
3929       violations = set()
3930       for group in self.cfg.GetAllNodeGroupsInfo().values():
3931         instances = frozenset([inst for inst in all_instances
3932                                if compat.any(node in group.members
3933                                              for node in inst.all_nodes)])
3934         new_ipolicy = objects.FillIPolicy(self.new_ipolicy, group.ipolicy)
3935         new = _ComputeNewInstanceViolations(_CalculateGroupIPolicy(cluster,
3936                                                                    group),
3937                                             new_ipolicy, instances)
3938         if new:
3939           violations.update(new)
3940
3941       if violations:
3942         self.LogWarning("After the ipolicy change the following instances"
3943                         " violate them: %s",
3944                         utils.CommaJoin(violations))
3945
3946     if self.op.nicparams:
3947       utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
3948       self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
3949       objects.NIC.CheckParameterSyntax(self.new_nicparams)
3950       nic_errors = []
3951
3952       # check all instances for consistency
3953       for instance in self.cfg.GetAllInstancesInfo().values():
3954         for nic_idx, nic in enumerate(instance.nics):
3955           params_copy = copy.deepcopy(nic.nicparams)
3956           params_filled = objects.FillDict(self.new_nicparams, params_copy)
3957
3958           # check parameter syntax
3959           try:
3960             objects.NIC.CheckParameterSyntax(params_filled)
3961           except errors.ConfigurationError, err:
3962             nic_errors.append("Instance %s, nic/%d: %s" %
3963                               (instance.name, nic_idx, err))
3964
3965           # if we're moving instances to routed, check that they have an ip
3966           target_mode = params_filled[constants.NIC_MODE]
3967           if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
3968             nic_errors.append("Instance %s, nic/%d: routed NIC with no ip"
3969                               " address" % (instance.name, nic_idx))
3970       if nic_errors:
3971         raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
3972                                    "\n".join(nic_errors))
3973
3974     # hypervisor list/parameters
3975     self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
3976     if self.op.hvparams:
3977       for hv_name, hv_dict in self.op.hvparams.items():
3978         if hv_name not in self.new_hvparams:
3979           self.new_hvparams[hv_name] = hv_dict
3980         else:
3981           self.new_hvparams[hv_name].update(hv_dict)
3982
3983     # disk template parameters
3984     self.new_diskparams = objects.FillDict(cluster.diskparams, {})
3985     if self.op.diskparams:
3986       for dt_name, dt_params in self.op.diskparams.items():
3987         if dt_name not in self.op.diskparams:
3988           self.new_diskparams[dt_name] = dt_params
3989         else:
3990           self.new_diskparams[dt_name].update(dt_params)
3991
3992     # os hypervisor parameters
3993     self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
3994     if self.op.os_hvp:
3995       for os_name, hvs in self.op.os_hvp.items():
3996         if os_name not in self.new_os_hvp:
3997           self.new_os_hvp[os_name] = hvs
3998         else:
3999           for hv_name, hv_dict in hvs.items():
4000             if hv_name not in self.new_os_hvp[os_name]:
4001               self.new_os_hvp[os_name][hv_name] = hv_dict
4002             else:
4003               self.new_os_hvp[os_name][hv_name].update(hv_dict)
4004
4005     # os parameters
4006     self.new_osp = objects.FillDict(cluster.osparams, {})
4007     if self.op.osparams:
4008       for os_name, osp in self.op.osparams.items():
4009         if os_name not in self.new_osp:
4010           self.new_osp[os_name] = {}
4011
4012         self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
4013                                                   use_none=True)
4014
4015         if not self.new_osp[os_name]:
4016           # we removed all parameters
4017           del self.new_osp[os_name]
4018         else:
4019           # check the parameter validity (remote check)
4020           _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
4021                          os_name, self.new_osp[os_name])
4022
4023     # changes to the hypervisor list
4024     if self.op.enabled_hypervisors is not None:
4025       self.hv_list = self.op.enabled_hypervisors
4026       for hv in self.hv_list:
4027         # if the hypervisor doesn't already exist in the cluster
4028         # hvparams, we initialize it to empty, and then (in both
4029         # cases) we make sure to fill the defaults, as we might not
4030         # have a complete defaults list if the hypervisor wasn't
4031         # enabled before
4032         if hv not in new_hvp:
4033           new_hvp[hv] = {}
4034         new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
4035         utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
4036     else:
4037       self.hv_list = cluster.enabled_hypervisors
4038
4039     if self.op.hvparams or self.op.enabled_hypervisors is not None:
4040       # either the enabled list has changed, or the parameters have, validate
4041       for hv_name, hv_params in self.new_hvparams.items():
4042         if ((self.op.hvparams and hv_name in self.op.hvparams) or
4043             (self.op.enabled_hypervisors and
4044              hv_name in self.op.enabled_hypervisors)):
4045           # either this is a new hypervisor, or its parameters have changed
4046           hv_class = hypervisor.GetHypervisor(hv_name)
4047           utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
4048           hv_class.CheckParameterSyntax(hv_params)
4049           _CheckHVParams(self, node_list, hv_name, hv_params)
4050
4051     if self.op.os_hvp:
4052       # no need to check any newly-enabled hypervisors, since the
4053       # defaults have already been checked in the above code-block
4054       for os_name, os_hvp in self.new_os_hvp.items():
4055         for hv_name, hv_params in os_hvp.items():
4056           utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
4057           # we need to fill in the new os_hvp on top of the actual hv_p
4058           cluster_defaults = self.new_hvparams.get(hv_name, {})
4059           new_osp = objects.FillDict(cluster_defaults, hv_params)
4060           hv_class = hypervisor.GetHypervisor(hv_name)
4061           hv_class.CheckParameterSyntax(new_osp)
4062           _CheckHVParams(self, node_list, hv_name, new_osp)
4063
4064     if self.op.default_iallocator:
4065       alloc_script = utils.FindFile(self.op.default_iallocator,
4066                                     constants.IALLOCATOR_SEARCH_PATH,
4067                                     os.path.isfile)
4068       if alloc_script is None:
4069         raise errors.OpPrereqError("Invalid default iallocator script '%s'"
4070                                    " specified" % self.op.default_iallocator,
4071                                    errors.ECODE_INVAL)
4072
4073   def Exec(self, feedback_fn):
4074     """Change the parameters of the cluster.
4075
4076     """
4077     if self.op.vg_name is not None:
4078       new_volume = self.op.vg_name
4079       if not new_volume:
4080         new_volume = None
4081       if new_volume != self.cfg.GetVGName():
4082         self.cfg.SetVGName(new_volume)
4083       else:
4084         feedback_fn("Cluster LVM configuration already in desired"
4085                     " state, not changing")
4086     if self.op.drbd_helper is not None:
4087       new_helper = self.op.drbd_helper
4088       if not new_helper:
4089         new_helper = None
4090       if new_helper != self.cfg.GetDRBDHelper():
4091         self.cfg.SetDRBDHelper(new_helper)
4092       else:
4093         feedback_fn("Cluster DRBD helper already in desired state,"
4094                     " not changing")
4095     if self.op.hvparams:
4096       self.cluster.hvparams = self.new_hvparams
4097     if self.op.os_hvp:
4098       self.cluster.os_hvp = self.new_os_hvp
4099     if self.op.enabled_hypervisors is not None:
4100       self.cluster.hvparams = self.new_hvparams
4101       self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
4102     if self.op.beparams:
4103       self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
4104     if self.op.nicparams:
4105       self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
4106     if self.op.ipolicy:
4107       self.cluster.ipolicy = self.new_ipolicy
4108     if self.op.osparams:
4109       self.cluster.osparams = self.new_osp
4110     if self.op.ndparams:
4111       self.cluster.ndparams = self.new_ndparams
4112     if self.op.diskparams:
4113       self.cluster.diskparams = self.new_diskparams
4114     if self.op.hv_state:
4115       self.cluster.hv_state_static = self.new_hv_state
4116     if self.op.disk_state:
4117       self.cluster.disk_state_static = self.new_disk_state
4118
4119     if self.op.candidate_pool_size is not None:
4120       self.cluster.candidate_pool_size = self.op.candidate_pool_size
4121       # we need to update the pool size here, otherwise the save will fail
4122       _AdjustCandidatePool(self, [])
4123
4124     if self.op.maintain_node_health is not None:
4125       if self.op.maintain_node_health and not constants.ENABLE_CONFD:
4126         feedback_fn("Note: CONFD was disabled at build time, node health"
4127                     " maintenance is not useful (still enabling it)")
4128       self.cluster.maintain_node_health = self.op.maintain_node_health
4129
4130     if self.op.prealloc_wipe_disks is not None:
4131       self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
4132
4133     if self.op.add_uids is not None:
4134       uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
4135
4136     if self.op.remove_uids is not None:
4137       uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
4138
4139     if self.op.uid_pool is not None:
4140       self.cluster.uid_pool = self.op.uid_pool
4141
4142     if self.op.default_iallocator is not None:
4143       self.cluster.default_iallocator = self.op.default_iallocator
4144
4145     if self.op.reserved_lvs is not None:
4146       self.cluster.reserved_lvs = self.op.reserved_lvs
4147
4148     if self.op.use_external_mip_script is not None:
4149       self.cluster.use_external_mip_script = self.op.use_external_mip_script
4150
4151     def helper_os(aname, mods, desc):
4152       desc += " OS list"
4153       lst = getattr(self.cluster, aname)
4154       for key, val in mods:
4155         if key == constants.DDM_ADD:
4156           if val in lst:
4157             feedback_fn("OS %s already in %s, ignoring" % (val, desc))
4158           else:
4159             lst.append(val)
4160         elif key == constants.DDM_REMOVE:
4161           if val in lst:
4162             lst.remove(val)
4163           else:
4164             feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
4165         else:
4166           raise errors.ProgrammerError("Invalid modification '%s'" % key)
4167
4168     if self.op.hidden_os:
4169       helper_os("hidden_os", self.op.hidden_os, "hidden")
4170
4171     if self.op.blacklisted_os:
4172       helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
4173
4174     if self.op.master_netdev:
4175       master_params = self.cfg.GetMasterNetworkParameters()
4176       ems = self.cfg.GetUseExternalMipScript()
4177       feedback_fn("Shutting down master ip on the current netdev (%s)" %
4178                   self.cluster.master_netdev)
4179       result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4180                                                        master_params, ems)
4181       result.Raise("Could not disable the master ip")
4182       feedback_fn("Changing master_netdev from %s to %s" %
4183                   (master_params.netdev, self.op.master_netdev))
4184       self.cluster.master_netdev = self.op.master_netdev
4185
4186     if self.op.master_netmask:
4187       master_params = self.cfg.GetMasterNetworkParameters()
4188       feedback_fn("Changing master IP netmask to %s" % self.op.master_netmask)
4189       result = self.rpc.call_node_change_master_netmask(master_params.name,
4190                                                         master_params.netmask,
4191                                                         self.op.master_netmask,
4192                                                         master_params.ip,
4193                                                         master_params.netdev)
4194       if result.fail_msg:
4195         msg = "Could not change the master IP netmask: %s" % result.fail_msg
4196         feedback_fn(msg)
4197
4198       self.cluster.master_netmask = self.op.master_netmask
4199
4200     self.cfg.Update(self.cluster, feedback_fn)
4201
4202     if self.op.master_netdev:
4203       master_params = self.cfg.GetMasterNetworkParameters()
4204       feedback_fn("Starting the master ip on the new master netdev (%s)" %
4205                   self.op.master_netdev)
4206       ems = self.cfg.GetUseExternalMipScript()
4207       result = self.rpc.call_node_activate_master_ip(master_params.name,
4208                                                      master_params, ems)
4209       if result.fail_msg:
4210         self.LogWarning("Could not re-enable the master ip on"
4211                         " the master, please restart manually: %s",
4212                         result.fail_msg)
4213
4214
4215 def _UploadHelper(lu, nodes, fname):
4216   """Helper for uploading a file and showing warnings.
4217
4218   """
4219   if os.path.exists(fname):
4220     result = lu.rpc.call_upload_file(nodes, fname)
4221     for to_node, to_result in result.items():
4222       msg = to_result.fail_msg
4223       if msg:
4224         msg = ("Copy of file %s to node %s failed: %s" %
4225                (fname, to_node, msg))
4226         lu.proc.LogWarning(msg)
4227
4228
4229 def _ComputeAncillaryFiles(cluster, redist):
4230   """Compute files external to Ganeti which need to be consistent.
4231
4232   @type redist: boolean
4233   @param redist: Whether to include files which need to be redistributed
4234
4235   """
4236   # Compute files for all nodes
4237   files_all = set([
4238     constants.SSH_KNOWN_HOSTS_FILE,
4239     constants.CONFD_HMAC_KEY,
4240     constants.CLUSTER_DOMAIN_SECRET_FILE,
4241     constants.SPICE_CERT_FILE,
4242     constants.SPICE_CACERT_FILE,
4243     constants.RAPI_USERS_FILE,
4244     ])
4245
4246   if not redist:
4247     files_all.update(constants.ALL_CERT_FILES)
4248     files_all.update(ssconf.SimpleStore().GetFileList())
4249   else:
4250     # we need to ship at least the RAPI certificate
4251     files_all.add(constants.RAPI_CERT_FILE)
4252
4253   if cluster.modify_etc_hosts:
4254     files_all.add(constants.ETC_HOSTS)
4255
4256   # Files which are optional, these must:
4257   # - be present in one other category as well
4258   # - either exist or not exist on all nodes of that category (mc, vm all)
4259   files_opt = set([
4260     constants.RAPI_USERS_FILE,
4261     ])
4262
4263   # Files which should only be on master candidates
4264   files_mc = set()
4265
4266   if not redist:
4267     files_mc.add(constants.CLUSTER_CONF_FILE)
4268
4269     # FIXME: this should also be replicated but Ganeti doesn't support files_mc
4270     # replication
4271     files_mc.add(constants.DEFAULT_MASTER_SETUP_SCRIPT)
4272
4273   # Files which should only be on VM-capable nodes
4274   files_vm = set(filename
4275     for hv_name in cluster.enabled_hypervisors
4276     for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[0])
4277
4278   files_opt |= set(filename
4279     for hv_name in cluster.enabled_hypervisors
4280     for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[1])
4281
4282   # Filenames in each category must be unique
4283   all_files_set = files_all | files_mc | files_vm
4284   assert (len(all_files_set) ==
4285           sum(map(len, [files_all, files_mc, files_vm]))), \
4286          "Found file listed in more than one file list"
4287
4288   # Optional files must be present in one other category
4289   assert all_files_set.issuperset(files_opt), \
4290          "Optional file not in a different required list"
4291
4292   return (files_all, files_opt, files_mc, files_vm)
4293
4294
4295 def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
4296   """Distribute additional files which are part of the cluster configuration.
4297
4298   ConfigWriter takes care of distributing the config and ssconf files, but
4299   there are more files which should be distributed to all nodes. This function
4300   makes sure those are copied.
4301
4302   @param lu: calling logical unit
4303   @param additional_nodes: list of nodes not in the config to distribute to
4304   @type additional_vm: boolean
4305   @param additional_vm: whether the additional nodes are vm-capable or not
4306
4307   """
4308   # Gather target nodes
4309   cluster = lu.cfg.GetClusterInfo()
4310   master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
4311
4312   online_nodes = lu.cfg.GetOnlineNodeList()
4313   vm_nodes = lu.cfg.GetVmCapableNodeList()
4314
4315   if additional_nodes is not None:
4316     online_nodes.extend(additional_nodes)
4317     if additional_vm:
4318       vm_nodes.extend(additional_nodes)
4319
4320   # Never distribute to master node
4321   for nodelist in [online_nodes, vm_nodes]:
4322     if master_info.name in nodelist:
4323       nodelist.remove(master_info.name)
4324
4325   # Gather file lists
4326   (files_all, _, files_mc, files_vm) = \
4327     _ComputeAncillaryFiles(cluster, True)
4328
4329   # Never re-distribute configuration file from here
4330   assert not (constants.CLUSTER_CONF_FILE in files_all or
4331               constants.CLUSTER_CONF_FILE in files_vm)
4332   assert not files_mc, "Master candidates not handled in this function"
4333
4334   filemap = [
4335     (online_nodes, files_all),
4336     (vm_nodes, files_vm),
4337     ]
4338
4339   # Upload the files
4340   for (node_list, files) in filemap:
4341     for fname in files:
4342       _UploadHelper(lu, node_list, fname)
4343
4344
4345 class LUClusterRedistConf(NoHooksLU):
4346   """Force the redistribution of cluster configuration.
4347
4348   This is a very simple LU.
4349
4350   """
4351   REQ_BGL = False
4352
4353   def ExpandNames(self):
4354     self.needed_locks = {
4355       locking.LEVEL_NODE: locking.ALL_SET,
4356     }
4357     self.share_locks[locking.LEVEL_NODE] = 1
4358
4359   def Exec(self, feedback_fn):
4360     """Redistribute the configuration.
4361
4362     """
4363     self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
4364     _RedistributeAncillaryFiles(self)
4365
4366
4367 class LUClusterActivateMasterIp(NoHooksLU):
4368   """Activate the master IP on the master node.
4369
4370   """
4371   def Exec(self, feedback_fn):
4372     """Activate the master IP.
4373
4374     """
4375     master_params = self.cfg.GetMasterNetworkParameters()
4376     ems = self.cfg.GetUseExternalMipScript()
4377     result = self.rpc.call_node_activate_master_ip(master_params.name,
4378                                                    master_params, ems)
4379     result.Raise("Could not activate the master IP")
4380
4381
4382 class LUClusterDeactivateMasterIp(NoHooksLU):
4383   """Deactivate the master IP on the master node.
4384
4385   """
4386   def Exec(self, feedback_fn):
4387     """Deactivate the master IP.
4388
4389     """
4390     master_params = self.cfg.GetMasterNetworkParameters()
4391     ems = self.cfg.GetUseExternalMipScript()
4392     result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4393                                                      master_params, ems)
4394     result.Raise("Could not deactivate the master IP")
4395
4396
4397 def _WaitForSync(lu, instance, disks=None, oneshot=False):
4398   """Sleep and poll for an instance's disk to sync.
4399
4400   """
4401   if not instance.disks or disks is not None and not disks:
4402     return True
4403
4404   disks = _ExpandCheckDisks(instance, disks)
4405
4406   if not oneshot:
4407     lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
4408
4409   node = instance.primary_node
4410
4411   for dev in disks:
4412     lu.cfg.SetDiskID(dev, node)
4413
4414   # TODO: Convert to utils.Retry
4415
4416   retries = 0
4417   degr_retries = 10 # in seconds, as we sleep 1 second each time
4418   while True:
4419     max_time = 0
4420     done = True
4421     cumul_degraded = False
4422     rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
4423     msg = rstats.fail_msg
4424     if msg:
4425       lu.LogWarning("Can't get any data from node %s: %s", node, msg)
4426       retries += 1
4427       if retries >= 10:
4428         raise errors.RemoteError("Can't contact node %s for mirror data,"
4429                                  " aborting." % node)
4430       time.sleep(6)
4431       continue
4432     rstats = rstats.payload
4433     retries = 0
4434     for i, mstat in enumerate(rstats):
4435       if mstat is None:
4436         lu.LogWarning("Can't compute data for node %s/%s",
4437                            node, disks[i].iv_name)
4438         continue
4439
4440       cumul_degraded = (cumul_degraded or
4441                         (mstat.is_degraded and mstat.sync_percent is None))
4442       if mstat.sync_percent is not None:
4443         done = False
4444         if mstat.estimated_time is not None:
4445           rem_time = ("%s remaining (estimated)" %
4446                       utils.FormatSeconds(mstat.estimated_time))
4447           max_time = mstat.estimated_time
4448         else:
4449           rem_time = "no time estimate"
4450         lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
4451                         (disks[i].iv_name, mstat.sync_percent, rem_time))
4452
4453     # if we're done but degraded, let's do a few small retries, to
4454     # make sure we see a stable and not transient situation; therefore
4455     # we force restart of the loop
4456     if (done or oneshot) and cumul_degraded and degr_retries > 0:
4457       logging.info("Degraded disks found, %d retries left", degr_retries)
4458       degr_retries -= 1
4459       time.sleep(1)
4460       continue
4461
4462     if done or oneshot:
4463       break
4464
4465     time.sleep(min(60, max_time))
4466
4467   if done:
4468     lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
4469   return not cumul_degraded
4470
4471
4472 def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
4473   """Check that mirrors are not degraded.
4474
4475   The ldisk parameter, if True, will change the test from the
4476   is_degraded attribute (which represents overall non-ok status for
4477   the device(s)) to the ldisk (representing the local storage status).
4478
4479   """
4480   lu.cfg.SetDiskID(dev, node)
4481
4482   result = True
4483
4484   if on_primary or dev.AssembleOnSecondary():
4485     rstats = lu.rpc.call_blockdev_find(node, dev)
4486     msg = rstats.fail_msg
4487     if msg:
4488       lu.LogWarning("Can't find disk on node %s: %s", node, msg)
4489       result = False
4490     elif not rstats.payload:
4491       lu.LogWarning("Can't find disk on node %s", node)
4492       result = False
4493     else:
4494       if ldisk:
4495         result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
4496       else:
4497         result = result and not rstats.payload.is_degraded
4498
4499   if dev.children:
4500     for child in dev.children:
4501       result = result and _CheckDiskConsistency(lu, child, node, on_primary)
4502
4503   return result
4504
4505
4506 class LUOobCommand(NoHooksLU):
4507   """Logical unit for OOB handling.
4508
4509   """
4510   REG_BGL = False
4511   _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
4512
4513   def ExpandNames(self):
4514     """Gather locks we need.
4515
4516     """
4517     if self.op.node_names:
4518       self.op.node_names = _GetWantedNodes(self, self.op.node_names)
4519       lock_names = self.op.node_names
4520     else:
4521       lock_names = locking.ALL_SET
4522
4523     self.needed_locks = {
4524       locking.LEVEL_NODE: lock_names,
4525       }
4526
4527   def CheckPrereq(self):
4528     """Check prerequisites.
4529
4530     This checks:
4531      - the node exists in the configuration
4532      - OOB is supported
4533
4534     Any errors are signaled by raising errors.OpPrereqError.
4535
4536     """
4537     self.nodes = []
4538     self.master_node = self.cfg.GetMasterNode()
4539
4540     assert self.op.power_delay >= 0.0
4541
4542     if self.op.node_names:
4543       if (self.op.command in self._SKIP_MASTER and
4544           self.master_node in self.op.node_names):
4545         master_node_obj = self.cfg.GetNodeInfo(self.master_node)
4546         master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
4547
4548         if master_oob_handler:
4549           additional_text = ("run '%s %s %s' if you want to operate on the"
4550                              " master regardless") % (master_oob_handler,
4551                                                       self.op.command,
4552                                                       self.master_node)
4553         else:
4554           additional_text = "it does not support out-of-band operations"
4555
4556         raise errors.OpPrereqError(("Operating on the master node %s is not"
4557                                     " allowed for %s; %s") %
4558                                    (self.master_node, self.op.command,
4559                                     additional_text), errors.ECODE_INVAL)
4560     else:
4561       self.op.node_names = self.cfg.GetNodeList()
4562       if self.op.command in self._SKIP_MASTER:
4563         self.op.node_names.remove(self.master_node)
4564
4565     if self.op.command in self._SKIP_MASTER:
4566       assert self.master_node not in self.op.node_names
4567
4568     for (node_name, node) in self.cfg.GetMultiNodeInfo(self.op.node_names):
4569       if node is None:
4570         raise errors.OpPrereqError("Node %s not found" % node_name,
4571                                    errors.ECODE_NOENT)
4572       else:
4573         self.nodes.append(node)
4574
4575       if (not self.op.ignore_status and
4576           (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
4577         raise errors.OpPrereqError(("Cannot power off node %s because it is"
4578                                     " not marked offline") % node_name,
4579                                    errors.ECODE_STATE)
4580
4581   def Exec(self, feedback_fn):
4582     """Execute OOB and return result if we expect any.
4583
4584     """
4585     master_node = self.master_node
4586     ret = []
4587
4588     for idx, node in enumerate(utils.NiceSort(self.nodes,
4589                                               key=lambda node: node.name)):
4590       node_entry = [(constants.RS_NORMAL, node.name)]
4591       ret.append(node_entry)
4592
4593       oob_program = _SupportsOob(self.cfg, node)
4594
4595       if not oob_program:
4596         node_entry.append((constants.RS_UNAVAIL, None))
4597         continue
4598
4599       logging.info("Executing out-of-band command '%s' using '%s' on %s",
4600                    self.op.command, oob_program, node.name)
4601       result = self.rpc.call_run_oob(master_node, oob_program,
4602                                      self.op.command, node.name,
4603                                      self.op.timeout)
4604
4605       if result.fail_msg:
4606         self.LogWarning("Out-of-band RPC failed on node '%s': %s",
4607                         node.name, result.fail_msg)
4608         node_entry.append((constants.RS_NODATA, None))
4609       else:
4610         try:
4611           self._CheckPayload(result)
4612         except errors.OpExecError, err:
4613           self.LogWarning("Payload returned by node '%s' is not valid: %s",
4614                           node.name, err)
4615           node_entry.append((constants.RS_NODATA, None))
4616         else:
4617           if self.op.command == constants.OOB_HEALTH:
4618             # For health we should log important events
4619             for item, status in result.payload:
4620               if status in [constants.OOB_STATUS_WARNING,
4621                             constants.OOB_STATUS_CRITICAL]:
4622                 self.LogWarning("Item '%s' on node '%s' has status '%s'",
4623                                 item, node.name, status)
4624
4625           if self.op.command == constants.OOB_POWER_ON:
4626             node.powered = True
4627           elif self.op.command == constants.OOB_POWER_OFF:
4628             node.powered = False
4629           elif self.op.command == constants.OOB_POWER_STATUS:
4630             powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
4631             if powered != node.powered:
4632               logging.warning(("Recorded power state (%s) of node '%s' does not"
4633                                " match actual power state (%s)"), node.powered,
4634                               node.name, powered)
4635
4636           # For configuration changing commands we should update the node
4637           if self.op.command in (constants.OOB_POWER_ON,
4638                                  constants.OOB_POWER_OFF):
4639             self.cfg.Update(node, feedback_fn)
4640
4641           node_entry.append((constants.RS_NORMAL, result.payload))
4642
4643           if (self.op.command == constants.OOB_POWER_ON and
4644               idx < len(self.nodes) - 1):
4645             time.sleep(self.op.power_delay)
4646
4647     return ret
4648
4649   def _CheckPayload(self, result):
4650     """Checks if the payload is valid.
4651
4652     @param result: RPC result
4653     @raises errors.OpExecError: If payload is not valid
4654
4655     """
4656     errs = []
4657     if self.op.command == constants.OOB_HEALTH:
4658       if not isinstance(result.payload, list):
4659         errs.append("command 'health' is expected to return a list but got %s" %
4660                     type(result.payload))
4661       else:
4662         for item, status in result.payload:
4663           if status not in constants.OOB_STATUSES:
4664             errs.append("health item '%s' has invalid status '%s'" %
4665                         (item, status))
4666
4667     if self.op.command == constants.OOB_POWER_STATUS:
4668       if not isinstance(result.payload, dict):
4669         errs.append("power-status is expected to return a dict but got %s" %
4670                     type(result.payload))
4671
4672     if self.op.command in [
4673         constants.OOB_POWER_ON,
4674         constants.OOB_POWER_OFF,
4675         constants.OOB_POWER_CYCLE,
4676         ]:
4677       if result.payload is not None:
4678         errs.append("%s is expected to not return payload but got '%s'" %
4679                     (self.op.command, result.payload))
4680
4681     if errs:
4682       raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
4683                                utils.CommaJoin(errs))
4684
4685
4686 class _OsQuery(_QueryBase):
4687   FIELDS = query.OS_FIELDS
4688
4689   def ExpandNames(self, lu):
4690     # Lock all nodes in shared mode
4691     # Temporary removal of locks, should be reverted later
4692     # TODO: reintroduce locks when they are lighter-weight
4693     lu.needed_locks = {}
4694     #self.share_locks[locking.LEVEL_NODE] = 1
4695     #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4696
4697     # The following variables interact with _QueryBase._GetNames
4698     if self.names:
4699       self.wanted = self.names
4700     else:
4701       self.wanted = locking.ALL_SET
4702
4703     self.do_locking = self.use_locking
4704
4705   def DeclareLocks(self, lu, level):
4706     pass
4707
4708   @staticmethod
4709   def _DiagnoseByOS(rlist):
4710     """Remaps a per-node return list into an a per-os per-node dictionary
4711
4712     @param rlist: a map with node names as keys and OS objects as values
4713
4714     @rtype: dict
4715     @return: a dictionary with osnames as keys and as value another
4716         map, with nodes as keys and tuples of (path, status, diagnose,
4717         variants, parameters, api_versions) as values, eg::
4718
4719           {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
4720                                      (/srv/..., False, "invalid api")],
4721                            "node2": [(/srv/..., True, "", [], [])]}
4722           }
4723
4724     """
4725     all_os = {}
4726     # we build here the list of nodes that didn't fail the RPC (at RPC
4727     # level), so that nodes with a non-responding node daemon don't
4728     # make all OSes invalid
4729     good_nodes = [node_name for node_name in rlist
4730                   if not rlist[node_name].fail_msg]
4731     for node_name, nr in rlist.items():
4732       if nr.fail_msg or not nr.payload:
4733         continue
4734       for (name, path, status, diagnose, variants,
4735            params, api_versions) in nr.payload:
4736         if name not in all_os:
4737           # build a list of nodes for this os containing empty lists
4738           # for each node in node_list
4739           all_os[name] = {}
4740           for nname in good_nodes:
4741             all_os[name][nname] = []
4742         # convert params from [name, help] to (name, help)
4743         params = [tuple(v) for v in params]
4744         all_os[name][node_name].append((path, status, diagnose,
4745                                         variants, params, api_versions))
4746     return all_os
4747
4748   def _GetQueryData(self, lu):
4749     """Computes the list of nodes and their attributes.
4750
4751     """
4752     # Locking is not used
4753     assert not (compat.any(lu.glm.is_owned(level)
4754                            for level in locking.LEVELS
4755                            if level != locking.LEVEL_CLUSTER) or
4756                 self.do_locking or self.use_locking)
4757
4758     valid_nodes = [node.name
4759                    for node in lu.cfg.GetAllNodesInfo().values()
4760                    if not node.offline and node.vm_capable]
4761     pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
4762     cluster = lu.cfg.GetClusterInfo()
4763
4764     data = {}
4765
4766     for (os_name, os_data) in pol.items():
4767       info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
4768                           hidden=(os_name in cluster.hidden_os),
4769                           blacklisted=(os_name in cluster.blacklisted_os))
4770
4771       variants = set()
4772       parameters = set()
4773       api_versions = set()
4774
4775       for idx, osl in enumerate(os_data.values()):
4776         info.valid = bool(info.valid and osl and osl[0][1])
4777         if not info.valid:
4778           break
4779
4780         (node_variants, node_params, node_api) = osl[0][3:6]
4781         if idx == 0:
4782           # First entry
4783           variants.update(node_variants)
4784           parameters.update(node_params)
4785           api_versions.update(node_api)
4786         else:
4787           # Filter out inconsistent values
4788           variants.intersection_update(node_variants)
4789           parameters.intersection_update(node_params)
4790           api_versions.intersection_update(node_api)
4791
4792       info.variants = list(variants)
4793       info.parameters = list(parameters)
4794       info.api_versions = list(api_versions)
4795
4796       data[os_name] = info
4797
4798     # Prepare data in requested order
4799     return [data[name] for name in self._GetNames(lu, pol.keys(), None)
4800             if name in data]
4801
4802
4803 class LUOsDiagnose(NoHooksLU):
4804   """Logical unit for OS diagnose/query.
4805
4806   """
4807   REQ_BGL = False
4808
4809   @staticmethod
4810   def _BuildFilter(fields, names):
4811     """Builds a filter for querying OSes.
4812
4813     """
4814     name_filter = qlang.MakeSimpleFilter("name", names)
4815
4816     # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
4817     # respective field is not requested
4818     status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
4819                      for fname in ["hidden", "blacklisted"]
4820                      if fname not in fields]
4821     if "valid" not in fields:
4822       status_filter.append([qlang.OP_TRUE, "valid"])
4823
4824     if status_filter:
4825       status_filter.insert(0, qlang.OP_AND)
4826     else:
4827       status_filter = None
4828
4829     if name_filter and status_filter:
4830       return [qlang.OP_AND, name_filter, status_filter]
4831     elif name_filter:
4832       return name_filter
4833     else:
4834       return status_filter
4835
4836   def CheckArguments(self):
4837     self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
4838                        self.op.output_fields, False)
4839
4840   def ExpandNames(self):
4841     self.oq.ExpandNames(self)
4842
4843   def Exec(self, feedback_fn):
4844     return self.oq.OldStyleQuery(self)
4845
4846
4847 class LUNodeRemove(LogicalUnit):
4848   """Logical unit for removing a node.
4849
4850   """
4851   HPATH = "node-remove"
4852   HTYPE = constants.HTYPE_NODE
4853
4854   def BuildHooksEnv(self):
4855     """Build hooks env.
4856
4857     """
4858     return {
4859       "OP_TARGET": self.op.node_name,
4860       "NODE_NAME": self.op.node_name,
4861       }
4862
4863   def BuildHooksNodes(self):
4864     """Build hooks nodes.
4865
4866     This doesn't run on the target node in the pre phase as a failed
4867     node would then be impossible to remove.
4868
4869     """
4870     all_nodes = self.cfg.GetNodeList()
4871     try:
4872       all_nodes.remove(self.op.node_name)
4873     except ValueError:
4874       pass
4875     return (all_nodes, all_nodes)
4876
4877   def CheckPrereq(self):
4878     """Check prerequisites.
4879
4880     This checks:
4881      - the node exists in the configuration
4882      - it does not have primary or secondary instances
4883      - it's not the master
4884
4885     Any errors are signaled by raising errors.OpPrereqError.
4886
4887     """
4888     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4889     node = self.cfg.GetNodeInfo(self.op.node_name)
4890     assert node is not None
4891
4892     masternode = self.cfg.GetMasterNode()
4893     if node.name == masternode:
4894       raise errors.OpPrereqError("Node is the master node, failover to another"
4895                                  " node is required", errors.ECODE_INVAL)
4896
4897     for instance_name, instance in self.cfg.GetAllInstancesInfo().items():
4898       if node.name in instance.all_nodes:
4899         raise errors.OpPrereqError("Instance %s is still running on the node,"
4900                                    " please remove first" % instance_name,
4901                                    errors.ECODE_INVAL)
4902     self.op.node_name = node.name
4903     self.node = node
4904
4905   def Exec(self, feedback_fn):
4906     """Removes the node from the cluster.
4907
4908     """
4909     node = self.node
4910     logging.info("Stopping the node daemon and removing configs from node %s",
4911                  node.name)
4912
4913     modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
4914
4915     assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
4916       "Not owning BGL"
4917
4918     # Promote nodes to master candidate as needed
4919     _AdjustCandidatePool(self, exceptions=[node.name])
4920     self.context.RemoveNode(node.name)
4921
4922     # Run post hooks on the node before it's removed
4923     _RunPostHook(self, node.name)
4924
4925     result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
4926     msg = result.fail_msg
4927     if msg:
4928       self.LogWarning("Errors encountered on the remote node while leaving"
4929                       " the cluster: %s", msg)
4930
4931     # Remove node from our /etc/hosts
4932     if self.cfg.GetClusterInfo().modify_etc_hosts:
4933       master_node = self.cfg.GetMasterNode()
4934       result = self.rpc.call_etc_hosts_modify(master_node,
4935                                               constants.ETC_HOSTS_REMOVE,
4936                                               node.name, None)
4937       result.Raise("Can't update hosts file with new host data")
4938       _RedistributeAncillaryFiles(self)
4939
4940
4941 class _NodeQuery(_QueryBase):
4942   FIELDS = query.NODE_FIELDS
4943
4944   def ExpandNames(self, lu):
4945     lu.needed_locks = {}
4946     lu.share_locks = _ShareAll()
4947
4948     if self.names:
4949       self.wanted = _GetWantedNodes(lu, self.names)
4950     else:
4951       self.wanted = locking.ALL_SET
4952
4953     self.do_locking = (self.use_locking and
4954                        query.NQ_LIVE in self.requested_data)
4955
4956     if self.do_locking:
4957       # If any non-static field is requested we need to lock the nodes
4958       lu.needed_locks[locking.LEVEL_NODE] = self.wanted
4959
4960   def DeclareLocks(self, lu, level):
4961     pass
4962
4963   def _GetQueryData(self, lu):
4964     """Computes the list of nodes and their attributes.
4965
4966     """
4967     all_info = lu.cfg.GetAllNodesInfo()
4968
4969     nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
4970
4971     # Gather data as requested
4972     if query.NQ_LIVE in self.requested_data:
4973       # filter out non-vm_capable nodes
4974       toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
4975
4976       node_data = lu.rpc.call_node_info(toquery_nodes, [lu.cfg.GetVGName()],
4977                                         [lu.cfg.GetHypervisorType()])
4978       live_data = dict((name, _MakeLegacyNodeInfo(nresult.payload))
4979                        for (name, nresult) in node_data.items()
4980                        if not nresult.fail_msg and nresult.payload)
4981     else:
4982       live_data = None
4983
4984     if query.NQ_INST in self.requested_data:
4985       node_to_primary = dict([(name, set()) for name in nodenames])
4986       node_to_secondary = dict([(name, set()) for name in nodenames])
4987
4988       inst_data = lu.cfg.GetAllInstancesInfo()
4989
4990       for inst in inst_data.values():
4991         if inst.primary_node in node_to_primary:
4992           node_to_primary[inst.primary_node].add(inst.name)
4993         for secnode in inst.secondary_nodes:
4994           if secnode in node_to_secondary:
4995             node_to_secondary[secnode].add(inst.name)
4996     else:
4997       node_to_primary = None
4998       node_to_secondary = None
4999
5000     if query.NQ_OOB in self.requested_data:
5001       oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
5002                          for name, node in all_info.iteritems())
5003     else:
5004       oob_support = None
5005
5006     if query.NQ_GROUP in self.requested_data:
5007       groups = lu.cfg.GetAllNodeGroupsInfo()
5008     else:
5009       groups = {}
5010
5011     return query.NodeQueryData([all_info[name] for name in nodenames],
5012                                live_data, lu.cfg.GetMasterNode(),
5013                                node_to_primary, node_to_secondary, groups,
5014                                oob_support, lu.cfg.GetClusterInfo())
5015
5016
5017 class LUNodeQuery(NoHooksLU):
5018   """Logical unit for querying nodes.
5019
5020   """
5021   # pylint: disable=W0142
5022   REQ_BGL = False
5023
5024   def CheckArguments(self):
5025     self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
5026                          self.op.output_fields, self.op.use_locking)
5027
5028   def ExpandNames(self):
5029     self.nq.ExpandNames(self)
5030
5031   def DeclareLocks(self, level):
5032     self.nq.DeclareLocks(self, level)
5033
5034   def Exec(self, feedback_fn):
5035     return self.nq.OldStyleQuery(self)
5036
5037
5038 class LUNodeQueryvols(NoHooksLU):
5039   """Logical unit for getting volumes on node(s).
5040
5041   """
5042   REQ_BGL = False
5043   _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
5044   _FIELDS_STATIC = utils.FieldSet("node")
5045
5046   def CheckArguments(self):
5047     _CheckOutputFields(static=self._FIELDS_STATIC,
5048                        dynamic=self._FIELDS_DYNAMIC,
5049                        selected=self.op.output_fields)
5050
5051   def ExpandNames(self):
5052     self.share_locks = _ShareAll()
5053     self.needed_locks = {}
5054
5055     if not self.op.nodes:
5056       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5057     else:
5058       self.needed_locks[locking.LEVEL_NODE] = \
5059         _GetWantedNodes(self, self.op.nodes)
5060
5061   def Exec(self, feedback_fn):
5062     """Computes the list of nodes and their attributes.
5063
5064     """
5065     nodenames = self.owned_locks(locking.LEVEL_NODE)
5066     volumes = self.rpc.call_node_volumes(nodenames)
5067
5068     ilist = self.cfg.GetAllInstancesInfo()
5069     vol2inst = _MapInstanceDisksToNodes(ilist.values())
5070
5071     output = []
5072     for node in nodenames:
5073       nresult = volumes[node]
5074       if nresult.offline:
5075         continue
5076       msg = nresult.fail_msg
5077       if msg:
5078         self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
5079         continue
5080
5081       node_vols = sorted(nresult.payload,
5082                          key=operator.itemgetter("dev"))
5083
5084       for vol in node_vols:
5085         node_output = []
5086         for field in self.op.output_fields:
5087           if field == "node":
5088             val = node
5089           elif field == "phys":
5090             val = vol["dev"]
5091           elif field == "vg":
5092             val = vol["vg"]
5093           elif field == "name":
5094             val = vol["name"]
5095           elif field == "size":
5096             val = int(float(vol["size"]))
5097           elif field == "instance":
5098             val = vol2inst.get((node, vol["vg"] + "/" + vol["name"]), "-")
5099           else:
5100             raise errors.ParameterError(field)
5101           node_output.append(str(val))
5102
5103         output.append(node_output)
5104
5105     return output
5106
5107
5108 class LUNodeQueryStorage(NoHooksLU):
5109   """Logical unit for getting information on storage units on node(s).
5110
5111   """
5112   _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
5113   REQ_BGL = False
5114
5115   def CheckArguments(self):
5116     _CheckOutputFields(static=self._FIELDS_STATIC,
5117                        dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
5118                        selected=self.op.output_fields)
5119
5120   def ExpandNames(self):
5121     self.share_locks = _ShareAll()
5122     self.needed_locks = {}
5123
5124     if self.op.nodes:
5125       self.needed_locks[locking.LEVEL_NODE] = \
5126         _GetWantedNodes(self, self.op.nodes)
5127     else:
5128       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5129
5130   def Exec(self, feedback_fn):
5131     """Computes the list of nodes and their attributes.
5132
5133     """
5134     self.nodes = self.owned_locks(locking.LEVEL_NODE)
5135
5136     # Always get name to sort by
5137     if constants.SF_NAME in self.op.output_fields:
5138       fields = self.op.output_fields[:]
5139     else:
5140       fields = [constants.SF_NAME] + self.op.output_fields
5141
5142     # Never ask for node or type as it's only known to the LU
5143     for extra in [constants.SF_NODE, constants.SF_TYPE]:
5144       while extra in fields:
5145         fields.remove(extra)
5146
5147     field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
5148     name_idx = field_idx[constants.SF_NAME]
5149
5150     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5151     data = self.rpc.call_storage_list(self.nodes,
5152                                       self.op.storage_type, st_args,
5153                                       self.op.name, fields)
5154
5155     result = []
5156
5157     for node in utils.NiceSort(self.nodes):
5158       nresult = data[node]
5159       if nresult.offline:
5160         continue
5161
5162       msg = nresult.fail_msg
5163       if msg:
5164         self.LogWarning("Can't get storage data from node %s: %s", node, msg)
5165         continue
5166
5167       rows = dict([(row[name_idx], row) for row in nresult.payload])
5168
5169       for name in utils.NiceSort(rows.keys()):
5170         row = rows[name]
5171
5172         out = []
5173
5174         for field in self.op.output_fields:
5175           if field == constants.SF_NODE:
5176             val = node
5177           elif field == constants.SF_TYPE:
5178             val = self.op.storage_type
5179           elif field in field_idx:
5180             val = row[field_idx[field]]
5181           else:
5182             raise errors.ParameterError(field)
5183
5184           out.append(val)
5185
5186         result.append(out)
5187
5188     return result
5189
5190
5191 class _InstanceQuery(_QueryBase):
5192   FIELDS = query.INSTANCE_FIELDS
5193
5194   def ExpandNames(self, lu):
5195     lu.needed_locks = {}
5196     lu.share_locks = _ShareAll()
5197
5198     if self.names:
5199       self.wanted = _GetWantedInstances(lu, self.names)
5200     else:
5201       self.wanted = locking.ALL_SET
5202
5203     self.do_locking = (self.use_locking and
5204                        query.IQ_LIVE in self.requested_data)
5205     if self.do_locking:
5206       lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
5207       lu.needed_locks[locking.LEVEL_NODEGROUP] = []
5208       lu.needed_locks[locking.LEVEL_NODE] = []
5209       lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5210
5211     self.do_grouplocks = (self.do_locking and
5212                           query.IQ_NODES in self.requested_data)
5213
5214   def DeclareLocks(self, lu, level):
5215     if self.do_locking:
5216       if level == locking.LEVEL_NODEGROUP and self.do_grouplocks:
5217         assert not lu.needed_locks[locking.LEVEL_NODEGROUP]
5218
5219         # Lock all groups used by instances optimistically; this requires going
5220         # via the node before it's locked, requiring verification later on
5221         lu.needed_locks[locking.LEVEL_NODEGROUP] = \
5222           set(group_uuid
5223               for instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
5224               for group_uuid in lu.cfg.GetInstanceNodeGroups(instance_name))
5225       elif level == locking.LEVEL_NODE:
5226         lu._LockInstancesNodes() # pylint: disable=W0212
5227
5228   @staticmethod
5229   def _CheckGroupLocks(lu):
5230     owned_instances = frozenset(lu.owned_locks(locking.LEVEL_INSTANCE))
5231     owned_groups = frozenset(lu.owned_locks(locking.LEVEL_NODEGROUP))
5232
5233     # Check if node groups for locked instances are still correct
5234     for instance_name in owned_instances:
5235       _CheckInstanceNodeGroups(lu.cfg, instance_name, owned_groups)
5236
5237   def _GetQueryData(self, lu):
5238     """Computes the list of instances and their attributes.
5239
5240     """
5241     if self.do_grouplocks:
5242       self._CheckGroupLocks(lu)
5243
5244     cluster = lu.cfg.GetClusterInfo()
5245     all_info = lu.cfg.GetAllInstancesInfo()
5246
5247     instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
5248
5249     instance_list = [all_info[name] for name in instance_names]
5250     nodes = frozenset(itertools.chain(*(inst.all_nodes
5251                                         for inst in instance_list)))
5252     hv_list = list(set([inst.hypervisor for inst in instance_list]))
5253     bad_nodes = []
5254     offline_nodes = []
5255     wrongnode_inst = set()
5256
5257     # Gather data as requested
5258     if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
5259       live_data = {}
5260       node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
5261       for name in nodes:
5262         result = node_data[name]
5263         if result.offline:
5264           # offline nodes will be in both lists
5265           assert result.fail_msg
5266           offline_nodes.append(name)
5267         if result.fail_msg:
5268           bad_nodes.append(name)
5269         elif result.payload:
5270           for inst in result.payload:
5271             if inst in all_info:
5272               if all_info[inst].primary_node == name:
5273                 live_data.update(result.payload)
5274               else:
5275                 wrongnode_inst.add(inst)
5276             else:
5277               # orphan instance; we don't list it here as we don't
5278               # handle this case yet in the output of instance listing
5279               logging.warning("Orphan instance '%s' found on node %s",
5280                               inst, name)
5281         # else no instance is alive
5282     else:
5283       live_data = {}
5284
5285     if query.IQ_DISKUSAGE in self.requested_data:
5286       disk_usage = dict((inst.name,
5287                          _ComputeDiskSize(inst.disk_template,
5288                                           [{constants.IDISK_SIZE: disk.size}
5289                                            for disk in inst.disks]))
5290                         for inst in instance_list)
5291     else:
5292       disk_usage = None
5293
5294     if query.IQ_CONSOLE in self.requested_data:
5295       consinfo = {}
5296       for inst in instance_list:
5297         if inst.name in live_data:
5298           # Instance is running
5299           consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
5300         else:
5301           consinfo[inst.name] = None
5302       assert set(consinfo.keys()) == set(instance_names)
5303     else:
5304       consinfo = None
5305
5306     if query.IQ_NODES in self.requested_data:
5307       node_names = set(itertools.chain(*map(operator.attrgetter("all_nodes"),
5308                                             instance_list)))
5309       nodes = dict(lu.cfg.GetMultiNodeInfo(node_names))
5310       groups = dict((uuid, lu.cfg.GetNodeGroup(uuid))
5311                     for uuid in set(map(operator.attrgetter("group"),
5312                                         nodes.values())))
5313     else:
5314       nodes = None
5315       groups = None
5316
5317     return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
5318                                    disk_usage, offline_nodes, bad_nodes,
5319                                    live_data, wrongnode_inst, consinfo,
5320                                    nodes, groups)
5321
5322
5323 class LUQuery(NoHooksLU):
5324   """Query for resources/items of a certain kind.
5325
5326   """
5327   # pylint: disable=W0142
5328   REQ_BGL = False
5329
5330   def CheckArguments(self):
5331     qcls = _GetQueryImplementation(self.op.what)
5332
5333     self.impl = qcls(self.op.qfilter, self.op.fields, self.op.use_locking)
5334
5335   def ExpandNames(self):
5336     self.impl.ExpandNames(self)
5337
5338   def DeclareLocks(self, level):
5339     self.impl.DeclareLocks(self, level)
5340
5341   def Exec(self, feedback_fn):
5342     return self.impl.NewStyleQuery(self)
5343
5344
5345 class LUQueryFields(NoHooksLU):
5346   """Query for resources/items of a certain kind.
5347
5348   """
5349   # pylint: disable=W0142
5350   REQ_BGL = False
5351
5352   def CheckArguments(self):
5353     self.qcls = _GetQueryImplementation(self.op.what)
5354
5355   def ExpandNames(self):
5356     self.needed_locks = {}
5357
5358   def Exec(self, feedback_fn):
5359     return query.QueryFields(self.qcls.FIELDS, self.op.fields)
5360
5361
5362 class LUNodeModifyStorage(NoHooksLU):
5363   """Logical unit for modifying a storage volume on a node.
5364
5365   """
5366   REQ_BGL = False
5367
5368   def CheckArguments(self):
5369     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5370
5371     storage_type = self.op.storage_type
5372
5373     try:
5374       modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
5375     except KeyError:
5376       raise errors.OpPrereqError("Storage units of type '%s' can not be"
5377                                  " modified" % storage_type,
5378                                  errors.ECODE_INVAL)
5379
5380     diff = set(self.op.changes.keys()) - modifiable
5381     if diff:
5382       raise errors.OpPrereqError("The following fields can not be modified for"
5383                                  " storage units of type '%s': %r" %
5384                                  (storage_type, list(diff)),
5385                                  errors.ECODE_INVAL)
5386
5387   def ExpandNames(self):
5388     self.needed_locks = {
5389       locking.LEVEL_NODE: self.op.node_name,
5390       }
5391
5392   def Exec(self, feedback_fn):
5393     """Computes the list of nodes and their attributes.
5394
5395     """
5396     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5397     result = self.rpc.call_storage_modify(self.op.node_name,
5398                                           self.op.storage_type, st_args,
5399                                           self.op.name, self.op.changes)
5400     result.Raise("Failed to modify storage unit '%s' on %s" %
5401                  (self.op.name, self.op.node_name))
5402
5403
5404 class LUNodeAdd(LogicalUnit):
5405   """Logical unit for adding node to the cluster.
5406
5407   """
5408   HPATH = "node-add"
5409   HTYPE = constants.HTYPE_NODE
5410   _NFLAGS = ["master_capable", "vm_capable"]
5411
5412   def CheckArguments(self):
5413     self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
5414     # validate/normalize the node name
5415     self.hostname = netutils.GetHostname(name=self.op.node_name,
5416                                          family=self.primary_ip_family)
5417     self.op.node_name = self.hostname.name
5418
5419     if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
5420       raise errors.OpPrereqError("Cannot readd the master node",
5421                                  errors.ECODE_STATE)
5422
5423     if self.op.readd and self.op.group:
5424       raise errors.OpPrereqError("Cannot pass a node group when a node is"
5425                                  " being readded", errors.ECODE_INVAL)
5426
5427   def BuildHooksEnv(self):
5428     """Build hooks env.
5429
5430     This will run on all nodes before, and on all nodes + the new node after.
5431
5432     """
5433     return {
5434       "OP_TARGET": self.op.node_name,
5435       "NODE_NAME": self.op.node_name,
5436       "NODE_PIP": self.op.primary_ip,
5437       "NODE_SIP": self.op.secondary_ip,
5438       "MASTER_CAPABLE": str(self.op.master_capable),
5439       "VM_CAPABLE": str(self.op.vm_capable),
5440       }
5441
5442   def BuildHooksNodes(self):
5443     """Build hooks nodes.
5444
5445     """
5446     # Exclude added node
5447     pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
5448     post_nodes = pre_nodes + [self.op.node_name, ]
5449
5450     return (pre_nodes, post_nodes)
5451
5452   def CheckPrereq(self):
5453     """Check prerequisites.
5454
5455     This checks:
5456      - the new node is not already in the config
5457      - it is resolvable
5458      - its parameters (single/dual homed) matches the cluster
5459
5460     Any errors are signaled by raising errors.OpPrereqError.
5461
5462     """
5463     cfg = self.cfg
5464     hostname = self.hostname
5465     node = hostname.name
5466     primary_ip = self.op.primary_ip = hostname.ip
5467     if self.op.secondary_ip is None:
5468       if self.primary_ip_family == netutils.IP6Address.family:
5469         raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
5470                                    " IPv4 address must be given as secondary",
5471                                    errors.ECODE_INVAL)
5472       self.op.secondary_ip = primary_ip
5473
5474     secondary_ip = self.op.secondary_ip
5475     if not netutils.IP4Address.IsValid(secondary_ip):
5476       raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5477                                  " address" % secondary_ip, errors.ECODE_INVAL)
5478
5479     node_list = cfg.GetNodeList()
5480     if not self.op.readd and node in node_list:
5481       raise errors.OpPrereqError("Node %s is already in the configuration" %
5482                                  node, errors.ECODE_EXISTS)
5483     elif self.op.readd and node not in node_list:
5484       raise errors.OpPrereqError("Node %s is not in the configuration" % node,
5485                                  errors.ECODE_NOENT)
5486
5487     self.changed_primary_ip = False
5488
5489     for existing_node_name, existing_node in cfg.GetMultiNodeInfo(node_list):
5490       if self.op.readd and node == existing_node_name:
5491         if existing_node.secondary_ip != secondary_ip:
5492           raise errors.OpPrereqError("Readded node doesn't have the same IP"
5493                                      " address configuration as before",
5494                                      errors.ECODE_INVAL)
5495         if existing_node.primary_ip != primary_ip:
5496           self.changed_primary_ip = True
5497
5498         continue
5499
5500       if (existing_node.primary_ip == primary_ip or
5501           existing_node.secondary_ip == primary_ip or
5502           existing_node.primary_ip == secondary_ip or
5503           existing_node.secondary_ip == secondary_ip):
5504         raise errors.OpPrereqError("New node ip address(es) conflict with"
5505                                    " existing node %s" % existing_node.name,
5506                                    errors.ECODE_NOTUNIQUE)
5507
5508     # After this 'if' block, None is no longer a valid value for the
5509     # _capable op attributes
5510     if self.op.readd:
5511       old_node = self.cfg.GetNodeInfo(node)
5512       assert old_node is not None, "Can't retrieve locked node %s" % node
5513       for attr in self._NFLAGS:
5514         if getattr(self.op, attr) is None:
5515           setattr(self.op, attr, getattr(old_node, attr))
5516     else:
5517       for attr in self._NFLAGS:
5518         if getattr(self.op, attr) is None:
5519           setattr(self.op, attr, True)
5520
5521     if self.op.readd and not self.op.vm_capable:
5522       pri, sec = cfg.GetNodeInstances(node)
5523       if pri or sec:
5524         raise errors.OpPrereqError("Node %s being re-added with vm_capable"
5525                                    " flag set to false, but it already holds"
5526                                    " instances" % node,
5527                                    errors.ECODE_STATE)
5528
5529     # check that the type of the node (single versus dual homed) is the
5530     # same as for the master
5531     myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
5532     master_singlehomed = myself.secondary_ip == myself.primary_ip
5533     newbie_singlehomed = secondary_ip == primary_ip
5534     if master_singlehomed != newbie_singlehomed:
5535       if master_singlehomed:
5536         raise errors.OpPrereqError("The master has no secondary ip but the"
5537                                    " new node has one",
5538                                    errors.ECODE_INVAL)
5539       else:
5540         raise errors.OpPrereqError("The master has a secondary ip but the"
5541                                    " new node doesn't have one",
5542                                    errors.ECODE_INVAL)
5543
5544     # checks reachability
5545     if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
5546       raise errors.OpPrereqError("Node not reachable by ping",
5547                                  errors.ECODE_ENVIRON)
5548
5549     if not newbie_singlehomed:
5550       # check reachability from my secondary ip to newbie's secondary ip
5551       if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
5552                            source=myself.secondary_ip):
5553         raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5554                                    " based ping to node daemon port",
5555                                    errors.ECODE_ENVIRON)
5556
5557     if self.op.readd:
5558       exceptions = [node]
5559     else:
5560       exceptions = []
5561
5562     if self.op.master_capable:
5563       self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
5564     else:
5565       self.master_candidate = False
5566
5567     if self.op.readd:
5568       self.new_node = old_node
5569     else:
5570       node_group = cfg.LookupNodeGroup(self.op.group)
5571       self.new_node = objects.Node(name=node,
5572                                    primary_ip=primary_ip,
5573                                    secondary_ip=secondary_ip,
5574                                    master_candidate=self.master_candidate,
5575                                    offline=False, drained=False,
5576                                    group=node_group)
5577
5578     if self.op.ndparams:
5579       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
5580
5581     if self.op.hv_state:
5582       self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
5583
5584     if self.op.disk_state:
5585       self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
5586
5587   def Exec(self, feedback_fn):
5588     """Adds the new node to the cluster.
5589
5590     """
5591     new_node = self.new_node
5592     node = new_node.name
5593
5594     assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
5595       "Not owning BGL"
5596
5597     # We adding a new node so we assume it's powered
5598     new_node.powered = True
5599
5600     # for re-adds, reset the offline/drained/master-candidate flags;
5601     # we need to reset here, otherwise offline would prevent RPC calls
5602     # later in the procedure; this also means that if the re-add
5603     # fails, we are left with a non-offlined, broken node
5604     if self.op.readd:
5605       new_node.drained = new_node.offline = False # pylint: disable=W0201
5606       self.LogInfo("Readding a node, the offline/drained flags were reset")
5607       # if we demote the node, we do cleanup later in the procedure
5608       new_node.master_candidate = self.master_candidate
5609       if self.changed_primary_ip:
5610         new_node.primary_ip = self.op.primary_ip
5611
5612     # copy the master/vm_capable flags
5613     for attr in self._NFLAGS:
5614       setattr(new_node, attr, getattr(self.op, attr))
5615
5616     # notify the user about any possible mc promotion
5617     if new_node.master_candidate:
5618       self.LogInfo("Node will be a master candidate")
5619
5620     if self.op.ndparams:
5621       new_node.ndparams = self.op.ndparams
5622     else:
5623       new_node.ndparams = {}
5624
5625     if self.op.hv_state:
5626       new_node.hv_state_static = self.new_hv_state
5627
5628     if self.op.disk_state:
5629       new_node.disk_state_static = self.new_disk_state
5630
5631     # check connectivity
5632     result = self.rpc.call_version([node])[node]
5633     result.Raise("Can't get version information from node %s" % node)
5634     if constants.PROTOCOL_VERSION == result.payload:
5635       logging.info("Communication to node %s fine, sw version %s match",
5636                    node, result.payload)
5637     else:
5638       raise errors.OpExecError("Version mismatch master version %s,"
5639                                " node version %s" %
5640                                (constants.PROTOCOL_VERSION, result.payload))
5641
5642     # Add node to our /etc/hosts, and add key to known_hosts
5643     if self.cfg.GetClusterInfo().modify_etc_hosts:
5644       master_node = self.cfg.GetMasterNode()
5645       result = self.rpc.call_etc_hosts_modify(master_node,
5646                                               constants.ETC_HOSTS_ADD,
5647                                               self.hostname.name,
5648                                               self.hostname.ip)
5649       result.Raise("Can't update hosts file with new host data")
5650
5651     if new_node.secondary_ip != new_node.primary_ip:
5652       _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
5653                                False)
5654
5655     node_verify_list = [self.cfg.GetMasterNode()]
5656     node_verify_param = {
5657       constants.NV_NODELIST: ([node], {}),
5658       # TODO: do a node-net-test as well?
5659     }
5660
5661     result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
5662                                        self.cfg.GetClusterName())
5663     for verifier in node_verify_list:
5664       result[verifier].Raise("Cannot communicate with node %s" % verifier)
5665       nl_payload = result[verifier].payload[constants.NV_NODELIST]
5666       if nl_payload:
5667         for failed in nl_payload:
5668           feedback_fn("ssh/hostname verification failed"
5669                       " (checking from %s): %s" %
5670                       (verifier, nl_payload[failed]))
5671         raise errors.OpExecError("ssh/hostname verification failed")
5672
5673     if self.op.readd:
5674       _RedistributeAncillaryFiles(self)
5675       self.context.ReaddNode(new_node)
5676       # make sure we redistribute the config
5677       self.cfg.Update(new_node, feedback_fn)
5678       # and make sure the new node will not have old files around
5679       if not new_node.master_candidate:
5680         result = self.rpc.call_node_demote_from_mc(new_node.name)
5681         msg = result.fail_msg
5682         if msg:
5683           self.LogWarning("Node failed to demote itself from master"
5684                           " candidate status: %s" % msg)
5685     else:
5686       _RedistributeAncillaryFiles(self, additional_nodes=[node],
5687                                   additional_vm=self.op.vm_capable)
5688       self.context.AddNode(new_node, self.proc.GetECId())
5689
5690
5691 class LUNodeSetParams(LogicalUnit):
5692   """Modifies the parameters of a node.
5693
5694   @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
5695       to the node role (as _ROLE_*)
5696   @cvar _R2F: a dictionary from node role to tuples of flags
5697   @cvar _FLAGS: a list of attribute names corresponding to the flags
5698
5699   """
5700   HPATH = "node-modify"
5701   HTYPE = constants.HTYPE_NODE
5702   REQ_BGL = False
5703   (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
5704   _F2R = {
5705     (True, False, False): _ROLE_CANDIDATE,
5706     (False, True, False): _ROLE_DRAINED,
5707     (False, False, True): _ROLE_OFFLINE,
5708     (False, False, False): _ROLE_REGULAR,
5709     }
5710   _R2F = dict((v, k) for k, v in _F2R.items())
5711   _FLAGS = ["master_candidate", "drained", "offline"]
5712
5713   def CheckArguments(self):
5714     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5715     all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
5716                 self.op.master_capable, self.op.vm_capable,
5717                 self.op.secondary_ip, self.op.ndparams, self.op.hv_state,
5718                 self.op.disk_state]
5719     if all_mods.count(None) == len(all_mods):
5720       raise errors.OpPrereqError("Please pass at least one modification",
5721                                  errors.ECODE_INVAL)
5722     if all_mods.count(True) > 1:
5723       raise errors.OpPrereqError("Can't set the node into more than one"
5724                                  " state at the same time",
5725                                  errors.ECODE_INVAL)
5726
5727     # Boolean value that tells us whether we might be demoting from MC
5728     self.might_demote = (self.op.master_candidate == False or
5729                          self.op.offline == True or
5730                          self.op.drained == True or
5731                          self.op.master_capable == False)
5732
5733     if self.op.secondary_ip:
5734       if not netutils.IP4Address.IsValid(self.op.secondary_ip):
5735         raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5736                                    " address" % self.op.secondary_ip,
5737                                    errors.ECODE_INVAL)
5738
5739     self.lock_all = self.op.auto_promote and self.might_demote
5740     self.lock_instances = self.op.secondary_ip is not None
5741
5742   def _InstanceFilter(self, instance):
5743     """Filter for getting affected instances.
5744
5745     """
5746     return (instance.disk_template in constants.DTS_INT_MIRROR and
5747             self.op.node_name in instance.all_nodes)
5748
5749   def ExpandNames(self):
5750     if self.lock_all:
5751       self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
5752     else:
5753       self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
5754
5755     # Since modifying a node can have severe effects on currently running
5756     # operations the resource lock is at least acquired in shared mode
5757     self.needed_locks[locking.LEVEL_NODE_RES] = \
5758       self.needed_locks[locking.LEVEL_NODE]
5759
5760     # Get node resource and instance locks in shared mode; they are not used
5761     # for anything but read-only access
5762     self.share_locks[locking.LEVEL_NODE_RES] = 1
5763     self.share_locks[locking.LEVEL_INSTANCE] = 1
5764
5765     if self.lock_instances:
5766       self.needed_locks[locking.LEVEL_INSTANCE] = \
5767         frozenset(self.cfg.GetInstancesInfoByFilter(self._InstanceFilter))
5768
5769   def BuildHooksEnv(self):
5770     """Build hooks env.
5771
5772     This runs on the master node.
5773
5774     """
5775     return {
5776       "OP_TARGET": self.op.node_name,
5777       "MASTER_CANDIDATE": str(self.op.master_candidate),
5778       "OFFLINE": str(self.op.offline),
5779       "DRAINED": str(self.op.drained),
5780       "MASTER_CAPABLE": str(self.op.master_capable),
5781       "VM_CAPABLE": str(self.op.vm_capable),
5782       }
5783
5784   def BuildHooksNodes(self):
5785     """Build hooks nodes.
5786
5787     """
5788     nl = [self.cfg.GetMasterNode(), self.op.node_name]
5789     return (nl, nl)
5790
5791   def CheckPrereq(self):
5792     """Check prerequisites.
5793
5794     This only checks the instance list against the existing names.
5795
5796     """
5797     node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
5798
5799     if self.lock_instances:
5800       affected_instances = \
5801         self.cfg.GetInstancesInfoByFilter(self._InstanceFilter)
5802
5803       # Verify instance locks
5804       owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
5805       wanted_instances = frozenset(affected_instances.keys())
5806       if wanted_instances - owned_instances:
5807         raise errors.OpPrereqError("Instances affected by changing node %s's"
5808                                    " secondary IP address have changed since"
5809                                    " locks were acquired, wanted '%s', have"
5810                                    " '%s'; retry the operation" %
5811                                    (self.op.node_name,
5812                                     utils.CommaJoin(wanted_instances),
5813                                     utils.CommaJoin(owned_instances)),
5814                                    errors.ECODE_STATE)
5815     else:
5816       affected_instances = None
5817
5818     if (self.op.master_candidate is not None or
5819         self.op.drained is not None or
5820         self.op.offline is not None):
5821       # we can't change the master's node flags
5822       if self.op.node_name == self.cfg.GetMasterNode():
5823         raise errors.OpPrereqError("The master role can be changed"
5824                                    " only via master-failover",
5825                                    errors.ECODE_INVAL)
5826
5827     if self.op.master_candidate and not node.master_capable:
5828       raise errors.OpPrereqError("Node %s is not master capable, cannot make"
5829                                  " it a master candidate" % node.name,
5830                                  errors.ECODE_STATE)
5831
5832     if self.op.vm_capable == False:
5833       (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
5834       if ipri or isec:
5835         raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
5836                                    " the vm_capable flag" % node.name,
5837                                    errors.ECODE_STATE)
5838
5839     if node.master_candidate and self.might_demote and not self.lock_all:
5840       assert not self.op.auto_promote, "auto_promote set but lock_all not"
5841       # check if after removing the current node, we're missing master
5842       # candidates
5843       (mc_remaining, mc_should, _) = \
5844           self.cfg.GetMasterCandidateStats(exceptions=[node.name])
5845       if mc_remaining < mc_should:
5846         raise errors.OpPrereqError("Not enough master candidates, please"
5847                                    " pass auto promote option to allow"
5848                                    " promotion", errors.ECODE_STATE)
5849
5850     self.old_flags = old_flags = (node.master_candidate,
5851                                   node.drained, node.offline)
5852     assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
5853     self.old_role = old_role = self._F2R[old_flags]
5854
5855     # Check for ineffective changes
5856     for attr in self._FLAGS:
5857       if (getattr(self.op, attr) == False and getattr(node, attr) == False):
5858         self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
5859         setattr(self.op, attr, None)
5860
5861     # Past this point, any flag change to False means a transition
5862     # away from the respective state, as only real changes are kept
5863
5864     # TODO: We might query the real power state if it supports OOB
5865     if _SupportsOob(self.cfg, node):
5866       if self.op.offline is False and not (node.powered or
5867                                            self.op.powered == True):
5868         raise errors.OpPrereqError(("Node %s needs to be turned on before its"
5869                                     " offline status can be reset") %
5870                                    self.op.node_name)
5871     elif self.op.powered is not None:
5872       raise errors.OpPrereqError(("Unable to change powered state for node %s"
5873                                   " as it does not support out-of-band"
5874                                   " handling") % self.op.node_name)
5875
5876     # If we're being deofflined/drained, we'll MC ourself if needed
5877     if (self.op.drained == False or self.op.offline == False or
5878         (self.op.master_capable and not node.master_capable)):
5879       if _DecideSelfPromotion(self):
5880         self.op.master_candidate = True
5881         self.LogInfo("Auto-promoting node to master candidate")
5882
5883     # If we're no longer master capable, we'll demote ourselves from MC
5884     if self.op.master_capable == False and node.master_candidate:
5885       self.LogInfo("Demoting from master candidate")
5886       self.op.master_candidate = False
5887
5888     # Compute new role
5889     assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
5890     if self.op.master_candidate:
5891       new_role = self._ROLE_CANDIDATE
5892     elif self.op.drained:
5893       new_role = self._ROLE_DRAINED
5894     elif self.op.offline:
5895       new_role = self._ROLE_OFFLINE
5896     elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
5897       # False is still in new flags, which means we're un-setting (the
5898       # only) True flag
5899       new_role = self._ROLE_REGULAR
5900     else: # no new flags, nothing, keep old role
5901       new_role = old_role
5902
5903     self.new_role = new_role
5904
5905     if old_role == self._ROLE_OFFLINE and new_role != old_role:
5906       # Trying to transition out of offline status
5907       # TODO: Use standard RPC runner, but make sure it works when the node is
5908       # still marked offline
5909       result = rpc.BootstrapRunner().call_version([node.name])[node.name]
5910       if result.fail_msg:
5911         raise errors.OpPrereqError("Node %s is being de-offlined but fails"
5912                                    " to report its version: %s" %
5913                                    (node.name, result.fail_msg),
5914                                    errors.ECODE_STATE)
5915       else:
5916         self.LogWarning("Transitioning node from offline to online state"
5917                         " without using re-add. Please make sure the node"
5918                         " is healthy!")
5919
5920     if self.op.secondary_ip:
5921       # Ok even without locking, because this can't be changed by any LU
5922       master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
5923       master_singlehomed = master.secondary_ip == master.primary_ip
5924       if master_singlehomed and self.op.secondary_ip:
5925         raise errors.OpPrereqError("Cannot change the secondary ip on a single"
5926                                    " homed cluster", errors.ECODE_INVAL)
5927
5928       assert not (frozenset(affected_instances) -
5929                   self.owned_locks(locking.LEVEL_INSTANCE))
5930
5931       if node.offline:
5932         if affected_instances:
5933           raise errors.OpPrereqError("Cannot change secondary IP address:"
5934                                      " offline node has instances (%s)"
5935                                      " configured to use it" %
5936                                      utils.CommaJoin(affected_instances.keys()))
5937       else:
5938         # On online nodes, check that no instances are running, and that
5939         # the node has the new ip and we can reach it.
5940         for instance in affected_instances.values():
5941           _CheckInstanceState(self, instance, INSTANCE_DOWN,
5942                               msg="cannot change secondary ip")
5943
5944         _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
5945         if master.name != node.name:
5946           # check reachability from master secondary ip to new secondary ip
5947           if not netutils.TcpPing(self.op.secondary_ip,
5948                                   constants.DEFAULT_NODED_PORT,
5949                                   source=master.secondary_ip):
5950             raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5951                                        " based ping to node daemon port",
5952                                        errors.ECODE_ENVIRON)
5953
5954     if self.op.ndparams:
5955       new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
5956       utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
5957       self.new_ndparams = new_ndparams
5958
5959     if self.op.hv_state:
5960       self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
5961                                                  self.node.hv_state_static)
5962
5963     if self.op.disk_state:
5964       self.new_disk_state = \
5965         _MergeAndVerifyDiskState(self.op.disk_state,
5966                                  self.node.disk_state_static)
5967
5968   def Exec(self, feedback_fn):
5969     """Modifies a node.
5970
5971     """
5972     node = self.node
5973     old_role = self.old_role
5974     new_role = self.new_role
5975
5976     result = []
5977
5978     if self.op.ndparams:
5979       node.ndparams = self.new_ndparams
5980
5981     if self.op.powered is not None:
5982       node.powered = self.op.powered
5983
5984     if self.op.hv_state:
5985       node.hv_state_static = self.new_hv_state
5986
5987     if self.op.disk_state:
5988       node.disk_state_static = self.new_disk_state
5989
5990     for attr in ["master_capable", "vm_capable"]:
5991       val = getattr(self.op, attr)
5992       if val is not None:
5993         setattr(node, attr, val)
5994         result.append((attr, str(val)))
5995
5996     if new_role != old_role:
5997       # Tell the node to demote itself, if no longer MC and not offline
5998       if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
5999         msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
6000         if msg:
6001           self.LogWarning("Node failed to demote itself: %s", msg)
6002
6003       new_flags = self._R2F[new_role]
6004       for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
6005         if of != nf:
6006           result.append((desc, str(nf)))
6007       (node.master_candidate, node.drained, node.offline) = new_flags
6008
6009       # we locked all nodes, we adjust the CP before updating this node
6010       if self.lock_all:
6011         _AdjustCandidatePool(self, [node.name])
6012
6013     if self.op.secondary_ip:
6014       node.secondary_ip = self.op.secondary_ip
6015       result.append(("secondary_ip", self.op.secondary_ip))
6016
6017     # this will trigger configuration file update, if needed
6018     self.cfg.Update(node, feedback_fn)
6019
6020     # this will trigger job queue propagation or cleanup if the mc
6021     # flag changed
6022     if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
6023       self.context.ReaddNode(node)
6024
6025     return result
6026
6027
6028 class LUNodePowercycle(NoHooksLU):
6029   """Powercycles a node.
6030
6031   """
6032   REQ_BGL = False
6033
6034   def CheckArguments(self):
6035     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6036     if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
6037       raise errors.OpPrereqError("The node is the master and the force"
6038                                  " parameter was not set",
6039                                  errors.ECODE_INVAL)
6040
6041   def ExpandNames(self):
6042     """Locking for PowercycleNode.
6043
6044     This is a last-resort option and shouldn't block on other
6045     jobs. Therefore, we grab no locks.
6046
6047     """
6048     self.needed_locks = {}
6049
6050   def Exec(self, feedback_fn):
6051     """Reboots a node.
6052
6053     """
6054     result = self.rpc.call_node_powercycle(self.op.node_name,
6055                                            self.cfg.GetHypervisorType())
6056     result.Raise("Failed to schedule the reboot")
6057     return result.payload
6058
6059
6060 class LUClusterQuery(NoHooksLU):
6061   """Query cluster configuration.
6062
6063   """
6064   REQ_BGL = False
6065
6066   def ExpandNames(self):
6067     self.needed_locks = {}
6068
6069   def Exec(self, feedback_fn):
6070     """Return cluster config.
6071
6072     """
6073     cluster = self.cfg.GetClusterInfo()
6074     os_hvp = {}
6075
6076     # Filter just for enabled hypervisors
6077     for os_name, hv_dict in cluster.os_hvp.items():
6078       os_hvp[os_name] = {}
6079       for hv_name, hv_params in hv_dict.items():
6080         if hv_name in cluster.enabled_hypervisors:
6081           os_hvp[os_name][hv_name] = hv_params
6082
6083     # Convert ip_family to ip_version
6084     primary_ip_version = constants.IP4_VERSION
6085     if cluster.primary_ip_family == netutils.IP6Address.family:
6086       primary_ip_version = constants.IP6_VERSION
6087
6088     result = {
6089       "software_version": constants.RELEASE_VERSION,
6090       "protocol_version": constants.PROTOCOL_VERSION,
6091       "config_version": constants.CONFIG_VERSION,
6092       "os_api_version": max(constants.OS_API_VERSIONS),
6093       "export_version": constants.EXPORT_VERSION,
6094       "architecture": (platform.architecture()[0], platform.machine()),
6095       "name": cluster.cluster_name,
6096       "master": cluster.master_node,
6097       "default_hypervisor": cluster.primary_hypervisor,
6098       "enabled_hypervisors": cluster.enabled_hypervisors,
6099       "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
6100                         for hypervisor_name in cluster.enabled_hypervisors]),
6101       "os_hvp": os_hvp,
6102       "beparams": cluster.beparams,
6103       "osparams": cluster.osparams,
6104       "ipolicy": cluster.ipolicy,
6105       "nicparams": cluster.nicparams,
6106       "ndparams": cluster.ndparams,
6107       "candidate_pool_size": cluster.candidate_pool_size,
6108       "master_netdev": cluster.master_netdev,
6109       "master_netmask": cluster.master_netmask,
6110       "use_external_mip_script": cluster.use_external_mip_script,
6111       "volume_group_name": cluster.volume_group_name,
6112       "drbd_usermode_helper": cluster.drbd_usermode_helper,
6113       "file_storage_dir": cluster.file_storage_dir,
6114       "shared_file_storage_dir": cluster.shared_file_storage_dir,
6115       "maintain_node_health": cluster.maintain_node_health,
6116       "ctime": cluster.ctime,
6117       "mtime": cluster.mtime,
6118       "uuid": cluster.uuid,
6119       "tags": list(cluster.GetTags()),
6120       "uid_pool": cluster.uid_pool,
6121       "default_iallocator": cluster.default_iallocator,
6122       "reserved_lvs": cluster.reserved_lvs,
6123       "primary_ip_version": primary_ip_version,
6124       "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
6125       "hidden_os": cluster.hidden_os,
6126       "blacklisted_os": cluster.blacklisted_os,
6127       }
6128
6129     return result
6130
6131
6132 class LUClusterConfigQuery(NoHooksLU):
6133   """Return configuration values.
6134
6135   """
6136   REQ_BGL = False
6137   _FIELDS_DYNAMIC = utils.FieldSet()
6138   _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
6139                                   "watcher_pause", "volume_group_name")
6140
6141   def CheckArguments(self):
6142     _CheckOutputFields(static=self._FIELDS_STATIC,
6143                        dynamic=self._FIELDS_DYNAMIC,
6144                        selected=self.op.output_fields)
6145
6146   def ExpandNames(self):
6147     self.needed_locks = {}
6148
6149   def Exec(self, feedback_fn):
6150     """Dump a representation of the cluster config to the standard output.
6151
6152     """
6153     values = []
6154     for field in self.op.output_fields:
6155       if field == "cluster_name":
6156         entry = self.cfg.GetClusterName()
6157       elif field == "master_node":
6158         entry = self.cfg.GetMasterNode()
6159       elif field == "drain_flag":
6160         entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
6161       elif field == "watcher_pause":
6162         entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
6163       elif field == "volume_group_name":
6164         entry = self.cfg.GetVGName()
6165       else:
6166         raise errors.ParameterError(field)
6167       values.append(entry)
6168     return values
6169
6170
6171 class LUInstanceActivateDisks(NoHooksLU):
6172   """Bring up an instance's disks.
6173
6174   """
6175   REQ_BGL = False
6176
6177   def ExpandNames(self):
6178     self._ExpandAndLockInstance()
6179     self.needed_locks[locking.LEVEL_NODE] = []
6180     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6181
6182   def DeclareLocks(self, level):
6183     if level == locking.LEVEL_NODE:
6184       self._LockInstancesNodes()
6185
6186   def CheckPrereq(self):
6187     """Check prerequisites.
6188
6189     This checks that the instance is in the cluster.
6190
6191     """
6192     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6193     assert self.instance is not None, \
6194       "Cannot retrieve locked instance %s" % self.op.instance_name
6195     _CheckNodeOnline(self, self.instance.primary_node)
6196
6197   def Exec(self, feedback_fn):
6198     """Activate the disks.
6199
6200     """
6201     disks_ok, disks_info = \
6202               _AssembleInstanceDisks(self, self.instance,
6203                                      ignore_size=self.op.ignore_size)
6204     if not disks_ok:
6205       raise errors.OpExecError("Cannot activate block devices")
6206
6207     return disks_info
6208
6209
6210 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
6211                            ignore_size=False):
6212   """Prepare the block devices for an instance.
6213
6214   This sets up the block devices on all nodes.
6215
6216   @type lu: L{LogicalUnit}
6217   @param lu: the logical unit on whose behalf we execute
6218   @type instance: L{objects.Instance}
6219   @param instance: the instance for whose disks we assemble
6220   @type disks: list of L{objects.Disk} or None
6221   @param disks: which disks to assemble (or all, if None)
6222   @type ignore_secondaries: boolean
6223   @param ignore_secondaries: if true, errors on secondary nodes
6224       won't result in an error return from the function
6225   @type ignore_size: boolean
6226   @param ignore_size: if true, the current known size of the disk
6227       will not be used during the disk activation, useful for cases
6228       when the size is wrong
6229   @return: False if the operation failed, otherwise a list of
6230       (host, instance_visible_name, node_visible_name)
6231       with the mapping from node devices to instance devices
6232
6233   """
6234   device_info = []
6235   disks_ok = True
6236   iname = instance.name
6237   disks = _ExpandCheckDisks(instance, disks)
6238
6239   # With the two passes mechanism we try to reduce the window of
6240   # opportunity for the race condition of switching DRBD to primary
6241   # before handshaking occured, but we do not eliminate it
6242
6243   # The proper fix would be to wait (with some limits) until the
6244   # connection has been made and drbd transitions from WFConnection
6245   # into any other network-connected state (Connected, SyncTarget,
6246   # SyncSource, etc.)
6247
6248   # 1st pass, assemble on all nodes in secondary mode
6249   for idx, inst_disk in enumerate(disks):
6250     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6251       if ignore_size:
6252         node_disk = node_disk.Copy()
6253         node_disk.UnsetSize()
6254       lu.cfg.SetDiskID(node_disk, node)
6255       result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False, idx)
6256       msg = result.fail_msg
6257       if msg:
6258         lu.proc.LogWarning("Could not prepare block device %s on node %s"
6259                            " (is_primary=False, pass=1): %s",
6260                            inst_disk.iv_name, node, msg)
6261         if not ignore_secondaries:
6262           disks_ok = False
6263
6264   # FIXME: race condition on drbd migration to primary
6265
6266   # 2nd pass, do only the primary node
6267   for idx, inst_disk in enumerate(disks):
6268     dev_path = None
6269
6270     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6271       if node != instance.primary_node:
6272         continue
6273       if ignore_size:
6274         node_disk = node_disk.Copy()
6275         node_disk.UnsetSize()
6276       lu.cfg.SetDiskID(node_disk, node)
6277       result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True, idx)
6278       msg = result.fail_msg
6279       if msg:
6280         lu.proc.LogWarning("Could not prepare block device %s on node %s"
6281                            " (is_primary=True, pass=2): %s",
6282                            inst_disk.iv_name, node, msg)
6283         disks_ok = False
6284       else:
6285         dev_path = result.payload
6286
6287     device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
6288
6289   # leave the disks configured for the primary node
6290   # this is a workaround that would be fixed better by
6291   # improving the logical/physical id handling
6292   for disk in disks:
6293     lu.cfg.SetDiskID(disk, instance.primary_node)
6294
6295   return disks_ok, device_info
6296
6297
6298 def _StartInstanceDisks(lu, instance, force):
6299   """Start the disks of an instance.
6300
6301   """
6302   disks_ok, _ = _AssembleInstanceDisks(lu, instance,
6303                                            ignore_secondaries=force)
6304   if not disks_ok:
6305     _ShutdownInstanceDisks(lu, instance)
6306     if force is not None and not force:
6307       lu.proc.LogWarning("", hint="If the message above refers to a"
6308                          " secondary node,"
6309                          " you can retry the operation using '--force'.")
6310     raise errors.OpExecError("Disk consistency error")
6311
6312
6313 class LUInstanceDeactivateDisks(NoHooksLU):
6314   """Shutdown an instance's disks.
6315
6316   """
6317   REQ_BGL = False
6318
6319   def ExpandNames(self):
6320     self._ExpandAndLockInstance()
6321     self.needed_locks[locking.LEVEL_NODE] = []
6322     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6323
6324   def DeclareLocks(self, level):
6325     if level == locking.LEVEL_NODE:
6326       self._LockInstancesNodes()
6327
6328   def CheckPrereq(self):
6329     """Check prerequisites.
6330
6331     This checks that the instance is in the cluster.
6332
6333     """
6334     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6335     assert self.instance is not None, \
6336       "Cannot retrieve locked instance %s" % self.op.instance_name
6337
6338   def Exec(self, feedback_fn):
6339     """Deactivate the disks
6340
6341     """
6342     instance = self.instance
6343     if self.op.force:
6344       _ShutdownInstanceDisks(self, instance)
6345     else:
6346       _SafeShutdownInstanceDisks(self, instance)
6347
6348
6349 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
6350   """Shutdown block devices of an instance.
6351
6352   This function checks if an instance is running, before calling
6353   _ShutdownInstanceDisks.
6354
6355   """
6356   _CheckInstanceState(lu, instance, INSTANCE_DOWN, msg="cannot shutdown disks")
6357   _ShutdownInstanceDisks(lu, instance, disks=disks)
6358
6359
6360 def _ExpandCheckDisks(instance, disks):
6361   """Return the instance disks selected by the disks list
6362
6363   @type disks: list of L{objects.Disk} or None
6364   @param disks: selected disks
6365   @rtype: list of L{objects.Disk}
6366   @return: selected instance disks to act on
6367
6368   """
6369   if disks is None:
6370     return instance.disks
6371   else:
6372     if not set(disks).issubset(instance.disks):
6373       raise errors.ProgrammerError("Can only act on disks belonging to the"
6374                                    " target instance")
6375     return disks
6376
6377
6378 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
6379   """Shutdown block devices of an instance.
6380
6381   This does the shutdown on all nodes of the instance.
6382
6383   If the ignore_primary is false, errors on the primary node are
6384   ignored.
6385
6386   """
6387   all_result = True
6388   disks = _ExpandCheckDisks(instance, disks)
6389
6390   for disk in disks:
6391     for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
6392       lu.cfg.SetDiskID(top_disk, node)
6393       result = lu.rpc.call_blockdev_shutdown(node, top_disk)
6394       msg = result.fail_msg
6395       if msg:
6396         lu.LogWarning("Could not shutdown block device %s on node %s: %s",
6397                       disk.iv_name, node, msg)
6398         if ((node == instance.primary_node and not ignore_primary) or
6399             (node != instance.primary_node and not result.offline)):
6400           all_result = False
6401   return all_result
6402
6403
6404 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
6405   """Checks if a node has enough free memory.
6406
6407   This function check if a given node has the needed amount of free
6408   memory. In case the node has less memory or we cannot get the
6409   information from the node, this function raise an OpPrereqError
6410   exception.
6411
6412   @type lu: C{LogicalUnit}
6413   @param lu: a logical unit from which we get configuration data
6414   @type node: C{str}
6415   @param node: the node to check
6416   @type reason: C{str}
6417   @param reason: string to use in the error message
6418   @type requested: C{int}
6419   @param requested: the amount of memory in MiB to check for
6420   @type hypervisor_name: C{str}
6421   @param hypervisor_name: the hypervisor to ask for memory stats
6422   @rtype: integer
6423   @return: node current free memory
6424   @raise errors.OpPrereqError: if the node doesn't have enough memory, or
6425       we cannot check the node
6426
6427   """
6428   nodeinfo = lu.rpc.call_node_info([node], None, [hypervisor_name])
6429   nodeinfo[node].Raise("Can't get data from node %s" % node,
6430                        prereq=True, ecode=errors.ECODE_ENVIRON)
6431   (_, _, (hv_info, )) = nodeinfo[node].payload
6432
6433   free_mem = hv_info.get("memory_free", None)
6434   if not isinstance(free_mem, int):
6435     raise errors.OpPrereqError("Can't compute free memory on node %s, result"
6436                                " was '%s'" % (node, free_mem),
6437                                errors.ECODE_ENVIRON)
6438   if requested > free_mem:
6439     raise errors.OpPrereqError("Not enough memory on node %s for %s:"
6440                                " needed %s MiB, available %s MiB" %
6441                                (node, reason, requested, free_mem),
6442                                errors.ECODE_NORES)
6443   return free_mem
6444
6445
6446 def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
6447   """Checks if nodes have enough free disk space in the all VGs.
6448
6449   This function check if all given nodes have the needed amount of
6450   free disk. In case any node has less disk or we cannot get the
6451   information from the node, this function raise an OpPrereqError
6452   exception.
6453
6454   @type lu: C{LogicalUnit}
6455   @param lu: a logical unit from which we get configuration data
6456   @type nodenames: C{list}
6457   @param nodenames: the list of node names to check
6458   @type req_sizes: C{dict}
6459   @param req_sizes: the hash of vg and corresponding amount of disk in
6460       MiB to check for
6461   @raise errors.OpPrereqError: if the node doesn't have enough disk,
6462       or we cannot check the node
6463
6464   """
6465   for vg, req_size in req_sizes.items():
6466     _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
6467
6468
6469 def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
6470   """Checks if nodes have enough free disk space in the specified VG.
6471
6472   This function check if all given nodes have the needed amount of
6473   free disk. In case any node has less disk or we cannot get the
6474   information from the node, this function raise an OpPrereqError
6475   exception.
6476
6477   @type lu: C{LogicalUnit}
6478   @param lu: a logical unit from which we get configuration data
6479   @type nodenames: C{list}
6480   @param nodenames: the list of node names to check
6481   @type vg: C{str}
6482   @param vg: the volume group to check
6483   @type requested: C{int}
6484   @param requested: the amount of disk in MiB to check for
6485   @raise errors.OpPrereqError: if the node doesn't have enough disk,
6486       or we cannot check the node
6487
6488   """
6489   nodeinfo = lu.rpc.call_node_info(nodenames, [vg], None)
6490   for node in nodenames:
6491     info = nodeinfo[node]
6492     info.Raise("Cannot get current information from node %s" % node,
6493                prereq=True, ecode=errors.ECODE_ENVIRON)
6494     (_, (vg_info, ), _) = info.payload
6495     vg_free = vg_info.get("vg_free", None)
6496     if not isinstance(vg_free, int):
6497       raise errors.OpPrereqError("Can't compute free disk space on node"
6498                                  " %s for vg %s, result was '%s'" %
6499                                  (node, vg, vg_free), errors.ECODE_ENVIRON)
6500     if requested > vg_free:
6501       raise errors.OpPrereqError("Not enough disk space on target node %s"
6502                                  " vg %s: required %d MiB, available %d MiB" %
6503                                  (node, vg, requested, vg_free),
6504                                  errors.ECODE_NORES)
6505
6506
6507 def _CheckNodesPhysicalCPUs(lu, nodenames, requested, hypervisor_name):
6508   """Checks if nodes have enough physical CPUs
6509
6510   This function checks if all given nodes have the needed number of
6511   physical CPUs. In case any node has less CPUs or we cannot get the
6512   information from the node, this function raises an OpPrereqError
6513   exception.
6514
6515   @type lu: C{LogicalUnit}
6516   @param lu: a logical unit from which we get configuration data
6517   @type nodenames: C{list}
6518   @param nodenames: the list of node names to check
6519   @type requested: C{int}
6520   @param requested: the minimum acceptable number of physical CPUs
6521   @raise errors.OpPrereqError: if the node doesn't have enough CPUs,
6522       or we cannot check the node
6523
6524   """
6525   nodeinfo = lu.rpc.call_node_info(nodenames, None, [hypervisor_name])
6526   for node in nodenames:
6527     info = nodeinfo[node]
6528     info.Raise("Cannot get current information from node %s" % node,
6529                prereq=True, ecode=errors.ECODE_ENVIRON)
6530     (_, _, (hv_info, )) = info.payload
6531     num_cpus = hv_info.get("cpu_total", None)
6532     if not isinstance(num_cpus, int):
6533       raise errors.OpPrereqError("Can't compute the number of physical CPUs"
6534                                  " on node %s, result was '%s'" %
6535                                  (node, num_cpus), errors.ECODE_ENVIRON)
6536     if requested > num_cpus:
6537       raise errors.OpPrereqError("Node %s has %s physical CPUs, but %s are "
6538                                  "required" % (node, num_cpus, requested),
6539                                  errors.ECODE_NORES)
6540
6541
6542 class LUInstanceStartup(LogicalUnit):
6543   """Starts an instance.
6544
6545   """
6546   HPATH = "instance-start"
6547   HTYPE = constants.HTYPE_INSTANCE
6548   REQ_BGL = False
6549
6550   def CheckArguments(self):
6551     # extra beparams
6552     if self.op.beparams:
6553       # fill the beparams dict
6554       objects.UpgradeBeParams(self.op.beparams)
6555       utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
6556
6557   def ExpandNames(self):
6558     self._ExpandAndLockInstance()
6559     self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
6560
6561   def DeclareLocks(self, level):
6562     if level == locking.LEVEL_NODE_RES:
6563       self._LockInstancesNodes(primary_only=True, level=locking.LEVEL_NODE_RES)
6564
6565   def BuildHooksEnv(self):
6566     """Build hooks env.
6567
6568     This runs on master, primary and secondary nodes of the instance.
6569
6570     """
6571     env = {
6572       "FORCE": self.op.force,
6573       }
6574
6575     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6576
6577     return env
6578
6579   def BuildHooksNodes(self):
6580     """Build hooks nodes.
6581
6582     """
6583     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6584     return (nl, nl)
6585
6586   def CheckPrereq(self):
6587     """Check prerequisites.
6588
6589     This checks that the instance is in the cluster.
6590
6591     """
6592     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6593     assert self.instance is not None, \
6594       "Cannot retrieve locked instance %s" % self.op.instance_name
6595
6596     # extra hvparams
6597     if self.op.hvparams:
6598       # check hypervisor parameter syntax (locally)
6599       cluster = self.cfg.GetClusterInfo()
6600       utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
6601       filled_hvp = cluster.FillHV(instance)
6602       filled_hvp.update(self.op.hvparams)
6603       hv_type = hypervisor.GetHypervisor(instance.hypervisor)
6604       hv_type.CheckParameterSyntax(filled_hvp)
6605       _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
6606
6607     _CheckInstanceState(self, instance, INSTANCE_ONLINE)
6608
6609     self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
6610
6611     if self.primary_offline and self.op.ignore_offline_nodes:
6612       self.proc.LogWarning("Ignoring offline primary node")
6613
6614       if self.op.hvparams or self.op.beparams:
6615         self.proc.LogWarning("Overridden parameters are ignored")
6616     else:
6617       _CheckNodeOnline(self, instance.primary_node)
6618
6619       bep = self.cfg.GetClusterInfo().FillBE(instance)
6620       bep.update(self.op.beparams)
6621
6622       # check bridges existence
6623       _CheckInstanceBridgesExist(self, instance)
6624
6625       remote_info = self.rpc.call_instance_info(instance.primary_node,
6626                                                 instance.name,
6627                                                 instance.hypervisor)
6628       remote_info.Raise("Error checking node %s" % instance.primary_node,
6629                         prereq=True, ecode=errors.ECODE_ENVIRON)
6630       if not remote_info.payload: # not running already
6631         _CheckNodeFreeMemory(self, instance.primary_node,
6632                              "starting instance %s" % instance.name,
6633                              bep[constants.BE_MINMEM], instance.hypervisor)
6634
6635   def Exec(self, feedback_fn):
6636     """Start the instance.
6637
6638     """
6639     instance = self.instance
6640     force = self.op.force
6641
6642     if not self.op.no_remember:
6643       self.cfg.MarkInstanceUp(instance.name)
6644
6645     if self.primary_offline:
6646       assert self.op.ignore_offline_nodes
6647       self.proc.LogInfo("Primary node offline, marked instance as started")
6648     else:
6649       node_current = instance.primary_node
6650
6651       _StartInstanceDisks(self, instance, force)
6652
6653       result = \
6654         self.rpc.call_instance_start(node_current,
6655                                      (instance, self.op.hvparams,
6656                                       self.op.beparams),
6657                                      self.op.startup_paused)
6658       msg = result.fail_msg
6659       if msg:
6660         _ShutdownInstanceDisks(self, instance)
6661         raise errors.OpExecError("Could not start instance: %s" % msg)
6662
6663
6664 class LUInstanceReboot(LogicalUnit):
6665   """Reboot an instance.
6666
6667   """
6668   HPATH = "instance-reboot"
6669   HTYPE = constants.HTYPE_INSTANCE
6670   REQ_BGL = False
6671
6672   def ExpandNames(self):
6673     self._ExpandAndLockInstance()
6674
6675   def BuildHooksEnv(self):
6676     """Build hooks env.
6677
6678     This runs on master, primary and secondary nodes of the instance.
6679
6680     """
6681     env = {
6682       "IGNORE_SECONDARIES": self.op.ignore_secondaries,
6683       "REBOOT_TYPE": self.op.reboot_type,
6684       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6685       }
6686
6687     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6688
6689     return env
6690
6691   def BuildHooksNodes(self):
6692     """Build hooks nodes.
6693
6694     """
6695     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6696     return (nl, nl)
6697
6698   def CheckPrereq(self):
6699     """Check prerequisites.
6700
6701     This checks that the instance is in the cluster.
6702
6703     """
6704     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6705     assert self.instance is not None, \
6706       "Cannot retrieve locked instance %s" % self.op.instance_name
6707     _CheckInstanceState(self, instance, INSTANCE_ONLINE)
6708     _CheckNodeOnline(self, instance.primary_node)
6709
6710     # check bridges existence
6711     _CheckInstanceBridgesExist(self, instance)
6712
6713   def Exec(self, feedback_fn):
6714     """Reboot the instance.
6715
6716     """
6717     instance = self.instance
6718     ignore_secondaries = self.op.ignore_secondaries
6719     reboot_type = self.op.reboot_type
6720
6721     remote_info = self.rpc.call_instance_info(instance.primary_node,
6722                                               instance.name,
6723                                               instance.hypervisor)
6724     remote_info.Raise("Error checking node %s" % instance.primary_node)
6725     instance_running = bool(remote_info.payload)
6726
6727     node_current = instance.primary_node
6728
6729     if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
6730                                             constants.INSTANCE_REBOOT_HARD]:
6731       for disk in instance.disks:
6732         self.cfg.SetDiskID(disk, node_current)
6733       result = self.rpc.call_instance_reboot(node_current, instance,
6734                                              reboot_type,
6735                                              self.op.shutdown_timeout)
6736       result.Raise("Could not reboot instance")
6737     else:
6738       if instance_running:
6739         result = self.rpc.call_instance_shutdown(node_current, instance,
6740                                                  self.op.shutdown_timeout)
6741         result.Raise("Could not shutdown instance for full reboot")
6742         _ShutdownInstanceDisks(self, instance)
6743       else:
6744         self.LogInfo("Instance %s was already stopped, starting now",
6745                      instance.name)
6746       _StartInstanceDisks(self, instance, ignore_secondaries)
6747       result = self.rpc.call_instance_start(node_current,
6748                                             (instance, None, None), False)
6749       msg = result.fail_msg
6750       if msg:
6751         _ShutdownInstanceDisks(self, instance)
6752         raise errors.OpExecError("Could not start instance for"
6753                                  " full reboot: %s" % msg)
6754
6755     self.cfg.MarkInstanceUp(instance.name)
6756
6757
6758 class LUInstanceShutdown(LogicalUnit):
6759   """Shutdown an instance.
6760
6761   """
6762   HPATH = "instance-stop"
6763   HTYPE = constants.HTYPE_INSTANCE
6764   REQ_BGL = False
6765
6766   def ExpandNames(self):
6767     self._ExpandAndLockInstance()
6768
6769   def BuildHooksEnv(self):
6770     """Build hooks env.
6771
6772     This runs on master, primary and secondary nodes of the instance.
6773
6774     """
6775     env = _BuildInstanceHookEnvByObject(self, self.instance)
6776     env["TIMEOUT"] = self.op.timeout
6777     return env
6778
6779   def BuildHooksNodes(self):
6780     """Build hooks nodes.
6781
6782     """
6783     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6784     return (nl, nl)
6785
6786   def CheckPrereq(self):
6787     """Check prerequisites.
6788
6789     This checks that the instance is in the cluster.
6790
6791     """
6792     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6793     assert self.instance is not None, \
6794       "Cannot retrieve locked instance %s" % self.op.instance_name
6795
6796     _CheckInstanceState(self, self.instance, INSTANCE_ONLINE)
6797
6798     self.primary_offline = \
6799       self.cfg.GetNodeInfo(self.instance.primary_node).offline
6800
6801     if self.primary_offline and self.op.ignore_offline_nodes:
6802       self.proc.LogWarning("Ignoring offline primary node")
6803     else:
6804       _CheckNodeOnline(self, self.instance.primary_node)
6805
6806   def Exec(self, feedback_fn):
6807     """Shutdown the instance.
6808
6809     """
6810     instance = self.instance
6811     node_current = instance.primary_node
6812     timeout = self.op.timeout
6813
6814     if not self.op.no_remember:
6815       self.cfg.MarkInstanceDown(instance.name)
6816
6817     if self.primary_offline:
6818       assert self.op.ignore_offline_nodes
6819       self.proc.LogInfo("Primary node offline, marked instance as stopped")
6820     else:
6821       result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
6822       msg = result.fail_msg
6823       if msg:
6824         self.proc.LogWarning("Could not shutdown instance: %s" % msg)
6825
6826       _ShutdownInstanceDisks(self, instance)
6827
6828
6829 class LUInstanceReinstall(LogicalUnit):
6830   """Reinstall an instance.
6831
6832   """
6833   HPATH = "instance-reinstall"
6834   HTYPE = constants.HTYPE_INSTANCE
6835   REQ_BGL = False
6836
6837   def ExpandNames(self):
6838     self._ExpandAndLockInstance()
6839
6840   def BuildHooksEnv(self):
6841     """Build hooks env.
6842
6843     This runs on master, primary and secondary nodes of the instance.
6844
6845     """
6846     return _BuildInstanceHookEnvByObject(self, self.instance)
6847
6848   def BuildHooksNodes(self):
6849     """Build hooks nodes.
6850
6851     """
6852     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6853     return (nl, nl)
6854
6855   def CheckPrereq(self):
6856     """Check prerequisites.
6857
6858     This checks that the instance is in the cluster and is not running.
6859
6860     """
6861     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6862     assert instance is not None, \
6863       "Cannot retrieve locked instance %s" % self.op.instance_name
6864     _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
6865                      " offline, cannot reinstall")
6866     for node in instance.secondary_nodes:
6867       _CheckNodeOnline(self, node, "Instance secondary node offline,"
6868                        " cannot reinstall")
6869
6870     if instance.disk_template == constants.DT_DISKLESS:
6871       raise errors.OpPrereqError("Instance '%s' has no disks" %
6872                                  self.op.instance_name,
6873                                  errors.ECODE_INVAL)
6874     _CheckInstanceState(self, instance, INSTANCE_DOWN, msg="cannot reinstall")
6875
6876     if self.op.os_type is not None:
6877       # OS verification
6878       pnode = _ExpandNodeName(self.cfg, instance.primary_node)
6879       _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
6880       instance_os = self.op.os_type
6881     else:
6882       instance_os = instance.os
6883
6884     nodelist = list(instance.all_nodes)
6885
6886     if self.op.osparams:
6887       i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
6888       _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
6889       self.os_inst = i_osdict # the new dict (without defaults)
6890     else:
6891       self.os_inst = None
6892
6893     self.instance = instance
6894
6895   def Exec(self, feedback_fn):
6896     """Reinstall the instance.
6897
6898     """
6899     inst = self.instance
6900
6901     if self.op.os_type is not None:
6902       feedback_fn("Changing OS to '%s'..." % self.op.os_type)
6903       inst.os = self.op.os_type
6904       # Write to configuration
6905       self.cfg.Update(inst, feedback_fn)
6906
6907     _StartInstanceDisks(self, inst, None)
6908     try:
6909       feedback_fn("Running the instance OS create scripts...")
6910       # FIXME: pass debug option from opcode to backend
6911       result = self.rpc.call_instance_os_add(inst.primary_node,
6912                                              (inst, self.os_inst), True,
6913                                              self.op.debug_level)
6914       result.Raise("Could not install OS for instance %s on node %s" %
6915                    (inst.name, inst.primary_node))
6916     finally:
6917       _ShutdownInstanceDisks(self, inst)
6918
6919
6920 class LUInstanceRecreateDisks(LogicalUnit):
6921   """Recreate an instance's missing disks.
6922
6923   """
6924   HPATH = "instance-recreate-disks"
6925   HTYPE = constants.HTYPE_INSTANCE
6926   REQ_BGL = False
6927
6928   _MODIFYABLE = frozenset([
6929     constants.IDISK_SIZE,
6930     constants.IDISK_MODE,
6931     ])
6932
6933   # New or changed disk parameters may have different semantics
6934   assert constants.IDISK_PARAMS == (_MODIFYABLE | frozenset([
6935     constants.IDISK_ADOPT,
6936
6937     # TODO: Implement support changing VG while recreating
6938     constants.IDISK_VG,
6939     constants.IDISK_METAVG,
6940     ]))
6941
6942   def CheckArguments(self):
6943     if self.op.disks and ht.TPositiveInt(self.op.disks[0]):
6944       # Normalize and convert deprecated list of disk indices
6945       self.op.disks = [(idx, {}) for idx in sorted(frozenset(self.op.disks))]
6946
6947     duplicates = utils.FindDuplicates(map(compat.fst, self.op.disks))
6948     if duplicates:
6949       raise errors.OpPrereqError("Some disks have been specified more than"
6950                                  " once: %s" % utils.CommaJoin(duplicates),
6951                                  errors.ECODE_INVAL)
6952
6953     for (idx, params) in self.op.disks:
6954       utils.ForceDictType(params, constants.IDISK_PARAMS_TYPES)
6955       unsupported = frozenset(params.keys()) - self._MODIFYABLE
6956       if unsupported:
6957         raise errors.OpPrereqError("Parameters for disk %s try to change"
6958                                    " unmodifyable parameter(s): %s" %
6959                                    (idx, utils.CommaJoin(unsupported)),
6960                                    errors.ECODE_INVAL)
6961
6962   def ExpandNames(self):
6963     self._ExpandAndLockInstance()
6964     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6965     if self.op.nodes:
6966       self.op.nodes = [_ExpandNodeName(self.cfg, n) for n in self.op.nodes]
6967       self.needed_locks[locking.LEVEL_NODE] = list(self.op.nodes)
6968     else:
6969       self.needed_locks[locking.LEVEL_NODE] = []
6970     self.needed_locks[locking.LEVEL_NODE_RES] = []
6971
6972   def DeclareLocks(self, level):
6973     if level == locking.LEVEL_NODE:
6974       # if we replace the nodes, we only need to lock the old primary,
6975       # otherwise we need to lock all nodes for disk re-creation
6976       primary_only = bool(self.op.nodes)
6977       self._LockInstancesNodes(primary_only=primary_only)
6978     elif level == locking.LEVEL_NODE_RES:
6979       # Copy node locks
6980       self.needed_locks[locking.LEVEL_NODE_RES] = \
6981         self.needed_locks[locking.LEVEL_NODE][:]
6982
6983   def BuildHooksEnv(self):
6984     """Build hooks env.
6985
6986     This runs on master, primary and secondary nodes of the instance.
6987
6988     """
6989     return _BuildInstanceHookEnvByObject(self, self.instance)
6990
6991   def BuildHooksNodes(self):
6992     """Build hooks nodes.
6993
6994     """
6995     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6996     return (nl, nl)
6997
6998   def CheckPrereq(self):
6999     """Check prerequisites.
7000
7001     This checks that the instance is in the cluster and is not running.
7002
7003     """
7004     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7005     assert instance is not None, \
7006       "Cannot retrieve locked instance %s" % self.op.instance_name
7007     if self.op.nodes:
7008       if len(self.op.nodes) != len(instance.all_nodes):
7009         raise errors.OpPrereqError("Instance %s currently has %d nodes, but"
7010                                    " %d replacement nodes were specified" %
7011                                    (instance.name, len(instance.all_nodes),
7012                                     len(self.op.nodes)),
7013                                    errors.ECODE_INVAL)
7014       assert instance.disk_template != constants.DT_DRBD8 or \
7015           len(self.op.nodes) == 2
7016       assert instance.disk_template != constants.DT_PLAIN or \
7017           len(self.op.nodes) == 1
7018       primary_node = self.op.nodes[0]
7019     else:
7020       primary_node = instance.primary_node
7021     _CheckNodeOnline(self, primary_node)
7022
7023     if instance.disk_template == constants.DT_DISKLESS:
7024       raise errors.OpPrereqError("Instance '%s' has no disks" %
7025                                  self.op.instance_name, errors.ECODE_INVAL)
7026
7027     # if we replace nodes *and* the old primary is offline, we don't
7028     # check
7029     assert instance.primary_node in self.owned_locks(locking.LEVEL_NODE)
7030     assert instance.primary_node in self.owned_locks(locking.LEVEL_NODE_RES)
7031     old_pnode = self.cfg.GetNodeInfo(instance.primary_node)
7032     if not (self.op.nodes and old_pnode.offline):
7033       _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
7034                           msg="cannot recreate disks")
7035
7036     if self.op.disks:
7037       self.disks = dict(self.op.disks)
7038     else:
7039       self.disks = dict((idx, {}) for idx in range(len(instance.disks)))
7040
7041     maxidx = max(self.disks.keys())
7042     if maxidx >= len(instance.disks):
7043       raise errors.OpPrereqError("Invalid disk index '%s'" % maxidx,
7044                                  errors.ECODE_INVAL)
7045
7046     if (self.op.nodes and
7047         sorted(self.disks.keys()) != range(len(instance.disks))):
7048       raise errors.OpPrereqError("Can't recreate disks partially and"
7049                                  " change the nodes at the same time",
7050                                  errors.ECODE_INVAL)
7051
7052     self.instance = instance
7053
7054   def Exec(self, feedback_fn):
7055     """Recreate the disks.
7056
7057     """
7058     instance = self.instance
7059
7060     assert (self.owned_locks(locking.LEVEL_NODE) ==
7061             self.owned_locks(locking.LEVEL_NODE_RES))
7062
7063     to_skip = []
7064     mods = [] # keeps track of needed changes
7065
7066     for idx, disk in enumerate(instance.disks):
7067       try:
7068         changes = self.disks[idx]
7069       except KeyError:
7070         # Disk should not be recreated
7071         to_skip.append(idx)
7072         continue
7073
7074       # update secondaries for disks, if needed
7075       if self.op.nodes and disk.dev_type == constants.LD_DRBD8:
7076         # need to update the nodes and minors
7077         assert len(self.op.nodes) == 2
7078         assert len(disk.logical_id) == 6 # otherwise disk internals
7079                                          # have changed
7080         (_, _, old_port, _, _, old_secret) = disk.logical_id
7081         new_minors = self.cfg.AllocateDRBDMinor(self.op.nodes, instance.name)
7082         new_id = (self.op.nodes[0], self.op.nodes[1], old_port,
7083                   new_minors[0], new_minors[1], old_secret)
7084         assert len(disk.logical_id) == len(new_id)
7085       else:
7086         new_id = None
7087
7088       mods.append((idx, new_id, changes))
7089
7090     # now that we have passed all asserts above, we can apply the mods
7091     # in a single run (to avoid partial changes)
7092     for idx, new_id, changes in mods:
7093       disk = instance.disks[idx]
7094       if new_id is not None:
7095         assert disk.dev_type == constants.LD_DRBD8
7096         disk.logical_id = new_id
7097       if changes:
7098         disk.Update(size=changes.get(constants.IDISK_SIZE, None),
7099                     mode=changes.get(constants.IDISK_MODE, None))
7100
7101     # change primary node, if needed
7102     if self.op.nodes:
7103       instance.primary_node = self.op.nodes[0]
7104       self.LogWarning("Changing the instance's nodes, you will have to"
7105                       " remove any disks left on the older nodes manually")
7106
7107     if self.op.nodes:
7108       self.cfg.Update(instance, feedback_fn)
7109
7110     _CreateDisks(self, instance, to_skip=to_skip)
7111
7112
7113 class LUInstanceRename(LogicalUnit):
7114   """Rename an instance.
7115
7116   """
7117   HPATH = "instance-rename"
7118   HTYPE = constants.HTYPE_INSTANCE
7119
7120   def CheckArguments(self):
7121     """Check arguments.
7122
7123     """
7124     if self.op.ip_check and not self.op.name_check:
7125       # TODO: make the ip check more flexible and not depend on the name check
7126       raise errors.OpPrereqError("IP address check requires a name check",
7127                                  errors.ECODE_INVAL)
7128
7129   def BuildHooksEnv(self):
7130     """Build hooks env.
7131
7132     This runs on master, primary and secondary nodes of the instance.
7133
7134     """
7135     env = _BuildInstanceHookEnvByObject(self, self.instance)
7136     env["INSTANCE_NEW_NAME"] = self.op.new_name
7137     return env
7138
7139   def BuildHooksNodes(self):
7140     """Build hooks nodes.
7141
7142     """
7143     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7144     return (nl, nl)
7145
7146   def CheckPrereq(self):
7147     """Check prerequisites.
7148
7149     This checks that the instance is in the cluster and is not running.
7150
7151     """
7152     self.op.instance_name = _ExpandInstanceName(self.cfg,
7153                                                 self.op.instance_name)
7154     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7155     assert instance is not None
7156     _CheckNodeOnline(self, instance.primary_node)
7157     _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
7158                         msg="cannot rename")
7159     self.instance = instance
7160
7161     new_name = self.op.new_name
7162     if self.op.name_check:
7163       hostname = netutils.GetHostname(name=new_name)
7164       if hostname.name != new_name:
7165         self.LogInfo("Resolved given name '%s' to '%s'", new_name,
7166                      hostname.name)
7167       if not utils.MatchNameComponent(self.op.new_name, [hostname.name]):
7168         raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
7169                                     " same as given hostname '%s'") %
7170                                     (hostname.name, self.op.new_name),
7171                                     errors.ECODE_INVAL)
7172       new_name = self.op.new_name = hostname.name
7173       if (self.op.ip_check and
7174           netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
7175         raise errors.OpPrereqError("IP %s of instance %s already in use" %
7176                                    (hostname.ip, new_name),
7177                                    errors.ECODE_NOTUNIQUE)
7178
7179     instance_list = self.cfg.GetInstanceList()
7180     if new_name in instance_list and new_name != instance.name:
7181       raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
7182                                  new_name, errors.ECODE_EXISTS)
7183
7184   def Exec(self, feedback_fn):
7185     """Rename the instance.
7186
7187     """
7188     inst = self.instance
7189     old_name = inst.name
7190
7191     rename_file_storage = False
7192     if (inst.disk_template in constants.DTS_FILEBASED and
7193         self.op.new_name != inst.name):
7194       old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
7195       rename_file_storage = True
7196
7197     self.cfg.RenameInstance(inst.name, self.op.new_name)
7198     # Change the instance lock. This is definitely safe while we hold the BGL.
7199     # Otherwise the new lock would have to be added in acquired mode.
7200     assert self.REQ_BGL
7201     self.glm.remove(locking.LEVEL_INSTANCE, old_name)
7202     self.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
7203
7204     # re-read the instance from the configuration after rename
7205     inst = self.cfg.GetInstanceInfo(self.op.new_name)
7206
7207     if rename_file_storage:
7208       new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
7209       result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
7210                                                      old_file_storage_dir,
7211                                                      new_file_storage_dir)
7212       result.Raise("Could not rename on node %s directory '%s' to '%s'"
7213                    " (but the instance has been renamed in Ganeti)" %
7214                    (inst.primary_node, old_file_storage_dir,
7215                     new_file_storage_dir))
7216
7217     _StartInstanceDisks(self, inst, None)
7218     try:
7219       result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
7220                                                  old_name, self.op.debug_level)
7221       msg = result.fail_msg
7222       if msg:
7223         msg = ("Could not run OS rename script for instance %s on node %s"
7224                " (but the instance has been renamed in Ganeti): %s" %
7225                (inst.name, inst.primary_node, msg))
7226         self.proc.LogWarning(msg)
7227     finally:
7228       _ShutdownInstanceDisks(self, inst)
7229
7230     return inst.name
7231
7232
7233 class LUInstanceRemove(LogicalUnit):
7234   """Remove an instance.
7235
7236   """
7237   HPATH = "instance-remove"
7238   HTYPE = constants.HTYPE_INSTANCE
7239   REQ_BGL = False
7240
7241   def ExpandNames(self):
7242     self._ExpandAndLockInstance()
7243     self.needed_locks[locking.LEVEL_NODE] = []
7244     self.needed_locks[locking.LEVEL_NODE_RES] = []
7245     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7246
7247   def DeclareLocks(self, level):
7248     if level == locking.LEVEL_NODE:
7249       self._LockInstancesNodes()
7250     elif level == locking.LEVEL_NODE_RES:
7251       # Copy node locks
7252       self.needed_locks[locking.LEVEL_NODE_RES] = \
7253         self.needed_locks[locking.LEVEL_NODE][:]
7254
7255   def BuildHooksEnv(self):
7256     """Build hooks env.
7257
7258     This runs on master, primary and secondary nodes of the instance.
7259
7260     """
7261     env = _BuildInstanceHookEnvByObject(self, self.instance)
7262     env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
7263     return env
7264
7265   def BuildHooksNodes(self):
7266     """Build hooks nodes.
7267
7268     """
7269     nl = [self.cfg.GetMasterNode()]
7270     nl_post = list(self.instance.all_nodes) + nl
7271     return (nl, nl_post)
7272
7273   def CheckPrereq(self):
7274     """Check prerequisites.
7275
7276     This checks that the instance is in the cluster.
7277
7278     """
7279     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7280     assert self.instance is not None, \
7281       "Cannot retrieve locked instance %s" % self.op.instance_name
7282
7283   def Exec(self, feedback_fn):
7284     """Remove the instance.
7285
7286     """
7287     instance = self.instance
7288     logging.info("Shutting down instance %s on node %s",
7289                  instance.name, instance.primary_node)
7290
7291     result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
7292                                              self.op.shutdown_timeout)
7293     msg = result.fail_msg
7294     if msg:
7295       if self.op.ignore_failures:
7296         feedback_fn("Warning: can't shutdown instance: %s" % msg)
7297       else:
7298         raise errors.OpExecError("Could not shutdown instance %s on"
7299                                  " node %s: %s" %
7300                                  (instance.name, instance.primary_node, msg))
7301
7302     assert (self.owned_locks(locking.LEVEL_NODE) ==
7303             self.owned_locks(locking.LEVEL_NODE_RES))
7304     assert not (set(instance.all_nodes) -
7305                 self.owned_locks(locking.LEVEL_NODE)), \
7306       "Not owning correct locks"
7307
7308     _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
7309
7310
7311 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
7312   """Utility function to remove an instance.
7313
7314   """
7315   logging.info("Removing block devices for instance %s", instance.name)
7316
7317   if not _RemoveDisks(lu, instance):
7318     if not ignore_failures:
7319       raise errors.OpExecError("Can't remove instance's disks")
7320     feedback_fn("Warning: can't remove instance's disks")
7321
7322   logging.info("Removing instance %s out of cluster config", instance.name)
7323
7324   lu.cfg.RemoveInstance(instance.name)
7325
7326   assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
7327     "Instance lock removal conflict"
7328
7329   # Remove lock for the instance
7330   lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
7331
7332
7333 class LUInstanceQuery(NoHooksLU):
7334   """Logical unit for querying instances.
7335
7336   """
7337   # pylint: disable=W0142
7338   REQ_BGL = False
7339
7340   def CheckArguments(self):
7341     self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
7342                              self.op.output_fields, self.op.use_locking)
7343
7344   def ExpandNames(self):
7345     self.iq.ExpandNames(self)
7346
7347   def DeclareLocks(self, level):
7348     self.iq.DeclareLocks(self, level)
7349
7350   def Exec(self, feedback_fn):
7351     return self.iq.OldStyleQuery(self)
7352
7353
7354 class LUInstanceFailover(LogicalUnit):
7355   """Failover an instance.
7356
7357   """
7358   HPATH = "instance-failover"
7359   HTYPE = constants.HTYPE_INSTANCE
7360   REQ_BGL = False
7361
7362   def CheckArguments(self):
7363     """Check the arguments.
7364
7365     """
7366     self.iallocator = getattr(self.op, "iallocator", None)
7367     self.target_node = getattr(self.op, "target_node", None)
7368
7369   def ExpandNames(self):
7370     self._ExpandAndLockInstance()
7371
7372     if self.op.target_node is not None:
7373       self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7374
7375     self.needed_locks[locking.LEVEL_NODE] = []
7376     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7377
7378     self.needed_locks[locking.LEVEL_NODE_RES] = []
7379     self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
7380
7381     ignore_consistency = self.op.ignore_consistency
7382     shutdown_timeout = self.op.shutdown_timeout
7383     self._migrater = TLMigrateInstance(self, self.op.instance_name,
7384                                        cleanup=False,
7385                                        failover=True,
7386                                        ignore_consistency=ignore_consistency,
7387                                        shutdown_timeout=shutdown_timeout,
7388                                        ignore_ipolicy=self.op.ignore_ipolicy)
7389     self.tasklets = [self._migrater]
7390
7391   def DeclareLocks(self, level):
7392     if level == locking.LEVEL_NODE:
7393       instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
7394       if instance.disk_template in constants.DTS_EXT_MIRROR:
7395         if self.op.target_node is None:
7396           self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7397         else:
7398           self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
7399                                                    self.op.target_node]
7400         del self.recalculate_locks[locking.LEVEL_NODE]
7401       else:
7402         self._LockInstancesNodes()
7403     elif level == locking.LEVEL_NODE_RES:
7404       # Copy node locks
7405       self.needed_locks[locking.LEVEL_NODE_RES] = \
7406         self.needed_locks[locking.LEVEL_NODE][:]
7407
7408   def BuildHooksEnv(self):
7409     """Build hooks env.
7410
7411     This runs on master, primary and secondary nodes of the instance.
7412
7413     """
7414     instance = self._migrater.instance
7415     source_node = instance.primary_node
7416     target_node = self.op.target_node
7417     env = {
7418       "IGNORE_CONSISTENCY": self.op.ignore_consistency,
7419       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7420       "OLD_PRIMARY": source_node,
7421       "NEW_PRIMARY": target_node,
7422       }
7423
7424     if instance.disk_template in constants.DTS_INT_MIRROR:
7425       env["OLD_SECONDARY"] = instance.secondary_nodes[0]
7426       env["NEW_SECONDARY"] = source_node
7427     else:
7428       env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
7429
7430     env.update(_BuildInstanceHookEnvByObject(self, instance))
7431
7432     return env
7433
7434   def BuildHooksNodes(self):
7435     """Build hooks nodes.
7436
7437     """
7438     instance = self._migrater.instance
7439     nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
7440     return (nl, nl + [instance.primary_node])
7441
7442
7443 class LUInstanceMigrate(LogicalUnit):
7444   """Migrate an instance.
7445
7446   This is migration without shutting down, compared to the failover,
7447   which is done with shutdown.
7448
7449   """
7450   HPATH = "instance-migrate"
7451   HTYPE = constants.HTYPE_INSTANCE
7452   REQ_BGL = False
7453
7454   def ExpandNames(self):
7455     self._ExpandAndLockInstance()
7456
7457     if self.op.target_node is not None:
7458       self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7459
7460     self.needed_locks[locking.LEVEL_NODE] = []
7461     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7462
7463     self.needed_locks[locking.LEVEL_NODE] = []
7464     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7465
7466     self._migrater = \
7467       TLMigrateInstance(self, self.op.instance_name,
7468                         cleanup=self.op.cleanup,
7469                         failover=False,
7470                         fallback=self.op.allow_failover,
7471                         allow_runtime_changes=self.op.allow_runtime_changes,
7472                         ignore_ipolicy=self.op.ignore_ipolicy)
7473     self.tasklets = [self._migrater]
7474
7475   def DeclareLocks(self, level):
7476     if level == locking.LEVEL_NODE:
7477       instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
7478       if instance.disk_template in constants.DTS_EXT_MIRROR:
7479         if self.op.target_node is None:
7480           self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7481         else:
7482           self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
7483                                                    self.op.target_node]
7484         del self.recalculate_locks[locking.LEVEL_NODE]
7485       else:
7486         self._LockInstancesNodes()
7487     elif level == locking.LEVEL_NODE_RES:
7488       # Copy node locks
7489       self.needed_locks[locking.LEVEL_NODE_RES] = \
7490         self.needed_locks[locking.LEVEL_NODE][:]
7491
7492   def BuildHooksEnv(self):
7493     """Build hooks env.
7494
7495     This runs on master, primary and secondary nodes of the instance.
7496
7497     """
7498     instance = self._migrater.instance
7499     source_node = instance.primary_node
7500     target_node = self.op.target_node
7501     env = _BuildInstanceHookEnvByObject(self, instance)
7502     env.update({
7503       "MIGRATE_LIVE": self._migrater.live,
7504       "MIGRATE_CLEANUP": self.op.cleanup,
7505       "OLD_PRIMARY": source_node,
7506       "NEW_PRIMARY": target_node,
7507       "ALLOW_RUNTIME_CHANGES": self.op.allow_runtime_changes,
7508       })
7509
7510     if instance.disk_template in constants.DTS_INT_MIRROR:
7511       env["OLD_SECONDARY"] = target_node
7512       env["NEW_SECONDARY"] = source_node
7513     else:
7514       env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
7515
7516     return env
7517
7518   def BuildHooksNodes(self):
7519     """Build hooks nodes.
7520
7521     """
7522     instance = self._migrater.instance
7523     nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
7524     return (nl, nl + [instance.primary_node])
7525
7526
7527 class LUInstanceMove(LogicalUnit):
7528   """Move an instance by data-copying.
7529
7530   """
7531   HPATH = "instance-move"
7532   HTYPE = constants.HTYPE_INSTANCE
7533   REQ_BGL = False
7534
7535   def ExpandNames(self):
7536     self._ExpandAndLockInstance()
7537     target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7538     self.op.target_node = target_node
7539     self.needed_locks[locking.LEVEL_NODE] = [target_node]
7540     self.needed_locks[locking.LEVEL_NODE_RES] = []
7541     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7542
7543   def DeclareLocks(self, level):
7544     if level == locking.LEVEL_NODE:
7545       self._LockInstancesNodes(primary_only=True)
7546     elif level == locking.LEVEL_NODE_RES:
7547       # Copy node locks
7548       self.needed_locks[locking.LEVEL_NODE_RES] = \
7549         self.needed_locks[locking.LEVEL_NODE][:]
7550
7551   def BuildHooksEnv(self):
7552     """Build hooks env.
7553
7554     This runs on master, primary and secondary nodes of the instance.
7555
7556     """
7557     env = {
7558       "TARGET_NODE": self.op.target_node,
7559       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7560       }
7561     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
7562     return env
7563
7564   def BuildHooksNodes(self):
7565     """Build hooks nodes.
7566
7567     """
7568     nl = [
7569       self.cfg.GetMasterNode(),
7570       self.instance.primary_node,
7571       self.op.target_node,
7572       ]
7573     return (nl, nl)
7574
7575   def CheckPrereq(self):
7576     """Check prerequisites.
7577
7578     This checks that the instance is in the cluster.
7579
7580     """
7581     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7582     assert self.instance is not None, \
7583       "Cannot retrieve locked instance %s" % self.op.instance_name
7584
7585     node = self.cfg.GetNodeInfo(self.op.target_node)
7586     assert node is not None, \
7587       "Cannot retrieve locked node %s" % self.op.target_node
7588
7589     self.target_node = target_node = node.name
7590
7591     if target_node == instance.primary_node:
7592       raise errors.OpPrereqError("Instance %s is already on the node %s" %
7593                                  (instance.name, target_node),
7594                                  errors.ECODE_STATE)
7595
7596     bep = self.cfg.GetClusterInfo().FillBE(instance)
7597
7598     for idx, dsk in enumerate(instance.disks):
7599       if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
7600         raise errors.OpPrereqError("Instance disk %d has a complex layout,"
7601                                    " cannot copy" % idx, errors.ECODE_STATE)
7602
7603     _CheckNodeOnline(self, target_node)
7604     _CheckNodeNotDrained(self, target_node)
7605     _CheckNodeVmCapable(self, target_node)
7606     ipolicy = _CalculateGroupIPolicy(self.cfg.GetClusterInfo(),
7607                                      self.cfg.GetNodeGroup(node.group))
7608     _CheckTargetNodeIPolicy(self, ipolicy, instance, node,
7609                             ignore=self.op.ignore_ipolicy)
7610
7611     if instance.admin_state == constants.ADMINST_UP:
7612       # check memory requirements on the secondary node
7613       _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
7614                            instance.name, bep[constants.BE_MAXMEM],
7615                            instance.hypervisor)
7616     else:
7617       self.LogInfo("Not checking memory on the secondary node as"
7618                    " instance will not be started")
7619
7620     # check bridge existance
7621     _CheckInstanceBridgesExist(self, instance, node=target_node)
7622
7623   def Exec(self, feedback_fn):
7624     """Move an instance.
7625
7626     The move is done by shutting it down on its present node, copying
7627     the data over (slow) and starting it on the new node.
7628
7629     """
7630     instance = self.instance
7631
7632     source_node = instance.primary_node
7633     target_node = self.target_node
7634
7635     self.LogInfo("Shutting down instance %s on source node %s",
7636                  instance.name, source_node)
7637
7638     assert (self.owned_locks(locking.LEVEL_NODE) ==
7639             self.owned_locks(locking.LEVEL_NODE_RES))
7640
7641     result = self.rpc.call_instance_shutdown(source_node, instance,
7642                                              self.op.shutdown_timeout)
7643     msg = result.fail_msg
7644     if msg:
7645       if self.op.ignore_consistency:
7646         self.proc.LogWarning("Could not shutdown instance %s on node %s."
7647                              " Proceeding anyway. Please make sure node"
7648                              " %s is down. Error details: %s",
7649                              instance.name, source_node, source_node, msg)
7650       else:
7651         raise errors.OpExecError("Could not shutdown instance %s on"
7652                                  " node %s: %s" %
7653                                  (instance.name, source_node, msg))
7654
7655     # create the target disks
7656     try:
7657       _CreateDisks(self, instance, target_node=target_node)
7658     except errors.OpExecError:
7659       self.LogWarning("Device creation failed, reverting...")
7660       try:
7661         _RemoveDisks(self, instance, target_node=target_node)
7662       finally:
7663         self.cfg.ReleaseDRBDMinors(instance.name)
7664         raise
7665
7666     cluster_name = self.cfg.GetClusterInfo().cluster_name
7667
7668     errs = []
7669     # activate, get path, copy the data over
7670     for idx, disk in enumerate(instance.disks):
7671       self.LogInfo("Copying data for disk %d", idx)
7672       result = self.rpc.call_blockdev_assemble(target_node, disk,
7673                                                instance.name, True, idx)
7674       if result.fail_msg:
7675         self.LogWarning("Can't assemble newly created disk %d: %s",
7676                         idx, result.fail_msg)
7677         errs.append(result.fail_msg)
7678         break
7679       dev_path = result.payload
7680       result = self.rpc.call_blockdev_export(source_node, disk,
7681                                              target_node, dev_path,
7682                                              cluster_name)
7683       if result.fail_msg:
7684         self.LogWarning("Can't copy data over for disk %d: %s",
7685                         idx, result.fail_msg)
7686         errs.append(result.fail_msg)
7687         break
7688
7689     if errs:
7690       self.LogWarning("Some disks failed to copy, aborting")
7691       try:
7692         _RemoveDisks(self, instance, target_node=target_node)
7693       finally:
7694         self.cfg.ReleaseDRBDMinors(instance.name)
7695         raise errors.OpExecError("Errors during disk copy: %s" %
7696                                  (",".join(errs),))
7697
7698     instance.primary_node = target_node
7699     self.cfg.Update(instance, feedback_fn)
7700
7701     self.LogInfo("Removing the disks on the original node")
7702     _RemoveDisks(self, instance, target_node=source_node)
7703
7704     # Only start the instance if it's marked as up
7705     if instance.admin_state == constants.ADMINST_UP:
7706       self.LogInfo("Starting instance %s on node %s",
7707                    instance.name, target_node)
7708
7709       disks_ok, _ = _AssembleInstanceDisks(self, instance,
7710                                            ignore_secondaries=True)
7711       if not disks_ok:
7712         _ShutdownInstanceDisks(self, instance)
7713         raise errors.OpExecError("Can't activate the instance's disks")
7714
7715       result = self.rpc.call_instance_start(target_node,
7716                                             (instance, None, None), False)
7717       msg = result.fail_msg
7718       if msg:
7719         _ShutdownInstanceDisks(self, instance)
7720         raise errors.OpExecError("Could not start instance %s on node %s: %s" %
7721                                  (instance.name, target_node, msg))
7722
7723
7724 class LUNodeMigrate(LogicalUnit):
7725   """Migrate all instances from a node.
7726
7727   """
7728   HPATH = "node-migrate"
7729   HTYPE = constants.HTYPE_NODE
7730   REQ_BGL = False
7731
7732   def CheckArguments(self):
7733     pass
7734
7735   def ExpandNames(self):
7736     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
7737
7738     self.share_locks = _ShareAll()
7739     self.needed_locks = {
7740       locking.LEVEL_NODE: [self.op.node_name],
7741       }
7742
7743   def BuildHooksEnv(self):
7744     """Build hooks env.
7745
7746     This runs on the master, the primary and all the secondaries.
7747
7748     """
7749     return {
7750       "NODE_NAME": self.op.node_name,
7751       "ALLOW_RUNTIME_CHANGES": self.op.allow_runtime_changes,
7752       }
7753
7754   def BuildHooksNodes(self):
7755     """Build hooks nodes.
7756
7757     """
7758     nl = [self.cfg.GetMasterNode()]
7759     return (nl, nl)
7760
7761   def CheckPrereq(self):
7762     pass
7763
7764   def Exec(self, feedback_fn):
7765     # Prepare jobs for migration instances
7766     allow_runtime_changes = self.op.allow_runtime_changes
7767     jobs = [
7768       [opcodes.OpInstanceMigrate(instance_name=inst.name,
7769                                  mode=self.op.mode,
7770                                  live=self.op.live,
7771                                  iallocator=self.op.iallocator,
7772                                  target_node=self.op.target_node,
7773                                  allow_runtime_changes=allow_runtime_changes,
7774                                  ignore_ipolicy=self.op.ignore_ipolicy)]
7775       for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name)
7776       ]
7777
7778     # TODO: Run iallocator in this opcode and pass correct placement options to
7779     # OpInstanceMigrate. Since other jobs can modify the cluster between
7780     # running the iallocator and the actual migration, a good consistency model
7781     # will have to be found.
7782
7783     assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
7784             frozenset([self.op.node_name]))
7785
7786     return ResultWithJobs(jobs)
7787
7788
7789 class TLMigrateInstance(Tasklet):
7790   """Tasklet class for instance migration.
7791
7792   @type live: boolean
7793   @ivar live: whether the migration will be done live or non-live;
7794       this variable is initalized only after CheckPrereq has run
7795   @type cleanup: boolean
7796   @ivar cleanup: Wheater we cleanup from a failed migration
7797   @type iallocator: string
7798   @ivar iallocator: The iallocator used to determine target_node
7799   @type target_node: string
7800   @ivar target_node: If given, the target_node to reallocate the instance to
7801   @type failover: boolean
7802   @ivar failover: Whether operation results in failover or migration
7803   @type fallback: boolean
7804   @ivar fallback: Whether fallback to failover is allowed if migration not
7805                   possible
7806   @type ignore_consistency: boolean
7807   @ivar ignore_consistency: Wheter we should ignore consistency between source
7808                             and target node
7809   @type shutdown_timeout: int
7810   @ivar shutdown_timeout: In case of failover timeout of the shutdown
7811   @type ignore_ipolicy: bool
7812   @ivar ignore_ipolicy: If true, we can ignore instance policy when migrating
7813
7814   """
7815
7816   # Constants
7817   _MIGRATION_POLL_INTERVAL = 1      # seconds
7818   _MIGRATION_FEEDBACK_INTERVAL = 10 # seconds
7819
7820   def __init__(self, lu, instance_name, cleanup=False,
7821                failover=False, fallback=False,
7822                ignore_consistency=False,
7823                allow_runtime_changes=True,
7824                shutdown_timeout=constants.DEFAULT_SHUTDOWN_TIMEOUT,
7825                ignore_ipolicy=False):
7826     """Initializes this class.
7827
7828     """
7829     Tasklet.__init__(self, lu)
7830
7831     # Parameters
7832     self.instance_name = instance_name
7833     self.cleanup = cleanup
7834     self.live = False # will be overridden later
7835     self.failover = failover
7836     self.fallback = fallback
7837     self.ignore_consistency = ignore_consistency
7838     self.shutdown_timeout = shutdown_timeout
7839     self.ignore_ipolicy = ignore_ipolicy
7840     self.allow_runtime_changes = allow_runtime_changes
7841
7842   def CheckPrereq(self):
7843     """Check prerequisites.
7844
7845     This checks that the instance is in the cluster.
7846
7847     """
7848     instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
7849     instance = self.cfg.GetInstanceInfo(instance_name)
7850     assert instance is not None
7851     self.instance = instance
7852     cluster = self.cfg.GetClusterInfo()
7853
7854     if (not self.cleanup and
7855         not instance.admin_state == constants.ADMINST_UP and
7856         not self.failover and self.fallback):
7857       self.lu.LogInfo("Instance is marked down or offline, fallback allowed,"
7858                       " switching to failover")
7859       self.failover = True
7860
7861     if instance.disk_template not in constants.DTS_MIRRORED:
7862       if self.failover:
7863         text = "failovers"
7864       else:
7865         text = "migrations"
7866       raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
7867                                  " %s" % (instance.disk_template, text),
7868                                  errors.ECODE_STATE)
7869
7870     if instance.disk_template in constants.DTS_EXT_MIRROR:
7871       _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
7872
7873       if self.lu.op.iallocator:
7874         self._RunAllocator()
7875       else:
7876         # We set set self.target_node as it is required by
7877         # BuildHooksEnv
7878         self.target_node = self.lu.op.target_node
7879
7880       # Check that the target node is correct in terms of instance policy
7881       nodeinfo = self.cfg.GetNodeInfo(self.target_node)
7882       group_info = self.cfg.GetNodeGroup(nodeinfo.group)
7883       ipolicy = _CalculateGroupIPolicy(cluster, group_info)
7884       _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo,
7885                               ignore=self.ignore_ipolicy)
7886
7887       # self.target_node is already populated, either directly or by the
7888       # iallocator run
7889       target_node = self.target_node
7890       if self.target_node == instance.primary_node:
7891         raise errors.OpPrereqError("Cannot migrate instance %s"
7892                                    " to its primary (%s)" %
7893                                    (instance.name, instance.primary_node))
7894
7895       if len(self.lu.tasklets) == 1:
7896         # It is safe to release locks only when we're the only tasklet
7897         # in the LU
7898         _ReleaseLocks(self.lu, locking.LEVEL_NODE,
7899                       keep=[instance.primary_node, self.target_node])
7900
7901     else:
7902       secondary_nodes = instance.secondary_nodes
7903       if not secondary_nodes:
7904         raise errors.ConfigurationError("No secondary node but using"
7905                                         " %s disk template" %
7906                                         instance.disk_template)
7907       target_node = secondary_nodes[0]
7908       if self.lu.op.iallocator or (self.lu.op.target_node and
7909                                    self.lu.op.target_node != target_node):
7910         if self.failover:
7911           text = "failed over"
7912         else:
7913           text = "migrated"
7914         raise errors.OpPrereqError("Instances with disk template %s cannot"
7915                                    " be %s to arbitrary nodes"
7916                                    " (neither an iallocator nor a target"
7917                                    " node can be passed)" %
7918                                    (instance.disk_template, text),
7919                                    errors.ECODE_INVAL)
7920       nodeinfo = self.cfg.GetNodeInfo(target_node)
7921       group_info = self.cfg.GetNodeGroup(nodeinfo.group)
7922       ipolicy = _CalculateGroupIPolicy(cluster, group_info)
7923       _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo,
7924                               ignore=self.ignore_ipolicy)
7925
7926     i_be = cluster.FillBE(instance)
7927
7928     # check memory requirements on the secondary node
7929     if (not self.cleanup and
7930          (not self.failover or instance.admin_state == constants.ADMINST_UP)):
7931       self.tgt_free_mem = _CheckNodeFreeMemory(self.lu, target_node,
7932                                                "migrating instance %s" %
7933                                                instance.name,
7934                                                i_be[constants.BE_MINMEM],
7935                                                instance.hypervisor)
7936     else:
7937       self.lu.LogInfo("Not checking memory on the secondary node as"
7938                       " instance will not be started")
7939
7940     # check if failover must be forced instead of migration
7941     if (not self.cleanup and not self.failover and
7942         i_be[constants.BE_ALWAYS_FAILOVER]):
7943       if self.fallback:
7944         self.lu.LogInfo("Instance configured to always failover; fallback"
7945                         " to failover")
7946         self.failover = True
7947       else:
7948         raise errors.OpPrereqError("This instance has been configured to"
7949                                    " always failover, please allow failover",
7950                                    errors.ECODE_STATE)
7951
7952     # check bridge existance
7953     _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
7954
7955     if not self.cleanup:
7956       _CheckNodeNotDrained(self.lu, target_node)
7957       if not self.failover:
7958         result = self.rpc.call_instance_migratable(instance.primary_node,
7959                                                    instance)
7960         if result.fail_msg and self.fallback:
7961           self.lu.LogInfo("Can't migrate, instance offline, fallback to"
7962                           " failover")
7963           self.failover = True
7964         else:
7965           result.Raise("Can't migrate, please use failover",
7966                        prereq=True, ecode=errors.ECODE_STATE)
7967
7968     assert not (self.failover and self.cleanup)
7969
7970     if not self.failover:
7971       if self.lu.op.live is not None and self.lu.op.mode is not None:
7972         raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
7973                                    " parameters are accepted",
7974                                    errors.ECODE_INVAL)
7975       if self.lu.op.live is not None:
7976         if self.lu.op.live:
7977           self.lu.op.mode = constants.HT_MIGRATION_LIVE
7978         else:
7979           self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
7980         # reset the 'live' parameter to None so that repeated
7981         # invocations of CheckPrereq do not raise an exception
7982         self.lu.op.live = None
7983       elif self.lu.op.mode is None:
7984         # read the default value from the hypervisor
7985         i_hv = cluster.FillHV(self.instance, skip_globals=False)
7986         self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
7987
7988       self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
7989     else:
7990       # Failover is never live
7991       self.live = False
7992
7993     if not (self.failover or self.cleanup):
7994       remote_info = self.rpc.call_instance_info(instance.primary_node,
7995                                                 instance.name,
7996                                                 instance.hypervisor)
7997       remote_info.Raise("Error checking instance on node %s" %
7998                         instance.primary_node)
7999       instance_running = bool(remote_info.payload)
8000       if instance_running:
8001         self.current_mem = int(remote_info.payload["memory"])
8002
8003   def _RunAllocator(self):
8004     """Run the allocator based on input opcode.
8005
8006     """
8007     # FIXME: add a self.ignore_ipolicy option
8008     ial = IAllocator(self.cfg, self.rpc,
8009                      mode=constants.IALLOCATOR_MODE_RELOC,
8010                      name=self.instance_name,
8011                      # TODO See why hail breaks with a single node below
8012                      relocate_from=[self.instance.primary_node,
8013                                     self.instance.primary_node],
8014                      )
8015
8016     ial.Run(self.lu.op.iallocator)
8017
8018     if not ial.success:
8019       raise errors.OpPrereqError("Can't compute nodes using"
8020                                  " iallocator '%s': %s" %
8021                                  (self.lu.op.iallocator, ial.info),
8022                                  errors.ECODE_NORES)
8023     if len(ial.result) != ial.required_nodes:
8024       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
8025                                  " of nodes (%s), required %s" %
8026                                  (self.lu.op.iallocator, len(ial.result),
8027                                   ial.required_nodes), errors.ECODE_FAULT)
8028     self.target_node = ial.result[0]
8029     self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
8030                  self.instance_name, self.lu.op.iallocator,
8031                  utils.CommaJoin(ial.result))
8032
8033   def _WaitUntilSync(self):
8034     """Poll with custom rpc for disk sync.
8035
8036     This uses our own step-based rpc call.
8037
8038     """
8039     self.feedback_fn("* wait until resync is done")
8040     all_done = False
8041     while not all_done:
8042       all_done = True
8043       result = self.rpc.call_drbd_wait_sync(self.all_nodes,
8044                                             self.nodes_ip,
8045                                             self.instance.disks)
8046       min_percent = 100
8047       for node, nres in result.items():
8048         nres.Raise("Cannot resync disks on node %s" % node)
8049         node_done, node_percent = nres.payload
8050         all_done = all_done and node_done
8051         if node_percent is not None:
8052           min_percent = min(min_percent, node_percent)
8053       if not all_done:
8054         if min_percent < 100:
8055           self.feedback_fn("   - progress: %.1f%%" % min_percent)
8056         time.sleep(2)
8057
8058   def _EnsureSecondary(self, node):
8059     """Demote a node to secondary.
8060
8061     """
8062     self.feedback_fn("* switching node %s to secondary mode" % node)
8063
8064     for dev in self.instance.disks:
8065       self.cfg.SetDiskID(dev, node)
8066
8067     result = self.rpc.call_blockdev_close(node, self.instance.name,
8068                                           self.instance.disks)
8069     result.Raise("Cannot change disk to secondary on node %s" % node)
8070
8071   def _GoStandalone(self):
8072     """Disconnect from the network.
8073
8074     """
8075     self.feedback_fn("* changing into standalone mode")
8076     result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
8077                                                self.instance.disks)
8078     for node, nres in result.items():
8079       nres.Raise("Cannot disconnect disks node %s" % node)
8080
8081   def _GoReconnect(self, multimaster):
8082     """Reconnect to the network.
8083
8084     """
8085     if multimaster:
8086       msg = "dual-master"
8087     else:
8088       msg = "single-master"
8089     self.feedback_fn("* changing disks into %s mode" % msg)
8090     result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
8091                                            self.instance.disks,
8092                                            self.instance.name, multimaster)
8093     for node, nres in result.items():
8094       nres.Raise("Cannot change disks config on node %s" % node)
8095
8096   def _ExecCleanup(self):
8097     """Try to cleanup after a failed migration.
8098
8099     The cleanup is done by:
8100       - check that the instance is running only on one node
8101         (and update the config if needed)
8102       - change disks on its secondary node to secondary
8103       - wait until disks are fully synchronized
8104       - disconnect from the network
8105       - change disks into single-master mode
8106       - wait again until disks are fully synchronized
8107
8108     """
8109     instance = self.instance
8110     target_node = self.target_node
8111     source_node = self.source_node
8112
8113     # check running on only one node
8114     self.feedback_fn("* checking where the instance actually runs"
8115                      " (if this hangs, the hypervisor might be in"
8116                      " a bad state)")
8117     ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
8118     for node, result in ins_l.items():
8119       result.Raise("Can't contact node %s" % node)
8120
8121     runningon_source = instance.name in ins_l[source_node].payload
8122     runningon_target = instance.name in ins_l[target_node].payload
8123
8124     if runningon_source and runningon_target:
8125       raise errors.OpExecError("Instance seems to be running on two nodes,"
8126                                " or the hypervisor is confused; you will have"
8127                                " to ensure manually that it runs only on one"
8128                                " and restart this operation")
8129
8130     if not (runningon_source or runningon_target):
8131       raise errors.OpExecError("Instance does not seem to be running at all;"
8132                                " in this case it's safer to repair by"
8133                                " running 'gnt-instance stop' to ensure disk"
8134                                " shutdown, and then restarting it")
8135
8136     if runningon_target:
8137       # the migration has actually succeeded, we need to update the config
8138       self.feedback_fn("* instance running on secondary node (%s),"
8139                        " updating config" % target_node)
8140       instance.primary_node = target_node
8141       self.cfg.Update(instance, self.feedback_fn)
8142       demoted_node = source_node
8143     else:
8144       self.feedback_fn("* instance confirmed to be running on its"
8145                        " primary node (%s)" % source_node)
8146       demoted_node = target_node
8147
8148     if instance.disk_template in constants.DTS_INT_MIRROR:
8149       self._EnsureSecondary(demoted_node)
8150       try:
8151         self._WaitUntilSync()
8152       except errors.OpExecError:
8153         # we ignore here errors, since if the device is standalone, it
8154         # won't be able to sync
8155         pass
8156       self._GoStandalone()
8157       self._GoReconnect(False)
8158       self._WaitUntilSync()
8159
8160     self.feedback_fn("* done")
8161
8162   def _RevertDiskStatus(self):
8163     """Try to revert the disk status after a failed migration.
8164
8165     """
8166     target_node = self.target_node
8167     if self.instance.disk_template in constants.DTS_EXT_MIRROR:
8168       return
8169
8170     try:
8171       self._EnsureSecondary(target_node)
8172       self._GoStandalone()
8173       self._GoReconnect(False)
8174       self._WaitUntilSync()
8175     except errors.OpExecError, err:
8176       self.lu.LogWarning("Migration failed and I can't reconnect the drives,"
8177                          " please try to recover the instance manually;"
8178                          " error '%s'" % str(err))
8179
8180   def _AbortMigration(self):
8181     """Call the hypervisor code to abort a started migration.
8182
8183     """
8184     instance = self.instance
8185     target_node = self.target_node
8186     source_node = self.source_node
8187     migration_info = self.migration_info
8188
8189     abort_result = self.rpc.call_instance_finalize_migration_dst(target_node,
8190                                                                  instance,
8191                                                                  migration_info,
8192                                                                  False)
8193     abort_msg = abort_result.fail_msg
8194     if abort_msg:
8195       logging.error("Aborting migration failed on target node %s: %s",
8196                     target_node, abort_msg)
8197       # Don't raise an exception here, as we stil have to try to revert the
8198       # disk status, even if this step failed.
8199
8200     abort_result = self.rpc.call_instance_finalize_migration_src(source_node,
8201         instance, False, self.live)
8202     abort_msg = abort_result.fail_msg
8203     if abort_msg:
8204       logging.error("Aborting migration failed on source node %s: %s",
8205                     source_node, abort_msg)
8206
8207   def _ExecMigration(self):
8208     """Migrate an instance.
8209
8210     The migrate is done by:
8211       - change the disks into dual-master mode
8212       - wait until disks are fully synchronized again
8213       - migrate the instance
8214       - change disks on the new secondary node (the old primary) to secondary
8215       - wait until disks are fully synchronized
8216       - change disks into single-master mode
8217
8218     """
8219     instance = self.instance
8220     target_node = self.target_node
8221     source_node = self.source_node
8222
8223     # Check for hypervisor version mismatch and warn the user.
8224     nodeinfo = self.rpc.call_node_info([source_node, target_node],
8225                                        None, [self.instance.hypervisor])
8226     for ninfo in nodeinfo.values():
8227       ninfo.Raise("Unable to retrieve node information from node '%s'" %
8228                   ninfo.node)
8229     (_, _, (src_info, )) = nodeinfo[source_node].payload
8230     (_, _, (dst_info, )) = nodeinfo[target_node].payload
8231
8232     if ((constants.HV_NODEINFO_KEY_VERSION in src_info) and
8233         (constants.HV_NODEINFO_KEY_VERSION in dst_info)):
8234       src_version = src_info[constants.HV_NODEINFO_KEY_VERSION]
8235       dst_version = dst_info[constants.HV_NODEINFO_KEY_VERSION]
8236       if src_version != dst_version:
8237         self.feedback_fn("* warning: hypervisor version mismatch between"
8238                          " source (%s) and target (%s) node" %
8239                          (src_version, dst_version))
8240
8241     self.feedback_fn("* checking disk consistency between source and target")
8242     for (idx, dev) in enumerate(instance.disks):
8243       if not _CheckDiskConsistency(self.lu, dev, target_node, False):
8244         raise errors.OpExecError("Disk %s is degraded or not fully"
8245                                  " synchronized on target node,"
8246                                  " aborting migration" % idx)
8247
8248     if self.current_mem > self.tgt_free_mem:
8249       if not self.allow_runtime_changes:
8250         raise errors.OpExecError("Memory ballooning not allowed and not enough"
8251                                  " free memory to fit instance %s on target"
8252                                  " node %s (have %dMB, need %dMB)" %
8253                                  (instance.name, target_node,
8254                                   self.tgt_free_mem, self.current_mem))
8255       self.feedback_fn("* setting instance memory to %s" % self.tgt_free_mem)
8256       rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node,
8257                                                      instance,
8258                                                      self.tgt_free_mem)
8259       rpcres.Raise("Cannot modify instance runtime memory")
8260
8261     # First get the migration information from the remote node
8262     result = self.rpc.call_migration_info(source_node, instance)
8263     msg = result.fail_msg
8264     if msg:
8265       log_err = ("Failed fetching source migration information from %s: %s" %
8266                  (source_node, msg))
8267       logging.error(log_err)
8268       raise errors.OpExecError(log_err)
8269
8270     self.migration_info = migration_info = result.payload
8271
8272     if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
8273       # Then switch the disks to master/master mode
8274       self._EnsureSecondary(target_node)
8275       self._GoStandalone()
8276       self._GoReconnect(True)
8277       self._WaitUntilSync()
8278
8279     self.feedback_fn("* preparing %s to accept the instance" % target_node)
8280     result = self.rpc.call_accept_instance(target_node,
8281                                            instance,
8282                                            migration_info,
8283                                            self.nodes_ip[target_node])
8284
8285     msg = result.fail_msg
8286     if msg:
8287       logging.error("Instance pre-migration failed, trying to revert"
8288                     " disk status: %s", msg)
8289       self.feedback_fn("Pre-migration failed, aborting")
8290       self._AbortMigration()
8291       self._RevertDiskStatus()
8292       raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
8293                                (instance.name, msg))
8294
8295     self.feedback_fn("* migrating instance to %s" % target_node)
8296     result = self.rpc.call_instance_migrate(source_node, instance,
8297                                             self.nodes_ip[target_node],
8298                                             self.live)
8299     msg = result.fail_msg
8300     if msg:
8301       logging.error("Instance migration failed, trying to revert"
8302                     " disk status: %s", msg)
8303       self.feedback_fn("Migration failed, aborting")
8304       self._AbortMigration()
8305       self._RevertDiskStatus()
8306       raise errors.OpExecError("Could not migrate instance %s: %s" %
8307                                (instance.name, msg))
8308
8309     self.feedback_fn("* starting memory transfer")
8310     last_feedback = time.time()
8311     while True:
8312       result = self.rpc.call_instance_get_migration_status(source_node,
8313                                                            instance)
8314       msg = result.fail_msg
8315       ms = result.payload   # MigrationStatus instance
8316       if msg or (ms.status in constants.HV_MIGRATION_FAILED_STATUSES):
8317         logging.error("Instance migration failed, trying to revert"
8318                       " disk status: %s", msg)
8319         self.feedback_fn("Migration failed, aborting")
8320         self._AbortMigration()
8321         self._RevertDiskStatus()
8322         raise errors.OpExecError("Could not migrate instance %s: %s" %
8323                                  (instance.name, msg))
8324
8325       if result.payload.status != constants.HV_MIGRATION_ACTIVE:
8326         self.feedback_fn("* memory transfer complete")
8327         break
8328
8329       if (utils.TimeoutExpired(last_feedback,
8330                                self._MIGRATION_FEEDBACK_INTERVAL) and
8331           ms.transferred_ram is not None):
8332         mem_progress = 100 * float(ms.transferred_ram) / float(ms.total_ram)
8333         self.feedback_fn("* memory transfer progress: %.2f %%" % mem_progress)
8334         last_feedback = time.time()
8335
8336       time.sleep(self._MIGRATION_POLL_INTERVAL)
8337
8338     result = self.rpc.call_instance_finalize_migration_src(source_node,
8339                                                            instance,
8340                                                            True,
8341                                                            self.live)
8342     msg = result.fail_msg
8343     if msg:
8344       logging.error("Instance migration succeeded, but finalization failed"
8345                     " on the source node: %s", msg)
8346       raise errors.OpExecError("Could not finalize instance migration: %s" %
8347                                msg)
8348
8349     instance.primary_node = target_node
8350
8351     # distribute new instance config to the other nodes
8352     self.cfg.Update(instance, self.feedback_fn)
8353
8354     result = self.rpc.call_instance_finalize_migration_dst(target_node,
8355                                                            instance,
8356                                                            migration_info,
8357                                                            True)
8358     msg = result.fail_msg
8359     if msg:
8360       logging.error("Instance migration succeeded, but finalization failed"
8361                     " on the target node: %s", msg)
8362       raise errors.OpExecError("Could not finalize instance migration: %s" %
8363                                msg)
8364
8365     if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
8366       self._EnsureSecondary(source_node)
8367       self._WaitUntilSync()
8368       self._GoStandalone()
8369       self._GoReconnect(False)
8370       self._WaitUntilSync()
8371
8372     # If the instance's disk template is `rbd' and there was a successful
8373     # migration, unmap the device from the source node.
8374     if self.instance.disk_template == constants.DT_RBD:
8375       disks = _ExpandCheckDisks(instance, instance.disks)
8376       self.feedback_fn("* unmapping instance's disks from %s" % source_node)
8377       for disk in disks:
8378         result = self.rpc.call_blockdev_shutdown(source_node, disk)
8379         msg = result.fail_msg
8380         if msg:
8381           logging.error("Migration was successful, but couldn't unmap the"
8382                         " block device %s on source node %s: %s",
8383                         disk.iv_name, source_node, msg)
8384           logging.error("You need to unmap the device %s manually on %s",
8385                         disk.iv_name, source_node)
8386
8387     self.feedback_fn("* done")
8388
8389   def _ExecFailover(self):
8390     """Failover an instance.
8391
8392     The failover is done by shutting it down on its present node and
8393     starting it on the secondary.
8394
8395     """
8396     instance = self.instance
8397     primary_node = self.cfg.GetNodeInfo(instance.primary_node)
8398
8399     source_node = instance.primary_node
8400     target_node = self.target_node
8401
8402     if instance.admin_state == constants.ADMINST_UP:
8403       self.feedback_fn("* checking disk consistency between source and target")
8404       for (idx, dev) in enumerate(instance.disks):
8405         # for drbd, these are drbd over lvm
8406         if not _CheckDiskConsistency(self.lu, dev, target_node, False):
8407           if primary_node.offline:
8408             self.feedback_fn("Node %s is offline, ignoring degraded disk %s on"
8409                              " target node %s" %
8410                              (primary_node.name, idx, target_node))
8411           elif not self.ignore_consistency:
8412             raise errors.OpExecError("Disk %s is degraded on target node,"
8413                                      " aborting failover" % idx)
8414     else:
8415       self.feedback_fn("* not checking disk consistency as instance is not"
8416                        " running")
8417
8418     self.feedback_fn("* shutting down instance on source node")
8419     logging.info("Shutting down instance %s on node %s",
8420                  instance.name, source_node)
8421
8422     result = self.rpc.call_instance_shutdown(source_node, instance,
8423                                              self.shutdown_timeout)
8424     msg = result.fail_msg
8425     if msg:
8426       if self.ignore_consistency or primary_node.offline:
8427         self.lu.LogWarning("Could not shutdown instance %s on node %s,"
8428                            " proceeding anyway; please make sure node"
8429                            " %s is down; error details: %s",
8430                            instance.name, source_node, source_node, msg)
8431       else:
8432         raise errors.OpExecError("Could not shutdown instance %s on"
8433                                  " node %s: %s" %
8434                                  (instance.name, source_node, msg))
8435
8436     self.feedback_fn("* deactivating the instance's disks on source node")
8437     if not _ShutdownInstanceDisks(self.lu, instance, ignore_primary=True):
8438       raise errors.OpExecError("Can't shut down the instance's disks")
8439
8440     instance.primary_node = target_node
8441     # distribute new instance config to the other nodes
8442     self.cfg.Update(instance, self.feedback_fn)
8443
8444     # Only start the instance if it's marked as up
8445     if instance.admin_state == constants.ADMINST_UP:
8446       self.feedback_fn("* activating the instance's disks on target node %s" %
8447                        target_node)
8448       logging.info("Starting instance %s on node %s",
8449                    instance.name, target_node)
8450
8451       disks_ok, _ = _AssembleInstanceDisks(self.lu, instance,
8452                                            ignore_secondaries=True)
8453       if not disks_ok:
8454         _ShutdownInstanceDisks(self.lu, instance)
8455         raise errors.OpExecError("Can't activate the instance's disks")
8456
8457       self.feedback_fn("* starting the instance on the target node %s" %
8458                        target_node)
8459       result = self.rpc.call_instance_start(target_node, (instance, None, None),
8460                                             False)
8461       msg = result.fail_msg
8462       if msg:
8463         _ShutdownInstanceDisks(self.lu, instance)
8464         raise errors.OpExecError("Could not start instance %s on node %s: %s" %
8465                                  (instance.name, target_node, msg))
8466
8467   def Exec(self, feedback_fn):
8468     """Perform the migration.
8469
8470     """
8471     self.feedback_fn = feedback_fn
8472     self.source_node = self.instance.primary_node
8473
8474     # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
8475     if self.instance.disk_template in constants.DTS_INT_MIRROR:
8476       self.target_node = self.instance.secondary_nodes[0]
8477       # Otherwise self.target_node has been populated either
8478       # directly, or through an iallocator.
8479
8480     self.all_nodes = [self.source_node, self.target_node]
8481     self.nodes_ip = dict((name, node.secondary_ip) for (name, node)
8482                          in self.cfg.GetMultiNodeInfo(self.all_nodes))
8483
8484     if self.failover:
8485       feedback_fn("Failover instance %s" % self.instance.name)
8486       self._ExecFailover()
8487     else:
8488       feedback_fn("Migrating instance %s" % self.instance.name)
8489
8490       if self.cleanup:
8491         return self._ExecCleanup()
8492       else:
8493         return self._ExecMigration()
8494
8495
8496 def _CreateBlockDev(lu, node, instance, device, force_create,
8497                     info, force_open):
8498   """Create a tree of block devices on a given node.
8499
8500   If this device type has to be created on secondaries, create it and
8501   all its children.
8502
8503   If not, just recurse to children keeping the same 'force' value.
8504
8505   @param lu: the lu on whose behalf we execute
8506   @param node: the node on which to create the device
8507   @type instance: L{objects.Instance}
8508   @param instance: the instance which owns the device
8509   @type device: L{objects.Disk}
8510   @param device: the device to create
8511   @type force_create: boolean
8512   @param force_create: whether to force creation of this device; this
8513       will be change to True whenever we find a device which has
8514       CreateOnSecondary() attribute
8515   @param info: the extra 'metadata' we should attach to the device
8516       (this will be represented as a LVM tag)
8517   @type force_open: boolean
8518   @param force_open: this parameter will be passes to the
8519       L{backend.BlockdevCreate} function where it specifies
8520       whether we run on primary or not, and it affects both
8521       the child assembly and the device own Open() execution
8522
8523   """
8524   if device.CreateOnSecondary():
8525     force_create = True
8526
8527   if device.children:
8528     for child in device.children:
8529       _CreateBlockDev(lu, node, instance, child, force_create,
8530                       info, force_open)
8531
8532   if not force_create:
8533     return
8534
8535   _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
8536
8537
8538 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
8539   """Create a single block device on a given node.
8540
8541   This will not recurse over children of the device, so they must be
8542   created in advance.
8543
8544   @param lu: the lu on whose behalf we execute
8545   @param node: the node on which to create the device
8546   @type instance: L{objects.Instance}
8547   @param instance: the instance which owns the device
8548   @type device: L{objects.Disk}
8549   @param device: the device to create
8550   @param info: the extra 'metadata' we should attach to the device
8551       (this will be represented as a LVM tag)
8552   @type force_open: boolean
8553   @param force_open: this parameter will be passes to the
8554       L{backend.BlockdevCreate} function where it specifies
8555       whether we run on primary or not, and it affects both
8556       the child assembly and the device own Open() execution
8557
8558   """
8559   lu.cfg.SetDiskID(device, node)
8560   result = lu.rpc.call_blockdev_create(node, device, device.size,
8561                                        instance.name, force_open, info)
8562   result.Raise("Can't create block device %s on"
8563                " node %s for instance %s" % (device, node, instance.name))
8564   if device.physical_id is None:
8565     device.physical_id = result.payload
8566
8567
8568 def _GenerateUniqueNames(lu, exts):
8569   """Generate a suitable LV name.
8570
8571   This will generate a logical volume name for the given instance.
8572
8573   """
8574   results = []
8575   for val in exts:
8576     new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
8577     results.append("%s%s" % (new_id, val))
8578   return results
8579
8580
8581 def _ComputeLDParams(disk_template, disk_params):
8582   """Computes Logical Disk parameters from Disk Template parameters.
8583
8584   @type disk_template: string
8585   @param disk_template: disk template, one of L{constants.DISK_TEMPLATES}
8586   @type disk_params: dict
8587   @param disk_params: disk template parameters; dict(template_name -> parameters
8588   @rtype: list(dict)
8589   @return: a list of dicts, one for each node of the disk hierarchy. Each dict
8590     contains the LD parameters of the node. The tree is flattened in-order.
8591
8592   """
8593   if disk_template not in constants.DISK_TEMPLATES:
8594     raise errors.ProgrammerError("Unknown disk template %s" % disk_template)
8595
8596   result = list()
8597   dt_params = disk_params[disk_template]
8598   if disk_template == constants.DT_DRBD8:
8599     drbd_params = {
8600       constants.LDP_RESYNC_RATE: dt_params[constants.DRBD_RESYNC_RATE],
8601       constants.LDP_BARRIERS: dt_params[constants.DRBD_DISK_BARRIERS],
8602       constants.LDP_NO_META_FLUSH: dt_params[constants.DRBD_META_BARRIERS],
8603       constants.LDP_DEFAULT_METAVG: dt_params[constants.DRBD_DEFAULT_METAVG],
8604       constants.LDP_DISK_CUSTOM: dt_params[constants.DRBD_DISK_CUSTOM],
8605       constants.LDP_NET_CUSTOM: dt_params[constants.DRBD_NET_CUSTOM],
8606       constants.LDP_DYNAMIC_RESYNC: dt_params[constants.DRBD_DYNAMIC_RESYNC],
8607       constants.LDP_PLAN_AHEAD: dt_params[constants.DRBD_PLAN_AHEAD],
8608       constants.LDP_FILL_TARGET: dt_params[constants.DRBD_FILL_TARGET],
8609       constants.LDP_DELAY_TARGET: dt_params[constants.DRBD_DELAY_TARGET],
8610       constants.LDP_MAX_RATE: dt_params[constants.DRBD_MAX_RATE],
8611       constants.LDP_MIN_RATE: dt_params[constants.DRBD_MIN_RATE],
8612       }
8613
8614     drbd_params = \
8615       objects.FillDict(constants.DISK_LD_DEFAULTS[constants.LD_DRBD8],
8616                        drbd_params)
8617
8618     result.append(drbd_params)
8619
8620     # data LV
8621     data_params = {
8622       constants.LDP_STRIPES: dt_params[constants.DRBD_DATA_STRIPES],
8623       }
8624     data_params = \
8625       objects.FillDict(constants.DISK_LD_DEFAULTS[constants.LD_LV],
8626                        data_params)
8627     result.append(data_params)
8628
8629     # metadata LV
8630     meta_params = {
8631       constants.LDP_STRIPES: dt_params[constants.DRBD_META_STRIPES],
8632       }
8633     meta_params = \
8634       objects.FillDict(constants.DISK_LD_DEFAULTS[constants.LD_LV],
8635                        meta_params)
8636     result.append(meta_params)
8637
8638   elif (disk_template == constants.DT_FILE or
8639         disk_template == constants.DT_SHARED_FILE):
8640     result.append(constants.DISK_LD_DEFAULTS[constants.LD_FILE])
8641
8642   elif disk_template == constants.DT_PLAIN:
8643     params = {
8644       constants.LDP_STRIPES: dt_params[constants.LV_STRIPES],
8645       }
8646     params = \
8647       objects.FillDict(constants.DISK_LD_DEFAULTS[constants.LD_LV],
8648                        params)
8649     result.append(params)
8650
8651   elif disk_template == constants.DT_BLOCK:
8652     result.append(constants.DISK_LD_DEFAULTS[constants.LD_BLOCKDEV])
8653
8654   elif disk_template == constants.DT_RBD:
8655     params = {
8656       constants.LDP_POOL: dt_params[constants.RBD_POOL]
8657       }
8658     params = \
8659       objects.FillDict(constants.DISK_LD_DEFAULTS[constants.LD_RBD],
8660                        params)
8661     result.append(params)
8662
8663   return result
8664
8665
8666 def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
8667                          iv_name, p_minor, s_minor, drbd_params, data_params,
8668                          meta_params):
8669   """Generate a drbd8 device complete with its children.
8670
8671   """
8672   assert len(vgnames) == len(names) == 2
8673   port = lu.cfg.AllocatePort()
8674   shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
8675
8676   dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
8677                           logical_id=(vgnames[0], names[0]),
8678                           params=data_params)
8679   dev_meta = objects.Disk(dev_type=constants.LD_LV, size=DRBD_META_SIZE,
8680                           logical_id=(vgnames[1], names[1]),
8681                           params=meta_params)
8682   drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
8683                           logical_id=(primary, secondary, port,
8684                                       p_minor, s_minor,
8685                                       shared_secret),
8686                           children=[dev_data, dev_meta],
8687                           iv_name=iv_name, params=drbd_params)
8688   return drbd_dev
8689
8690
8691 _DISK_TEMPLATE_NAME_PREFIX = {
8692   constants.DT_PLAIN: "",
8693   constants.DT_RBD: ".rbd",
8694   }
8695
8696
8697 _DISK_TEMPLATE_DEVICE_TYPE = {
8698   constants.DT_PLAIN: constants.LD_LV,
8699   constants.DT_FILE: constants.LD_FILE,
8700   constants.DT_SHARED_FILE: constants.LD_FILE,
8701   constants.DT_BLOCK: constants.LD_BLOCKDEV,
8702   constants.DT_RBD: constants.LD_RBD,
8703   }
8704
8705
8706 def _GenerateDiskTemplate(lu, template_name, instance_name, primary_node,
8707     secondary_nodes, disk_info, file_storage_dir, file_driver, base_index,
8708     feedback_fn, disk_params,
8709     _req_file_storage=opcodes.RequireFileStorage,
8710     _req_shr_file_storage=opcodes.RequireSharedFileStorage):
8711   """Generate the entire disk layout for a given template type.
8712
8713   """
8714   #TODO: compute space requirements
8715
8716   vgname = lu.cfg.GetVGName()
8717   disk_count = len(disk_info)
8718   disks = []
8719   ld_params = _ComputeLDParams(template_name, disk_params)
8720
8721   if template_name == constants.DT_DISKLESS:
8722     pass
8723   elif template_name == constants.DT_DRBD8:
8724     drbd_params, data_params, meta_params = ld_params
8725     if len(secondary_nodes) != 1:
8726       raise errors.ProgrammerError("Wrong template configuration")
8727     remote_node = secondary_nodes[0]
8728     minors = lu.cfg.AllocateDRBDMinor(
8729       [primary_node, remote_node] * len(disk_info), instance_name)
8730
8731     names = []
8732     for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
8733                                                for i in range(disk_count)]):
8734       names.append(lv_prefix + "_data")
8735       names.append(lv_prefix + "_meta")
8736     for idx, disk in enumerate(disk_info):
8737       disk_index = idx + base_index
8738       drbd_default_metavg = drbd_params[constants.LDP_DEFAULT_METAVG]
8739       data_vg = disk.get(constants.IDISK_VG, vgname)
8740       meta_vg = disk.get(constants.IDISK_METAVG, drbd_default_metavg)
8741       disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
8742                                       disk[constants.IDISK_SIZE],
8743                                       [data_vg, meta_vg],
8744                                       names[idx * 2:idx * 2 + 2],
8745                                       "disk/%d" % disk_index,
8746                                       minors[idx * 2], minors[idx * 2 + 1],
8747                                       drbd_params, data_params, meta_params)
8748       disk_dev.mode = disk[constants.IDISK_MODE]
8749       disks.append(disk_dev)
8750   else:
8751     if secondary_nodes:
8752       raise errors.ProgrammerError("Wrong template configuration")
8753
8754     if template_name == constants.DT_FILE:
8755       _req_file_storage()
8756     elif template_name == constants.DT_SHARED_FILE:
8757       _req_shr_file_storage()
8758
8759     name_prefix = _DISK_TEMPLATE_NAME_PREFIX.get(template_name, None)
8760     if name_prefix is None:
8761       names = None
8762     else:
8763       names = _GenerateUniqueNames(lu, ["%s.disk%s" %
8764                                         (name_prefix, base_index + i)
8765                                         for i in range(disk_count)])
8766
8767     dev_type = _DISK_TEMPLATE_DEVICE_TYPE[template_name]
8768
8769     if template_name == constants.DT_PLAIN:
8770       def logical_id_fn(idx, _, disk):
8771         vg = disk.get(constants.IDISK_VG, vgname)
8772         return (vg, names[idx])
8773     elif template_name in (constants.DT_FILE, constants.DT_SHARED_FILE):
8774       logical_id_fn = \
8775         lambda _, disk_index, disk: (file_driver,
8776                                      "%s/disk%d" % (file_storage_dir,
8777                                                     disk_index))
8778     elif template_name == constants.DT_BLOCK:
8779       logical_id_fn = \
8780         lambda idx, disk_index, disk: (constants.BLOCKDEV_DRIVER_MANUAL,
8781                                        disk[constants.IDISK_ADOPT])
8782     elif template_name == constants.DT_RBD:
8783       logical_id_fn = lambda idx, _, disk: ("rbd", names[idx])
8784     else:
8785       raise errors.ProgrammerError("Unknown disk template '%s'" % template_name)
8786
8787     for idx, disk in enumerate(disk_info):
8788       disk_index = idx + base_index
8789       size = disk[constants.IDISK_SIZE]
8790       feedback_fn("* disk %s, size %s" %
8791                   (disk_index, utils.FormatUnit(size, "h")))
8792       disks.append(objects.Disk(dev_type=dev_type, size=size,
8793                                 logical_id=logical_id_fn(idx, disk_index, disk),
8794                                 iv_name="disk/%d" % disk_index,
8795                                 mode=disk[constants.IDISK_MODE],
8796                                 params=ld_params[0]))
8797
8798   return disks
8799
8800
8801 def _GetInstanceInfoText(instance):
8802   """Compute that text that should be added to the disk's metadata.
8803
8804   """
8805   return "originstname+%s" % instance.name
8806
8807
8808 def _CalcEta(time_taken, written, total_size):
8809   """Calculates the ETA based on size written and total size.
8810
8811   @param time_taken: The time taken so far
8812   @param written: amount written so far
8813   @param total_size: The total size of data to be written
8814   @return: The remaining time in seconds
8815
8816   """
8817   avg_time = time_taken / float(written)
8818   return (total_size - written) * avg_time
8819
8820
8821 def _WipeDisks(lu, instance):
8822   """Wipes instance disks.
8823
8824   @type lu: L{LogicalUnit}
8825   @param lu: the logical unit on whose behalf we execute
8826   @type instance: L{objects.Instance}
8827   @param instance: the instance whose disks we should create
8828   @return: the success of the wipe
8829
8830   """
8831   node = instance.primary_node
8832
8833   for device in instance.disks:
8834     lu.cfg.SetDiskID(device, node)
8835
8836   logging.info("Pause sync of instance %s disks", instance.name)
8837   result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, True)
8838
8839   for idx, success in enumerate(result.payload):
8840     if not success:
8841       logging.warn("pause-sync of instance %s for disks %d failed",
8842                    instance.name, idx)
8843
8844   try:
8845     for idx, device in enumerate(instance.disks):
8846       # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
8847       # MAX_WIPE_CHUNK at max
8848       wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 *
8849                             constants.MIN_WIPE_CHUNK_PERCENT)
8850       # we _must_ make this an int, otherwise rounding errors will
8851       # occur
8852       wipe_chunk_size = int(wipe_chunk_size)
8853
8854       lu.LogInfo("* Wiping disk %d", idx)
8855       logging.info("Wiping disk %d for instance %s, node %s using"
8856                    " chunk size %s", idx, instance.name, node, wipe_chunk_size)
8857
8858       offset = 0
8859       size = device.size
8860       last_output = 0
8861       start_time = time.time()
8862
8863       while offset < size:
8864         wipe_size = min(wipe_chunk_size, size - offset)
8865         logging.debug("Wiping disk %d, offset %s, chunk %s",
8866                       idx, offset, wipe_size)
8867         result = lu.rpc.call_blockdev_wipe(node, device, offset, wipe_size)
8868         result.Raise("Could not wipe disk %d at offset %d for size %d" %
8869                      (idx, offset, wipe_size))
8870         now = time.time()
8871         offset += wipe_size
8872         if now - last_output >= 60:
8873           eta = _CalcEta(now - start_time, offset, size)
8874           lu.LogInfo(" - done: %.1f%% ETA: %s" %
8875                      (offset / float(size) * 100, utils.FormatSeconds(eta)))
8876           last_output = now
8877   finally:
8878     logging.info("Resume sync of instance %s disks", instance.name)
8879
8880     result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, False)
8881
8882     for idx, success in enumerate(result.payload):
8883       if not success:
8884         lu.LogWarning("Resume sync of disk %d failed, please have a"
8885                       " look at the status and troubleshoot the issue", idx)
8886         logging.warn("resume-sync of instance %s for disks %d failed",
8887                      instance.name, idx)
8888
8889
8890 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
8891   """Create all disks for an instance.
8892
8893   This abstracts away some work from AddInstance.
8894
8895   @type lu: L{LogicalUnit}
8896   @param lu: the logical unit on whose behalf we execute
8897   @type instance: L{objects.Instance}
8898   @param instance: the instance whose disks we should create
8899   @type to_skip: list
8900   @param to_skip: list of indices to skip
8901   @type target_node: string
8902   @param target_node: if passed, overrides the target node for creation
8903   @rtype: boolean
8904   @return: the success of the creation
8905
8906   """
8907   info = _GetInstanceInfoText(instance)
8908   if target_node is None:
8909     pnode = instance.primary_node
8910     all_nodes = instance.all_nodes
8911   else:
8912     pnode = target_node
8913     all_nodes = [pnode]
8914
8915   if instance.disk_template in constants.DTS_FILEBASED:
8916     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
8917     result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
8918
8919     result.Raise("Failed to create directory '%s' on"
8920                  " node %s" % (file_storage_dir, pnode))
8921
8922   # Note: this needs to be kept in sync with adding of disks in
8923   # LUInstanceSetParams
8924   for idx, device in enumerate(instance.disks):
8925     if to_skip and idx in to_skip:
8926       continue
8927     logging.info("Creating disk %s for instance '%s'", idx, instance.name)
8928     #HARDCODE
8929     for node in all_nodes:
8930       f_create = node == pnode
8931       _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
8932
8933
8934 def _RemoveDisks(lu, instance, target_node=None):
8935   """Remove all disks for an instance.
8936
8937   This abstracts away some work from `AddInstance()` and
8938   `RemoveInstance()`. Note that in case some of the devices couldn't
8939   be removed, the removal will continue with the other ones (compare
8940   with `_CreateDisks()`).
8941
8942   @type lu: L{LogicalUnit}
8943   @param lu: the logical unit on whose behalf we execute
8944   @type instance: L{objects.Instance}
8945   @param instance: the instance whose disks we should remove
8946   @type target_node: string
8947   @param target_node: used to override the node on which to remove the disks
8948   @rtype: boolean
8949   @return: the success of the removal
8950
8951   """
8952   logging.info("Removing block devices for instance %s", instance.name)
8953
8954   all_result = True
8955   for (idx, device) in enumerate(instance.disks):
8956     if target_node:
8957       edata = [(target_node, device)]
8958     else:
8959       edata = device.ComputeNodeTree(instance.primary_node)
8960     for node, disk in edata:
8961       lu.cfg.SetDiskID(disk, node)
8962       msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
8963       if msg:
8964         lu.LogWarning("Could not remove disk %s on node %s,"
8965                       " continuing anyway: %s", idx, node, msg)
8966         all_result = False
8967
8968     # if this is a DRBD disk, return its port to the pool
8969     if device.dev_type in constants.LDS_DRBD:
8970       tcp_port = device.logical_id[2]
8971       lu.cfg.AddTcpUdpPort(tcp_port)
8972
8973   if instance.disk_template == constants.DT_FILE:
8974     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
8975     if target_node:
8976       tgt = target_node
8977     else:
8978       tgt = instance.primary_node
8979     result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
8980     if result.fail_msg:
8981       lu.LogWarning("Could not remove directory '%s' on node %s: %s",
8982                     file_storage_dir, instance.primary_node, result.fail_msg)
8983       all_result = False
8984
8985   return all_result
8986
8987
8988 def _ComputeDiskSizePerVG(disk_template, disks):
8989   """Compute disk size requirements in the volume group
8990
8991   """
8992   def _compute(disks, payload):
8993     """Universal algorithm.
8994
8995     """
8996     vgs = {}
8997     for disk in disks:
8998       vgs[disk[constants.IDISK_VG]] = \
8999         vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
9000
9001     return vgs
9002
9003   # Required free disk space as a function of disk and swap space
9004   req_size_dict = {
9005     constants.DT_DISKLESS: {},
9006     constants.DT_PLAIN: _compute(disks, 0),
9007     # 128 MB are added for drbd metadata for each disk
9008     constants.DT_DRBD8: _compute(disks, DRBD_META_SIZE),
9009     constants.DT_FILE: {},
9010     constants.DT_SHARED_FILE: {},
9011   }
9012
9013   if disk_template not in req_size_dict:
9014     raise errors.ProgrammerError("Disk template '%s' size requirement"
9015                                  " is unknown" % disk_template)
9016
9017   return req_size_dict[disk_template]
9018
9019
9020 def _ComputeDiskSize(disk_template, disks):
9021   """Compute disk size requirements in the volume group
9022
9023   """
9024   # Required free disk space as a function of disk and swap space
9025   req_size_dict = {
9026     constants.DT_DISKLESS: None,
9027     constants.DT_PLAIN: sum(d[constants.IDISK_SIZE] for d in disks),
9028     # 128 MB are added for drbd metadata for each disk
9029     constants.DT_DRBD8:
9030       sum(d[constants.IDISK_SIZE] + DRBD_META_SIZE for d in disks),
9031     constants.DT_FILE: None,
9032     constants.DT_SHARED_FILE: 0,
9033     constants.DT_BLOCK: 0,
9034     constants.DT_RBD: 0,
9035   }
9036
9037   if disk_template not in req_size_dict:
9038     raise errors.ProgrammerError("Disk template '%s' size requirement"
9039                                  " is unknown" % disk_template)
9040
9041   return req_size_dict[disk_template]
9042
9043
9044 def _FilterVmNodes(lu, nodenames):
9045   """Filters out non-vm_capable nodes from a list.
9046
9047   @type lu: L{LogicalUnit}
9048   @param lu: the logical unit for which we check
9049   @type nodenames: list
9050   @param nodenames: the list of nodes on which we should check
9051   @rtype: list
9052   @return: the list of vm-capable nodes
9053
9054   """
9055   vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
9056   return [name for name in nodenames if name not in vm_nodes]
9057
9058
9059 def _CheckHVParams(lu, nodenames, hvname, hvparams):
9060   """Hypervisor parameter validation.
9061
9062   This function abstract the hypervisor parameter validation to be
9063   used in both instance create and instance modify.
9064
9065   @type lu: L{LogicalUnit}
9066   @param lu: the logical unit for which we check
9067   @type nodenames: list
9068   @param nodenames: the list of nodes on which we should check
9069   @type hvname: string
9070   @param hvname: the name of the hypervisor we should use
9071   @type hvparams: dict
9072   @param hvparams: the parameters which we need to check
9073   @raise errors.OpPrereqError: if the parameters are not valid
9074
9075   """
9076   nodenames = _FilterVmNodes(lu, nodenames)
9077
9078   cluster = lu.cfg.GetClusterInfo()
9079   hvfull = objects.FillDict(cluster.hvparams.get(hvname, {}), hvparams)
9080
9081   hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames, hvname, hvfull)
9082   for node in nodenames:
9083     info = hvinfo[node]
9084     if info.offline:
9085       continue
9086     info.Raise("Hypervisor parameter validation failed on node %s" % node)
9087
9088
9089 def _CheckOSParams(lu, required, nodenames, osname, osparams):
9090   """OS parameters validation.
9091
9092   @type lu: L{LogicalUnit}
9093   @param lu: the logical unit for which we check
9094   @type required: boolean
9095   @param required: whether the validation should fail if the OS is not
9096       found
9097   @type nodenames: list
9098   @param nodenames: the list of nodes on which we should check
9099   @type osname: string
9100   @param osname: the name of the hypervisor we should use
9101   @type osparams: dict
9102   @param osparams: the parameters which we need to check
9103   @raise errors.OpPrereqError: if the parameters are not valid
9104
9105   """
9106   nodenames = _FilterVmNodes(lu, nodenames)
9107   result = lu.rpc.call_os_validate(nodenames, required, osname,
9108                                    [constants.OS_VALIDATE_PARAMETERS],
9109                                    osparams)
9110   for node, nres in result.items():
9111     # we don't check for offline cases since this should be run only
9112     # against the master node and/or an instance's nodes
9113     nres.Raise("OS Parameters validation failed on node %s" % node)
9114     if not nres.payload:
9115       lu.LogInfo("OS %s not found on node %s, validation skipped",
9116                  osname, node)
9117
9118
9119 class LUInstanceCreate(LogicalUnit):
9120   """Create an instance.
9121
9122   """
9123   HPATH = "instance-add"
9124   HTYPE = constants.HTYPE_INSTANCE
9125   REQ_BGL = False
9126
9127   def CheckArguments(self):
9128     """Check arguments.
9129
9130     """
9131     # do not require name_check to ease forward/backward compatibility
9132     # for tools
9133     if self.op.no_install and self.op.start:
9134       self.LogInfo("No-installation mode selected, disabling startup")
9135       self.op.start = False
9136     # validate/normalize the instance name
9137     self.op.instance_name = \
9138       netutils.Hostname.GetNormalizedName(self.op.instance_name)
9139
9140     if self.op.ip_check and not self.op.name_check:
9141       # TODO: make the ip check more flexible and not depend on the name check
9142       raise errors.OpPrereqError("Cannot do IP address check without a name"
9143                                  " check", errors.ECODE_INVAL)
9144
9145     # check nics' parameter names
9146     for nic in self.op.nics:
9147       utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
9148
9149     # check disks. parameter names and consistent adopt/no-adopt strategy
9150     has_adopt = has_no_adopt = False
9151     for disk in self.op.disks:
9152       utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
9153       if constants.IDISK_ADOPT in disk:
9154         has_adopt = True
9155       else:
9156         has_no_adopt = True
9157     if has_adopt and has_no_adopt:
9158       raise errors.OpPrereqError("Either all disks are adopted or none is",
9159                                  errors.ECODE_INVAL)
9160     if has_adopt:
9161       if self.op.disk_template not in constants.DTS_MAY_ADOPT:
9162         raise errors.OpPrereqError("Disk adoption is not supported for the"
9163                                    " '%s' disk template" %
9164                                    self.op.disk_template,
9165                                    errors.ECODE_INVAL)
9166       if self.op.iallocator is not None:
9167         raise errors.OpPrereqError("Disk adoption not allowed with an"
9168                                    " iallocator script", errors.ECODE_INVAL)
9169       if self.op.mode == constants.INSTANCE_IMPORT:
9170         raise errors.OpPrereqError("Disk adoption not allowed for"
9171                                    " instance import", errors.ECODE_INVAL)
9172     else:
9173       if self.op.disk_template in constants.DTS_MUST_ADOPT:
9174         raise errors.OpPrereqError("Disk template %s requires disk adoption,"
9175                                    " but no 'adopt' parameter given" %
9176                                    self.op.disk_template,
9177                                    errors.ECODE_INVAL)
9178
9179     self.adopt_disks = has_adopt
9180
9181     # instance name verification
9182     if self.op.name_check:
9183       self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
9184       self.op.instance_name = self.hostname1.name
9185       # used in CheckPrereq for ip ping check
9186       self.check_ip = self.hostname1.ip
9187     else:
9188       self.check_ip = None
9189
9190     # file storage checks
9191     if (self.op.file_driver and
9192         not self.op.file_driver in constants.FILE_DRIVER):
9193       raise errors.OpPrereqError("Invalid file driver name '%s'" %
9194                                  self.op.file_driver, errors.ECODE_INVAL)
9195
9196     if self.op.disk_template == constants.DT_FILE:
9197       opcodes.RequireFileStorage()
9198     elif self.op.disk_template == constants.DT_SHARED_FILE:
9199       opcodes.RequireSharedFileStorage()
9200
9201     ### Node/iallocator related checks
9202     _CheckIAllocatorOrNode(self, "iallocator", "pnode")
9203
9204     if self.op.pnode is not None:
9205       if self.op.disk_template in constants.DTS_INT_MIRROR:
9206         if self.op.snode is None:
9207           raise errors.OpPrereqError("The networked disk templates need"
9208                                      " a mirror node", errors.ECODE_INVAL)
9209       elif self.op.snode:
9210         self.LogWarning("Secondary node will be ignored on non-mirrored disk"
9211                         " template")
9212         self.op.snode = None
9213
9214     self._cds = _GetClusterDomainSecret()
9215
9216     if self.op.mode == constants.INSTANCE_IMPORT:
9217       # On import force_variant must be True, because if we forced it at
9218       # initial install, our only chance when importing it back is that it
9219       # works again!
9220       self.op.force_variant = True
9221
9222       if self.op.no_install:
9223         self.LogInfo("No-installation mode has no effect during import")
9224
9225     elif self.op.mode == constants.INSTANCE_CREATE:
9226       if self.op.os_type is None:
9227         raise errors.OpPrereqError("No guest OS specified",
9228                                    errors.ECODE_INVAL)
9229       if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
9230         raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
9231                                    " installation" % self.op.os_type,
9232                                    errors.ECODE_STATE)
9233       if self.op.disk_template is None:
9234         raise errors.OpPrereqError("No disk template specified",
9235                                    errors.ECODE_INVAL)
9236
9237     elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
9238       # Check handshake to ensure both clusters have the same domain secret
9239       src_handshake = self.op.source_handshake
9240       if not src_handshake:
9241         raise errors.OpPrereqError("Missing source handshake",
9242                                    errors.ECODE_INVAL)
9243
9244       errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
9245                                                            src_handshake)
9246       if errmsg:
9247         raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
9248                                    errors.ECODE_INVAL)
9249
9250       # Load and check source CA
9251       self.source_x509_ca_pem = self.op.source_x509_ca
9252       if not self.source_x509_ca_pem:
9253         raise errors.OpPrereqError("Missing source X509 CA",
9254                                    errors.ECODE_INVAL)
9255
9256       try:
9257         (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
9258                                                     self._cds)
9259       except OpenSSL.crypto.Error, err:
9260         raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
9261                                    (err, ), errors.ECODE_INVAL)
9262
9263       (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
9264       if errcode is not None:
9265         raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
9266                                    errors.ECODE_INVAL)
9267
9268       self.source_x509_ca = cert
9269
9270       src_instance_name = self.op.source_instance_name
9271       if not src_instance_name:
9272         raise errors.OpPrereqError("Missing source instance name",
9273                                    errors.ECODE_INVAL)
9274
9275       self.source_instance_name = \
9276           netutils.GetHostname(name=src_instance_name).name
9277
9278     else:
9279       raise errors.OpPrereqError("Invalid instance creation mode %r" %
9280                                  self.op.mode, errors.ECODE_INVAL)
9281
9282   def ExpandNames(self):
9283     """ExpandNames for CreateInstance.
9284
9285     Figure out the right locks for instance creation.
9286
9287     """
9288     self.needed_locks = {}
9289
9290     instance_name = self.op.instance_name
9291     # this is just a preventive check, but someone might still add this
9292     # instance in the meantime, and creation will fail at lock-add time
9293     if instance_name in self.cfg.GetInstanceList():
9294       raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
9295                                  instance_name, errors.ECODE_EXISTS)
9296
9297     self.add_locks[locking.LEVEL_INSTANCE] = instance_name
9298
9299     if self.op.iallocator:
9300       # TODO: Find a solution to not lock all nodes in the cluster, e.g. by
9301       # specifying a group on instance creation and then selecting nodes from
9302       # that group
9303       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9304       self.needed_locks[locking.LEVEL_NODE_RES] = locking.ALL_SET
9305     else:
9306       self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
9307       nodelist = [self.op.pnode]
9308       if self.op.snode is not None:
9309         self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
9310         nodelist.append(self.op.snode)
9311       self.needed_locks[locking.LEVEL_NODE] = nodelist
9312       # Lock resources of instance's primary and secondary nodes (copy to
9313       # prevent accidential modification)
9314       self.needed_locks[locking.LEVEL_NODE_RES] = list(nodelist)
9315
9316     # in case of import lock the source node too
9317     if self.op.mode == constants.INSTANCE_IMPORT:
9318       src_node = self.op.src_node
9319       src_path = self.op.src_path
9320
9321       if src_path is None:
9322         self.op.src_path = src_path = self.op.instance_name
9323
9324       if src_node is None:
9325         self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9326         self.op.src_node = None
9327         if os.path.isabs(src_path):
9328           raise errors.OpPrereqError("Importing an instance from a path"
9329                                      " requires a source node option",
9330                                      errors.ECODE_INVAL)
9331       else:
9332         self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
9333         if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
9334           self.needed_locks[locking.LEVEL_NODE].append(src_node)
9335         if not os.path.isabs(src_path):
9336           self.op.src_path = src_path = \
9337             utils.PathJoin(constants.EXPORT_DIR, src_path)
9338
9339   def _RunAllocator(self):
9340     """Run the allocator based on input opcode.
9341
9342     """
9343     nics = [n.ToDict() for n in self.nics]
9344     ial = IAllocator(self.cfg, self.rpc,
9345                      mode=constants.IALLOCATOR_MODE_ALLOC,
9346                      name=self.op.instance_name,
9347                      disk_template=self.op.disk_template,
9348                      tags=self.op.tags,
9349                      os=self.op.os_type,
9350                      vcpus=self.be_full[constants.BE_VCPUS],
9351                      memory=self.be_full[constants.BE_MAXMEM],
9352                      spindle_usage=self.be_full[constants.BE_SPINDLE_USAGE],
9353                      disks=self.disks,
9354                      nics=nics,
9355                      hypervisor=self.op.hypervisor,
9356                      )
9357
9358     ial.Run(self.op.iallocator)
9359
9360     if not ial.success:
9361       raise errors.OpPrereqError("Can't compute nodes using"
9362                                  " iallocator '%s': %s" %
9363                                  (self.op.iallocator, ial.info),
9364                                  errors.ECODE_NORES)
9365     if len(ial.result) != ial.required_nodes:
9366       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
9367                                  " of nodes (%s), required %s" %
9368                                  (self.op.iallocator, len(ial.result),
9369                                   ial.required_nodes), errors.ECODE_FAULT)
9370     self.op.pnode = ial.result[0]
9371     self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
9372                  self.op.instance_name, self.op.iallocator,
9373                  utils.CommaJoin(ial.result))
9374     if ial.required_nodes == 2:
9375       self.op.snode = ial.result[1]
9376
9377   def BuildHooksEnv(self):
9378     """Build hooks env.
9379
9380     This runs on master, primary and secondary nodes of the instance.
9381
9382     """
9383     env = {
9384       "ADD_MODE": self.op.mode,
9385       }
9386     if self.op.mode == constants.INSTANCE_IMPORT:
9387       env["SRC_NODE"] = self.op.src_node
9388       env["SRC_PATH"] = self.op.src_path
9389       env["SRC_IMAGES"] = self.src_images
9390
9391     env.update(_BuildInstanceHookEnv(
9392       name=self.op.instance_name,
9393       primary_node=self.op.pnode,
9394       secondary_nodes=self.secondaries,
9395       status=self.op.start,
9396       os_type=self.op.os_type,
9397       minmem=self.be_full[constants.BE_MINMEM],
9398       maxmem=self.be_full[constants.BE_MAXMEM],
9399       vcpus=self.be_full[constants.BE_VCPUS],
9400       nics=_NICListToTuple(self, self.nics),
9401       disk_template=self.op.disk_template,
9402       disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE])
9403              for d in self.disks],
9404       bep=self.be_full,
9405       hvp=self.hv_full,
9406       hypervisor_name=self.op.hypervisor,
9407       tags=self.op.tags,
9408     ))
9409
9410     return env
9411
9412   def BuildHooksNodes(self):
9413     """Build hooks nodes.
9414
9415     """
9416     nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
9417     return nl, nl
9418
9419   def _ReadExportInfo(self):
9420     """Reads the export information from disk.
9421
9422     It will override the opcode source node and path with the actual
9423     information, if these two were not specified before.
9424
9425     @return: the export information
9426
9427     """
9428     assert self.op.mode == constants.INSTANCE_IMPORT
9429
9430     src_node = self.op.src_node
9431     src_path = self.op.src_path
9432
9433     if src_node is None:
9434       locked_nodes = self.owned_locks(locking.LEVEL_NODE)
9435       exp_list = self.rpc.call_export_list(locked_nodes)
9436       found = False
9437       for node in exp_list:
9438         if exp_list[node].fail_msg:
9439           continue
9440         if src_path in exp_list[node].payload:
9441           found = True
9442           self.op.src_node = src_node = node
9443           self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
9444                                                        src_path)
9445           break
9446       if not found:
9447         raise errors.OpPrereqError("No export found for relative path %s" %
9448                                     src_path, errors.ECODE_INVAL)
9449
9450     _CheckNodeOnline(self, src_node)
9451     result = self.rpc.call_export_info(src_node, src_path)
9452     result.Raise("No export or invalid export found in dir %s" % src_path)
9453
9454     export_info = objects.SerializableConfigParser.Loads(str(result.payload))
9455     if not export_info.has_section(constants.INISECT_EXP):
9456       raise errors.ProgrammerError("Corrupted export config",
9457                                    errors.ECODE_ENVIRON)
9458
9459     ei_version = export_info.get(constants.INISECT_EXP, "version")
9460     if (int(ei_version) != constants.EXPORT_VERSION):
9461       raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
9462                                  (ei_version, constants.EXPORT_VERSION),
9463                                  errors.ECODE_ENVIRON)
9464     return export_info
9465
9466   def _ReadExportParams(self, einfo):
9467     """Use export parameters as defaults.
9468
9469     In case the opcode doesn't specify (as in override) some instance
9470     parameters, then try to use them from the export information, if
9471     that declares them.
9472
9473     """
9474     self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
9475
9476     if self.op.disk_template is None:
9477       if einfo.has_option(constants.INISECT_INS, "disk_template"):
9478         self.op.disk_template = einfo.get(constants.INISECT_INS,
9479                                           "disk_template")
9480         if self.op.disk_template not in constants.DISK_TEMPLATES:
9481           raise errors.OpPrereqError("Disk template specified in configuration"
9482                                      " file is not one of the allowed values:"
9483                                      " %s" % " ".join(constants.DISK_TEMPLATES))
9484       else:
9485         raise errors.OpPrereqError("No disk template specified and the export"
9486                                    " is missing the disk_template information",
9487                                    errors.ECODE_INVAL)
9488
9489     if not self.op.disks:
9490       disks = []
9491       # TODO: import the disk iv_name too
9492       for idx in range(constants.MAX_DISKS):
9493         if einfo.has_option(constants.INISECT_INS, "disk%d_size" % idx):
9494           disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
9495           disks.append({constants.IDISK_SIZE: disk_sz})
9496       self.op.disks = disks
9497       if not disks and self.op.disk_template != constants.DT_DISKLESS:
9498         raise errors.OpPrereqError("No disk info specified and the export"
9499                                    " is missing the disk information",
9500                                    errors.ECODE_INVAL)
9501
9502     if not self.op.nics:
9503       nics = []
9504       for idx in range(constants.MAX_NICS):
9505         if einfo.has_option(constants.INISECT_INS, "nic%d_mac" % idx):
9506           ndict = {}
9507           for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
9508             v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
9509             ndict[name] = v
9510           nics.append(ndict)
9511         else:
9512           break
9513       self.op.nics = nics
9514
9515     if not self.op.tags and einfo.has_option(constants.INISECT_INS, "tags"):
9516       self.op.tags = einfo.get(constants.INISECT_INS, "tags").split()
9517
9518     if (self.op.hypervisor is None and
9519         einfo.has_option(constants.INISECT_INS, "hypervisor")):
9520       self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
9521
9522     if einfo.has_section(constants.INISECT_HYP):
9523       # use the export parameters but do not override the ones
9524       # specified by the user
9525       for name, value in einfo.items(constants.INISECT_HYP):
9526         if name not in self.op.hvparams:
9527           self.op.hvparams[name] = value
9528
9529     if einfo.has_section(constants.INISECT_BEP):
9530       # use the parameters, without overriding
9531       for name, value in einfo.items(constants.INISECT_BEP):
9532         if name not in self.op.beparams:
9533           self.op.beparams[name] = value
9534         # Compatibility for the old "memory" be param
9535         if name == constants.BE_MEMORY:
9536           if constants.BE_MAXMEM not in self.op.beparams:
9537             self.op.beparams[constants.BE_MAXMEM] = value
9538           if constants.BE_MINMEM not in self.op.beparams:
9539             self.op.beparams[constants.BE_MINMEM] = value
9540     else:
9541       # try to read the parameters old style, from the main section
9542       for name in constants.BES_PARAMETERS:
9543         if (name not in self.op.beparams and
9544             einfo.has_option(constants.INISECT_INS, name)):
9545           self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
9546
9547     if einfo.has_section(constants.INISECT_OSP):
9548       # use the parameters, without overriding
9549       for name, value in einfo.items(constants.INISECT_OSP):
9550         if name not in self.op.osparams:
9551           self.op.osparams[name] = value
9552
9553   def _RevertToDefaults(self, cluster):
9554     """Revert the instance parameters to the default values.
9555
9556     """
9557     # hvparams
9558     hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
9559     for name in self.op.hvparams.keys():
9560       if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
9561         del self.op.hvparams[name]
9562     # beparams
9563     be_defs = cluster.SimpleFillBE({})
9564     for name in self.op.beparams.keys():
9565       if name in be_defs and be_defs[name] == self.op.beparams[name]:
9566         del self.op.beparams[name]
9567     # nic params
9568     nic_defs = cluster.SimpleFillNIC({})
9569     for nic in self.op.nics:
9570       for name in constants.NICS_PARAMETERS:
9571         if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
9572           del nic[name]
9573     # osparams
9574     os_defs = cluster.SimpleFillOS(self.op.os_type, {})
9575     for name in self.op.osparams.keys():
9576       if name in os_defs and os_defs[name] == self.op.osparams[name]:
9577         del self.op.osparams[name]
9578
9579   def _CalculateFileStorageDir(self):
9580     """Calculate final instance file storage dir.
9581
9582     """
9583     # file storage dir calculation/check
9584     self.instance_file_storage_dir = None
9585     if self.op.disk_template in constants.DTS_FILEBASED:
9586       # build the full file storage dir path
9587       joinargs = []
9588
9589       if self.op.disk_template == constants.DT_SHARED_FILE:
9590         get_fsd_fn = self.cfg.GetSharedFileStorageDir
9591       else:
9592         get_fsd_fn = self.cfg.GetFileStorageDir
9593
9594       cfg_storagedir = get_fsd_fn()
9595       if not cfg_storagedir:
9596         raise errors.OpPrereqError("Cluster file storage dir not defined")
9597       joinargs.append(cfg_storagedir)
9598
9599       if self.op.file_storage_dir is not None:
9600         joinargs.append(self.op.file_storage_dir)
9601
9602       joinargs.append(self.op.instance_name)
9603
9604       # pylint: disable=W0142
9605       self.instance_file_storage_dir = utils.PathJoin(*joinargs)
9606
9607   def CheckPrereq(self): # pylint: disable=R0914
9608     """Check prerequisites.
9609
9610     """
9611     self._CalculateFileStorageDir()
9612
9613     if self.op.mode == constants.INSTANCE_IMPORT:
9614       export_info = self._ReadExportInfo()
9615       self._ReadExportParams(export_info)
9616
9617     if (not self.cfg.GetVGName() and
9618         self.op.disk_template not in constants.DTS_NOT_LVM):
9619       raise errors.OpPrereqError("Cluster does not support lvm-based"
9620                                  " instances", errors.ECODE_STATE)
9621
9622     if (self.op.hypervisor is None or
9623         self.op.hypervisor == constants.VALUE_AUTO):
9624       self.op.hypervisor = self.cfg.GetHypervisorType()
9625
9626     cluster = self.cfg.GetClusterInfo()
9627     enabled_hvs = cluster.enabled_hypervisors
9628     if self.op.hypervisor not in enabled_hvs:
9629       raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
9630                                  " cluster (%s)" % (self.op.hypervisor,
9631                                   ",".join(enabled_hvs)),
9632                                  errors.ECODE_STATE)
9633
9634     # Check tag validity
9635     for tag in self.op.tags:
9636       objects.TaggableObject.ValidateTag(tag)
9637
9638     # check hypervisor parameter syntax (locally)
9639     utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
9640     filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
9641                                       self.op.hvparams)
9642     hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
9643     hv_type.CheckParameterSyntax(filled_hvp)
9644     self.hv_full = filled_hvp
9645     # check that we don't specify global parameters on an instance
9646     _CheckGlobalHvParams(self.op.hvparams)
9647
9648     # fill and remember the beparams dict
9649     default_beparams = cluster.beparams[constants.PP_DEFAULT]
9650     for param, value in self.op.beparams.iteritems():
9651       if value == constants.VALUE_AUTO:
9652         self.op.beparams[param] = default_beparams[param]
9653     objects.UpgradeBeParams(self.op.beparams)
9654     utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
9655     self.be_full = cluster.SimpleFillBE(self.op.beparams)
9656
9657     # build os parameters
9658     self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
9659
9660     # now that hvp/bep are in final format, let's reset to defaults,
9661     # if told to do so
9662     if self.op.identify_defaults:
9663       self._RevertToDefaults(cluster)
9664
9665     # NIC buildup
9666     self.nics = []
9667     for idx, nic in enumerate(self.op.nics):
9668       nic_mode_req = nic.get(constants.INIC_MODE, None)
9669       nic_mode = nic_mode_req
9670       if nic_mode is None or nic_mode == constants.VALUE_AUTO:
9671         nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
9672
9673       # in routed mode, for the first nic, the default ip is 'auto'
9674       if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
9675         default_ip_mode = constants.VALUE_AUTO
9676       else:
9677         default_ip_mode = constants.VALUE_NONE
9678
9679       # ip validity checks
9680       ip = nic.get(constants.INIC_IP, default_ip_mode)
9681       if ip is None or ip.lower() == constants.VALUE_NONE:
9682         nic_ip = None
9683       elif ip.lower() == constants.VALUE_AUTO:
9684         if not self.op.name_check:
9685           raise errors.OpPrereqError("IP address set to auto but name checks"
9686                                      " have been skipped",
9687                                      errors.ECODE_INVAL)
9688         nic_ip = self.hostname1.ip
9689       else:
9690         if not netutils.IPAddress.IsValid(ip):
9691           raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
9692                                      errors.ECODE_INVAL)
9693         nic_ip = ip
9694
9695       # TODO: check the ip address for uniqueness
9696       if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
9697         raise errors.OpPrereqError("Routed nic mode requires an ip address",
9698                                    errors.ECODE_INVAL)
9699
9700       # MAC address verification
9701       mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
9702       if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9703         mac = utils.NormalizeAndValidateMac(mac)
9704
9705         try:
9706           self.cfg.ReserveMAC(mac, self.proc.GetECId())
9707         except errors.ReservationError:
9708           raise errors.OpPrereqError("MAC address %s already in use"
9709                                      " in cluster" % mac,
9710                                      errors.ECODE_NOTUNIQUE)
9711
9712       #  Build nic parameters
9713       link = nic.get(constants.INIC_LINK, None)
9714       if link == constants.VALUE_AUTO:
9715         link = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_LINK]
9716       nicparams = {}
9717       if nic_mode_req:
9718         nicparams[constants.NIC_MODE] = nic_mode
9719       if link:
9720         nicparams[constants.NIC_LINK] = link
9721
9722       check_params = cluster.SimpleFillNIC(nicparams)
9723       objects.NIC.CheckParameterSyntax(check_params)
9724       self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
9725
9726     # disk checks/pre-build
9727     default_vg = self.cfg.GetVGName()
9728     self.disks = []
9729     for disk in self.op.disks:
9730       mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
9731       if mode not in constants.DISK_ACCESS_SET:
9732         raise errors.OpPrereqError("Invalid disk access mode '%s'" %
9733                                    mode, errors.ECODE_INVAL)
9734       size = disk.get(constants.IDISK_SIZE, None)
9735       if size is None:
9736         raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
9737       try:
9738         size = int(size)
9739       except (TypeError, ValueError):
9740         raise errors.OpPrereqError("Invalid disk size '%s'" % size,
9741                                    errors.ECODE_INVAL)
9742
9743       data_vg = disk.get(constants.IDISK_VG, default_vg)
9744       new_disk = {
9745         constants.IDISK_SIZE: size,
9746         constants.IDISK_MODE: mode,
9747         constants.IDISK_VG: data_vg,
9748         }
9749       if constants.IDISK_METAVG in disk:
9750         new_disk[constants.IDISK_METAVG] = disk[constants.IDISK_METAVG]
9751       if constants.IDISK_ADOPT in disk:
9752         new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
9753       self.disks.append(new_disk)
9754
9755     if self.op.mode == constants.INSTANCE_IMPORT:
9756       disk_images = []
9757       for idx in range(len(self.disks)):
9758         option = "disk%d_dump" % idx
9759         if export_info.has_option(constants.INISECT_INS, option):
9760           # FIXME: are the old os-es, disk sizes, etc. useful?
9761           export_name = export_info.get(constants.INISECT_INS, option)
9762           image = utils.PathJoin(self.op.src_path, export_name)
9763           disk_images.append(image)
9764         else:
9765           disk_images.append(False)
9766
9767       self.src_images = disk_images
9768
9769       old_name = export_info.get(constants.INISECT_INS, "name")
9770       if self.op.instance_name == old_name:
9771         for idx, nic in enumerate(self.nics):
9772           if nic.mac == constants.VALUE_AUTO:
9773             nic_mac_ini = "nic%d_mac" % idx
9774             nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
9775
9776     # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
9777
9778     # ip ping checks (we use the same ip that was resolved in ExpandNames)
9779     if self.op.ip_check:
9780       if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
9781         raise errors.OpPrereqError("IP %s of instance %s already in use" %
9782                                    (self.check_ip, self.op.instance_name),
9783                                    errors.ECODE_NOTUNIQUE)
9784
9785     #### mac address generation
9786     # By generating here the mac address both the allocator and the hooks get
9787     # the real final mac address rather than the 'auto' or 'generate' value.
9788     # There is a race condition between the generation and the instance object
9789     # creation, which means that we know the mac is valid now, but we're not
9790     # sure it will be when we actually add the instance. If things go bad
9791     # adding the instance will abort because of a duplicate mac, and the
9792     # creation job will fail.
9793     for nic in self.nics:
9794       if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9795         nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
9796
9797     #### allocator run
9798
9799     if self.op.iallocator is not None:
9800       self._RunAllocator()
9801
9802     # Release all unneeded node locks
9803     _ReleaseLocks(self, locking.LEVEL_NODE,
9804                   keep=filter(None, [self.op.pnode, self.op.snode,
9805                                      self.op.src_node]))
9806     _ReleaseLocks(self, locking.LEVEL_NODE_RES,
9807                   keep=filter(None, [self.op.pnode, self.op.snode,
9808                                      self.op.src_node]))
9809
9810     #### node related checks
9811
9812     # check primary node
9813     self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
9814     assert self.pnode is not None, \
9815       "Cannot retrieve locked node %s" % self.op.pnode
9816     if pnode.offline:
9817       raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
9818                                  pnode.name, errors.ECODE_STATE)
9819     if pnode.drained:
9820       raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
9821                                  pnode.name, errors.ECODE_STATE)
9822     if not pnode.vm_capable:
9823       raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
9824                                  " '%s'" % pnode.name, errors.ECODE_STATE)
9825
9826     self.secondaries = []
9827
9828     # mirror node verification
9829     if self.op.disk_template in constants.DTS_INT_MIRROR:
9830       if self.op.snode == pnode.name:
9831         raise errors.OpPrereqError("The secondary node cannot be the"
9832                                    " primary node", errors.ECODE_INVAL)
9833       _CheckNodeOnline(self, self.op.snode)
9834       _CheckNodeNotDrained(self, self.op.snode)
9835       _CheckNodeVmCapable(self, self.op.snode)
9836       self.secondaries.append(self.op.snode)
9837
9838       snode = self.cfg.GetNodeInfo(self.op.snode)
9839       if pnode.group != snode.group:
9840         self.LogWarning("The primary and secondary nodes are in two"
9841                         " different node groups; the disk parameters"
9842                         " from the first disk's node group will be"
9843                         " used")
9844
9845     nodenames = [pnode.name] + self.secondaries
9846
9847     # Verify instance specs
9848     ispec = {
9849       constants.ISPEC_MEM_SIZE: self.be_full.get(constants.BE_MAXMEM, None),
9850       constants.ISPEC_CPU_COUNT: self.be_full.get(constants.BE_VCPUS, None),
9851       constants.ISPEC_DISK_COUNT: len(self.disks),
9852       constants.ISPEC_DISK_SIZE: [disk["size"] for disk in self.disks],
9853       constants.ISPEC_NIC_COUNT: len(self.nics),
9854       }
9855
9856     group_info = self.cfg.GetNodeGroup(pnode.group)
9857     ipolicy = _CalculateGroupIPolicy(cluster, group_info)
9858     res = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec)
9859     if not self.op.ignore_ipolicy and res:
9860       raise errors.OpPrereqError(("Instance allocation to group %s violates"
9861                                   " policy: %s") % (pnode.group,
9862                                                     utils.CommaJoin(res)),
9863                                   errors.ECODE_INVAL)
9864
9865     # disk parameters (not customizable at instance or node level)
9866     # just use the primary node parameters, ignoring the secondary.
9867     self.diskparams = group_info.diskparams
9868
9869     if not self.adopt_disks:
9870       if self.op.disk_template == constants.DT_RBD:
9871         # _CheckRADOSFreeSpace() is just a placeholder.
9872         # Any function that checks prerequisites can be placed here.
9873         # Check if there is enough space on the RADOS cluster.
9874         _CheckRADOSFreeSpace()
9875       else:
9876         # Check lv size requirements, if not adopting
9877         req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
9878         _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
9879
9880     elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
9881       all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
9882                                 disk[constants.IDISK_ADOPT])
9883                      for disk in self.disks])
9884       if len(all_lvs) != len(self.disks):
9885         raise errors.OpPrereqError("Duplicate volume names given for adoption",
9886                                    errors.ECODE_INVAL)
9887       for lv_name in all_lvs:
9888         try:
9889           # FIXME: lv_name here is "vg/lv" need to ensure that other calls
9890           # to ReserveLV uses the same syntax
9891           self.cfg.ReserveLV(lv_name, self.proc.GetECId())
9892         except errors.ReservationError:
9893           raise errors.OpPrereqError("LV named %s used by another instance" %
9894                                      lv_name, errors.ECODE_NOTUNIQUE)
9895
9896       vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
9897       vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
9898
9899       node_lvs = self.rpc.call_lv_list([pnode.name],
9900                                        vg_names.payload.keys())[pnode.name]
9901       node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
9902       node_lvs = node_lvs.payload
9903
9904       delta = all_lvs.difference(node_lvs.keys())
9905       if delta:
9906         raise errors.OpPrereqError("Missing logical volume(s): %s" %
9907                                    utils.CommaJoin(delta),
9908                                    errors.ECODE_INVAL)
9909       online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
9910       if online_lvs:
9911         raise errors.OpPrereqError("Online logical volumes found, cannot"
9912                                    " adopt: %s" % utils.CommaJoin(online_lvs),
9913                                    errors.ECODE_STATE)
9914       # update the size of disk based on what is found
9915       for dsk in self.disks:
9916         dsk[constants.IDISK_SIZE] = \
9917           int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
9918                                         dsk[constants.IDISK_ADOPT])][0]))
9919
9920     elif self.op.disk_template == constants.DT_BLOCK:
9921       # Normalize and de-duplicate device paths
9922       all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
9923                        for disk in self.disks])
9924       if len(all_disks) != len(self.disks):
9925         raise errors.OpPrereqError("Duplicate disk names given for adoption",
9926                                    errors.ECODE_INVAL)
9927       baddisks = [d for d in all_disks
9928                   if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
9929       if baddisks:
9930         raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
9931                                    " cannot be adopted" %
9932                                    (", ".join(baddisks),
9933                                     constants.ADOPTABLE_BLOCKDEV_ROOT),
9934                                    errors.ECODE_INVAL)
9935
9936       node_disks = self.rpc.call_bdev_sizes([pnode.name],
9937                                             list(all_disks))[pnode.name]
9938       node_disks.Raise("Cannot get block device information from node %s" %
9939                        pnode.name)
9940       node_disks = node_disks.payload
9941       delta = all_disks.difference(node_disks.keys())
9942       if delta:
9943         raise errors.OpPrereqError("Missing block device(s): %s" %
9944                                    utils.CommaJoin(delta),
9945                                    errors.ECODE_INVAL)
9946       for dsk in self.disks:
9947         dsk[constants.IDISK_SIZE] = \
9948           int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
9949
9950     _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
9951
9952     _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
9953     # check OS parameters (remotely)
9954     _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
9955
9956     _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
9957
9958     # memory check on primary node
9959     #TODO(dynmem): use MINMEM for checking
9960     if self.op.start:
9961       _CheckNodeFreeMemory(self, self.pnode.name,
9962                            "creating instance %s" % self.op.instance_name,
9963                            self.be_full[constants.BE_MAXMEM],
9964                            self.op.hypervisor)
9965
9966     self.dry_run_result = list(nodenames)
9967
9968   def Exec(self, feedback_fn):
9969     """Create and add the instance to the cluster.
9970
9971     """
9972     instance = self.op.instance_name
9973     pnode_name = self.pnode.name
9974
9975     assert not (self.owned_locks(locking.LEVEL_NODE_RES) -
9976                 self.owned_locks(locking.LEVEL_NODE)), \
9977       "Node locks differ from node resource locks"
9978
9979     ht_kind = self.op.hypervisor
9980     if ht_kind in constants.HTS_REQ_PORT:
9981       network_port = self.cfg.AllocatePort()
9982     else:
9983       network_port = None
9984
9985     disks = _GenerateDiskTemplate(self,
9986                                   self.op.disk_template,
9987                                   instance, pnode_name,
9988                                   self.secondaries,
9989                                   self.disks,
9990                                   self.instance_file_storage_dir,
9991                                   self.op.file_driver,
9992                                   0,
9993                                   feedback_fn,
9994                                   self.diskparams)
9995
9996     iobj = objects.Instance(name=instance, os=self.op.os_type,
9997                             primary_node=pnode_name,
9998                             nics=self.nics, disks=disks,
9999                             disk_template=self.op.disk_template,
10000                             admin_state=constants.ADMINST_DOWN,
10001                             network_port=network_port,
10002                             beparams=self.op.beparams,
10003                             hvparams=self.op.hvparams,
10004                             hypervisor=self.op.hypervisor,
10005                             osparams=self.op.osparams,
10006                             )
10007
10008     if self.op.tags:
10009       for tag in self.op.tags:
10010         iobj.AddTag(tag)
10011
10012     if self.adopt_disks:
10013       if self.op.disk_template == constants.DT_PLAIN:
10014         # rename LVs to the newly-generated names; we need to construct
10015         # 'fake' LV disks with the old data, plus the new unique_id
10016         tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
10017         rename_to = []
10018         for t_dsk, a_dsk in zip(tmp_disks, self.disks):
10019           rename_to.append(t_dsk.logical_id)
10020           t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
10021           self.cfg.SetDiskID(t_dsk, pnode_name)
10022         result = self.rpc.call_blockdev_rename(pnode_name,
10023                                                zip(tmp_disks, rename_to))
10024         result.Raise("Failed to rename adoped LVs")
10025     else:
10026       feedback_fn("* creating instance disks...")
10027       try:
10028         _CreateDisks(self, iobj)
10029       except errors.OpExecError:
10030         self.LogWarning("Device creation failed, reverting...")
10031         try:
10032           _RemoveDisks(self, iobj)
10033         finally:
10034           self.cfg.ReleaseDRBDMinors(instance)
10035           raise
10036
10037     feedback_fn("adding instance %s to cluster config" % instance)
10038
10039     self.cfg.AddInstance(iobj, self.proc.GetECId())
10040
10041     # Declare that we don't want to remove the instance lock anymore, as we've
10042     # added the instance to the config
10043     del self.remove_locks[locking.LEVEL_INSTANCE]
10044
10045     if self.op.mode == constants.INSTANCE_IMPORT:
10046       # Release unused nodes
10047       _ReleaseLocks(self, locking.LEVEL_NODE, keep=[self.op.src_node])
10048     else:
10049       # Release all nodes
10050       _ReleaseLocks(self, locking.LEVEL_NODE)
10051
10052     disk_abort = False
10053     if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
10054       feedback_fn("* wiping instance disks...")
10055       try:
10056         _WipeDisks(self, iobj)
10057       except errors.OpExecError, err:
10058         logging.exception("Wiping disks failed")
10059         self.LogWarning("Wiping instance disks failed (%s)", err)
10060         disk_abort = True
10061
10062     if disk_abort:
10063       # Something is already wrong with the disks, don't do anything else
10064       pass
10065     elif self.op.wait_for_sync:
10066       disk_abort = not _WaitForSync(self, iobj)
10067     elif iobj.disk_template in constants.DTS_INT_MIRROR:
10068       # make sure the disks are not degraded (still sync-ing is ok)
10069       feedback_fn("* checking mirrors status")
10070       disk_abort = not _WaitForSync(self, iobj, oneshot=True)
10071     else:
10072       disk_abort = False
10073
10074     if disk_abort:
10075       _RemoveDisks(self, iobj)
10076       self.cfg.RemoveInstance(iobj.name)
10077       # Make sure the instance lock gets removed
10078       self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
10079       raise errors.OpExecError("There are some degraded disks for"
10080                                " this instance")
10081
10082     # Release all node resource locks
10083     _ReleaseLocks(self, locking.LEVEL_NODE_RES)
10084
10085     if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
10086       if self.op.mode == constants.INSTANCE_CREATE:
10087         if not self.op.no_install:
10088           pause_sync = (iobj.disk_template in constants.DTS_INT_MIRROR and
10089                         not self.op.wait_for_sync)
10090           if pause_sync:
10091             feedback_fn("* pausing disk sync to install instance OS")
10092             result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
10093                                                               iobj.disks, True)
10094             for idx, success in enumerate(result.payload):
10095               if not success:
10096                 logging.warn("pause-sync of instance %s for disk %d failed",
10097                              instance, idx)
10098
10099           feedback_fn("* running the instance OS create scripts...")
10100           # FIXME: pass debug option from opcode to backend
10101           os_add_result = \
10102             self.rpc.call_instance_os_add(pnode_name, (iobj, None), False,
10103                                           self.op.debug_level)
10104           if pause_sync:
10105             feedback_fn("* resuming disk sync")
10106             result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
10107                                                               iobj.disks, False)
10108             for idx, success in enumerate(result.payload):
10109               if not success:
10110                 logging.warn("resume-sync of instance %s for disk %d failed",
10111                              instance, idx)
10112
10113           os_add_result.Raise("Could not add os for instance %s"
10114                               " on node %s" % (instance, pnode_name))
10115
10116       elif self.op.mode == constants.INSTANCE_IMPORT:
10117         feedback_fn("* running the instance OS import scripts...")
10118
10119         transfers = []
10120
10121         for idx, image in enumerate(self.src_images):
10122           if not image:
10123             continue
10124
10125           # FIXME: pass debug option from opcode to backend
10126           dt = masterd.instance.DiskTransfer("disk/%s" % idx,
10127                                              constants.IEIO_FILE, (image, ),
10128                                              constants.IEIO_SCRIPT,
10129                                              (iobj.disks[idx], idx),
10130                                              None)
10131           transfers.append(dt)
10132
10133         import_result = \
10134           masterd.instance.TransferInstanceData(self, feedback_fn,
10135                                                 self.op.src_node, pnode_name,
10136                                                 self.pnode.secondary_ip,
10137                                                 iobj, transfers)
10138         if not compat.all(import_result):
10139           self.LogWarning("Some disks for instance %s on node %s were not"
10140                           " imported successfully" % (instance, pnode_name))
10141
10142       elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
10143         feedback_fn("* preparing remote import...")
10144         # The source cluster will stop the instance before attempting to make a
10145         # connection. In some cases stopping an instance can take a long time,
10146         # hence the shutdown timeout is added to the connection timeout.
10147         connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
10148                            self.op.source_shutdown_timeout)
10149         timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
10150
10151         assert iobj.primary_node == self.pnode.name
10152         disk_results = \
10153           masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
10154                                         self.source_x509_ca,
10155                                         self._cds, timeouts)
10156         if not compat.all(disk_results):
10157           # TODO: Should the instance still be started, even if some disks
10158           # failed to import (valid for local imports, too)?
10159           self.LogWarning("Some disks for instance %s on node %s were not"
10160                           " imported successfully" % (instance, pnode_name))
10161
10162         # Run rename script on newly imported instance
10163         assert iobj.name == instance
10164         feedback_fn("Running rename script for %s" % instance)
10165         result = self.rpc.call_instance_run_rename(pnode_name, iobj,
10166                                                    self.source_instance_name,
10167                                                    self.op.debug_level)
10168         if result.fail_msg:
10169           self.LogWarning("Failed to run rename script for %s on node"
10170                           " %s: %s" % (instance, pnode_name, result.fail_msg))
10171
10172       else:
10173         # also checked in the prereq part
10174         raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
10175                                      % self.op.mode)
10176
10177     assert not self.owned_locks(locking.LEVEL_NODE_RES)
10178
10179     if self.op.start:
10180       iobj.admin_state = constants.ADMINST_UP
10181       self.cfg.Update(iobj, feedback_fn)
10182       logging.info("Starting instance %s on node %s", instance, pnode_name)
10183       feedback_fn("* starting instance...")
10184       result = self.rpc.call_instance_start(pnode_name, (iobj, None, None),
10185                                             False)
10186       result.Raise("Could not start instance")
10187
10188     return list(iobj.all_nodes)
10189
10190
10191 def _CheckRADOSFreeSpace():
10192   """Compute disk size requirements inside the RADOS cluster.
10193
10194   """
10195   # For the RADOS cluster we assume there is always enough space.
10196   pass
10197
10198
10199 class LUInstanceConsole(NoHooksLU):
10200   """Connect to an instance's console.
10201
10202   This is somewhat special in that it returns the command line that
10203   you need to run on the master node in order to connect to the
10204   console.
10205
10206   """
10207   REQ_BGL = False
10208
10209   def ExpandNames(self):
10210     self.share_locks = _ShareAll()
10211     self._ExpandAndLockInstance()
10212
10213   def CheckPrereq(self):
10214     """Check prerequisites.
10215
10216     This checks that the instance is in the cluster.
10217
10218     """
10219     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10220     assert self.instance is not None, \
10221       "Cannot retrieve locked instance %s" % self.op.instance_name
10222     _CheckNodeOnline(self, self.instance.primary_node)
10223
10224   def Exec(self, feedback_fn):
10225     """Connect to the console of an instance
10226
10227     """
10228     instance = self.instance
10229     node = instance.primary_node
10230
10231     node_insts = self.rpc.call_instance_list([node],
10232                                              [instance.hypervisor])[node]
10233     node_insts.Raise("Can't get node information from %s" % node)
10234
10235     if instance.name not in node_insts.payload:
10236       if instance.admin_state == constants.ADMINST_UP:
10237         state = constants.INSTST_ERRORDOWN
10238       elif instance.admin_state == constants.ADMINST_DOWN:
10239         state = constants.INSTST_ADMINDOWN
10240       else:
10241         state = constants.INSTST_ADMINOFFLINE
10242       raise errors.OpExecError("Instance %s is not running (state %s)" %
10243                                (instance.name, state))
10244
10245     logging.debug("Connecting to console of %s on %s", instance.name, node)
10246
10247     return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
10248
10249
10250 def _GetInstanceConsole(cluster, instance):
10251   """Returns console information for an instance.
10252
10253   @type cluster: L{objects.Cluster}
10254   @type instance: L{objects.Instance}
10255   @rtype: dict
10256
10257   """
10258   hyper = hypervisor.GetHypervisor(instance.hypervisor)
10259   # beparams and hvparams are passed separately, to avoid editing the
10260   # instance and then saving the defaults in the instance itself.
10261   hvparams = cluster.FillHV(instance)
10262   beparams = cluster.FillBE(instance)
10263   console = hyper.GetInstanceConsole(instance, hvparams, beparams)
10264
10265   assert console.instance == instance.name
10266   assert console.Validate()
10267
10268   return console.ToDict()
10269
10270
10271 class LUInstanceReplaceDisks(LogicalUnit):
10272   """Replace the disks of an instance.
10273
10274   """
10275   HPATH = "mirrors-replace"
10276   HTYPE = constants.HTYPE_INSTANCE
10277   REQ_BGL = False
10278
10279   def CheckArguments(self):
10280     TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
10281                                   self.op.iallocator)
10282
10283   def ExpandNames(self):
10284     self._ExpandAndLockInstance()
10285
10286     assert locking.LEVEL_NODE not in self.needed_locks
10287     assert locking.LEVEL_NODE_RES not in self.needed_locks
10288     assert locking.LEVEL_NODEGROUP not in self.needed_locks
10289
10290     assert self.op.iallocator is None or self.op.remote_node is None, \
10291       "Conflicting options"
10292
10293     if self.op.remote_node is not None:
10294       self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10295
10296       # Warning: do not remove the locking of the new secondary here
10297       # unless DRBD8.AddChildren is changed to work in parallel;
10298       # currently it doesn't since parallel invocations of
10299       # FindUnusedMinor will conflict
10300       self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
10301       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
10302     else:
10303       self.needed_locks[locking.LEVEL_NODE] = []
10304       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10305
10306       if self.op.iallocator is not None:
10307         # iallocator will select a new node in the same group
10308         self.needed_locks[locking.LEVEL_NODEGROUP] = []
10309
10310     self.needed_locks[locking.LEVEL_NODE_RES] = []
10311
10312     self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
10313                                    self.op.iallocator, self.op.remote_node,
10314                                    self.op.disks, False, self.op.early_release,
10315                                    self.op.ignore_ipolicy)
10316
10317     self.tasklets = [self.replacer]
10318
10319   def DeclareLocks(self, level):
10320     if level == locking.LEVEL_NODEGROUP:
10321       assert self.op.remote_node is None
10322       assert self.op.iallocator is not None
10323       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
10324
10325       self.share_locks[locking.LEVEL_NODEGROUP] = 1
10326       # Lock all groups used by instance optimistically; this requires going
10327       # via the node before it's locked, requiring verification later on
10328       self.needed_locks[locking.LEVEL_NODEGROUP] = \
10329         self.cfg.GetInstanceNodeGroups(self.op.instance_name)
10330
10331     elif level == locking.LEVEL_NODE:
10332       if self.op.iallocator is not None:
10333         assert self.op.remote_node is None
10334         assert not self.needed_locks[locking.LEVEL_NODE]
10335
10336         # Lock member nodes of all locked groups
10337         self.needed_locks[locking.LEVEL_NODE] = [node_name
10338           for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
10339           for node_name in self.cfg.GetNodeGroup(group_uuid).members]
10340       else:
10341         self._LockInstancesNodes()
10342     elif level == locking.LEVEL_NODE_RES:
10343       # Reuse node locks
10344       self.needed_locks[locking.LEVEL_NODE_RES] = \
10345         self.needed_locks[locking.LEVEL_NODE]
10346
10347   def BuildHooksEnv(self):
10348     """Build hooks env.
10349
10350     This runs on the master, the primary and all the secondaries.
10351
10352     """
10353     instance = self.replacer.instance
10354     env = {
10355       "MODE": self.op.mode,
10356       "NEW_SECONDARY": self.op.remote_node,
10357       "OLD_SECONDARY": instance.secondary_nodes[0],
10358       }
10359     env.update(_BuildInstanceHookEnvByObject(self, instance))
10360     return env
10361
10362   def BuildHooksNodes(self):
10363     """Build hooks nodes.
10364
10365     """
10366     instance = self.replacer.instance
10367     nl = [
10368       self.cfg.GetMasterNode(),
10369       instance.primary_node,
10370       ]
10371     if self.op.remote_node is not None:
10372       nl.append(self.op.remote_node)
10373     return nl, nl
10374
10375   def CheckPrereq(self):
10376     """Check prerequisites.
10377
10378     """
10379     assert (self.glm.is_owned(locking.LEVEL_NODEGROUP) or
10380             self.op.iallocator is None)
10381
10382     # Verify if node group locks are still correct
10383     owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
10384     if owned_groups:
10385       _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
10386
10387     return LogicalUnit.CheckPrereq(self)
10388
10389
10390 class TLReplaceDisks(Tasklet):
10391   """Replaces disks for an instance.
10392
10393   Note: Locking is not within the scope of this class.
10394
10395   """
10396   def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
10397                disks, delay_iallocator, early_release, ignore_ipolicy):
10398     """Initializes this class.
10399
10400     """
10401     Tasklet.__init__(self, lu)
10402
10403     # Parameters
10404     self.instance_name = instance_name
10405     self.mode = mode
10406     self.iallocator_name = iallocator_name
10407     self.remote_node = remote_node
10408     self.disks = disks
10409     self.delay_iallocator = delay_iallocator
10410     self.early_release = early_release
10411     self.ignore_ipolicy = ignore_ipolicy
10412
10413     # Runtime data
10414     self.instance = None
10415     self.new_node = None
10416     self.target_node = None
10417     self.other_node = None
10418     self.remote_node_info = None
10419     self.node_secondary_ip = None
10420
10421   @staticmethod
10422   def CheckArguments(mode, remote_node, iallocator):
10423     """Helper function for users of this class.
10424
10425     """
10426     # check for valid parameter combination
10427     if mode == constants.REPLACE_DISK_CHG:
10428       if remote_node is None and iallocator is None:
10429         raise errors.OpPrereqError("When changing the secondary either an"
10430                                    " iallocator script must be used or the"
10431                                    " new node given", errors.ECODE_INVAL)
10432
10433       if remote_node is not None and iallocator is not None:
10434         raise errors.OpPrereqError("Give either the iallocator or the new"
10435                                    " secondary, not both", errors.ECODE_INVAL)
10436
10437     elif remote_node is not None or iallocator is not None:
10438       # Not replacing the secondary
10439       raise errors.OpPrereqError("The iallocator and new node options can"
10440                                  " only be used when changing the"
10441                                  " secondary node", errors.ECODE_INVAL)
10442
10443   @staticmethod
10444   def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
10445     """Compute a new secondary node using an IAllocator.
10446
10447     """
10448     ial = IAllocator(lu.cfg, lu.rpc,
10449                      mode=constants.IALLOCATOR_MODE_RELOC,
10450                      name=instance_name,
10451                      relocate_from=list(relocate_from))
10452
10453     ial.Run(iallocator_name)
10454
10455     if not ial.success:
10456       raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
10457                                  " %s" % (iallocator_name, ial.info),
10458                                  errors.ECODE_NORES)
10459
10460     if len(ial.result) != ial.required_nodes:
10461       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
10462                                  " of nodes (%s), required %s" %
10463                                  (iallocator_name,
10464                                   len(ial.result), ial.required_nodes),
10465                                  errors.ECODE_FAULT)
10466
10467     remote_node_name = ial.result[0]
10468
10469     lu.LogInfo("Selected new secondary for instance '%s': %s",
10470                instance_name, remote_node_name)
10471
10472     return remote_node_name
10473
10474   def _FindFaultyDisks(self, node_name):
10475     """Wrapper for L{_FindFaultyInstanceDisks}.
10476
10477     """
10478     return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
10479                                     node_name, True)
10480
10481   def _CheckDisksActivated(self, instance):
10482     """Checks if the instance disks are activated.
10483
10484     @param instance: The instance to check disks
10485     @return: True if they are activated, False otherwise
10486
10487     """
10488     nodes = instance.all_nodes
10489
10490     for idx, dev in enumerate(instance.disks):
10491       for node in nodes:
10492         self.lu.LogInfo("Checking disk/%d on %s", idx, node)
10493         self.cfg.SetDiskID(dev, node)
10494
10495         result = self.rpc.call_blockdev_find(node, dev)
10496
10497         if result.offline:
10498           continue
10499         elif result.fail_msg or not result.payload:
10500           return False
10501
10502     return True
10503
10504   def CheckPrereq(self):
10505     """Check prerequisites.
10506
10507     This checks that the instance is in the cluster.
10508
10509     """
10510     self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
10511     assert instance is not None, \
10512       "Cannot retrieve locked instance %s" % self.instance_name
10513
10514     if instance.disk_template != constants.DT_DRBD8:
10515       raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
10516                                  " instances", errors.ECODE_INVAL)
10517
10518     if len(instance.secondary_nodes) != 1:
10519       raise errors.OpPrereqError("The instance has a strange layout,"
10520                                  " expected one secondary but found %d" %
10521                                  len(instance.secondary_nodes),
10522                                  errors.ECODE_FAULT)
10523
10524     if not self.delay_iallocator:
10525       self._CheckPrereq2()
10526
10527   def _CheckPrereq2(self):
10528     """Check prerequisites, second part.
10529
10530     This function should always be part of CheckPrereq. It was separated and is
10531     now called from Exec because during node evacuation iallocator was only
10532     called with an unmodified cluster model, not taking planned changes into
10533     account.
10534
10535     """
10536     instance = self.instance
10537     secondary_node = instance.secondary_nodes[0]
10538
10539     if self.iallocator_name is None:
10540       remote_node = self.remote_node
10541     else:
10542       remote_node = self._RunAllocator(self.lu, self.iallocator_name,
10543                                        instance.name, instance.secondary_nodes)
10544
10545     if remote_node is None:
10546       self.remote_node_info = None
10547     else:
10548       assert remote_node in self.lu.owned_locks(locking.LEVEL_NODE), \
10549              "Remote node '%s' is not locked" % remote_node
10550
10551       self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
10552       assert self.remote_node_info is not None, \
10553         "Cannot retrieve locked node %s" % remote_node
10554
10555     if remote_node == self.instance.primary_node:
10556       raise errors.OpPrereqError("The specified node is the primary node of"
10557                                  " the instance", errors.ECODE_INVAL)
10558
10559     if remote_node == secondary_node:
10560       raise errors.OpPrereqError("The specified node is already the"
10561                                  " secondary node of the instance",
10562                                  errors.ECODE_INVAL)
10563
10564     if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
10565                                     constants.REPLACE_DISK_CHG):
10566       raise errors.OpPrereqError("Cannot specify disks to be replaced",
10567                                  errors.ECODE_INVAL)
10568
10569     if self.mode == constants.REPLACE_DISK_AUTO:
10570       if not self._CheckDisksActivated(instance):
10571         raise errors.OpPrereqError("Please run activate-disks on instance %s"
10572                                    " first" % self.instance_name,
10573                                    errors.ECODE_STATE)
10574       faulty_primary = self._FindFaultyDisks(instance.primary_node)
10575       faulty_secondary = self._FindFaultyDisks(secondary_node)
10576
10577       if faulty_primary and faulty_secondary:
10578         raise errors.OpPrereqError("Instance %s has faulty disks on more than"
10579                                    " one node and can not be repaired"
10580                                    " automatically" % self.instance_name,
10581                                    errors.ECODE_STATE)
10582
10583       if faulty_primary:
10584         self.disks = faulty_primary
10585         self.target_node = instance.primary_node
10586         self.other_node = secondary_node
10587         check_nodes = [self.target_node, self.other_node]
10588       elif faulty_secondary:
10589         self.disks = faulty_secondary
10590         self.target_node = secondary_node
10591         self.other_node = instance.primary_node
10592         check_nodes = [self.target_node, self.other_node]
10593       else:
10594         self.disks = []
10595         check_nodes = []
10596
10597     else:
10598       # Non-automatic modes
10599       if self.mode == constants.REPLACE_DISK_PRI:
10600         self.target_node = instance.primary_node
10601         self.other_node = secondary_node
10602         check_nodes = [self.target_node, self.other_node]
10603
10604       elif self.mode == constants.REPLACE_DISK_SEC:
10605         self.target_node = secondary_node
10606         self.other_node = instance.primary_node
10607         check_nodes = [self.target_node, self.other_node]
10608
10609       elif self.mode == constants.REPLACE_DISK_CHG:
10610         self.new_node = remote_node
10611         self.other_node = instance.primary_node
10612         self.target_node = secondary_node
10613         check_nodes = [self.new_node, self.other_node]
10614
10615         _CheckNodeNotDrained(self.lu, remote_node)
10616         _CheckNodeVmCapable(self.lu, remote_node)
10617
10618         old_node_info = self.cfg.GetNodeInfo(secondary_node)
10619         assert old_node_info is not None
10620         if old_node_info.offline and not self.early_release:
10621           # doesn't make sense to delay the release
10622           self.early_release = True
10623           self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
10624                           " early-release mode", secondary_node)
10625
10626       else:
10627         raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
10628                                      self.mode)
10629
10630       # If not specified all disks should be replaced
10631       if not self.disks:
10632         self.disks = range(len(self.instance.disks))
10633
10634     # TODO: This is ugly, but right now we can't distinguish between internal
10635     # submitted opcode and external one. We should fix that.
10636     if self.remote_node_info:
10637       # We change the node, lets verify it still meets instance policy
10638       new_group_info = self.cfg.GetNodeGroup(self.remote_node_info.group)
10639       ipolicy = _CalculateGroupIPolicy(self.cfg.GetClusterInfo(),
10640                                        new_group_info)
10641       _CheckTargetNodeIPolicy(self, ipolicy, instance, self.remote_node_info,
10642                               ignore=self.ignore_ipolicy)
10643
10644     # TODO: compute disk parameters
10645     primary_node_info = self.cfg.GetNodeInfo(instance.primary_node)
10646     secondary_node_info = self.cfg.GetNodeInfo(secondary_node)
10647     if primary_node_info.group != secondary_node_info.group:
10648       self.lu.LogInfo("The instance primary and secondary nodes are in two"
10649                       " different node groups; the disk parameters of the"
10650                       " primary node's group will be applied.")
10651
10652     self.diskparams = self.cfg.GetNodeGroup(primary_node_info.group).diskparams
10653
10654     for node in check_nodes:
10655       _CheckNodeOnline(self.lu, node)
10656
10657     touched_nodes = frozenset(node_name for node_name in [self.new_node,
10658                                                           self.other_node,
10659                                                           self.target_node]
10660                               if node_name is not None)
10661
10662     # Release unneeded node and node resource locks
10663     _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
10664     _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES, keep=touched_nodes)
10665
10666     # Release any owned node group
10667     if self.lu.glm.is_owned(locking.LEVEL_NODEGROUP):
10668       _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
10669
10670     # Check whether disks are valid
10671     for disk_idx in self.disks:
10672       instance.FindDisk(disk_idx)
10673
10674     # Get secondary node IP addresses
10675     self.node_secondary_ip = dict((name, node.secondary_ip) for (name, node)
10676                                   in self.cfg.GetMultiNodeInfo(touched_nodes))
10677
10678   def Exec(self, feedback_fn):
10679     """Execute disk replacement.
10680
10681     This dispatches the disk replacement to the appropriate handler.
10682
10683     """
10684     if self.delay_iallocator:
10685       self._CheckPrereq2()
10686
10687     if __debug__:
10688       # Verify owned locks before starting operation
10689       owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
10690       assert set(owned_nodes) == set(self.node_secondary_ip), \
10691           ("Incorrect node locks, owning %s, expected %s" %
10692            (owned_nodes, self.node_secondary_ip.keys()))
10693       assert (self.lu.owned_locks(locking.LEVEL_NODE) ==
10694               self.lu.owned_locks(locking.LEVEL_NODE_RES))
10695
10696       owned_instances = self.lu.owned_locks(locking.LEVEL_INSTANCE)
10697       assert list(owned_instances) == [self.instance_name], \
10698           "Instance '%s' not locked" % self.instance_name
10699
10700       assert not self.lu.glm.is_owned(locking.LEVEL_NODEGROUP), \
10701           "Should not own any node group lock at this point"
10702
10703     if not self.disks:
10704       feedback_fn("No disks need replacement")
10705       return
10706
10707     feedback_fn("Replacing disk(s) %s for %s" %
10708                 (utils.CommaJoin(self.disks), self.instance.name))
10709
10710     activate_disks = (self.instance.admin_state != constants.ADMINST_UP)
10711
10712     # Activate the instance disks if we're replacing them on a down instance
10713     if activate_disks:
10714       _StartInstanceDisks(self.lu, self.instance, True)
10715
10716     try:
10717       # Should we replace the secondary node?
10718       if self.new_node is not None:
10719         fn = self._ExecDrbd8Secondary
10720       else:
10721         fn = self._ExecDrbd8DiskOnly
10722
10723       result = fn(feedback_fn)
10724     finally:
10725       # Deactivate the instance disks if we're replacing them on a
10726       # down instance
10727       if activate_disks:
10728         _SafeShutdownInstanceDisks(self.lu, self.instance)
10729
10730     assert not self.lu.owned_locks(locking.LEVEL_NODE)
10731
10732     if __debug__:
10733       # Verify owned locks
10734       owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE_RES)
10735       nodes = frozenset(self.node_secondary_ip)
10736       assert ((self.early_release and not owned_nodes) or
10737               (not self.early_release and not (set(owned_nodes) - nodes))), \
10738         ("Not owning the correct locks, early_release=%s, owned=%r,"
10739          " nodes=%r" % (self.early_release, owned_nodes, nodes))
10740
10741     return result
10742
10743   def _CheckVolumeGroup(self, nodes):
10744     self.lu.LogInfo("Checking volume groups")
10745
10746     vgname = self.cfg.GetVGName()
10747
10748     # Make sure volume group exists on all involved nodes
10749     results = self.rpc.call_vg_list(nodes)
10750     if not results:
10751       raise errors.OpExecError("Can't list volume groups on the nodes")
10752
10753     for node in nodes:
10754       res = results[node]
10755       res.Raise("Error checking node %s" % node)
10756       if vgname not in res.payload:
10757         raise errors.OpExecError("Volume group '%s' not found on node %s" %
10758                                  (vgname, node))
10759
10760   def _CheckDisksExistence(self, nodes):
10761     # Check disk existence
10762     for idx, dev in enumerate(self.instance.disks):
10763       if idx not in self.disks:
10764         continue
10765
10766       for node in nodes:
10767         self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
10768         self.cfg.SetDiskID(dev, node)
10769
10770         result = self.rpc.call_blockdev_find(node, dev)
10771
10772         msg = result.fail_msg
10773         if msg or not result.payload:
10774           if not msg:
10775             msg = "disk not found"
10776           raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
10777                                    (idx, node, msg))
10778
10779   def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
10780     for idx, dev in enumerate(self.instance.disks):
10781       if idx not in self.disks:
10782         continue
10783
10784       self.lu.LogInfo("Checking disk/%d consistency on node %s" %
10785                       (idx, node_name))
10786
10787       if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
10788                                    ldisk=ldisk):
10789         raise errors.OpExecError("Node %s has degraded storage, unsafe to"
10790                                  " replace disks for instance %s" %
10791                                  (node_name, self.instance.name))
10792
10793   def _CreateNewStorage(self, node_name):
10794     """Create new storage on the primary or secondary node.
10795
10796     This is only used for same-node replaces, not for changing the
10797     secondary node, hence we don't want to modify the existing disk.
10798
10799     """
10800     iv_names = {}
10801
10802     for idx, dev in enumerate(self.instance.disks):
10803       if idx not in self.disks:
10804         continue
10805
10806       self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
10807
10808       self.cfg.SetDiskID(dev, node_name)
10809
10810       lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
10811       names = _GenerateUniqueNames(self.lu, lv_names)
10812
10813       _, data_p, meta_p = _ComputeLDParams(constants.DT_DRBD8, self.diskparams)
10814
10815       vg_data = dev.children[0].logical_id[0]
10816       lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
10817                              logical_id=(vg_data, names[0]), params=data_p)
10818       vg_meta = dev.children[1].logical_id[0]
10819       lv_meta = objects.Disk(dev_type=constants.LD_LV, size=DRBD_META_SIZE,
10820                              logical_id=(vg_meta, names[1]), params=meta_p)
10821
10822       new_lvs = [lv_data, lv_meta]
10823       old_lvs = [child.Copy() for child in dev.children]
10824       iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
10825
10826       # we pass force_create=True to force the LVM creation
10827       for new_lv in new_lvs:
10828         _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
10829                         _GetInstanceInfoText(self.instance), False)
10830
10831     return iv_names
10832
10833   def _CheckDevices(self, node_name, iv_names):
10834     for name, (dev, _, _) in iv_names.iteritems():
10835       self.cfg.SetDiskID(dev, node_name)
10836
10837       result = self.rpc.call_blockdev_find(node_name, dev)
10838
10839       msg = result.fail_msg
10840       if msg or not result.payload:
10841         if not msg:
10842           msg = "disk not found"
10843         raise errors.OpExecError("Can't find DRBD device %s: %s" %
10844                                  (name, msg))
10845
10846       if result.payload.is_degraded:
10847         raise errors.OpExecError("DRBD device %s is degraded!" % name)
10848
10849   def _RemoveOldStorage(self, node_name, iv_names):
10850     for name, (_, old_lvs, _) in iv_names.iteritems():
10851       self.lu.LogInfo("Remove logical volumes for %s" % name)
10852
10853       for lv in old_lvs:
10854         self.cfg.SetDiskID(lv, node_name)
10855
10856         msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
10857         if msg:
10858           self.lu.LogWarning("Can't remove old LV: %s" % msg,
10859                              hint="remove unused LVs manually")
10860
10861   def _ExecDrbd8DiskOnly(self, feedback_fn): # pylint: disable=W0613
10862     """Replace a disk on the primary or secondary for DRBD 8.
10863
10864     The algorithm for replace is quite complicated:
10865
10866       1. for each disk to be replaced:
10867
10868         1. create new LVs on the target node with unique names
10869         1. detach old LVs from the drbd device
10870         1. rename old LVs to name_replaced.<time_t>
10871         1. rename new LVs to old LVs
10872         1. attach the new LVs (with the old names now) to the drbd device
10873
10874       1. wait for sync across all devices
10875
10876       1. for each modified disk:
10877
10878         1. remove old LVs (which have the name name_replaces.<time_t>)
10879
10880     Failures are not very well handled.
10881
10882     """
10883     steps_total = 6
10884
10885     # Step: check device activation
10886     self.lu.LogStep(1, steps_total, "Check device existence")
10887     self._CheckDisksExistence([self.other_node, self.target_node])
10888     self._CheckVolumeGroup([self.target_node, self.other_node])
10889
10890     # Step: check other node consistency
10891     self.lu.LogStep(2, steps_total, "Check peer consistency")
10892     self._CheckDisksConsistency(self.other_node,
10893                                 self.other_node == self.instance.primary_node,
10894                                 False)
10895
10896     # Step: create new storage
10897     self.lu.LogStep(3, steps_total, "Allocate new storage")
10898     iv_names = self._CreateNewStorage(self.target_node)
10899
10900     # Step: for each lv, detach+rename*2+attach
10901     self.lu.LogStep(4, steps_total, "Changing drbd configuration")
10902     for dev, old_lvs, new_lvs in iv_names.itervalues():
10903       self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
10904
10905       result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
10906                                                      old_lvs)
10907       result.Raise("Can't detach drbd from local storage on node"
10908                    " %s for device %s" % (self.target_node, dev.iv_name))
10909       #dev.children = []
10910       #cfg.Update(instance)
10911
10912       # ok, we created the new LVs, so now we know we have the needed
10913       # storage; as such, we proceed on the target node to rename
10914       # old_lv to _old, and new_lv to old_lv; note that we rename LVs
10915       # using the assumption that logical_id == physical_id (which in
10916       # turn is the unique_id on that node)
10917
10918       # FIXME(iustin): use a better name for the replaced LVs
10919       temp_suffix = int(time.time())
10920       ren_fn = lambda d, suff: (d.physical_id[0],
10921                                 d.physical_id[1] + "_replaced-%s" % suff)
10922
10923       # Build the rename list based on what LVs exist on the node
10924       rename_old_to_new = []
10925       for to_ren in old_lvs:
10926         result = self.rpc.call_blockdev_find(self.target_node, to_ren)
10927         if not result.fail_msg and result.payload:
10928           # device exists
10929           rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
10930
10931       self.lu.LogInfo("Renaming the old LVs on the target node")
10932       result = self.rpc.call_blockdev_rename(self.target_node,
10933                                              rename_old_to_new)
10934       result.Raise("Can't rename old LVs on node %s" % self.target_node)
10935
10936       # Now we rename the new LVs to the old LVs
10937       self.lu.LogInfo("Renaming the new LVs on the target node")
10938       rename_new_to_old = [(new, old.physical_id)
10939                            for old, new in zip(old_lvs, new_lvs)]
10940       result = self.rpc.call_blockdev_rename(self.target_node,
10941                                              rename_new_to_old)
10942       result.Raise("Can't rename new LVs on node %s" % self.target_node)
10943
10944       # Intermediate steps of in memory modifications
10945       for old, new in zip(old_lvs, new_lvs):
10946         new.logical_id = old.logical_id
10947         self.cfg.SetDiskID(new, self.target_node)
10948
10949       # We need to modify old_lvs so that removal later removes the
10950       # right LVs, not the newly added ones; note that old_lvs is a
10951       # copy here
10952       for disk in old_lvs:
10953         disk.logical_id = ren_fn(disk, temp_suffix)
10954         self.cfg.SetDiskID(disk, self.target_node)
10955
10956       # Now that the new lvs have the old name, we can add them to the device
10957       self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
10958       result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
10959                                                   new_lvs)
10960       msg = result.fail_msg
10961       if msg:
10962         for new_lv in new_lvs:
10963           msg2 = self.rpc.call_blockdev_remove(self.target_node,
10964                                                new_lv).fail_msg
10965           if msg2:
10966             self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
10967                                hint=("cleanup manually the unused logical"
10968                                      "volumes"))
10969         raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
10970
10971     cstep = itertools.count(5)
10972
10973     if self.early_release:
10974       self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
10975       self._RemoveOldStorage(self.target_node, iv_names)
10976       # TODO: Check if releasing locks early still makes sense
10977       _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
10978     else:
10979       # Release all resource locks except those used by the instance
10980       _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
10981                     keep=self.node_secondary_ip.keys())
10982
10983     # Release all node locks while waiting for sync
10984     _ReleaseLocks(self.lu, locking.LEVEL_NODE)
10985
10986     # TODO: Can the instance lock be downgraded here? Take the optional disk
10987     # shutdown in the caller into consideration.
10988
10989     # Wait for sync
10990     # This can fail as the old devices are degraded and _WaitForSync
10991     # does a combined result over all disks, so we don't check its return value
10992     self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
10993     _WaitForSync(self.lu, self.instance)
10994
10995     # Check all devices manually
10996     self._CheckDevices(self.instance.primary_node, iv_names)
10997
10998     # Step: remove old storage
10999     if not self.early_release:
11000       self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11001       self._RemoveOldStorage(self.target_node, iv_names)
11002
11003   def _ExecDrbd8Secondary(self, feedback_fn):
11004     """Replace the secondary node for DRBD 8.
11005
11006     The algorithm for replace is quite complicated:
11007       - for all disks of the instance:
11008         - create new LVs on the new node with same names
11009         - shutdown the drbd device on the old secondary
11010         - disconnect the drbd network on the primary
11011         - create the drbd device on the new secondary
11012         - network attach the drbd on the primary, using an artifice:
11013           the drbd code for Attach() will connect to the network if it
11014           finds a device which is connected to the good local disks but
11015           not network enabled
11016       - wait for sync across all devices
11017       - remove all disks from the old secondary
11018
11019     Failures are not very well handled.
11020
11021     """
11022     steps_total = 6
11023
11024     pnode = self.instance.primary_node
11025
11026     # Step: check device activation
11027     self.lu.LogStep(1, steps_total, "Check device existence")
11028     self._CheckDisksExistence([self.instance.primary_node])
11029     self._CheckVolumeGroup([self.instance.primary_node])
11030
11031     # Step: check other node consistency
11032     self.lu.LogStep(2, steps_total, "Check peer consistency")
11033     self._CheckDisksConsistency(self.instance.primary_node, True, True)
11034
11035     # Step: create new storage
11036     self.lu.LogStep(3, steps_total, "Allocate new storage")
11037     for idx, dev in enumerate(self.instance.disks):
11038       self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
11039                       (self.new_node, idx))
11040       # we pass force_create=True to force LVM creation
11041       for new_lv in dev.children:
11042         _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
11043                         _GetInstanceInfoText(self.instance), False)
11044
11045     # Step 4: dbrd minors and drbd setups changes
11046     # after this, we must manually remove the drbd minors on both the
11047     # error and the success paths
11048     self.lu.LogStep(4, steps_total, "Changing drbd configuration")
11049     minors = self.cfg.AllocateDRBDMinor([self.new_node
11050                                          for dev in self.instance.disks],
11051                                         self.instance.name)
11052     logging.debug("Allocated minors %r", minors)
11053
11054     iv_names = {}
11055     for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
11056       self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
11057                       (self.new_node, idx))
11058       # create new devices on new_node; note that we create two IDs:
11059       # one without port, so the drbd will be activated without
11060       # networking information on the new node at this stage, and one
11061       # with network, for the latter activation in step 4
11062       (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
11063       if self.instance.primary_node == o_node1:
11064         p_minor = o_minor1
11065       else:
11066         assert self.instance.primary_node == o_node2, "Three-node instance?"
11067         p_minor = o_minor2
11068
11069       new_alone_id = (self.instance.primary_node, self.new_node, None,
11070                       p_minor, new_minor, o_secret)
11071       new_net_id = (self.instance.primary_node, self.new_node, o_port,
11072                     p_minor, new_minor, o_secret)
11073
11074       iv_names[idx] = (dev, dev.children, new_net_id)
11075       logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
11076                     new_net_id)
11077       drbd_params, _, _ = _ComputeLDParams(constants.DT_DRBD8, self.diskparams)
11078       new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
11079                               logical_id=new_alone_id,
11080                               children=dev.children,
11081                               size=dev.size,
11082                               params=drbd_params)
11083       try:
11084         _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
11085                               _GetInstanceInfoText(self.instance), False)
11086       except errors.GenericError:
11087         self.cfg.ReleaseDRBDMinors(self.instance.name)
11088         raise
11089
11090     # We have new devices, shutdown the drbd on the old secondary
11091     for idx, dev in enumerate(self.instance.disks):
11092       self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
11093       self.cfg.SetDiskID(dev, self.target_node)
11094       msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
11095       if msg:
11096         self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
11097                            "node: %s" % (idx, msg),
11098                            hint=("Please cleanup this device manually as"
11099                                  " soon as possible"))
11100
11101     self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
11102     result = self.rpc.call_drbd_disconnect_net([pnode], self.node_secondary_ip,
11103                                                self.instance.disks)[pnode]
11104
11105     msg = result.fail_msg
11106     if msg:
11107       # detaches didn't succeed (unlikely)
11108       self.cfg.ReleaseDRBDMinors(self.instance.name)
11109       raise errors.OpExecError("Can't detach the disks from the network on"
11110                                " old node: %s" % (msg,))
11111
11112     # if we managed to detach at least one, we update all the disks of
11113     # the instance to point to the new secondary
11114     self.lu.LogInfo("Updating instance configuration")
11115     for dev, _, new_logical_id in iv_names.itervalues():
11116       dev.logical_id = new_logical_id
11117       self.cfg.SetDiskID(dev, self.instance.primary_node)
11118
11119     self.cfg.Update(self.instance, feedback_fn)
11120
11121     # Release all node locks (the configuration has been updated)
11122     _ReleaseLocks(self.lu, locking.LEVEL_NODE)
11123
11124     # and now perform the drbd attach
11125     self.lu.LogInfo("Attaching primary drbds to new secondary"
11126                     " (standalone => connected)")
11127     result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
11128                                             self.new_node],
11129                                            self.node_secondary_ip,
11130                                            self.instance.disks,
11131                                            self.instance.name,
11132                                            False)
11133     for to_node, to_result in result.items():
11134       msg = to_result.fail_msg
11135       if msg:
11136         self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
11137                            to_node, msg,
11138                            hint=("please do a gnt-instance info to see the"
11139                                  " status of disks"))
11140
11141     cstep = itertools.count(5)
11142
11143     if self.early_release:
11144       self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11145       self._RemoveOldStorage(self.target_node, iv_names)
11146       # TODO: Check if releasing locks early still makes sense
11147       _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
11148     else:
11149       # Release all resource locks except those used by the instance
11150       _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
11151                     keep=self.node_secondary_ip.keys())
11152
11153     # TODO: Can the instance lock be downgraded here? Take the optional disk
11154     # shutdown in the caller into consideration.
11155
11156     # Wait for sync
11157     # This can fail as the old devices are degraded and _WaitForSync
11158     # does a combined result over all disks, so we don't check its return value
11159     self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
11160     _WaitForSync(self.lu, self.instance)
11161
11162     # Check all devices manually
11163     self._CheckDevices(self.instance.primary_node, iv_names)
11164
11165     # Step: remove old storage
11166     if not self.early_release:
11167       self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11168       self._RemoveOldStorage(self.target_node, iv_names)
11169
11170
11171 class LURepairNodeStorage(NoHooksLU):
11172   """Repairs the volume group on a node.
11173
11174   """
11175   REQ_BGL = False
11176
11177   def CheckArguments(self):
11178     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
11179
11180     storage_type = self.op.storage_type
11181
11182     if (constants.SO_FIX_CONSISTENCY not in
11183         constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
11184       raise errors.OpPrereqError("Storage units of type '%s' can not be"
11185                                  " repaired" % storage_type,
11186                                  errors.ECODE_INVAL)
11187
11188   def ExpandNames(self):
11189     self.needed_locks = {
11190       locking.LEVEL_NODE: [self.op.node_name],
11191       }
11192
11193   def _CheckFaultyDisks(self, instance, node_name):
11194     """Ensure faulty disks abort the opcode or at least warn."""
11195     try:
11196       if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
11197                                   node_name, True):
11198         raise errors.OpPrereqError("Instance '%s' has faulty disks on"
11199                                    " node '%s'" % (instance.name, node_name),
11200                                    errors.ECODE_STATE)
11201     except errors.OpPrereqError, err:
11202       if self.op.ignore_consistency:
11203         self.proc.LogWarning(str(err.args[0]))
11204       else:
11205         raise
11206
11207   def CheckPrereq(self):
11208     """Check prerequisites.
11209
11210     """
11211     # Check whether any instance on this node has faulty disks
11212     for inst in _GetNodeInstances(self.cfg, self.op.node_name):
11213       if inst.admin_state != constants.ADMINST_UP:
11214         continue
11215       check_nodes = set(inst.all_nodes)
11216       check_nodes.discard(self.op.node_name)
11217       for inst_node_name in check_nodes:
11218         self._CheckFaultyDisks(inst, inst_node_name)
11219
11220   def Exec(self, feedback_fn):
11221     feedback_fn("Repairing storage unit '%s' on %s ..." %
11222                 (self.op.name, self.op.node_name))
11223
11224     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
11225     result = self.rpc.call_storage_execute(self.op.node_name,
11226                                            self.op.storage_type, st_args,
11227                                            self.op.name,
11228                                            constants.SO_FIX_CONSISTENCY)
11229     result.Raise("Failed to repair storage unit '%s' on %s" %
11230                  (self.op.name, self.op.node_name))
11231
11232
11233 class LUNodeEvacuate(NoHooksLU):
11234   """Evacuates instances off a list of nodes.
11235
11236   """
11237   REQ_BGL = False
11238
11239   _MODE2IALLOCATOR = {
11240     constants.NODE_EVAC_PRI: constants.IALLOCATOR_NEVAC_PRI,
11241     constants.NODE_EVAC_SEC: constants.IALLOCATOR_NEVAC_SEC,
11242     constants.NODE_EVAC_ALL: constants.IALLOCATOR_NEVAC_ALL,
11243     }
11244   assert frozenset(_MODE2IALLOCATOR.keys()) == constants.NODE_EVAC_MODES
11245   assert (frozenset(_MODE2IALLOCATOR.values()) ==
11246           constants.IALLOCATOR_NEVAC_MODES)
11247
11248   def CheckArguments(self):
11249     _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
11250
11251   def ExpandNames(self):
11252     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
11253
11254     if self.op.remote_node is not None:
11255       self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
11256       assert self.op.remote_node
11257
11258       if self.op.remote_node == self.op.node_name:
11259         raise errors.OpPrereqError("Can not use evacuated node as a new"
11260                                    " secondary node", errors.ECODE_INVAL)
11261
11262       if self.op.mode != constants.NODE_EVAC_SEC:
11263         raise errors.OpPrereqError("Without the use of an iallocator only"
11264                                    " secondary instances can be evacuated",
11265                                    errors.ECODE_INVAL)
11266
11267     # Declare locks
11268     self.share_locks = _ShareAll()
11269     self.needed_locks = {
11270       locking.LEVEL_INSTANCE: [],
11271       locking.LEVEL_NODEGROUP: [],
11272       locking.LEVEL_NODE: [],
11273       }
11274
11275     # Determine nodes (via group) optimistically, needs verification once locks
11276     # have been acquired
11277     self.lock_nodes = self._DetermineNodes()
11278
11279   def _DetermineNodes(self):
11280     """Gets the list of nodes to operate on.
11281
11282     """
11283     if self.op.remote_node is None:
11284       # Iallocator will choose any node(s) in the same group
11285       group_nodes = self.cfg.GetNodeGroupMembersByNodes([self.op.node_name])
11286     else:
11287       group_nodes = frozenset([self.op.remote_node])
11288
11289     # Determine nodes to be locked
11290     return set([self.op.node_name]) | group_nodes
11291
11292   def _DetermineInstances(self):
11293     """Builds list of instances to operate on.
11294
11295     """
11296     assert self.op.mode in constants.NODE_EVAC_MODES
11297
11298     if self.op.mode == constants.NODE_EVAC_PRI:
11299       # Primary instances only
11300       inst_fn = _GetNodePrimaryInstances
11301       assert self.op.remote_node is None, \
11302         "Evacuating primary instances requires iallocator"
11303     elif self.op.mode == constants.NODE_EVAC_SEC:
11304       # Secondary instances only
11305       inst_fn = _GetNodeSecondaryInstances
11306     else:
11307       # All instances
11308       assert self.op.mode == constants.NODE_EVAC_ALL
11309       inst_fn = _GetNodeInstances
11310       # TODO: In 2.6, change the iallocator interface to take an evacuation mode
11311       # per instance
11312       raise errors.OpPrereqError("Due to an issue with the iallocator"
11313                                  " interface it is not possible to evacuate"
11314                                  " all instances at once; specify explicitly"
11315                                  " whether to evacuate primary or secondary"
11316                                  " instances",
11317                                  errors.ECODE_INVAL)
11318
11319     return inst_fn(self.cfg, self.op.node_name)
11320
11321   def DeclareLocks(self, level):
11322     if level == locking.LEVEL_INSTANCE:
11323       # Lock instances optimistically, needs verification once node and group
11324       # locks have been acquired
11325       self.needed_locks[locking.LEVEL_INSTANCE] = \
11326         set(i.name for i in self._DetermineInstances())
11327
11328     elif level == locking.LEVEL_NODEGROUP:
11329       # Lock node groups for all potential target nodes optimistically, needs
11330       # verification once nodes have been acquired
11331       self.needed_locks[locking.LEVEL_NODEGROUP] = \
11332         self.cfg.GetNodeGroupsFromNodes(self.lock_nodes)
11333
11334     elif level == locking.LEVEL_NODE:
11335       self.needed_locks[locking.LEVEL_NODE] = self.lock_nodes
11336
11337   def CheckPrereq(self):
11338     # Verify locks
11339     owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
11340     owned_nodes = self.owned_locks(locking.LEVEL_NODE)
11341     owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
11342
11343     need_nodes = self._DetermineNodes()
11344
11345     if not owned_nodes.issuperset(need_nodes):
11346       raise errors.OpPrereqError("Nodes in same group as '%s' changed since"
11347                                  " locks were acquired, current nodes are"
11348                                  " are '%s', used to be '%s'; retry the"
11349                                  " operation" %
11350                                  (self.op.node_name,
11351                                   utils.CommaJoin(need_nodes),
11352                                   utils.CommaJoin(owned_nodes)),
11353                                  errors.ECODE_STATE)
11354
11355     wanted_groups = self.cfg.GetNodeGroupsFromNodes(owned_nodes)
11356     if owned_groups != wanted_groups:
11357       raise errors.OpExecError("Node groups changed since locks were acquired,"
11358                                " current groups are '%s', used to be '%s';"
11359                                " retry the operation" %
11360                                (utils.CommaJoin(wanted_groups),
11361                                 utils.CommaJoin(owned_groups)))
11362
11363     # Determine affected instances
11364     self.instances = self._DetermineInstances()
11365     self.instance_names = [i.name for i in self.instances]
11366
11367     if set(self.instance_names) != owned_instances:
11368       raise errors.OpExecError("Instances on node '%s' changed since locks"
11369                                " were acquired, current instances are '%s',"
11370                                " used to be '%s'; retry the operation" %
11371                                (self.op.node_name,
11372                                 utils.CommaJoin(self.instance_names),
11373                                 utils.CommaJoin(owned_instances)))
11374
11375     if self.instance_names:
11376       self.LogInfo("Evacuating instances from node '%s': %s",
11377                    self.op.node_name,
11378                    utils.CommaJoin(utils.NiceSort(self.instance_names)))
11379     else:
11380       self.LogInfo("No instances to evacuate from node '%s'",
11381                    self.op.node_name)
11382
11383     if self.op.remote_node is not None:
11384       for i in self.instances:
11385         if i.primary_node == self.op.remote_node:
11386           raise errors.OpPrereqError("Node %s is the primary node of"
11387                                      " instance %s, cannot use it as"
11388                                      " secondary" %
11389                                      (self.op.remote_node, i.name),
11390                                      errors.ECODE_INVAL)
11391
11392   def Exec(self, feedback_fn):
11393     assert (self.op.iallocator is not None) ^ (self.op.remote_node is not None)
11394
11395     if not self.instance_names:
11396       # No instances to evacuate
11397       jobs = []
11398
11399     elif self.op.iallocator is not None:
11400       # TODO: Implement relocation to other group
11401       ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_NODE_EVAC,
11402                        evac_mode=self._MODE2IALLOCATOR[self.op.mode],
11403                        instances=list(self.instance_names))
11404
11405       ial.Run(self.op.iallocator)
11406
11407       if not ial.success:
11408         raise errors.OpPrereqError("Can't compute node evacuation using"
11409                                    " iallocator '%s': %s" %
11410                                    (self.op.iallocator, ial.info),
11411                                    errors.ECODE_NORES)
11412
11413       jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, True)
11414
11415     elif self.op.remote_node is not None:
11416       assert self.op.mode == constants.NODE_EVAC_SEC
11417       jobs = [
11418         [opcodes.OpInstanceReplaceDisks(instance_name=instance_name,
11419                                         remote_node=self.op.remote_node,
11420                                         disks=[],
11421                                         mode=constants.REPLACE_DISK_CHG,
11422                                         early_release=self.op.early_release)]
11423         for instance_name in self.instance_names
11424         ]
11425
11426     else:
11427       raise errors.ProgrammerError("No iallocator or remote node")
11428
11429     return ResultWithJobs(jobs)
11430
11431
11432 def _SetOpEarlyRelease(early_release, op):
11433   """Sets C{early_release} flag on opcodes if available.
11434
11435   """
11436   try:
11437     op.early_release = early_release
11438   except AttributeError:
11439     assert not isinstance(op, opcodes.OpInstanceReplaceDisks)
11440
11441   return op
11442
11443
11444 def _NodeEvacDest(use_nodes, group, nodes):
11445   """Returns group or nodes depending on caller's choice.
11446
11447   """
11448   if use_nodes:
11449     return utils.CommaJoin(nodes)
11450   else:
11451     return group
11452
11453
11454 def _LoadNodeEvacResult(lu, alloc_result, early_release, use_nodes):
11455   """Unpacks the result of change-group and node-evacuate iallocator requests.
11456
11457   Iallocator modes L{constants.IALLOCATOR_MODE_NODE_EVAC} and
11458   L{constants.IALLOCATOR_MODE_CHG_GROUP}.
11459
11460   @type lu: L{LogicalUnit}
11461   @param lu: Logical unit instance
11462   @type alloc_result: tuple/list
11463   @param alloc_result: Result from iallocator
11464   @type early_release: bool
11465   @param early_release: Whether to release locks early if possible
11466   @type use_nodes: bool
11467   @param use_nodes: Whether to display node names instead of groups
11468
11469   """
11470   (moved, failed, jobs) = alloc_result
11471
11472   if failed:
11473     failreason = utils.CommaJoin("%s (%s)" % (name, reason)
11474                                  for (name, reason) in failed)
11475     lu.LogWarning("Unable to evacuate instances %s", failreason)
11476     raise errors.OpExecError("Unable to evacuate instances %s" % failreason)
11477
11478   if moved:
11479     lu.LogInfo("Instances to be moved: %s",
11480                utils.CommaJoin("%s (to %s)" %
11481                                (name, _NodeEvacDest(use_nodes, group, nodes))
11482                                for (name, group, nodes) in moved))
11483
11484   return [map(compat.partial(_SetOpEarlyRelease, early_release),
11485               map(opcodes.OpCode.LoadOpCode, ops))
11486           for ops in jobs]
11487
11488
11489 class LUInstanceGrowDisk(LogicalUnit):
11490   """Grow a disk of an instance.
11491
11492   """
11493   HPATH = "disk-grow"
11494   HTYPE = constants.HTYPE_INSTANCE
11495   REQ_BGL = False
11496
11497   def ExpandNames(self):
11498     self._ExpandAndLockInstance()
11499     self.needed_locks[locking.LEVEL_NODE] = []
11500     self.needed_locks[locking.LEVEL_NODE_RES] = []
11501     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
11502     self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
11503
11504   def DeclareLocks(self, level):
11505     if level == locking.LEVEL_NODE:
11506       self._LockInstancesNodes()
11507     elif level == locking.LEVEL_NODE_RES:
11508       # Copy node locks
11509       self.needed_locks[locking.LEVEL_NODE_RES] = \
11510         self.needed_locks[locking.LEVEL_NODE][:]
11511
11512   def BuildHooksEnv(self):
11513     """Build hooks env.
11514
11515     This runs on the master, the primary and all the secondaries.
11516
11517     """
11518     env = {
11519       "DISK": self.op.disk,
11520       "AMOUNT": self.op.amount,
11521       }
11522     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
11523     return env
11524
11525   def BuildHooksNodes(self):
11526     """Build hooks nodes.
11527
11528     """
11529     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
11530     return (nl, nl)
11531
11532   def CheckPrereq(self):
11533     """Check prerequisites.
11534
11535     This checks that the instance is in the cluster.
11536
11537     """
11538     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
11539     assert instance is not None, \
11540       "Cannot retrieve locked instance %s" % self.op.instance_name
11541     nodenames = list(instance.all_nodes)
11542     for node in nodenames:
11543       _CheckNodeOnline(self, node)
11544
11545     self.instance = instance
11546
11547     if instance.disk_template not in constants.DTS_GROWABLE:
11548       raise errors.OpPrereqError("Instance's disk layout does not support"
11549                                  " growing", errors.ECODE_INVAL)
11550
11551     self.disk = instance.FindDisk(self.op.disk)
11552
11553     if instance.disk_template not in (constants.DT_FILE,
11554                                       constants.DT_SHARED_FILE,
11555                                       constants.DT_RBD):
11556       # TODO: check the free disk space for file, when that feature will be
11557       # supported
11558       _CheckNodesFreeDiskPerVG(self, nodenames,
11559                                self.disk.ComputeGrowth(self.op.amount))
11560
11561   def Exec(self, feedback_fn):
11562     """Execute disk grow.
11563
11564     """
11565     instance = self.instance
11566     disk = self.disk
11567
11568     assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
11569     assert (self.owned_locks(locking.LEVEL_NODE) ==
11570             self.owned_locks(locking.LEVEL_NODE_RES))
11571
11572     disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
11573     if not disks_ok:
11574       raise errors.OpExecError("Cannot activate block device to grow")
11575
11576     feedback_fn("Growing disk %s of instance '%s' by %s" %
11577                 (self.op.disk, instance.name,
11578                  utils.FormatUnit(self.op.amount, "h")))
11579
11580     # First run all grow ops in dry-run mode
11581     for node in instance.all_nodes:
11582       self.cfg.SetDiskID(disk, node)
11583       result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, True)
11584       result.Raise("Grow request failed to node %s" % node)
11585
11586     # We know that (as far as we can test) operations across different
11587     # nodes will succeed, time to run it for real
11588     for node in instance.all_nodes:
11589       self.cfg.SetDiskID(disk, node)
11590       result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, False)
11591       result.Raise("Grow request failed to node %s" % node)
11592
11593       # TODO: Rewrite code to work properly
11594       # DRBD goes into sync mode for a short amount of time after executing the
11595       # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
11596       # calling "resize" in sync mode fails. Sleeping for a short amount of
11597       # time is a work-around.
11598       time.sleep(5)
11599
11600     disk.RecordGrow(self.op.amount)
11601     self.cfg.Update(instance, feedback_fn)
11602
11603     # Changes have been recorded, release node lock
11604     _ReleaseLocks(self, locking.LEVEL_NODE)
11605
11606     # Downgrade lock while waiting for sync
11607     self.glm.downgrade(locking.LEVEL_INSTANCE)
11608
11609     if self.op.wait_for_sync:
11610       disk_abort = not _WaitForSync(self, instance, disks=[disk])
11611       if disk_abort:
11612         self.proc.LogWarning("Disk sync-ing has not returned a good"
11613                              " status; please check the instance")
11614       if instance.admin_state != constants.ADMINST_UP:
11615         _SafeShutdownInstanceDisks(self, instance, disks=[disk])
11616     elif instance.admin_state != constants.ADMINST_UP:
11617       self.proc.LogWarning("Not shutting down the disk even if the instance is"
11618                            " not supposed to be running because no wait for"
11619                            " sync mode was requested")
11620
11621     assert self.owned_locks(locking.LEVEL_NODE_RES)
11622     assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
11623
11624
11625 class LUInstanceQueryData(NoHooksLU):
11626   """Query runtime instance data.
11627
11628   """
11629   REQ_BGL = False
11630
11631   def ExpandNames(self):
11632     self.needed_locks = {}
11633
11634     # Use locking if requested or when non-static information is wanted
11635     if not (self.op.static or self.op.use_locking):
11636       self.LogWarning("Non-static data requested, locks need to be acquired")
11637       self.op.use_locking = True
11638
11639     if self.op.instances or not self.op.use_locking:
11640       # Expand instance names right here
11641       self.wanted_names = _GetWantedInstances(self, self.op.instances)
11642     else:
11643       # Will use acquired locks
11644       self.wanted_names = None
11645
11646     if self.op.use_locking:
11647       self.share_locks = _ShareAll()
11648
11649       if self.wanted_names is None:
11650         self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
11651       else:
11652         self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
11653
11654       self.needed_locks[locking.LEVEL_NODE] = []
11655       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
11656
11657   def DeclareLocks(self, level):
11658     if self.op.use_locking and level == locking.LEVEL_NODE:
11659       self._LockInstancesNodes()
11660
11661   def CheckPrereq(self):
11662     """Check prerequisites.
11663
11664     This only checks the optional instance list against the existing names.
11665
11666     """
11667     if self.wanted_names is None:
11668       assert self.op.use_locking, "Locking was not used"
11669       self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
11670
11671     self.wanted_instances = \
11672         map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
11673
11674   def _ComputeBlockdevStatus(self, node, instance_name, dev):
11675     """Returns the status of a block device
11676
11677     """
11678     if self.op.static or not node:
11679       return None
11680
11681     self.cfg.SetDiskID(dev, node)
11682
11683     result = self.rpc.call_blockdev_find(node, dev)
11684     if result.offline:
11685       return None
11686
11687     result.Raise("Can't compute disk status for %s" % instance_name)
11688
11689     status = result.payload
11690     if status is None:
11691       return None
11692
11693     return (status.dev_path, status.major, status.minor,
11694             status.sync_percent, status.estimated_time,
11695             status.is_degraded, status.ldisk_status)
11696
11697   def _ComputeDiskStatus(self, instance, snode, dev):
11698     """Compute block device status.
11699
11700     """
11701     if dev.dev_type in constants.LDS_DRBD:
11702       # we change the snode then (otherwise we use the one passed in)
11703       if dev.logical_id[0] == instance.primary_node:
11704         snode = dev.logical_id[1]
11705       else:
11706         snode = dev.logical_id[0]
11707
11708     dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
11709                                               instance.name, dev)
11710     dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
11711
11712     if dev.children:
11713       dev_children = map(compat.partial(self._ComputeDiskStatus,
11714                                         instance, snode),
11715                          dev.children)
11716     else:
11717       dev_children = []
11718
11719     return {
11720       "iv_name": dev.iv_name,
11721       "dev_type": dev.dev_type,
11722       "logical_id": dev.logical_id,
11723       "physical_id": dev.physical_id,
11724       "pstatus": dev_pstatus,
11725       "sstatus": dev_sstatus,
11726       "children": dev_children,
11727       "mode": dev.mode,
11728       "size": dev.size,
11729       }
11730
11731   def Exec(self, feedback_fn):
11732     """Gather and return data"""
11733     result = {}
11734
11735     cluster = self.cfg.GetClusterInfo()
11736
11737     pri_nodes = self.cfg.GetMultiNodeInfo(i.primary_node
11738                                           for i in self.wanted_instances)
11739     for instance, (_, pnode) in zip(self.wanted_instances, pri_nodes):
11740       if self.op.static or pnode.offline:
11741         remote_state = None
11742         if pnode.offline:
11743           self.LogWarning("Primary node %s is marked offline, returning static"
11744                           " information only for instance %s" %
11745                           (pnode.name, instance.name))
11746       else:
11747         remote_info = self.rpc.call_instance_info(instance.primary_node,
11748                                                   instance.name,
11749                                                   instance.hypervisor)
11750         remote_info.Raise("Error checking node %s" % instance.primary_node)
11751         remote_info = remote_info.payload
11752         if remote_info and "state" in remote_info:
11753           remote_state = "up"
11754         else:
11755           if instance.admin_state == constants.ADMINST_UP:
11756             remote_state = "down"
11757           else:
11758             remote_state = instance.admin_state
11759
11760       disks = map(compat.partial(self._ComputeDiskStatus, instance, None),
11761                   instance.disks)
11762
11763       result[instance.name] = {
11764         "name": instance.name,
11765         "config_state": instance.admin_state,
11766         "run_state": remote_state,
11767         "pnode": instance.primary_node,
11768         "snodes": instance.secondary_nodes,
11769         "os": instance.os,
11770         # this happens to be the same format used for hooks
11771         "nics": _NICListToTuple(self, instance.nics),
11772         "disk_template": instance.disk_template,
11773         "disks": disks,
11774         "hypervisor": instance.hypervisor,
11775         "network_port": instance.network_port,
11776         "hv_instance": instance.hvparams,
11777         "hv_actual": cluster.FillHV(instance, skip_globals=True),
11778         "be_instance": instance.beparams,
11779         "be_actual": cluster.FillBE(instance),
11780         "os_instance": instance.osparams,
11781         "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
11782         "serial_no": instance.serial_no,
11783         "mtime": instance.mtime,
11784         "ctime": instance.ctime,
11785         "uuid": instance.uuid,
11786         }
11787
11788     return result
11789
11790
11791 def PrepareContainerMods(mods, private_fn):
11792   """Prepares a list of container modifications by adding a private data field.
11793
11794   @type mods: list of tuples; (operation, index, parameters)
11795   @param mods: List of modifications
11796   @type private_fn: callable or None
11797   @param private_fn: Callable for constructing a private data field for a
11798     modification
11799   @rtype: list
11800
11801   """
11802   if private_fn is None:
11803     fn = lambda: None
11804   else:
11805     fn = private_fn
11806
11807   return [(op, idx, params, fn()) for (op, idx, params) in mods]
11808
11809
11810 #: Type description for changes as returned by L{ApplyContainerMods}'s
11811 #: callbacks
11812 _TApplyContModsCbChanges = \
11813   ht.TMaybeListOf(ht.TAnd(ht.TIsLength(2), ht.TItems([
11814     ht.TNonEmptyString,
11815     ht.TAny,
11816     ])))
11817
11818
11819 def ApplyContainerMods(kind, container, chgdesc, mods,
11820                        create_fn, modify_fn, remove_fn):
11821   """Applies descriptions in C{mods} to C{container}.
11822
11823   @type kind: string
11824   @param kind: One-word item description
11825   @type container: list
11826   @param container: Container to modify
11827   @type chgdesc: None or list
11828   @param chgdesc: List of applied changes
11829   @type mods: list
11830   @param mods: Modifications as returned by L{PrepareContainerMods}
11831   @type create_fn: callable
11832   @param create_fn: Callback for creating a new item (L{constants.DDM_ADD});
11833     receives absolute item index, parameters and private data object as added
11834     by L{PrepareContainerMods}, returns tuple containing new item and changes
11835     as list
11836   @type modify_fn: callable
11837   @param modify_fn: Callback for modifying an existing item
11838     (L{constants.DDM_MODIFY}); receives absolute item index, item, parameters
11839     and private data object as added by L{PrepareContainerMods}, returns
11840     changes as list
11841   @type remove_fn: callable
11842   @param remove_fn: Callback on removing item; receives absolute item index,
11843     item and private data object as added by L{PrepareContainerMods}
11844
11845   """
11846   for (op, idx, params, private) in mods:
11847     if idx == -1:
11848       # Append
11849       absidx = len(container) - 1
11850     elif idx < 0:
11851       raise IndexError("Not accepting negative indices other than -1")
11852     elif idx > len(container):
11853       raise IndexError("Got %s index %s, but there are only %s" %
11854                        (kind, idx, len(container)))
11855     else:
11856       absidx = idx
11857
11858     changes = None
11859
11860     if op == constants.DDM_ADD:
11861       # Calculate where item will be added
11862       if idx == -1:
11863         addidx = len(container)
11864       else:
11865         addidx = idx
11866
11867       if create_fn is None:
11868         item = params
11869       else:
11870         (item, changes) = create_fn(addidx, params, private)
11871
11872       if idx == -1:
11873         container.append(item)
11874       else:
11875         assert idx >= 0
11876         assert idx <= len(container)
11877         # list.insert does so before the specified index
11878         container.insert(idx, item)
11879     else:
11880       # Retrieve existing item
11881       try:
11882         item = container[absidx]
11883       except IndexError:
11884         raise IndexError("Invalid %s index %s" % (kind, idx))
11885
11886       if op == constants.DDM_REMOVE:
11887         assert not params
11888
11889         if remove_fn is not None:
11890           remove_fn(absidx, item, private)
11891
11892         changes = [("%s/%s" % (kind, absidx), "remove")]
11893
11894         assert container[absidx] == item
11895         del container[absidx]
11896       elif op == constants.DDM_MODIFY:
11897         if modify_fn is not None:
11898           changes = modify_fn(absidx, item, params, private)
11899       else:
11900         raise errors.ProgrammerError("Unhandled operation '%s'" % op)
11901
11902     assert _TApplyContModsCbChanges(changes)
11903
11904     if not (chgdesc is None or changes is None):
11905       chgdesc.extend(changes)
11906
11907
11908 def _UpdateIvNames(base_index, disks):
11909   """Updates the C{iv_name} attribute of disks.
11910
11911   @type disks: list of L{objects.Disk}
11912
11913   """
11914   for (idx, disk) in enumerate(disks):
11915     disk.iv_name = "disk/%s" % (base_index + idx, )
11916
11917
11918 class _InstNicModPrivate:
11919   """Data structure for network interface modifications.
11920
11921   Used by L{LUInstanceSetParams}.
11922
11923   """
11924   def __init__(self):
11925     self.params = None
11926     self.filled = None
11927
11928
11929 class LUInstanceSetParams(LogicalUnit):
11930   """Modifies an instances's parameters.
11931
11932   """
11933   HPATH = "instance-modify"
11934   HTYPE = constants.HTYPE_INSTANCE
11935   REQ_BGL = False
11936
11937   @staticmethod
11938   def _UpgradeDiskNicMods(kind, mods, verify_fn):
11939     assert ht.TList(mods)
11940     assert not mods or len(mods[0]) in (2, 3)
11941
11942     if mods and len(mods[0]) == 2:
11943       result = []
11944
11945       addremove = 0
11946       for op, params in mods:
11947         if op in (constants.DDM_ADD, constants.DDM_REMOVE):
11948           result.append((op, -1, params))
11949           addremove += 1
11950
11951           if addremove > 1:
11952             raise errors.OpPrereqError("Only one %s add or remove operation is"
11953                                        " supported at a time" % kind,
11954                                        errors.ECODE_INVAL)
11955         else:
11956           result.append((constants.DDM_MODIFY, op, params))
11957
11958       assert verify_fn(result)
11959     else:
11960       result = mods
11961
11962     return result
11963
11964   @staticmethod
11965   def _CheckMods(kind, mods, key_types, item_fn):
11966     """Ensures requested disk/NIC modifications are valid.
11967
11968     """
11969     for (op, _, params) in mods:
11970       assert ht.TDict(params)
11971
11972       utils.ForceDictType(params, key_types)
11973
11974       if op == constants.DDM_REMOVE:
11975         if params:
11976           raise errors.OpPrereqError("No settings should be passed when"
11977                                      " removing a %s" % kind,
11978                                      errors.ECODE_INVAL)
11979       elif op in (constants.DDM_ADD, constants.DDM_MODIFY):
11980         item_fn(op, params)
11981       else:
11982         raise errors.ProgrammerError("Unhandled operation '%s'" % op)
11983
11984   @staticmethod
11985   def _VerifyDiskModification(op, params):
11986     """Verifies a disk modification.
11987
11988     """
11989     if op == constants.DDM_ADD:
11990       mode = params.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
11991       if mode not in constants.DISK_ACCESS_SET:
11992         raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
11993                                    errors.ECODE_INVAL)
11994
11995       size = params.get(constants.IDISK_SIZE, None)
11996       if size is None:
11997         raise errors.OpPrereqError("Required disk parameter '%s' missing" %
11998                                    constants.IDISK_SIZE, errors.ECODE_INVAL)
11999
12000       try:
12001         size = int(size)
12002       except (TypeError, ValueError), err:
12003         raise errors.OpPrereqError("Invalid disk size parameter: %s" % err,
12004                                    errors.ECODE_INVAL)
12005
12006       params[constants.IDISK_SIZE] = size
12007
12008     elif op == constants.DDM_MODIFY and constants.IDISK_SIZE in params:
12009       raise errors.OpPrereqError("Disk size change not possible, use"
12010                                  " grow-disk", errors.ECODE_INVAL)
12011
12012   @staticmethod
12013   def _VerifyNicModification(op, params):
12014     """Verifies a network interface modification.
12015
12016     """
12017     if op in (constants.DDM_ADD, constants.DDM_MODIFY):
12018       ip = params.get(constants.INIC_IP, None)
12019       if ip is None:
12020         pass
12021       elif ip.lower() == constants.VALUE_NONE:
12022         params[constants.INIC_IP] = None
12023       elif not netutils.IPAddress.IsValid(ip):
12024         raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
12025                                    errors.ECODE_INVAL)
12026
12027       bridge = params.get("bridge", None)
12028       link = params.get(constants.INIC_LINK, None)
12029       if bridge and link:
12030         raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
12031                                    " at the same time", errors.ECODE_INVAL)
12032       elif bridge and bridge.lower() == constants.VALUE_NONE:
12033         params["bridge"] = None
12034       elif link and link.lower() == constants.VALUE_NONE:
12035         params[constants.INIC_LINK] = None
12036
12037       if op == constants.DDM_ADD:
12038         macaddr = params.get(constants.INIC_MAC, None)
12039         if macaddr is None:
12040           params[constants.INIC_MAC] = constants.VALUE_AUTO
12041
12042       if constants.INIC_MAC in params:
12043         macaddr = params[constants.INIC_MAC]
12044         if macaddr not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
12045           macaddr = utils.NormalizeAndValidateMac(macaddr)
12046
12047         if op == constants.DDM_MODIFY and macaddr == constants.VALUE_AUTO:
12048           raise errors.OpPrereqError("'auto' is not a valid MAC address when"
12049                                      " modifying an existing NIC",
12050                                      errors.ECODE_INVAL)
12051
12052   def CheckArguments(self):
12053     if not (self.op.nics or self.op.disks or self.op.disk_template or
12054             self.op.hvparams or self.op.beparams or self.op.os_name or
12055             self.op.offline is not None or self.op.runtime_mem):
12056       raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
12057
12058     if self.op.hvparams:
12059       _CheckGlobalHvParams(self.op.hvparams)
12060
12061     self.op.disks = \
12062       self._UpgradeDiskNicMods("disk", self.op.disks,
12063         opcodes.OpInstanceSetParams.TestDiskModifications)
12064     self.op.nics = \
12065       self._UpgradeDiskNicMods("NIC", self.op.nics,
12066         opcodes.OpInstanceSetParams.TestNicModifications)
12067
12068     # Check disk modifications
12069     self._CheckMods("disk", self.op.disks, constants.IDISK_PARAMS_TYPES,
12070                     self._VerifyDiskModification)
12071
12072     if self.op.disks and self.op.disk_template is not None:
12073       raise errors.OpPrereqError("Disk template conversion and other disk"
12074                                  " changes not supported at the same time",
12075                                  errors.ECODE_INVAL)
12076
12077     if (self.op.disk_template and
12078         self.op.disk_template in constants.DTS_INT_MIRROR and
12079         self.op.remote_node is None):
12080       raise errors.OpPrereqError("Changing the disk template to a mirrored"
12081                                  " one requires specifying a secondary node",
12082                                  errors.ECODE_INVAL)
12083
12084     # Check NIC modifications
12085     self._CheckMods("NIC", self.op.nics, constants.INIC_PARAMS_TYPES,
12086                     self._VerifyNicModification)
12087
12088   def ExpandNames(self):
12089     self._ExpandAndLockInstance()
12090     # Can't even acquire node locks in shared mode as upcoming changes in
12091     # Ganeti 2.6 will start to modify the node object on disk conversion
12092     self.needed_locks[locking.LEVEL_NODE] = []
12093     self.needed_locks[locking.LEVEL_NODE_RES] = []
12094     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
12095
12096   def DeclareLocks(self, level):
12097     # TODO: Acquire group lock in shared mode (disk parameters)
12098     if level == locking.LEVEL_NODE:
12099       self._LockInstancesNodes()
12100       if self.op.disk_template and self.op.remote_node:
12101         self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
12102         self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
12103     elif level == locking.LEVEL_NODE_RES and self.op.disk_template:
12104       # Copy node locks
12105       self.needed_locks[locking.LEVEL_NODE_RES] = \
12106         self.needed_locks[locking.LEVEL_NODE][:]
12107
12108   def BuildHooksEnv(self):
12109     """Build hooks env.
12110
12111     This runs on the master, primary and secondaries.
12112
12113     """
12114     args = dict()
12115     if constants.BE_MINMEM in self.be_new:
12116       args["minmem"] = self.be_new[constants.BE_MINMEM]
12117     if constants.BE_MAXMEM in self.be_new:
12118       args["maxmem"] = self.be_new[constants.BE_MAXMEM]
12119     if constants.BE_VCPUS in self.be_new:
12120       args["vcpus"] = self.be_new[constants.BE_VCPUS]
12121     # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
12122     # information at all.
12123
12124     if self._new_nics is not None:
12125       nics = []
12126
12127       for nic in self._new_nics:
12128         nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
12129         mode = nicparams[constants.NIC_MODE]
12130         link = nicparams[constants.NIC_LINK]
12131         nics.append((nic.ip, nic.mac, mode, link))
12132
12133       args["nics"] = nics
12134
12135     env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
12136     if self.op.disk_template:
12137       env["NEW_DISK_TEMPLATE"] = self.op.disk_template
12138     if self.op.runtime_mem:
12139       env["RUNTIME_MEMORY"] = self.op.runtime_mem
12140
12141     return env
12142
12143   def BuildHooksNodes(self):
12144     """Build hooks nodes.
12145
12146     """
12147     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
12148     return (nl, nl)
12149
12150   def _PrepareNicModification(self, params, private, old_ip, old_params,
12151                               cluster, pnode):
12152     update_params_dict = dict([(key, params[key])
12153                                for key in constants.NICS_PARAMETERS
12154                                if key in params])
12155
12156     if "bridge" in params:
12157       update_params_dict[constants.NIC_LINK] = params["bridge"]
12158
12159     new_params = _GetUpdatedParams(old_params, update_params_dict)
12160     utils.ForceDictType(new_params, constants.NICS_PARAMETER_TYPES)
12161
12162     new_filled_params = cluster.SimpleFillNIC(new_params)
12163     objects.NIC.CheckParameterSyntax(new_filled_params)
12164
12165     new_mode = new_filled_params[constants.NIC_MODE]
12166     if new_mode == constants.NIC_MODE_BRIDGED:
12167       bridge = new_filled_params[constants.NIC_LINK]
12168       msg = self.rpc.call_bridges_exist(pnode, [bridge]).fail_msg
12169       if msg:
12170         msg = "Error checking bridges on node '%s': %s" % (pnode, msg)
12171         if self.op.force:
12172           self.warn.append(msg)
12173         else:
12174           raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
12175
12176     elif new_mode == constants.NIC_MODE_ROUTED:
12177       ip = params.get(constants.INIC_IP, old_ip)
12178       if ip is None:
12179         raise errors.OpPrereqError("Cannot set the NIC IP address to None"
12180                                    " on a routed NIC", errors.ECODE_INVAL)
12181
12182     if constants.INIC_MAC in params:
12183       mac = params[constants.INIC_MAC]
12184       if mac is None:
12185         raise errors.OpPrereqError("Cannot unset the NIC MAC address",
12186                                    errors.ECODE_INVAL)
12187       elif mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
12188         # otherwise generate the MAC address
12189         params[constants.INIC_MAC] = \
12190           self.cfg.GenerateMAC(self.proc.GetECId())
12191       else:
12192         # or validate/reserve the current one
12193         try:
12194           self.cfg.ReserveMAC(mac, self.proc.GetECId())
12195         except errors.ReservationError:
12196           raise errors.OpPrereqError("MAC address '%s' already in use"
12197                                      " in cluster" % mac,
12198                                      errors.ECODE_NOTUNIQUE)
12199
12200     private.params = new_params
12201     private.filled = new_filled_params
12202
12203     return (None, None)
12204
12205   def CheckPrereq(self):
12206     """Check prerequisites.
12207
12208     This only checks the instance list against the existing names.
12209
12210     """
12211     # checking the new params on the primary/secondary nodes
12212
12213     instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
12214     cluster = self.cluster = self.cfg.GetClusterInfo()
12215     assert self.instance is not None, \
12216       "Cannot retrieve locked instance %s" % self.op.instance_name
12217     pnode = instance.primary_node
12218     nodelist = list(instance.all_nodes)
12219     pnode_info = self.cfg.GetNodeInfo(pnode)
12220     self.diskparams = self.cfg.GetNodeGroup(pnode_info.group).diskparams
12221
12222     # Prepare disk/NIC modifications
12223     self.diskmod = PrepareContainerMods(self.op.disks, None)
12224     self.nicmod = PrepareContainerMods(self.op.nics, _InstNicModPrivate)
12225
12226     # OS change
12227     if self.op.os_name and not self.op.force:
12228       _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
12229                       self.op.force_variant)
12230       instance_os = self.op.os_name
12231     else:
12232       instance_os = instance.os
12233
12234     assert not (self.op.disk_template and self.op.disks), \
12235       "Can't modify disk template and apply disk changes at the same time"
12236
12237     if self.op.disk_template:
12238       if instance.disk_template == self.op.disk_template:
12239         raise errors.OpPrereqError("Instance already has disk template %s" %
12240                                    instance.disk_template, errors.ECODE_INVAL)
12241
12242       if (instance.disk_template,
12243           self.op.disk_template) not in self._DISK_CONVERSIONS:
12244         raise errors.OpPrereqError("Unsupported disk template conversion from"
12245                                    " %s to %s" % (instance.disk_template,
12246                                                   self.op.disk_template),
12247                                    errors.ECODE_INVAL)
12248       _CheckInstanceState(self, instance, INSTANCE_DOWN,
12249                           msg="cannot change disk template")
12250       if self.op.disk_template in constants.DTS_INT_MIRROR:
12251         if self.op.remote_node == pnode:
12252           raise errors.OpPrereqError("Given new secondary node %s is the same"
12253                                      " as the primary node of the instance" %
12254                                      self.op.remote_node, errors.ECODE_STATE)
12255         _CheckNodeOnline(self, self.op.remote_node)
12256         _CheckNodeNotDrained(self, self.op.remote_node)
12257         # FIXME: here we assume that the old instance type is DT_PLAIN
12258         assert instance.disk_template == constants.DT_PLAIN
12259         disks = [{constants.IDISK_SIZE: d.size,
12260                   constants.IDISK_VG: d.logical_id[0]}
12261                  for d in instance.disks]
12262         required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
12263         _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
12264
12265         snode_info = self.cfg.GetNodeInfo(self.op.remote_node)
12266         snode_group = self.cfg.GetNodeGroup(snode_info.group)
12267         ipolicy = _CalculateGroupIPolicy(cluster, snode_group)
12268         _CheckTargetNodeIPolicy(self, ipolicy, instance, snode_info,
12269                                 ignore=self.op.ignore_ipolicy)
12270         if pnode_info.group != snode_info.group:
12271           self.LogWarning("The primary and secondary nodes are in two"
12272                           " different node groups; the disk parameters"
12273                           " from the first disk's node group will be"
12274                           " used")
12275
12276     # hvparams processing
12277     if self.op.hvparams:
12278       hv_type = instance.hypervisor
12279       i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
12280       utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
12281       hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
12282
12283       # local check
12284       hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
12285       _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
12286       self.hv_proposed = self.hv_new = hv_new # the new actual values
12287       self.hv_inst = i_hvdict # the new dict (without defaults)
12288     else:
12289       self.hv_proposed = cluster.SimpleFillHV(instance.hypervisor, instance.os,
12290                                               instance.hvparams)
12291       self.hv_new = self.hv_inst = {}
12292
12293     # beparams processing
12294     if self.op.beparams:
12295       i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
12296                                    use_none=True)
12297       objects.UpgradeBeParams(i_bedict)
12298       utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
12299       be_new = cluster.SimpleFillBE(i_bedict)
12300       self.be_proposed = self.be_new = be_new # the new actual values
12301       self.be_inst = i_bedict # the new dict (without defaults)
12302     else:
12303       self.be_new = self.be_inst = {}
12304       self.be_proposed = cluster.SimpleFillBE(instance.beparams)
12305     be_old = cluster.FillBE(instance)
12306
12307     # CPU param validation -- checking every time a paramtere is
12308     # changed to cover all cases where either CPU mask or vcpus have
12309     # changed
12310     if (constants.BE_VCPUS in self.be_proposed and
12311         constants.HV_CPU_MASK in self.hv_proposed):
12312       cpu_list = \
12313         utils.ParseMultiCpuMask(self.hv_proposed[constants.HV_CPU_MASK])
12314       # Verify mask is consistent with number of vCPUs. Can skip this
12315       # test if only 1 entry in the CPU mask, which means same mask
12316       # is applied to all vCPUs.
12317       if (len(cpu_list) > 1 and
12318           len(cpu_list) != self.be_proposed[constants.BE_VCPUS]):
12319         raise errors.OpPrereqError("Number of vCPUs [%d] does not match the"
12320                                    " CPU mask [%s]" %
12321                                    (self.be_proposed[constants.BE_VCPUS],
12322                                     self.hv_proposed[constants.HV_CPU_MASK]),
12323                                    errors.ECODE_INVAL)
12324
12325       # Only perform this test if a new CPU mask is given
12326       if constants.HV_CPU_MASK in self.hv_new:
12327         # Calculate the largest CPU number requested
12328         max_requested_cpu = max(map(max, cpu_list))
12329         # Check that all of the instance's nodes have enough physical CPUs to
12330         # satisfy the requested CPU mask
12331         _CheckNodesPhysicalCPUs(self, instance.all_nodes,
12332                                 max_requested_cpu + 1, instance.hypervisor)
12333
12334     # osparams processing
12335     if self.op.osparams:
12336       i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
12337       _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
12338       self.os_inst = i_osdict # the new dict (without defaults)
12339     else:
12340       self.os_inst = {}
12341
12342     self.warn = []
12343
12344     #TODO(dynmem): do the appropriate check involving MINMEM
12345     if (constants.BE_MAXMEM in self.op.beparams and not self.op.force and
12346         be_new[constants.BE_MAXMEM] > be_old[constants.BE_MAXMEM]):
12347       mem_check_list = [pnode]
12348       if be_new[constants.BE_AUTO_BALANCE]:
12349         # either we changed auto_balance to yes or it was from before
12350         mem_check_list.extend(instance.secondary_nodes)
12351       instance_info = self.rpc.call_instance_info(pnode, instance.name,
12352                                                   instance.hypervisor)
12353       nodeinfo = self.rpc.call_node_info(mem_check_list, None,
12354                                          [instance.hypervisor])
12355       pninfo = nodeinfo[pnode]
12356       msg = pninfo.fail_msg
12357       if msg:
12358         # Assume the primary node is unreachable and go ahead
12359         self.warn.append("Can't get info from primary node %s: %s" %
12360                          (pnode, msg))
12361       else:
12362         (_, _, (pnhvinfo, )) = pninfo.payload
12363         if not isinstance(pnhvinfo.get("memory_free", None), int):
12364           self.warn.append("Node data from primary node %s doesn't contain"
12365                            " free memory information" % pnode)
12366         elif instance_info.fail_msg:
12367           self.warn.append("Can't get instance runtime information: %s" %
12368                           instance_info.fail_msg)
12369         else:
12370           if instance_info.payload:
12371             current_mem = int(instance_info.payload["memory"])
12372           else:
12373             # Assume instance not running
12374             # (there is a slight race condition here, but it's not very
12375             # probable, and we have no other way to check)
12376             # TODO: Describe race condition
12377             current_mem = 0
12378           #TODO(dynmem): do the appropriate check involving MINMEM
12379           miss_mem = (be_new[constants.BE_MAXMEM] - current_mem -
12380                       pnhvinfo["memory_free"])
12381           if miss_mem > 0:
12382             raise errors.OpPrereqError("This change will prevent the instance"
12383                                        " from starting, due to %d MB of memory"
12384                                        " missing on its primary node" %
12385                                        miss_mem,
12386                                        errors.ECODE_NORES)
12387
12388       if be_new[constants.BE_AUTO_BALANCE]:
12389         for node, nres in nodeinfo.items():
12390           if node not in instance.secondary_nodes:
12391             continue
12392           nres.Raise("Can't get info from secondary node %s" % node,
12393                      prereq=True, ecode=errors.ECODE_STATE)
12394           (_, _, (nhvinfo, )) = nres.payload
12395           if not isinstance(nhvinfo.get("memory_free", None), int):
12396             raise errors.OpPrereqError("Secondary node %s didn't return free"
12397                                        " memory information" % node,
12398                                        errors.ECODE_STATE)
12399           #TODO(dynmem): do the appropriate check involving MINMEM
12400           elif be_new[constants.BE_MAXMEM] > nhvinfo["memory_free"]:
12401             raise errors.OpPrereqError("This change will prevent the instance"
12402                                        " from failover to its secondary node"
12403                                        " %s, due to not enough memory" % node,
12404                                        errors.ECODE_STATE)
12405
12406     if self.op.runtime_mem:
12407       remote_info = self.rpc.call_instance_info(instance.primary_node,
12408                                                 instance.name,
12409                                                 instance.hypervisor)
12410       remote_info.Raise("Error checking node %s" % instance.primary_node)
12411       if not remote_info.payload: # not running already
12412         raise errors.OpPrereqError("Instance %s is not running" % instance.name,
12413                                    errors.ECODE_STATE)
12414
12415       current_memory = remote_info.payload["memory"]
12416       if (not self.op.force and
12417            (self.op.runtime_mem > self.be_proposed[constants.BE_MAXMEM] or
12418             self.op.runtime_mem < self.be_proposed[constants.BE_MINMEM])):
12419         raise errors.OpPrereqError("Instance %s must have memory between %d"
12420                                    " and %d MB of memory unless --force is"
12421                                    " given" % (instance.name,
12422                                     self.be_proposed[constants.BE_MINMEM],
12423                                     self.be_proposed[constants.BE_MAXMEM]),
12424                                    errors.ECODE_INVAL)
12425
12426       if self.op.runtime_mem > current_memory:
12427         _CheckNodeFreeMemory(self, instance.primary_node,
12428                              "ballooning memory for instance %s" %
12429                              instance.name,
12430                              self.op.memory - current_memory,
12431                              instance.hypervisor)
12432
12433     if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
12434       raise errors.OpPrereqError("Disk operations not supported for"
12435                                  " diskless instances",
12436                                  errors.ECODE_INVAL)
12437
12438     def _PrepareNicCreate(_, params, private):
12439       return self._PrepareNicModification(params, private, None, {},
12440                                           cluster, pnode)
12441
12442     def _PrepareNicMod(_, nic, params, private):
12443       return self._PrepareNicModification(params, private, nic.ip,
12444                                           nic.nicparams, cluster, pnode)
12445
12446     # Verify NIC changes (operating on copy)
12447     nics = instance.nics[:]
12448     ApplyContainerMods("NIC", nics, None, self.nicmod,
12449                        _PrepareNicCreate, _PrepareNicMod, None)
12450     if len(nics) > constants.MAX_NICS:
12451       raise errors.OpPrereqError("Instance has too many network interfaces"
12452                                  " (%d), cannot add more" % constants.MAX_NICS,
12453                                  errors.ECODE_STATE)
12454
12455     # Verify disk changes (operating on a copy)
12456     disks = instance.disks[:]
12457     ApplyContainerMods("disk", disks, None, self.diskmod, None, None, None)
12458     if len(disks) > constants.MAX_DISKS:
12459       raise errors.OpPrereqError("Instance has too many disks (%d), cannot add"
12460                                  " more" % constants.MAX_DISKS,
12461                                  errors.ECODE_STATE)
12462
12463     if self.op.offline is not None:
12464       if self.op.offline:
12465         msg = "can't change to offline"
12466       else:
12467         msg = "can't change to online"
12468       _CheckInstanceState(self, instance, CAN_CHANGE_INSTANCE_OFFLINE, msg=msg)
12469
12470     # Pre-compute NIC changes (necessary to use result in hooks)
12471     self._nic_chgdesc = []
12472     if self.nicmod:
12473       # Operate on copies as this is still in prereq
12474       nics = [nic.Copy() for nic in instance.nics]
12475       ApplyContainerMods("NIC", nics, self._nic_chgdesc, self.nicmod,
12476                          self._CreateNewNic, self._ApplyNicMods, None)
12477       self._new_nics = nics
12478     else:
12479       self._new_nics = None
12480
12481   def _ConvertPlainToDrbd(self, feedback_fn):
12482     """Converts an instance from plain to drbd.
12483
12484     """
12485     feedback_fn("Converting template to drbd")
12486     instance = self.instance
12487     pnode = instance.primary_node
12488     snode = self.op.remote_node
12489
12490     assert instance.disk_template == constants.DT_PLAIN
12491
12492     # create a fake disk info for _GenerateDiskTemplate
12493     disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
12494                   constants.IDISK_VG: d.logical_id[0]}
12495                  for d in instance.disks]
12496     new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
12497                                       instance.name, pnode, [snode],
12498                                       disk_info, None, None, 0, feedback_fn,
12499                                       self.diskparams)
12500     info = _GetInstanceInfoText(instance)
12501     feedback_fn("Creating aditional volumes...")
12502     # first, create the missing data and meta devices
12503     for disk in new_disks:
12504       # unfortunately this is... not too nice
12505       _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
12506                             info, True)
12507       for child in disk.children:
12508         _CreateSingleBlockDev(self, snode, instance, child, info, True)
12509     # at this stage, all new LVs have been created, we can rename the
12510     # old ones
12511     feedback_fn("Renaming original volumes...")
12512     rename_list = [(o, n.children[0].logical_id)
12513                    for (o, n) in zip(instance.disks, new_disks)]
12514     result = self.rpc.call_blockdev_rename(pnode, rename_list)
12515     result.Raise("Failed to rename original LVs")
12516
12517     feedback_fn("Initializing DRBD devices...")
12518     # all child devices are in place, we can now create the DRBD devices
12519     for disk in new_disks:
12520       for node in [pnode, snode]:
12521         f_create = node == pnode
12522         _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
12523
12524     # at this point, the instance has been modified
12525     instance.disk_template = constants.DT_DRBD8
12526     instance.disks = new_disks
12527     self.cfg.Update(instance, feedback_fn)
12528
12529     # Release node locks while waiting for sync
12530     _ReleaseLocks(self, locking.LEVEL_NODE)
12531
12532     # disks are created, waiting for sync
12533     disk_abort = not _WaitForSync(self, instance,
12534                                   oneshot=not self.op.wait_for_sync)
12535     if disk_abort:
12536       raise errors.OpExecError("There are some degraded disks for"
12537                                " this instance, please cleanup manually")
12538
12539     # Node resource locks will be released by caller
12540
12541   def _ConvertDrbdToPlain(self, feedback_fn):
12542     """Converts an instance from drbd to plain.
12543
12544     """
12545     instance = self.instance
12546
12547     assert len(instance.secondary_nodes) == 1
12548     assert instance.disk_template == constants.DT_DRBD8
12549
12550     pnode = instance.primary_node
12551     snode = instance.secondary_nodes[0]
12552     feedback_fn("Converting template to plain")
12553
12554     old_disks = instance.disks
12555     new_disks = [d.children[0] for d in old_disks]
12556
12557     # copy over size and mode
12558     for parent, child in zip(old_disks, new_disks):
12559       child.size = parent.size
12560       child.mode = parent.mode
12561
12562     # update instance structure
12563     instance.disks = new_disks
12564     instance.disk_template = constants.DT_PLAIN
12565     self.cfg.Update(instance, feedback_fn)
12566
12567     # Release locks in case removing disks takes a while
12568     _ReleaseLocks(self, locking.LEVEL_NODE)
12569
12570     feedback_fn("Removing volumes on the secondary node...")
12571     for disk in old_disks:
12572       self.cfg.SetDiskID(disk, snode)
12573       msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
12574       if msg:
12575         self.LogWarning("Could not remove block device %s on node %s,"
12576                         " continuing anyway: %s", disk.iv_name, snode, msg)
12577
12578     feedback_fn("Removing unneeded volumes on the primary node...")
12579     for idx, disk in enumerate(old_disks):
12580       meta = disk.children[1]
12581       self.cfg.SetDiskID(meta, pnode)
12582       msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
12583       if msg:
12584         self.LogWarning("Could not remove metadata for disk %d on node %s,"
12585                         " continuing anyway: %s", idx, pnode, msg)
12586
12587     # this is a DRBD disk, return its port to the pool
12588     for disk in old_disks:
12589       tcp_port = disk.logical_id[2]
12590       self.cfg.AddTcpUdpPort(tcp_port)
12591
12592     # Node resource locks will be released by caller
12593
12594   def _CreateNewDisk(self, idx, params, _):
12595     """Creates a new disk.
12596
12597     """
12598     instance = self.instance
12599
12600     # add a new disk
12601     if instance.disk_template in constants.DTS_FILEBASED:
12602       (file_driver, file_path) = instance.disks[0].logical_id
12603       file_path = os.path.dirname(file_path)
12604     else:
12605       file_driver = file_path = None
12606
12607     disk = \
12608       _GenerateDiskTemplate(self, instance.disk_template, instance.name,
12609                             instance.primary_node, instance.secondary_nodes,
12610                             [params], file_path, file_driver, idx,
12611                             self.Log, self.diskparams)[0]
12612
12613     info = _GetInstanceInfoText(instance)
12614
12615     logging.info("Creating volume %s for instance %s",
12616                  disk.iv_name, instance.name)
12617     # Note: this needs to be kept in sync with _CreateDisks
12618     #HARDCODE
12619     for node in instance.all_nodes:
12620       f_create = (node == instance.primary_node)
12621       try:
12622         _CreateBlockDev(self, node, instance, disk, f_create, info, f_create)
12623       except errors.OpExecError, err:
12624         self.LogWarning("Failed to create volume %s (%s) on node '%s': %s",
12625                         disk.iv_name, disk, node, err)
12626
12627     return (disk, [
12628       ("disk/%d" % idx, "add:size=%s,mode=%s" % (disk.size, disk.mode)),
12629       ])
12630
12631   @staticmethod
12632   def _ModifyDisk(idx, disk, params, _):
12633     """Modifies a disk.
12634
12635     """
12636     disk.mode = params[constants.IDISK_MODE]
12637
12638     return [
12639       ("disk.mode/%d" % idx, disk.mode),
12640       ]
12641
12642   def _RemoveDisk(self, idx, root, _):
12643     """Removes a disk.
12644
12645     """
12646     for node, disk in root.ComputeNodeTree(self.instance.primary_node):
12647       self.cfg.SetDiskID(disk, node)
12648       msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
12649       if msg:
12650         self.LogWarning("Could not remove disk/%d on node '%s': %s,"
12651                         " continuing anyway", idx, node, msg)
12652
12653     # if this is a DRBD disk, return its port to the pool
12654     if root.dev_type in constants.LDS_DRBD:
12655       self.cfg.AddTcpUdpPort(root.logical_id[2])
12656
12657   @staticmethod
12658   def _CreateNewNic(idx, params, private):
12659     """Creates data structure for a new network interface.
12660
12661     """
12662     mac = params[constants.INIC_MAC]
12663     ip = params.get(constants.INIC_IP, None)
12664     nicparams = private.params
12665
12666     return (objects.NIC(mac=mac, ip=ip, nicparams=nicparams), [
12667       ("nic.%d" % idx,
12668        "add:mac=%s,ip=%s,mode=%s,link=%s" %
12669        (mac, ip, private.filled[constants.NIC_MODE],
12670        private.filled[constants.NIC_LINK])),
12671       ])
12672
12673   @staticmethod
12674   def _ApplyNicMods(idx, nic, params, private):
12675     """Modifies a network interface.
12676
12677     """
12678     changes = []
12679
12680     for key in [constants.INIC_MAC, constants.INIC_IP]:
12681       if key in params:
12682         changes.append(("nic.%s/%d" % (key, idx), params[key]))
12683         setattr(nic, key, params[key])
12684
12685     if private.params:
12686       nic.nicparams = private.params
12687
12688       for (key, val) in params.items():
12689         changes.append(("nic.%s/%d" % (key, idx), val))
12690
12691     return changes
12692
12693   def Exec(self, feedback_fn):
12694     """Modifies an instance.
12695
12696     All parameters take effect only at the next restart of the instance.
12697
12698     """
12699     # Process here the warnings from CheckPrereq, as we don't have a
12700     # feedback_fn there.
12701     # TODO: Replace with self.LogWarning
12702     for warn in self.warn:
12703       feedback_fn("WARNING: %s" % warn)
12704
12705     assert ((self.op.disk_template is None) ^
12706             bool(self.owned_locks(locking.LEVEL_NODE_RES))), \
12707       "Not owning any node resource locks"
12708
12709     result = []
12710     instance = self.instance
12711
12712     # runtime memory
12713     if self.op.runtime_mem:
12714       rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node,
12715                                                      instance,
12716                                                      self.op.runtime_mem)
12717       rpcres.Raise("Cannot modify instance runtime memory")
12718       result.append(("runtime_memory", self.op.runtime_mem))
12719
12720     # Apply disk changes
12721     ApplyContainerMods("disk", instance.disks, result, self.diskmod,
12722                        self._CreateNewDisk, self._ModifyDisk, self._RemoveDisk)
12723     _UpdateIvNames(0, instance.disks)
12724
12725     if self.op.disk_template:
12726       if __debug__:
12727         check_nodes = set(instance.all_nodes)
12728         if self.op.remote_node:
12729           check_nodes.add(self.op.remote_node)
12730         for level in [locking.LEVEL_NODE, locking.LEVEL_NODE_RES]:
12731           owned = self.owned_locks(level)
12732           assert not (check_nodes - owned), \
12733             ("Not owning the correct locks, owning %r, expected at least %r" %
12734              (owned, check_nodes))
12735
12736       r_shut = _ShutdownInstanceDisks(self, instance)
12737       if not r_shut:
12738         raise errors.OpExecError("Cannot shutdown instance disks, unable to"
12739                                  " proceed with disk template conversion")
12740       mode = (instance.disk_template, self.op.disk_template)
12741       try:
12742         self._DISK_CONVERSIONS[mode](self, feedback_fn)
12743       except:
12744         self.cfg.ReleaseDRBDMinors(instance.name)
12745         raise
12746       result.append(("disk_template", self.op.disk_template))
12747
12748       assert instance.disk_template == self.op.disk_template, \
12749         ("Expected disk template '%s', found '%s'" %
12750          (self.op.disk_template, instance.disk_template))
12751
12752     # Release node and resource locks if there are any (they might already have
12753     # been released during disk conversion)
12754     _ReleaseLocks(self, locking.LEVEL_NODE)
12755     _ReleaseLocks(self, locking.LEVEL_NODE_RES)
12756
12757     # Apply NIC changes
12758     if self._new_nics is not None:
12759       instance.nics = self._new_nics
12760       result.extend(self._nic_chgdesc)
12761
12762     # hvparams changes
12763     if self.op.hvparams:
12764       instance.hvparams = self.hv_inst
12765       for key, val in self.op.hvparams.iteritems():
12766         result.append(("hv/%s" % key, val))
12767
12768     # beparams changes
12769     if self.op.beparams:
12770       instance.beparams = self.be_inst
12771       for key, val in self.op.beparams.iteritems():
12772         result.append(("be/%s" % key, val))
12773
12774     # OS change
12775     if self.op.os_name:
12776       instance.os = self.op.os_name
12777
12778     # osparams changes
12779     if self.op.osparams:
12780       instance.osparams = self.os_inst
12781       for key, val in self.op.osparams.iteritems():
12782         result.append(("os/%s" % key, val))
12783
12784     if self.op.offline is None:
12785       # Ignore
12786       pass
12787     elif self.op.offline:
12788       # Mark instance as offline
12789       self.cfg.MarkInstanceOffline(instance.name)
12790       result.append(("admin_state", constants.ADMINST_OFFLINE))
12791     else:
12792       # Mark instance as online, but stopped
12793       self.cfg.MarkInstanceDown(instance.name)
12794       result.append(("admin_state", constants.ADMINST_DOWN))
12795
12796     self.cfg.Update(instance, feedback_fn)
12797
12798     assert not (self.owned_locks(locking.LEVEL_NODE_RES) or
12799                 self.owned_locks(locking.LEVEL_NODE)), \
12800       "All node locks should have been released by now"
12801
12802     return result
12803
12804   _DISK_CONVERSIONS = {
12805     (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
12806     (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
12807     }
12808
12809
12810 class LUInstanceChangeGroup(LogicalUnit):
12811   HPATH = "instance-change-group"
12812   HTYPE = constants.HTYPE_INSTANCE
12813   REQ_BGL = False
12814
12815   def ExpandNames(self):
12816     self.share_locks = _ShareAll()
12817     self.needed_locks = {
12818       locking.LEVEL_NODEGROUP: [],
12819       locking.LEVEL_NODE: [],
12820       }
12821
12822     self._ExpandAndLockInstance()
12823
12824     if self.op.target_groups:
12825       self.req_target_uuids = map(self.cfg.LookupNodeGroup,
12826                                   self.op.target_groups)
12827     else:
12828       self.req_target_uuids = None
12829
12830     self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
12831
12832   def DeclareLocks(self, level):
12833     if level == locking.LEVEL_NODEGROUP:
12834       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
12835
12836       if self.req_target_uuids:
12837         lock_groups = set(self.req_target_uuids)
12838
12839         # Lock all groups used by instance optimistically; this requires going
12840         # via the node before it's locked, requiring verification later on
12841         instance_groups = self.cfg.GetInstanceNodeGroups(self.op.instance_name)
12842         lock_groups.update(instance_groups)
12843       else:
12844         # No target groups, need to lock all of them
12845         lock_groups = locking.ALL_SET
12846
12847       self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
12848
12849     elif level == locking.LEVEL_NODE:
12850       if self.req_target_uuids:
12851         # Lock all nodes used by instances
12852         self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
12853         self._LockInstancesNodes()
12854
12855         # Lock all nodes in all potential target groups
12856         lock_groups = (frozenset(self.owned_locks(locking.LEVEL_NODEGROUP)) -
12857                        self.cfg.GetInstanceNodeGroups(self.op.instance_name))
12858         member_nodes = [node_name
12859                         for group in lock_groups
12860                         for node_name in self.cfg.GetNodeGroup(group).members]
12861         self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
12862       else:
12863         # Lock all nodes as all groups are potential targets
12864         self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
12865
12866   def CheckPrereq(self):
12867     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
12868     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
12869     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
12870
12871     assert (self.req_target_uuids is None or
12872             owned_groups.issuperset(self.req_target_uuids))
12873     assert owned_instances == set([self.op.instance_name])
12874
12875     # Get instance information
12876     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
12877
12878     # Check if node groups for locked instance are still correct
12879     assert owned_nodes.issuperset(self.instance.all_nodes), \
12880       ("Instance %s's nodes changed while we kept the lock" %
12881        self.op.instance_name)
12882
12883     inst_groups = _CheckInstanceNodeGroups(self.cfg, self.op.instance_name,
12884                                            owned_groups)
12885
12886     if self.req_target_uuids:
12887       # User requested specific target groups
12888       self.target_uuids = self.req_target_uuids
12889     else:
12890       # All groups except those used by the instance are potential targets
12891       self.target_uuids = owned_groups - inst_groups
12892
12893     conflicting_groups = self.target_uuids & inst_groups
12894     if conflicting_groups:
12895       raise errors.OpPrereqError("Can't use group(s) '%s' as targets, they are"
12896                                  " used by the instance '%s'" %
12897                                  (utils.CommaJoin(conflicting_groups),
12898                                   self.op.instance_name),
12899                                  errors.ECODE_INVAL)
12900
12901     if not self.target_uuids:
12902       raise errors.OpPrereqError("There are no possible target groups",
12903                                  errors.ECODE_INVAL)
12904
12905   def BuildHooksEnv(self):
12906     """Build hooks env.
12907
12908     """
12909     assert self.target_uuids
12910
12911     env = {
12912       "TARGET_GROUPS": " ".join(self.target_uuids),
12913       }
12914
12915     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
12916
12917     return env
12918
12919   def BuildHooksNodes(self):
12920     """Build hooks nodes.
12921
12922     """
12923     mn = self.cfg.GetMasterNode()
12924     return ([mn], [mn])
12925
12926   def Exec(self, feedback_fn):
12927     instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
12928
12929     assert instances == [self.op.instance_name], "Instance not locked"
12930
12931     ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
12932                      instances=instances, target_groups=list(self.target_uuids))
12933
12934     ial.Run(self.op.iallocator)
12935
12936     if not ial.success:
12937       raise errors.OpPrereqError("Can't compute solution for changing group of"
12938                                  " instance '%s' using iallocator '%s': %s" %
12939                                  (self.op.instance_name, self.op.iallocator,
12940                                   ial.info),
12941                                  errors.ECODE_NORES)
12942
12943     jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
12944
12945     self.LogInfo("Iallocator returned %s job(s) for changing group of"
12946                  " instance '%s'", len(jobs), self.op.instance_name)
12947
12948     return ResultWithJobs(jobs)
12949
12950
12951 class LUBackupQuery(NoHooksLU):
12952   """Query the exports list
12953
12954   """
12955   REQ_BGL = False
12956
12957   def ExpandNames(self):
12958     self.needed_locks = {}
12959     self.share_locks[locking.LEVEL_NODE] = 1
12960     if not self.op.nodes:
12961       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
12962     else:
12963       self.needed_locks[locking.LEVEL_NODE] = \
12964         _GetWantedNodes(self, self.op.nodes)
12965
12966   def Exec(self, feedback_fn):
12967     """Compute the list of all the exported system images.
12968
12969     @rtype: dict
12970     @return: a dictionary with the structure node->(export-list)
12971         where export-list is a list of the instances exported on
12972         that node.
12973
12974     """
12975     self.nodes = self.owned_locks(locking.LEVEL_NODE)
12976     rpcresult = self.rpc.call_export_list(self.nodes)
12977     result = {}
12978     for node in rpcresult:
12979       if rpcresult[node].fail_msg:
12980         result[node] = False
12981       else:
12982         result[node] = rpcresult[node].payload
12983
12984     return result
12985
12986
12987 class LUBackupPrepare(NoHooksLU):
12988   """Prepares an instance for an export and returns useful information.
12989
12990   """
12991   REQ_BGL = False
12992
12993   def ExpandNames(self):
12994     self._ExpandAndLockInstance()
12995
12996   def CheckPrereq(self):
12997     """Check prerequisites.
12998
12999     """
13000     instance_name = self.op.instance_name
13001
13002     self.instance = self.cfg.GetInstanceInfo(instance_name)
13003     assert self.instance is not None, \
13004           "Cannot retrieve locked instance %s" % self.op.instance_name
13005     _CheckNodeOnline(self, self.instance.primary_node)
13006
13007     self._cds = _GetClusterDomainSecret()
13008
13009   def Exec(self, feedback_fn):
13010     """Prepares an instance for an export.
13011
13012     """
13013     instance = self.instance
13014
13015     if self.op.mode == constants.EXPORT_MODE_REMOTE:
13016       salt = utils.GenerateSecret(8)
13017
13018       feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
13019       result = self.rpc.call_x509_cert_create(instance.primary_node,
13020                                               constants.RIE_CERT_VALIDITY)
13021       result.Raise("Can't create X509 key and certificate on %s" % result.node)
13022
13023       (name, cert_pem) = result.payload
13024
13025       cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
13026                                              cert_pem)
13027
13028       return {
13029         "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
13030         "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
13031                           salt),
13032         "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
13033         }
13034
13035     return None
13036
13037
13038 class LUBackupExport(LogicalUnit):
13039   """Export an instance to an image in the cluster.
13040
13041   """
13042   HPATH = "instance-export"
13043   HTYPE = constants.HTYPE_INSTANCE
13044   REQ_BGL = False
13045
13046   def CheckArguments(self):
13047     """Check the arguments.
13048
13049     """
13050     self.x509_key_name = self.op.x509_key_name
13051     self.dest_x509_ca_pem = self.op.destination_x509_ca
13052
13053     if self.op.mode == constants.EXPORT_MODE_REMOTE:
13054       if not self.x509_key_name:
13055         raise errors.OpPrereqError("Missing X509 key name for encryption",
13056                                    errors.ECODE_INVAL)
13057
13058       if not self.dest_x509_ca_pem:
13059         raise errors.OpPrereqError("Missing destination X509 CA",
13060                                    errors.ECODE_INVAL)
13061
13062   def ExpandNames(self):
13063     self._ExpandAndLockInstance()
13064
13065     # Lock all nodes for local exports
13066     if self.op.mode == constants.EXPORT_MODE_LOCAL:
13067       # FIXME: lock only instance primary and destination node
13068       #
13069       # Sad but true, for now we have do lock all nodes, as we don't know where
13070       # the previous export might be, and in this LU we search for it and
13071       # remove it from its current node. In the future we could fix this by:
13072       #  - making a tasklet to search (share-lock all), then create the
13073       #    new one, then one to remove, after
13074       #  - removing the removal operation altogether
13075       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
13076
13077   def DeclareLocks(self, level):
13078     """Last minute lock declaration."""
13079     # All nodes are locked anyway, so nothing to do here.
13080
13081   def BuildHooksEnv(self):
13082     """Build hooks env.
13083
13084     This will run on the master, primary node and target node.
13085
13086     """
13087     env = {
13088       "EXPORT_MODE": self.op.mode,
13089       "EXPORT_NODE": self.op.target_node,
13090       "EXPORT_DO_SHUTDOWN": self.op.shutdown,
13091       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
13092       # TODO: Generic function for boolean env variables
13093       "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
13094       }
13095
13096     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
13097
13098     return env
13099
13100   def BuildHooksNodes(self):
13101     """Build hooks nodes.
13102
13103     """
13104     nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
13105
13106     if self.op.mode == constants.EXPORT_MODE_LOCAL:
13107       nl.append(self.op.target_node)
13108
13109     return (nl, nl)
13110
13111   def CheckPrereq(self):
13112     """Check prerequisites.
13113
13114     This checks that the instance and node names are valid.
13115
13116     """
13117     instance_name = self.op.instance_name
13118
13119     self.instance = self.cfg.GetInstanceInfo(instance_name)
13120     assert self.instance is not None, \
13121           "Cannot retrieve locked instance %s" % self.op.instance_name
13122     _CheckNodeOnline(self, self.instance.primary_node)
13123
13124     if (self.op.remove_instance and
13125         self.instance.admin_state == constants.ADMINST_UP and
13126         not self.op.shutdown):
13127       raise errors.OpPrereqError("Can not remove instance without shutting it"
13128                                  " down before")
13129
13130     if self.op.mode == constants.EXPORT_MODE_LOCAL:
13131       self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
13132       self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
13133       assert self.dst_node is not None
13134
13135       _CheckNodeOnline(self, self.dst_node.name)
13136       _CheckNodeNotDrained(self, self.dst_node.name)
13137
13138       self._cds = None
13139       self.dest_disk_info = None
13140       self.dest_x509_ca = None
13141
13142     elif self.op.mode == constants.EXPORT_MODE_REMOTE:
13143       self.dst_node = None
13144
13145       if len(self.op.target_node) != len(self.instance.disks):
13146         raise errors.OpPrereqError(("Received destination information for %s"
13147                                     " disks, but instance %s has %s disks") %
13148                                    (len(self.op.target_node), instance_name,
13149                                     len(self.instance.disks)),
13150                                    errors.ECODE_INVAL)
13151
13152       cds = _GetClusterDomainSecret()
13153
13154       # Check X509 key name
13155       try:
13156         (key_name, hmac_digest, hmac_salt) = self.x509_key_name
13157       except (TypeError, ValueError), err:
13158         raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
13159
13160       if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
13161         raise errors.OpPrereqError("HMAC for X509 key name is wrong",
13162                                    errors.ECODE_INVAL)
13163
13164       # Load and verify CA
13165       try:
13166         (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
13167       except OpenSSL.crypto.Error, err:
13168         raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
13169                                    (err, ), errors.ECODE_INVAL)
13170
13171       (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
13172       if errcode is not None:
13173         raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
13174                                    (msg, ), errors.ECODE_INVAL)
13175
13176       self.dest_x509_ca = cert
13177
13178       # Verify target information
13179       disk_info = []
13180       for idx, disk_data in enumerate(self.op.target_node):
13181         try:
13182           (host, port, magic) = \
13183             masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
13184         except errors.GenericError, err:
13185           raise errors.OpPrereqError("Target info for disk %s: %s" %
13186                                      (idx, err), errors.ECODE_INVAL)
13187
13188         disk_info.append((host, port, magic))
13189
13190       assert len(disk_info) == len(self.op.target_node)
13191       self.dest_disk_info = disk_info
13192
13193     else:
13194       raise errors.ProgrammerError("Unhandled export mode %r" %
13195                                    self.op.mode)
13196
13197     # instance disk type verification
13198     # TODO: Implement export support for file-based disks
13199     for disk in self.instance.disks:
13200       if disk.dev_type == constants.LD_FILE:
13201         raise errors.OpPrereqError("Export not supported for instances with"
13202                                    " file-based disks", errors.ECODE_INVAL)
13203
13204   def _CleanupExports(self, feedback_fn):
13205     """Removes exports of current instance from all other nodes.
13206
13207     If an instance in a cluster with nodes A..D was exported to node C, its
13208     exports will be removed from the nodes A, B and D.
13209
13210     """
13211     assert self.op.mode != constants.EXPORT_MODE_REMOTE
13212
13213     nodelist = self.cfg.GetNodeList()
13214     nodelist.remove(self.dst_node.name)
13215
13216     # on one-node clusters nodelist will be empty after the removal
13217     # if we proceed the backup would be removed because OpBackupQuery
13218     # substitutes an empty list with the full cluster node list.
13219     iname = self.instance.name
13220     if nodelist:
13221       feedback_fn("Removing old exports for instance %s" % iname)
13222       exportlist = self.rpc.call_export_list(nodelist)
13223       for node in exportlist:
13224         if exportlist[node].fail_msg:
13225           continue
13226         if iname in exportlist[node].payload:
13227           msg = self.rpc.call_export_remove(node, iname).fail_msg
13228           if msg:
13229             self.LogWarning("Could not remove older export for instance %s"
13230                             " on node %s: %s", iname, node, msg)
13231
13232   def Exec(self, feedback_fn):
13233     """Export an instance to an image in the cluster.
13234
13235     """
13236     assert self.op.mode in constants.EXPORT_MODES
13237
13238     instance = self.instance
13239     src_node = instance.primary_node
13240
13241     if self.op.shutdown:
13242       # shutdown the instance, but not the disks
13243       feedback_fn("Shutting down instance %s" % instance.name)
13244       result = self.rpc.call_instance_shutdown(src_node, instance,
13245                                                self.op.shutdown_timeout)
13246       # TODO: Maybe ignore failures if ignore_remove_failures is set
13247       result.Raise("Could not shutdown instance %s on"
13248                    " node %s" % (instance.name, src_node))
13249
13250     # set the disks ID correctly since call_instance_start needs the
13251     # correct drbd minor to create the symlinks
13252     for disk in instance.disks:
13253       self.cfg.SetDiskID(disk, src_node)
13254
13255     activate_disks = (instance.admin_state != constants.ADMINST_UP)
13256
13257     if activate_disks:
13258       # Activate the instance disks if we'exporting a stopped instance
13259       feedback_fn("Activating disks for %s" % instance.name)
13260       _StartInstanceDisks(self, instance, None)
13261
13262     try:
13263       helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
13264                                                      instance)
13265
13266       helper.CreateSnapshots()
13267       try:
13268         if (self.op.shutdown and
13269             instance.admin_state == constants.ADMINST_UP and
13270             not self.op.remove_instance):
13271           assert not activate_disks
13272           feedback_fn("Starting instance %s" % instance.name)
13273           result = self.rpc.call_instance_start(src_node,
13274                                                 (instance, None, None), False)
13275           msg = result.fail_msg
13276           if msg:
13277             feedback_fn("Failed to start instance: %s" % msg)
13278             _ShutdownInstanceDisks(self, instance)
13279             raise errors.OpExecError("Could not start instance: %s" % msg)
13280
13281         if self.op.mode == constants.EXPORT_MODE_LOCAL:
13282           (fin_resu, dresults) = helper.LocalExport(self.dst_node)
13283         elif self.op.mode == constants.EXPORT_MODE_REMOTE:
13284           connect_timeout = constants.RIE_CONNECT_TIMEOUT
13285           timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
13286
13287           (key_name, _, _) = self.x509_key_name
13288
13289           dest_ca_pem = \
13290             OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
13291                                             self.dest_x509_ca)
13292
13293           (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
13294                                                      key_name, dest_ca_pem,
13295                                                      timeouts)
13296       finally:
13297         helper.Cleanup()
13298
13299       # Check for backwards compatibility
13300       assert len(dresults) == len(instance.disks)
13301       assert compat.all(isinstance(i, bool) for i in dresults), \
13302              "Not all results are boolean: %r" % dresults
13303
13304     finally:
13305       if activate_disks:
13306         feedback_fn("Deactivating disks for %s" % instance.name)
13307         _ShutdownInstanceDisks(self, instance)
13308
13309     if not (compat.all(dresults) and fin_resu):
13310       failures = []
13311       if not fin_resu:
13312         failures.append("export finalization")
13313       if not compat.all(dresults):
13314         fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
13315                                if not dsk)
13316         failures.append("disk export: disk(s) %s" % fdsk)
13317
13318       raise errors.OpExecError("Export failed, errors in %s" %
13319                                utils.CommaJoin(failures))
13320
13321     # At this point, the export was successful, we can cleanup/finish
13322
13323     # Remove instance if requested
13324     if self.op.remove_instance:
13325       feedback_fn("Removing instance %s" % instance.name)
13326       _RemoveInstance(self, feedback_fn, instance,
13327                       self.op.ignore_remove_failures)
13328
13329     if self.op.mode == constants.EXPORT_MODE_LOCAL:
13330       self._CleanupExports(feedback_fn)
13331
13332     return fin_resu, dresults
13333
13334
13335 class LUBackupRemove(NoHooksLU):
13336   """Remove exports related to the named instance.
13337
13338   """
13339   REQ_BGL = False
13340
13341   def ExpandNames(self):
13342     self.needed_locks = {}
13343     # We need all nodes to be locked in order for RemoveExport to work, but we
13344     # don't need to lock the instance itself, as nothing will happen to it (and
13345     # we can remove exports also for a removed instance)
13346     self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
13347
13348   def Exec(self, feedback_fn):
13349     """Remove any export.
13350
13351     """
13352     instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
13353     # If the instance was not found we'll try with the name that was passed in.
13354     # This will only work if it was an FQDN, though.
13355     fqdn_warn = False
13356     if not instance_name:
13357       fqdn_warn = True
13358       instance_name = self.op.instance_name
13359
13360     locked_nodes = self.owned_locks(locking.LEVEL_NODE)
13361     exportlist = self.rpc.call_export_list(locked_nodes)
13362     found = False
13363     for node in exportlist:
13364       msg = exportlist[node].fail_msg
13365       if msg:
13366         self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
13367         continue
13368       if instance_name in exportlist[node].payload:
13369         found = True
13370         result = self.rpc.call_export_remove(node, instance_name)
13371         msg = result.fail_msg
13372         if msg:
13373           logging.error("Could not remove export for instance %s"
13374                         " on node %s: %s", instance_name, node, msg)
13375
13376     if fqdn_warn and not found:
13377       feedback_fn("Export not found. If trying to remove an export belonging"
13378                   " to a deleted instance please use its Fully Qualified"
13379                   " Domain Name.")
13380
13381
13382 class LUGroupAdd(LogicalUnit):
13383   """Logical unit for creating node groups.
13384
13385   """
13386   HPATH = "group-add"
13387   HTYPE = constants.HTYPE_GROUP
13388   REQ_BGL = False
13389
13390   def ExpandNames(self):
13391     # We need the new group's UUID here so that we can create and acquire the
13392     # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
13393     # that it should not check whether the UUID exists in the configuration.
13394     self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
13395     self.needed_locks = {}
13396     self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
13397
13398   def CheckPrereq(self):
13399     """Check prerequisites.
13400
13401     This checks that the given group name is not an existing node group
13402     already.
13403
13404     """
13405     try:
13406       existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13407     except errors.OpPrereqError:
13408       pass
13409     else:
13410       raise errors.OpPrereqError("Desired group name '%s' already exists as a"
13411                                  " node group (UUID: %s)" %
13412                                  (self.op.group_name, existing_uuid),
13413                                  errors.ECODE_EXISTS)
13414
13415     if self.op.ndparams:
13416       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
13417
13418     if self.op.hv_state:
13419       self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
13420     else:
13421       self.new_hv_state = None
13422
13423     if self.op.disk_state:
13424       self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
13425     else:
13426       self.new_disk_state = None
13427
13428     if self.op.diskparams:
13429       for templ in constants.DISK_TEMPLATES:
13430         if templ not in self.op.diskparams:
13431           self.op.diskparams[templ] = {}
13432         utils.ForceDictType(self.op.diskparams[templ], constants.DISK_DT_TYPES)
13433     else:
13434       self.op.diskparams = self.cfg.GetClusterInfo().diskparams
13435
13436     if self.op.ipolicy:
13437       cluster = self.cfg.GetClusterInfo()
13438       full_ipolicy = cluster.SimpleFillIPolicy(self.op.ipolicy)
13439       try:
13440         objects.InstancePolicy.CheckParameterSyntax(full_ipolicy)
13441       except errors.ConfigurationError, err:
13442         raise errors.OpPrereqError("Invalid instance policy: %s" % err,
13443                                    errors.ECODE_INVAL)
13444
13445   def BuildHooksEnv(self):
13446     """Build hooks env.
13447
13448     """
13449     return {
13450       "GROUP_NAME": self.op.group_name,
13451       }
13452
13453   def BuildHooksNodes(self):
13454     """Build hooks nodes.
13455
13456     """
13457     mn = self.cfg.GetMasterNode()
13458     return ([mn], [mn])
13459
13460   def Exec(self, feedback_fn):
13461     """Add the node group to the cluster.
13462
13463     """
13464     group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
13465                                   uuid=self.group_uuid,
13466                                   alloc_policy=self.op.alloc_policy,
13467                                   ndparams=self.op.ndparams,
13468                                   diskparams=self.op.diskparams,
13469                                   ipolicy=self.op.ipolicy,
13470                                   hv_state_static=self.new_hv_state,
13471                                   disk_state_static=self.new_disk_state)
13472
13473     self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
13474     del self.remove_locks[locking.LEVEL_NODEGROUP]
13475
13476
13477 class LUGroupAssignNodes(NoHooksLU):
13478   """Logical unit for assigning nodes to groups.
13479
13480   """
13481   REQ_BGL = False
13482
13483   def ExpandNames(self):
13484     # These raise errors.OpPrereqError on their own:
13485     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13486     self.op.nodes = _GetWantedNodes(self, self.op.nodes)
13487
13488     # We want to lock all the affected nodes and groups. We have readily
13489     # available the list of nodes, and the *destination* group. To gather the
13490     # list of "source" groups, we need to fetch node information later on.
13491     self.needed_locks = {
13492       locking.LEVEL_NODEGROUP: set([self.group_uuid]),
13493       locking.LEVEL_NODE: self.op.nodes,
13494       }
13495
13496   def DeclareLocks(self, level):
13497     if level == locking.LEVEL_NODEGROUP:
13498       assert len(self.needed_locks[locking.LEVEL_NODEGROUP]) == 1
13499
13500       # Try to get all affected nodes' groups without having the group or node
13501       # lock yet. Needs verification later in the code flow.
13502       groups = self.cfg.GetNodeGroupsFromNodes(self.op.nodes)
13503
13504       self.needed_locks[locking.LEVEL_NODEGROUP].update(groups)
13505
13506   def CheckPrereq(self):
13507     """Check prerequisites.
13508
13509     """
13510     assert self.needed_locks[locking.LEVEL_NODEGROUP]
13511     assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
13512             frozenset(self.op.nodes))
13513
13514     expected_locks = (set([self.group_uuid]) |
13515                       self.cfg.GetNodeGroupsFromNodes(self.op.nodes))
13516     actual_locks = self.owned_locks(locking.LEVEL_NODEGROUP)
13517     if actual_locks != expected_locks:
13518       raise errors.OpExecError("Nodes changed groups since locks were acquired,"
13519                                " current groups are '%s', used to be '%s'" %
13520                                (utils.CommaJoin(expected_locks),
13521                                 utils.CommaJoin(actual_locks)))
13522
13523     self.node_data = self.cfg.GetAllNodesInfo()
13524     self.group = self.cfg.GetNodeGroup(self.group_uuid)
13525     instance_data = self.cfg.GetAllInstancesInfo()
13526
13527     if self.group is None:
13528       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
13529                                (self.op.group_name, self.group_uuid))
13530
13531     (new_splits, previous_splits) = \
13532       self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
13533                                              for node in self.op.nodes],
13534                                             self.node_data, instance_data)
13535
13536     if new_splits:
13537       fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
13538
13539       if not self.op.force:
13540         raise errors.OpExecError("The following instances get split by this"
13541                                  " change and --force was not given: %s" %
13542                                  fmt_new_splits)
13543       else:
13544         self.LogWarning("This operation will split the following instances: %s",
13545                         fmt_new_splits)
13546
13547         if previous_splits:
13548           self.LogWarning("In addition, these already-split instances continue"
13549                           " to be split across groups: %s",
13550                           utils.CommaJoin(utils.NiceSort(previous_splits)))
13551
13552   def Exec(self, feedback_fn):
13553     """Assign nodes to a new group.
13554
13555     """
13556     mods = [(node_name, self.group_uuid) for node_name in self.op.nodes]
13557
13558     self.cfg.AssignGroupNodes(mods)
13559
13560   @staticmethod
13561   def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
13562     """Check for split instances after a node assignment.
13563
13564     This method considers a series of node assignments as an atomic operation,
13565     and returns information about split instances after applying the set of
13566     changes.
13567
13568     In particular, it returns information about newly split instances, and
13569     instances that were already split, and remain so after the change.
13570
13571     Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
13572     considered.
13573
13574     @type changes: list of (node_name, new_group_uuid) pairs.
13575     @param changes: list of node assignments to consider.
13576     @param node_data: a dict with data for all nodes
13577     @param instance_data: a dict with all instances to consider
13578     @rtype: a two-tuple
13579     @return: a list of instances that were previously okay and result split as a
13580       consequence of this change, and a list of instances that were previously
13581       split and this change does not fix.
13582
13583     """
13584     changed_nodes = dict((node, group) for node, group in changes
13585                          if node_data[node].group != group)
13586
13587     all_split_instances = set()
13588     previously_split_instances = set()
13589
13590     def InstanceNodes(instance):
13591       return [instance.primary_node] + list(instance.secondary_nodes)
13592
13593     for inst in instance_data.values():
13594       if inst.disk_template not in constants.DTS_INT_MIRROR:
13595         continue
13596
13597       instance_nodes = InstanceNodes(inst)
13598
13599       if len(set(node_data[node].group for node in instance_nodes)) > 1:
13600         previously_split_instances.add(inst.name)
13601
13602       if len(set(changed_nodes.get(node, node_data[node].group)
13603                  for node in instance_nodes)) > 1:
13604         all_split_instances.add(inst.name)
13605
13606     return (list(all_split_instances - previously_split_instances),
13607             list(previously_split_instances & all_split_instances))
13608
13609
13610 class _GroupQuery(_QueryBase):
13611   FIELDS = query.GROUP_FIELDS
13612
13613   def ExpandNames(self, lu):
13614     lu.needed_locks = {}
13615
13616     self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
13617     self._cluster = lu.cfg.GetClusterInfo()
13618     name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
13619
13620     if not self.names:
13621       self.wanted = [name_to_uuid[name]
13622                      for name in utils.NiceSort(name_to_uuid.keys())]
13623     else:
13624       # Accept names to be either names or UUIDs.
13625       missing = []
13626       self.wanted = []
13627       all_uuid = frozenset(self._all_groups.keys())
13628
13629       for name in self.names:
13630         if name in all_uuid:
13631           self.wanted.append(name)
13632         elif name in name_to_uuid:
13633           self.wanted.append(name_to_uuid[name])
13634         else:
13635           missing.append(name)
13636
13637       if missing:
13638         raise errors.OpPrereqError("Some groups do not exist: %s" %
13639                                    utils.CommaJoin(missing),
13640                                    errors.ECODE_NOENT)
13641
13642   def DeclareLocks(self, lu, level):
13643     pass
13644
13645   def _GetQueryData(self, lu):
13646     """Computes the list of node groups and their attributes.
13647
13648     """
13649     do_nodes = query.GQ_NODE in self.requested_data
13650     do_instances = query.GQ_INST in self.requested_data
13651
13652     group_to_nodes = None
13653     group_to_instances = None
13654
13655     # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
13656     # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
13657     # latter GetAllInstancesInfo() is not enough, for we have to go through
13658     # instance->node. Hence, we will need to process nodes even if we only need
13659     # instance information.
13660     if do_nodes or do_instances:
13661       all_nodes = lu.cfg.GetAllNodesInfo()
13662       group_to_nodes = dict((uuid, []) for uuid in self.wanted)
13663       node_to_group = {}
13664
13665       for node in all_nodes.values():
13666         if node.group in group_to_nodes:
13667           group_to_nodes[node.group].append(node.name)
13668           node_to_group[node.name] = node.group
13669
13670       if do_instances:
13671         all_instances = lu.cfg.GetAllInstancesInfo()
13672         group_to_instances = dict((uuid, []) for uuid in self.wanted)
13673
13674         for instance in all_instances.values():
13675           node = instance.primary_node
13676           if node in node_to_group:
13677             group_to_instances[node_to_group[node]].append(instance.name)
13678
13679         if not do_nodes:
13680           # Do not pass on node information if it was not requested.
13681           group_to_nodes = None
13682
13683     return query.GroupQueryData(self._cluster,
13684                                 [self._all_groups[uuid]
13685                                  for uuid in self.wanted],
13686                                 group_to_nodes, group_to_instances)
13687
13688
13689 class LUGroupQuery(NoHooksLU):
13690   """Logical unit for querying node groups.
13691
13692   """
13693   REQ_BGL = False
13694
13695   def CheckArguments(self):
13696     self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
13697                           self.op.output_fields, False)
13698
13699   def ExpandNames(self):
13700     self.gq.ExpandNames(self)
13701
13702   def DeclareLocks(self, level):
13703     self.gq.DeclareLocks(self, level)
13704
13705   def Exec(self, feedback_fn):
13706     return self.gq.OldStyleQuery(self)
13707
13708
13709 class LUGroupSetParams(LogicalUnit):
13710   """Modifies the parameters of a node group.
13711
13712   """
13713   HPATH = "group-modify"
13714   HTYPE = constants.HTYPE_GROUP
13715   REQ_BGL = False
13716
13717   def CheckArguments(self):
13718     all_changes = [
13719       self.op.ndparams,
13720       self.op.diskparams,
13721       self.op.alloc_policy,
13722       self.op.hv_state,
13723       self.op.disk_state,
13724       self.op.ipolicy,
13725       ]
13726
13727     if all_changes.count(None) == len(all_changes):
13728       raise errors.OpPrereqError("Please pass at least one modification",
13729                                  errors.ECODE_INVAL)
13730
13731   def ExpandNames(self):
13732     # This raises errors.OpPrereqError on its own:
13733     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13734
13735     self.needed_locks = {
13736       locking.LEVEL_INSTANCE: [],
13737       locking.LEVEL_NODEGROUP: [self.group_uuid],
13738       }
13739
13740     self.share_locks[locking.LEVEL_INSTANCE] = 1
13741
13742   def DeclareLocks(self, level):
13743     if level == locking.LEVEL_INSTANCE:
13744       assert not self.needed_locks[locking.LEVEL_INSTANCE]
13745
13746       # Lock instances optimistically, needs verification once group lock has
13747       # been acquired
13748       self.needed_locks[locking.LEVEL_INSTANCE] = \
13749           self.cfg.GetNodeGroupInstances(self.group_uuid)
13750
13751   def CheckPrereq(self):
13752     """Check prerequisites.
13753
13754     """
13755     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
13756
13757     # Check if locked instances are still correct
13758     _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
13759
13760     self.group = self.cfg.GetNodeGroup(self.group_uuid)
13761     cluster = self.cfg.GetClusterInfo()
13762
13763     if self.group is None:
13764       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
13765                                (self.op.group_name, self.group_uuid))
13766
13767     if self.op.ndparams:
13768       new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
13769       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
13770       self.new_ndparams = new_ndparams
13771
13772     if self.op.diskparams:
13773       self.new_diskparams = dict()
13774       for templ in constants.DISK_TEMPLATES:
13775         if templ not in self.op.diskparams:
13776           self.op.diskparams[templ] = {}
13777         new_templ_params = _GetUpdatedParams(self.group.diskparams[templ],
13778                                              self.op.diskparams[templ])
13779         utils.ForceDictType(new_templ_params, constants.DISK_DT_TYPES)
13780         self.new_diskparams[templ] = new_templ_params
13781
13782     if self.op.hv_state:
13783       self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
13784                                                  self.group.hv_state_static)
13785
13786     if self.op.disk_state:
13787       self.new_disk_state = \
13788         _MergeAndVerifyDiskState(self.op.disk_state,
13789                                  self.group.disk_state_static)
13790
13791     if self.op.ipolicy:
13792       self.new_ipolicy = _GetUpdatedIPolicy(self.group.ipolicy,
13793                                             self.op.ipolicy,
13794                                             group_policy=True)
13795
13796       new_ipolicy = cluster.SimpleFillIPolicy(self.new_ipolicy)
13797       inst_filter = lambda inst: inst.name in owned_instances
13798       instances = self.cfg.GetInstancesInfoByFilter(inst_filter).values()
13799       violations = \
13800           _ComputeNewInstanceViolations(_CalculateGroupIPolicy(cluster,
13801                                                                self.group),
13802                                         new_ipolicy, instances)
13803
13804       if violations:
13805         self.LogWarning("After the ipolicy change the following instances"
13806                         " violate them: %s",
13807                         utils.CommaJoin(violations))
13808
13809   def BuildHooksEnv(self):
13810     """Build hooks env.
13811
13812     """
13813     return {
13814       "GROUP_NAME": self.op.group_name,
13815       "NEW_ALLOC_POLICY": self.op.alloc_policy,
13816       }
13817
13818   def BuildHooksNodes(self):
13819     """Build hooks nodes.
13820
13821     """
13822     mn = self.cfg.GetMasterNode()
13823     return ([mn], [mn])
13824
13825   def Exec(self, feedback_fn):
13826     """Modifies the node group.
13827
13828     """
13829     result = []
13830
13831     if self.op.ndparams:
13832       self.group.ndparams = self.new_ndparams
13833       result.append(("ndparams", str(self.group.ndparams)))
13834
13835     if self.op.diskparams:
13836       self.group.diskparams = self.new_diskparams
13837       result.append(("diskparams", str(self.group.diskparams)))
13838
13839     if self.op.alloc_policy:
13840       self.group.alloc_policy = self.op.alloc_policy
13841
13842     if self.op.hv_state:
13843       self.group.hv_state_static = self.new_hv_state
13844
13845     if self.op.disk_state:
13846       self.group.disk_state_static = self.new_disk_state
13847
13848     if self.op.ipolicy:
13849       self.group.ipolicy = self.new_ipolicy
13850
13851     self.cfg.Update(self.group, feedback_fn)
13852     return result
13853
13854
13855 class LUGroupRemove(LogicalUnit):
13856   HPATH = "group-remove"
13857   HTYPE = constants.HTYPE_GROUP
13858   REQ_BGL = False
13859
13860   def ExpandNames(self):
13861     # This will raises errors.OpPrereqError on its own:
13862     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13863     self.needed_locks = {
13864       locking.LEVEL_NODEGROUP: [self.group_uuid],
13865       }
13866
13867   def CheckPrereq(self):
13868     """Check prerequisites.
13869
13870     This checks that the given group name exists as a node group, that is
13871     empty (i.e., contains no nodes), and that is not the last group of the
13872     cluster.
13873
13874     """
13875     # Verify that the group is empty.
13876     group_nodes = [node.name
13877                    for node in self.cfg.GetAllNodesInfo().values()
13878                    if node.group == self.group_uuid]
13879
13880     if group_nodes:
13881       raise errors.OpPrereqError("Group '%s' not empty, has the following"
13882                                  " nodes: %s" %
13883                                  (self.op.group_name,
13884                                   utils.CommaJoin(utils.NiceSort(group_nodes))),
13885                                  errors.ECODE_STATE)
13886
13887     # Verify the cluster would not be left group-less.
13888     if len(self.cfg.GetNodeGroupList()) == 1:
13889       raise errors.OpPrereqError("Group '%s' is the only group,"
13890                                  " cannot be removed" %
13891                                  self.op.group_name,
13892                                  errors.ECODE_STATE)
13893
13894   def BuildHooksEnv(self):
13895     """Build hooks env.
13896
13897     """
13898     return {
13899       "GROUP_NAME": self.op.group_name,
13900       }
13901
13902   def BuildHooksNodes(self):
13903     """Build hooks nodes.
13904
13905     """
13906     mn = self.cfg.GetMasterNode()
13907     return ([mn], [mn])
13908
13909   def Exec(self, feedback_fn):
13910     """Remove the node group.
13911
13912     """
13913     try:
13914       self.cfg.RemoveNodeGroup(self.group_uuid)
13915     except errors.ConfigurationError:
13916       raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
13917                                (self.op.group_name, self.group_uuid))
13918
13919     self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
13920
13921
13922 class LUGroupRename(LogicalUnit):
13923   HPATH = "group-rename"
13924   HTYPE = constants.HTYPE_GROUP
13925   REQ_BGL = False
13926
13927   def ExpandNames(self):
13928     # This raises errors.OpPrereqError on its own:
13929     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13930
13931     self.needed_locks = {
13932       locking.LEVEL_NODEGROUP: [self.group_uuid],
13933       }
13934
13935   def CheckPrereq(self):
13936     """Check prerequisites.
13937
13938     Ensures requested new name is not yet used.
13939
13940     """
13941     try:
13942       new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
13943     except errors.OpPrereqError:
13944       pass
13945     else:
13946       raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
13947                                  " node group (UUID: %s)" %
13948                                  (self.op.new_name, new_name_uuid),
13949                                  errors.ECODE_EXISTS)
13950
13951   def BuildHooksEnv(self):
13952     """Build hooks env.
13953
13954     """
13955     return {
13956       "OLD_NAME": self.op.group_name,
13957       "NEW_NAME": self.op.new_name,
13958       }
13959
13960   def BuildHooksNodes(self):
13961     """Build hooks nodes.
13962
13963     """
13964     mn = self.cfg.GetMasterNode()
13965
13966     all_nodes = self.cfg.GetAllNodesInfo()
13967     all_nodes.pop(mn, None)
13968
13969     run_nodes = [mn]
13970     run_nodes.extend(node.name for node in all_nodes.values()
13971                      if node.group == self.group_uuid)
13972
13973     return (run_nodes, run_nodes)
13974
13975   def Exec(self, feedback_fn):
13976     """Rename the node group.
13977
13978     """
13979     group = self.cfg.GetNodeGroup(self.group_uuid)
13980
13981     if group is None:
13982       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
13983                                (self.op.group_name, self.group_uuid))
13984
13985     group.name = self.op.new_name
13986     self.cfg.Update(group, feedback_fn)
13987
13988     return self.op.new_name
13989
13990
13991 class LUGroupEvacuate(LogicalUnit):
13992   HPATH = "group-evacuate"
13993   HTYPE = constants.HTYPE_GROUP
13994   REQ_BGL = False
13995
13996   def ExpandNames(self):
13997     # This raises errors.OpPrereqError on its own:
13998     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13999
14000     if self.op.target_groups:
14001       self.req_target_uuids = map(self.cfg.LookupNodeGroup,
14002                                   self.op.target_groups)
14003     else:
14004       self.req_target_uuids = []
14005
14006     if self.group_uuid in self.req_target_uuids:
14007       raise errors.OpPrereqError("Group to be evacuated (%s) can not be used"
14008                                  " as a target group (targets are %s)" %
14009                                  (self.group_uuid,
14010                                   utils.CommaJoin(self.req_target_uuids)),
14011                                  errors.ECODE_INVAL)
14012
14013     self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
14014
14015     self.share_locks = _ShareAll()
14016     self.needed_locks = {
14017       locking.LEVEL_INSTANCE: [],
14018       locking.LEVEL_NODEGROUP: [],
14019       locking.LEVEL_NODE: [],
14020       }
14021
14022   def DeclareLocks(self, level):
14023     if level == locking.LEVEL_INSTANCE:
14024       assert not self.needed_locks[locking.LEVEL_INSTANCE]
14025
14026       # Lock instances optimistically, needs verification once node and group
14027       # locks have been acquired
14028       self.needed_locks[locking.LEVEL_INSTANCE] = \
14029         self.cfg.GetNodeGroupInstances(self.group_uuid)
14030
14031     elif level == locking.LEVEL_NODEGROUP:
14032       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
14033
14034       if self.req_target_uuids:
14035         lock_groups = set([self.group_uuid] + self.req_target_uuids)
14036
14037         # Lock all groups used by instances optimistically; this requires going
14038         # via the node before it's locked, requiring verification later on
14039         lock_groups.update(group_uuid
14040                            for instance_name in
14041                              self.owned_locks(locking.LEVEL_INSTANCE)
14042                            for group_uuid in
14043                              self.cfg.GetInstanceNodeGroups(instance_name))
14044       else:
14045         # No target groups, need to lock all of them
14046         lock_groups = locking.ALL_SET
14047
14048       self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
14049
14050     elif level == locking.LEVEL_NODE:
14051       # This will only lock the nodes in the group to be evacuated which
14052       # contain actual instances
14053       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
14054       self._LockInstancesNodes()
14055
14056       # Lock all nodes in group to be evacuated and target groups
14057       owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
14058       assert self.group_uuid in owned_groups
14059       member_nodes = [node_name
14060                       for group in owned_groups
14061                       for node_name in self.cfg.GetNodeGroup(group).members]
14062       self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
14063
14064   def CheckPrereq(self):
14065     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
14066     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
14067     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
14068
14069     assert owned_groups.issuperset(self.req_target_uuids)
14070     assert self.group_uuid in owned_groups
14071
14072     # Check if locked instances are still correct
14073     _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
14074
14075     # Get instance information
14076     self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
14077
14078     # Check if node groups for locked instances are still correct
14079     for instance_name in owned_instances:
14080       inst = self.instances[instance_name]
14081       assert owned_nodes.issuperset(inst.all_nodes), \
14082         "Instance %s's nodes changed while we kept the lock" % instance_name
14083
14084       inst_groups = _CheckInstanceNodeGroups(self.cfg, instance_name,
14085                                              owned_groups)
14086
14087       assert self.group_uuid in inst_groups, \
14088         "Instance %s has no node in group %s" % (instance_name, self.group_uuid)
14089
14090     if self.req_target_uuids:
14091       # User requested specific target groups
14092       self.target_uuids = self.req_target_uuids
14093     else:
14094       # All groups except the one to be evacuated are potential targets
14095       self.target_uuids = [group_uuid for group_uuid in owned_groups
14096                            if group_uuid != self.group_uuid]
14097
14098       if not self.target_uuids:
14099         raise errors.OpPrereqError("There are no possible target groups",
14100                                    errors.ECODE_INVAL)
14101
14102   def BuildHooksEnv(self):
14103     """Build hooks env.
14104
14105     """
14106     return {
14107       "GROUP_NAME": self.op.group_name,
14108       "TARGET_GROUPS": " ".join(self.target_uuids),
14109       }
14110
14111   def BuildHooksNodes(self):
14112     """Build hooks nodes.
14113
14114     """
14115     mn = self.cfg.GetMasterNode()
14116
14117     assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
14118
14119     run_nodes = [mn] + self.cfg.GetNodeGroup(self.group_uuid).members
14120
14121     return (run_nodes, run_nodes)
14122
14123   def Exec(self, feedback_fn):
14124     instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
14125
14126     assert self.group_uuid not in self.target_uuids
14127
14128     ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
14129                      instances=instances, target_groups=self.target_uuids)
14130
14131     ial.Run(self.op.iallocator)
14132
14133     if not ial.success:
14134       raise errors.OpPrereqError("Can't compute group evacuation using"
14135                                  " iallocator '%s': %s" %
14136                                  (self.op.iallocator, ial.info),
14137                                  errors.ECODE_NORES)
14138
14139     jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
14140
14141     self.LogInfo("Iallocator returned %s job(s) for evacuating node group %s",
14142                  len(jobs), self.op.group_name)
14143
14144     return ResultWithJobs(jobs)
14145
14146
14147 class TagsLU(NoHooksLU): # pylint: disable=W0223
14148   """Generic tags LU.
14149
14150   This is an abstract class which is the parent of all the other tags LUs.
14151
14152   """
14153   def ExpandNames(self):
14154     self.group_uuid = None
14155     self.needed_locks = {}
14156     if self.op.kind == constants.TAG_NODE:
14157       self.op.name = _ExpandNodeName(self.cfg, self.op.name)
14158       self.needed_locks[locking.LEVEL_NODE] = self.op.name
14159     elif self.op.kind == constants.TAG_INSTANCE:
14160       self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
14161       self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
14162     elif self.op.kind == constants.TAG_NODEGROUP:
14163       self.group_uuid = self.cfg.LookupNodeGroup(self.op.name)
14164
14165     # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
14166     # not possible to acquire the BGL based on opcode parameters)
14167
14168   def CheckPrereq(self):
14169     """Check prerequisites.
14170
14171     """
14172     if self.op.kind == constants.TAG_CLUSTER:
14173       self.target = self.cfg.GetClusterInfo()
14174     elif self.op.kind == constants.TAG_NODE:
14175       self.target = self.cfg.GetNodeInfo(self.op.name)
14176     elif self.op.kind == constants.TAG_INSTANCE:
14177       self.target = self.cfg.GetInstanceInfo(self.op.name)
14178     elif self.op.kind == constants.TAG_NODEGROUP:
14179       self.target = self.cfg.GetNodeGroup(self.group_uuid)
14180     else:
14181       raise errors.OpPrereqError("Wrong tag type requested (%s)" %
14182                                  str(self.op.kind), errors.ECODE_INVAL)
14183
14184
14185 class LUTagsGet(TagsLU):
14186   """Returns the tags of a given object.
14187
14188   """
14189   REQ_BGL = False
14190
14191   def ExpandNames(self):
14192     TagsLU.ExpandNames(self)
14193
14194     # Share locks as this is only a read operation
14195     self.share_locks = _ShareAll()
14196
14197   def Exec(self, feedback_fn):
14198     """Returns the tag list.
14199
14200     """
14201     return list(self.target.GetTags())
14202
14203
14204 class LUTagsSearch(NoHooksLU):
14205   """Searches the tags for a given pattern.
14206
14207   """
14208   REQ_BGL = False
14209
14210   def ExpandNames(self):
14211     self.needed_locks = {}
14212
14213   def CheckPrereq(self):
14214     """Check prerequisites.
14215
14216     This checks the pattern passed for validity by compiling it.
14217
14218     """
14219     try:
14220       self.re = re.compile(self.op.pattern)
14221     except re.error, err:
14222       raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
14223                                  (self.op.pattern, err), errors.ECODE_INVAL)
14224
14225   def Exec(self, feedback_fn):
14226     """Returns the tag list.
14227
14228     """
14229     cfg = self.cfg
14230     tgts = [("/cluster", cfg.GetClusterInfo())]
14231     ilist = cfg.GetAllInstancesInfo().values()
14232     tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
14233     nlist = cfg.GetAllNodesInfo().values()
14234     tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
14235     tgts.extend(("/nodegroup/%s" % n.name, n)
14236                 for n in cfg.GetAllNodeGroupsInfo().values())
14237     results = []
14238     for path, target in tgts:
14239       for tag in target.GetTags():
14240         if self.re.search(tag):
14241           results.append((path, tag))
14242     return results
14243
14244
14245 class LUTagsSet(TagsLU):
14246   """Sets a tag on a given object.
14247
14248   """
14249   REQ_BGL = False
14250
14251   def CheckPrereq(self):
14252     """Check prerequisites.
14253
14254     This checks the type and length of the tag name and value.
14255
14256     """
14257     TagsLU.CheckPrereq(self)
14258     for tag in self.op.tags:
14259       objects.TaggableObject.ValidateTag(tag)
14260
14261   def Exec(self, feedback_fn):
14262     """Sets the tag.
14263
14264     """
14265     try:
14266       for tag in self.op.tags:
14267         self.target.AddTag(tag)
14268     except errors.TagError, err:
14269       raise errors.OpExecError("Error while setting tag: %s" % str(err))
14270     self.cfg.Update(self.target, feedback_fn)
14271
14272
14273 class LUTagsDel(TagsLU):
14274   """Delete a list of tags from a given object.
14275
14276   """
14277   REQ_BGL = False
14278
14279   def CheckPrereq(self):
14280     """Check prerequisites.
14281
14282     This checks that we have the given tag.
14283
14284     """
14285     TagsLU.CheckPrereq(self)
14286     for tag in self.op.tags:
14287       objects.TaggableObject.ValidateTag(tag)
14288     del_tags = frozenset(self.op.tags)
14289     cur_tags = self.target.GetTags()
14290
14291     diff_tags = del_tags - cur_tags
14292     if diff_tags:
14293       diff_names = ("'%s'" % i for i in sorted(diff_tags))
14294       raise errors.OpPrereqError("Tag(s) %s not found" %
14295                                  (utils.CommaJoin(diff_names), ),
14296                                  errors.ECODE_NOENT)
14297
14298   def Exec(self, feedback_fn):
14299     """Remove the tag from the object.
14300
14301     """
14302     for tag in self.op.tags:
14303       self.target.RemoveTag(tag)
14304     self.cfg.Update(self.target, feedback_fn)
14305
14306
14307 class LUTestDelay(NoHooksLU):
14308   """Sleep for a specified amount of time.
14309
14310   This LU sleeps on the master and/or nodes for a specified amount of
14311   time.
14312
14313   """
14314   REQ_BGL = False
14315
14316   def ExpandNames(self):
14317     """Expand names and set required locks.
14318
14319     This expands the node list, if any.
14320
14321     """
14322     self.needed_locks = {}
14323     if self.op.on_nodes:
14324       # _GetWantedNodes can be used here, but is not always appropriate to use
14325       # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
14326       # more information.
14327       self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
14328       self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
14329
14330   def _TestDelay(self):
14331     """Do the actual sleep.
14332
14333     """
14334     if self.op.on_master:
14335       if not utils.TestDelay(self.op.duration):
14336         raise errors.OpExecError("Error during master delay test")
14337     if self.op.on_nodes:
14338       result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
14339       for node, node_result in result.items():
14340         node_result.Raise("Failure during rpc call to node %s" % node)
14341
14342   def Exec(self, feedback_fn):
14343     """Execute the test delay opcode, with the wanted repetitions.
14344
14345     """
14346     if self.op.repeat == 0:
14347       self._TestDelay()
14348     else:
14349       top_value = self.op.repeat - 1
14350       for i in range(self.op.repeat):
14351         self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
14352         self._TestDelay()
14353
14354
14355 class LUTestJqueue(NoHooksLU):
14356   """Utility LU to test some aspects of the job queue.
14357
14358   """
14359   REQ_BGL = False
14360
14361   # Must be lower than default timeout for WaitForJobChange to see whether it
14362   # notices changed jobs
14363   _CLIENT_CONNECT_TIMEOUT = 20.0
14364   _CLIENT_CONFIRM_TIMEOUT = 60.0
14365
14366   @classmethod
14367   def _NotifyUsingSocket(cls, cb, errcls):
14368     """Opens a Unix socket and waits for another program to connect.
14369
14370     @type cb: callable
14371     @param cb: Callback to send socket name to client
14372     @type errcls: class
14373     @param errcls: Exception class to use for errors
14374
14375     """
14376     # Using a temporary directory as there's no easy way to create temporary
14377     # sockets without writing a custom loop around tempfile.mktemp and
14378     # socket.bind
14379     tmpdir = tempfile.mkdtemp()
14380     try:
14381       tmpsock = utils.PathJoin(tmpdir, "sock")
14382
14383       logging.debug("Creating temporary socket at %s", tmpsock)
14384       sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
14385       try:
14386         sock.bind(tmpsock)
14387         sock.listen(1)
14388
14389         # Send details to client
14390         cb(tmpsock)
14391
14392         # Wait for client to connect before continuing
14393         sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
14394         try:
14395           (conn, _) = sock.accept()
14396         except socket.error, err:
14397           raise errcls("Client didn't connect in time (%s)" % err)
14398       finally:
14399         sock.close()
14400     finally:
14401       # Remove as soon as client is connected
14402       shutil.rmtree(tmpdir)
14403
14404     # Wait for client to close
14405     try:
14406       try:
14407         # pylint: disable=E1101
14408         # Instance of '_socketobject' has no ... member
14409         conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
14410         conn.recv(1)
14411       except socket.error, err:
14412         raise errcls("Client failed to confirm notification (%s)" % err)
14413     finally:
14414       conn.close()
14415
14416   def _SendNotification(self, test, arg, sockname):
14417     """Sends a notification to the client.
14418
14419     @type test: string
14420     @param test: Test name
14421     @param arg: Test argument (depends on test)
14422     @type sockname: string
14423     @param sockname: Socket path
14424
14425     """
14426     self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
14427
14428   def _Notify(self, prereq, test, arg):
14429     """Notifies the client of a test.
14430
14431     @type prereq: bool
14432     @param prereq: Whether this is a prereq-phase test
14433     @type test: string
14434     @param test: Test name
14435     @param arg: Test argument (depends on test)
14436
14437     """
14438     if prereq:
14439       errcls = errors.OpPrereqError
14440     else:
14441       errcls = errors.OpExecError
14442
14443     return self._NotifyUsingSocket(compat.partial(self._SendNotification,
14444                                                   test, arg),
14445                                    errcls)
14446
14447   def CheckArguments(self):
14448     self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
14449     self.expandnames_calls = 0
14450
14451   def ExpandNames(self):
14452     checkargs_calls = getattr(self, "checkargs_calls", 0)
14453     if checkargs_calls < 1:
14454       raise errors.ProgrammerError("CheckArguments was not called")
14455
14456     self.expandnames_calls += 1
14457
14458     if self.op.notify_waitlock:
14459       self._Notify(True, constants.JQT_EXPANDNAMES, None)
14460
14461     self.LogInfo("Expanding names")
14462
14463     # Get lock on master node (just to get a lock, not for a particular reason)
14464     self.needed_locks = {
14465       locking.LEVEL_NODE: self.cfg.GetMasterNode(),
14466       }
14467
14468   def Exec(self, feedback_fn):
14469     if self.expandnames_calls < 1:
14470       raise errors.ProgrammerError("ExpandNames was not called")
14471
14472     if self.op.notify_exec:
14473       self._Notify(False, constants.JQT_EXEC, None)
14474
14475     self.LogInfo("Executing")
14476
14477     if self.op.log_messages:
14478       self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
14479       for idx, msg in enumerate(self.op.log_messages):
14480         self.LogInfo("Sending log message %s", idx + 1)
14481         feedback_fn(constants.JQT_MSGPREFIX + msg)
14482         # Report how many test messages have been sent
14483         self._Notify(False, constants.JQT_LOGMSG, idx + 1)
14484
14485     if self.op.fail:
14486       raise errors.OpExecError("Opcode failure was requested")
14487
14488     return True
14489
14490
14491 class IAllocator(object):
14492   """IAllocator framework.
14493
14494   An IAllocator instance has three sets of attributes:
14495     - cfg that is needed to query the cluster
14496     - input data (all members of the _KEYS class attribute are required)
14497     - four buffer attributes (in|out_data|text), that represent the
14498       input (to the external script) in text and data structure format,
14499       and the output from it, again in two formats
14500     - the result variables from the script (success, info, nodes) for
14501       easy usage
14502
14503   """
14504   # pylint: disable=R0902
14505   # lots of instance attributes
14506
14507   def __init__(self, cfg, rpc_runner, mode, **kwargs):
14508     self.cfg = cfg
14509     self.rpc = rpc_runner
14510     # init buffer variables
14511     self.in_text = self.out_text = self.in_data = self.out_data = None
14512     # init all input fields so that pylint is happy
14513     self.mode = mode
14514     self.memory = self.disks = self.disk_template = self.spindle_usage = None
14515     self.os = self.tags = self.nics = self.vcpus = None
14516     self.hypervisor = None
14517     self.relocate_from = None
14518     self.name = None
14519     self.instances = None
14520     self.evac_mode = None
14521     self.target_groups = []
14522     # computed fields
14523     self.required_nodes = None
14524     # init result fields
14525     self.success = self.info = self.result = None
14526
14527     try:
14528       (fn, keydata, self._result_check) = self._MODE_DATA[self.mode]
14529     except KeyError:
14530       raise errors.ProgrammerError("Unknown mode '%s' passed to the"
14531                                    " IAllocator" % self.mode)
14532
14533     keyset = [n for (n, _) in keydata]
14534
14535     for key in kwargs:
14536       if key not in keyset:
14537         raise errors.ProgrammerError("Invalid input parameter '%s' to"
14538                                      " IAllocator" % key)
14539       setattr(self, key, kwargs[key])
14540
14541     for key in keyset:
14542       if key not in kwargs:
14543         raise errors.ProgrammerError("Missing input parameter '%s' to"
14544                                      " IAllocator" % key)
14545     self._BuildInputData(compat.partial(fn, self), keydata)
14546
14547   def _ComputeClusterData(self):
14548     """Compute the generic allocator input data.
14549
14550     This is the data that is independent of the actual operation.
14551
14552     """
14553     cfg = self.cfg
14554     cluster_info = cfg.GetClusterInfo()
14555     # cluster data
14556     data = {
14557       "version": constants.IALLOCATOR_VERSION,
14558       "cluster_name": cfg.GetClusterName(),
14559       "cluster_tags": list(cluster_info.GetTags()),
14560       "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
14561       "ipolicy": cluster_info.ipolicy,
14562       }
14563     ninfo = cfg.GetAllNodesInfo()
14564     iinfo = cfg.GetAllInstancesInfo().values()
14565     i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
14566
14567     # node data
14568     node_list = [n.name for n in ninfo.values() if n.vm_capable]
14569
14570     if self.mode == constants.IALLOCATOR_MODE_ALLOC:
14571       hypervisor_name = self.hypervisor
14572     elif self.mode == constants.IALLOCATOR_MODE_RELOC:
14573       hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
14574     else:
14575       hypervisor_name = cluster_info.primary_hypervisor
14576
14577     node_data = self.rpc.call_node_info(node_list, [cfg.GetVGName()],
14578                                         [hypervisor_name])
14579     node_iinfo = \
14580       self.rpc.call_all_instances_info(node_list,
14581                                        cluster_info.enabled_hypervisors)
14582
14583     data["nodegroups"] = self._ComputeNodeGroupData(cfg)
14584
14585     config_ndata = self._ComputeBasicNodeData(cfg, ninfo)
14586     data["nodes"] = self._ComputeDynamicNodeData(ninfo, node_data, node_iinfo,
14587                                                  i_list, config_ndata)
14588     assert len(data["nodes"]) == len(ninfo), \
14589         "Incomplete node data computed"
14590
14591     data["instances"] = self._ComputeInstanceData(cluster_info, i_list)
14592
14593     self.in_data = data
14594
14595   @staticmethod
14596   def _ComputeNodeGroupData(cfg):
14597     """Compute node groups data.
14598
14599     """
14600     cluster = cfg.GetClusterInfo()
14601     ng = dict((guuid, {
14602       "name": gdata.name,
14603       "alloc_policy": gdata.alloc_policy,
14604       "ipolicy": _CalculateGroupIPolicy(cluster, gdata),
14605       })
14606       for guuid, gdata in cfg.GetAllNodeGroupsInfo().items())
14607
14608     return ng
14609
14610   @staticmethod
14611   def _ComputeBasicNodeData(cfg, node_cfg):
14612     """Compute global node data.
14613
14614     @rtype: dict
14615     @returns: a dict of name: (node dict, node config)
14616
14617     """
14618     # fill in static (config-based) values
14619     node_results = dict((ninfo.name, {
14620       "tags": list(ninfo.GetTags()),
14621       "primary_ip": ninfo.primary_ip,
14622       "secondary_ip": ninfo.secondary_ip,
14623       "offline": ninfo.offline,
14624       "drained": ninfo.drained,
14625       "master_candidate": ninfo.master_candidate,
14626       "group": ninfo.group,
14627       "master_capable": ninfo.master_capable,
14628       "vm_capable": ninfo.vm_capable,
14629       "ndparams": cfg.GetNdParams(ninfo),
14630       })
14631       for ninfo in node_cfg.values())
14632
14633     return node_results
14634
14635   @staticmethod
14636   def _ComputeDynamicNodeData(node_cfg, node_data, node_iinfo, i_list,
14637                               node_results):
14638     """Compute global node data.
14639
14640     @param node_results: the basic node structures as filled from the config
14641
14642     """
14643     #TODO(dynmem): compute the right data on MAX and MIN memory
14644     # make a copy of the current dict
14645     node_results = dict(node_results)
14646     for nname, nresult in node_data.items():
14647       assert nname in node_results, "Missing basic data for node %s" % nname
14648       ninfo = node_cfg[nname]
14649
14650       if not (ninfo.offline or ninfo.drained):
14651         nresult.Raise("Can't get data for node %s" % nname)
14652         node_iinfo[nname].Raise("Can't get node instance info from node %s" %
14653                                 nname)
14654         remote_info = _MakeLegacyNodeInfo(nresult.payload)
14655
14656         for attr in ["memory_total", "memory_free", "memory_dom0",
14657                      "vg_size", "vg_free", "cpu_total"]:
14658           if attr not in remote_info:
14659             raise errors.OpExecError("Node '%s' didn't return attribute"
14660                                      " '%s'" % (nname, attr))
14661           if not isinstance(remote_info[attr], int):
14662             raise errors.OpExecError("Node '%s' returned invalid value"
14663                                      " for '%s': %s" %
14664                                      (nname, attr, remote_info[attr]))
14665         # compute memory used by primary instances
14666         i_p_mem = i_p_up_mem = 0
14667         for iinfo, beinfo in i_list:
14668           if iinfo.primary_node == nname:
14669             i_p_mem += beinfo[constants.BE_MAXMEM]
14670             if iinfo.name not in node_iinfo[nname].payload:
14671               i_used_mem = 0
14672             else:
14673               i_used_mem = int(node_iinfo[nname].payload[iinfo.name]["memory"])
14674             i_mem_diff = beinfo[constants.BE_MAXMEM] - i_used_mem
14675             remote_info["memory_free"] -= max(0, i_mem_diff)
14676
14677             if iinfo.admin_state == constants.ADMINST_UP:
14678               i_p_up_mem += beinfo[constants.BE_MAXMEM]
14679
14680         # compute memory used by instances
14681         pnr_dyn = {
14682           "total_memory": remote_info["memory_total"],
14683           "reserved_memory": remote_info["memory_dom0"],
14684           "free_memory": remote_info["memory_free"],
14685           "total_disk": remote_info["vg_size"],
14686           "free_disk": remote_info["vg_free"],
14687           "total_cpus": remote_info["cpu_total"],
14688           "i_pri_memory": i_p_mem,
14689           "i_pri_up_memory": i_p_up_mem,
14690           }
14691         pnr_dyn.update(node_results[nname])
14692         node_results[nname] = pnr_dyn
14693
14694     return node_results
14695
14696   @staticmethod
14697   def _ComputeInstanceData(cluster_info, i_list):
14698     """Compute global instance data.
14699
14700     """
14701     instance_data = {}
14702     for iinfo, beinfo in i_list:
14703       nic_data = []
14704       for nic in iinfo.nics:
14705         filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
14706         nic_dict = {
14707           "mac": nic.mac,
14708           "ip": nic.ip,
14709           "mode": filled_params[constants.NIC_MODE],
14710           "link": filled_params[constants.NIC_LINK],
14711           }
14712         if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
14713           nic_dict["bridge"] = filled_params[constants.NIC_LINK]
14714         nic_data.append(nic_dict)
14715       pir = {
14716         "tags": list(iinfo.GetTags()),
14717         "admin_state": iinfo.admin_state,
14718         "vcpus": beinfo[constants.BE_VCPUS],
14719         "memory": beinfo[constants.BE_MAXMEM],
14720         "spindle_usage": beinfo[constants.BE_SPINDLE_USAGE],
14721         "os": iinfo.os,
14722         "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
14723         "nics": nic_data,
14724         "disks": [{constants.IDISK_SIZE: dsk.size,
14725                    constants.IDISK_MODE: dsk.mode}
14726                   for dsk in iinfo.disks],
14727         "disk_template": iinfo.disk_template,
14728         "hypervisor": iinfo.hypervisor,
14729         }
14730       pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
14731                                                  pir["disks"])
14732       instance_data[iinfo.name] = pir
14733
14734     return instance_data
14735
14736   def _AddNewInstance(self):
14737     """Add new instance data to allocator structure.
14738
14739     This in combination with _AllocatorGetClusterData will create the
14740     correct structure needed as input for the allocator.
14741
14742     The checks for the completeness of the opcode must have already been
14743     done.
14744
14745     """
14746     disk_space = _ComputeDiskSize(self.disk_template, self.disks)
14747
14748     if self.disk_template in constants.DTS_INT_MIRROR:
14749       self.required_nodes = 2
14750     else:
14751       self.required_nodes = 1
14752
14753     request = {
14754       "name": self.name,
14755       "disk_template": self.disk_template,
14756       "tags": self.tags,
14757       "os": self.os,
14758       "vcpus": self.vcpus,
14759       "memory": self.memory,
14760       "spindle_usage": self.spindle_usage,
14761       "disks": self.disks,
14762       "disk_space_total": disk_space,
14763       "nics": self.nics,
14764       "required_nodes": self.required_nodes,
14765       "hypervisor": self.hypervisor,
14766       }
14767
14768     return request
14769
14770   def _AddRelocateInstance(self):
14771     """Add relocate instance data to allocator structure.
14772
14773     This in combination with _IAllocatorGetClusterData will create the
14774     correct structure needed as input for the allocator.
14775
14776     The checks for the completeness of the opcode must have already been
14777     done.
14778
14779     """
14780     instance = self.cfg.GetInstanceInfo(self.name)
14781     if instance is None:
14782       raise errors.ProgrammerError("Unknown instance '%s' passed to"
14783                                    " IAllocator" % self.name)
14784
14785     if instance.disk_template not in constants.DTS_MIRRORED:
14786       raise errors.OpPrereqError("Can't relocate non-mirrored instances",
14787                                  errors.ECODE_INVAL)
14788
14789     if instance.disk_template in constants.DTS_INT_MIRROR and \
14790         len(instance.secondary_nodes) != 1:
14791       raise errors.OpPrereqError("Instance has not exactly one secondary node",
14792                                  errors.ECODE_STATE)
14793
14794     self.required_nodes = 1
14795     disk_sizes = [{constants.IDISK_SIZE: disk.size} for disk in instance.disks]
14796     disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
14797
14798     request = {
14799       "name": self.name,
14800       "disk_space_total": disk_space,
14801       "required_nodes": self.required_nodes,
14802       "relocate_from": self.relocate_from,
14803       }
14804     return request
14805
14806   def _AddNodeEvacuate(self):
14807     """Get data for node-evacuate requests.
14808
14809     """
14810     return {
14811       "instances": self.instances,
14812       "evac_mode": self.evac_mode,
14813       }
14814
14815   def _AddChangeGroup(self):
14816     """Get data for node-evacuate requests.
14817
14818     """
14819     return {
14820       "instances": self.instances,
14821       "target_groups": self.target_groups,
14822       }
14823
14824   def _BuildInputData(self, fn, keydata):
14825     """Build input data structures.
14826
14827     """
14828     self._ComputeClusterData()
14829
14830     request = fn()
14831     request["type"] = self.mode
14832     for keyname, keytype in keydata:
14833       if keyname not in request:
14834         raise errors.ProgrammerError("Request parameter %s is missing" %
14835                                      keyname)
14836       val = request[keyname]
14837       if not keytype(val):
14838         raise errors.ProgrammerError("Request parameter %s doesn't pass"
14839                                      " validation, value %s, expected"
14840                                      " type %s" % (keyname, val, keytype))
14841     self.in_data["request"] = request
14842
14843     self.in_text = serializer.Dump(self.in_data)
14844
14845   _STRING_LIST = ht.TListOf(ht.TString)
14846   _JOB_LIST = ht.TListOf(ht.TListOf(ht.TStrictDict(True, False, {
14847      # pylint: disable=E1101
14848      # Class '...' has no 'OP_ID' member
14849      "OP_ID": ht.TElemOf([opcodes.OpInstanceFailover.OP_ID,
14850                           opcodes.OpInstanceMigrate.OP_ID,
14851                           opcodes.OpInstanceReplaceDisks.OP_ID])
14852      })))
14853
14854   _NEVAC_MOVED = \
14855     ht.TListOf(ht.TAnd(ht.TIsLength(3),
14856                        ht.TItems([ht.TNonEmptyString,
14857                                   ht.TNonEmptyString,
14858                                   ht.TListOf(ht.TNonEmptyString),
14859                                  ])))
14860   _NEVAC_FAILED = \
14861     ht.TListOf(ht.TAnd(ht.TIsLength(2),
14862                        ht.TItems([ht.TNonEmptyString,
14863                                   ht.TMaybeString,
14864                                  ])))
14865   _NEVAC_RESULT = ht.TAnd(ht.TIsLength(3),
14866                           ht.TItems([_NEVAC_MOVED, _NEVAC_FAILED, _JOB_LIST]))
14867
14868   _MODE_DATA = {
14869     constants.IALLOCATOR_MODE_ALLOC:
14870       (_AddNewInstance,
14871        [
14872         ("name", ht.TString),
14873         ("memory", ht.TInt),
14874         ("spindle_usage", ht.TInt),
14875         ("disks", ht.TListOf(ht.TDict)),
14876         ("disk_template", ht.TString),
14877         ("os", ht.TString),
14878         ("tags", _STRING_LIST),
14879         ("nics", ht.TListOf(ht.TDict)),
14880         ("vcpus", ht.TInt),
14881         ("hypervisor", ht.TString),
14882         ], ht.TList),
14883     constants.IALLOCATOR_MODE_RELOC:
14884       (_AddRelocateInstance,
14885        [("name", ht.TString), ("relocate_from", _STRING_LIST)],
14886        ht.TList),
14887      constants.IALLOCATOR_MODE_NODE_EVAC:
14888       (_AddNodeEvacuate, [
14889         ("instances", _STRING_LIST),
14890         ("evac_mode", ht.TElemOf(constants.IALLOCATOR_NEVAC_MODES)),
14891         ], _NEVAC_RESULT),
14892      constants.IALLOCATOR_MODE_CHG_GROUP:
14893       (_AddChangeGroup, [
14894         ("instances", _STRING_LIST),
14895         ("target_groups", _STRING_LIST),
14896         ], _NEVAC_RESULT),
14897     }
14898
14899   def Run(self, name, validate=True, call_fn=None):
14900     """Run an instance allocator and return the results.
14901
14902     """
14903     if call_fn is None:
14904       call_fn = self.rpc.call_iallocator_runner
14905
14906     result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
14907     result.Raise("Failure while running the iallocator script")
14908
14909     self.out_text = result.payload
14910     if validate:
14911       self._ValidateResult()
14912
14913   def _ValidateResult(self):
14914     """Process the allocator results.
14915
14916     This will process and if successful save the result in
14917     self.out_data and the other parameters.
14918
14919     """
14920     try:
14921       rdict = serializer.Load(self.out_text)
14922     except Exception, err:
14923       raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
14924
14925     if not isinstance(rdict, dict):
14926       raise errors.OpExecError("Can't parse iallocator results: not a dict")
14927
14928     # TODO: remove backwards compatiblity in later versions
14929     if "nodes" in rdict and "result" not in rdict:
14930       rdict["result"] = rdict["nodes"]
14931       del rdict["nodes"]
14932
14933     for key in "success", "info", "result":
14934       if key not in rdict:
14935         raise errors.OpExecError("Can't parse iallocator results:"
14936                                  " missing key '%s'" % key)
14937       setattr(self, key, rdict[key])
14938
14939     if not self._result_check(self.result):
14940       raise errors.OpExecError("Iallocator returned invalid result,"
14941                                " expected %s, got %s" %
14942                                (self._result_check, self.result),
14943                                errors.ECODE_INVAL)
14944
14945     if self.mode == constants.IALLOCATOR_MODE_RELOC:
14946       assert self.relocate_from is not None
14947       assert self.required_nodes == 1
14948
14949       node2group = dict((name, ndata["group"])
14950                         for (name, ndata) in self.in_data["nodes"].items())
14951
14952       fn = compat.partial(self._NodesToGroups, node2group,
14953                           self.in_data["nodegroups"])
14954
14955       instance = self.cfg.GetInstanceInfo(self.name)
14956       request_groups = fn(self.relocate_from + [instance.primary_node])
14957       result_groups = fn(rdict["result"] + [instance.primary_node])
14958
14959       if self.success and not set(result_groups).issubset(request_groups):
14960         raise errors.OpExecError("Groups of nodes returned by iallocator (%s)"
14961                                  " differ from original groups (%s)" %
14962                                  (utils.CommaJoin(result_groups),
14963                                   utils.CommaJoin(request_groups)))
14964
14965     elif self.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
14966       assert self.evac_mode in constants.IALLOCATOR_NEVAC_MODES
14967
14968     self.out_data = rdict
14969
14970   @staticmethod
14971   def _NodesToGroups(node2group, groups, nodes):
14972     """Returns a list of unique group names for a list of nodes.
14973
14974     @type node2group: dict
14975     @param node2group: Map from node name to group UUID
14976     @type groups: dict
14977     @param groups: Group information
14978     @type nodes: list
14979     @param nodes: Node names
14980
14981     """
14982     result = set()
14983
14984     for node in nodes:
14985       try:
14986         group_uuid = node2group[node]
14987       except KeyError:
14988         # Ignore unknown node
14989         pass
14990       else:
14991         try:
14992           group = groups[group_uuid]
14993         except KeyError:
14994           # Can't find group, let's use UUID
14995           group_name = group_uuid
14996         else:
14997           group_name = group["name"]
14998
14999         result.add(group_name)
15000
15001     return sorted(result)
15002
15003
15004 class LUTestAllocator(NoHooksLU):
15005   """Run allocator tests.
15006
15007   This LU runs the allocator tests
15008
15009   """
15010   def CheckPrereq(self):
15011     """Check prerequisites.
15012
15013     This checks the opcode parameters depending on the director and mode test.
15014
15015     """
15016     if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
15017       for attr in ["memory", "disks", "disk_template",
15018                    "os", "tags", "nics", "vcpus"]:
15019         if not hasattr(self.op, attr):
15020           raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
15021                                      attr, errors.ECODE_INVAL)
15022       iname = self.cfg.ExpandInstanceName(self.op.name)
15023       if iname is not None:
15024         raise errors.OpPrereqError("Instance '%s' already in the cluster" %
15025                                    iname, errors.ECODE_EXISTS)
15026       if not isinstance(self.op.nics, list):
15027         raise errors.OpPrereqError("Invalid parameter 'nics'",
15028                                    errors.ECODE_INVAL)
15029       if not isinstance(self.op.disks, list):
15030         raise errors.OpPrereqError("Invalid parameter 'disks'",
15031                                    errors.ECODE_INVAL)
15032       for row in self.op.disks:
15033         if (not isinstance(row, dict) or
15034             constants.IDISK_SIZE not in row or
15035             not isinstance(row[constants.IDISK_SIZE], int) or
15036             constants.IDISK_MODE not in row or
15037             row[constants.IDISK_MODE] not in constants.DISK_ACCESS_SET):
15038           raise errors.OpPrereqError("Invalid contents of the 'disks'"
15039                                      " parameter", errors.ECODE_INVAL)
15040       if self.op.hypervisor is None:
15041         self.op.hypervisor = self.cfg.GetHypervisorType()
15042     elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
15043       fname = _ExpandInstanceName(self.cfg, self.op.name)
15044       self.op.name = fname
15045       self.relocate_from = \
15046           list(self.cfg.GetInstanceInfo(fname).secondary_nodes)
15047     elif self.op.mode in (constants.IALLOCATOR_MODE_CHG_GROUP,
15048                           constants.IALLOCATOR_MODE_NODE_EVAC):
15049       if not self.op.instances:
15050         raise errors.OpPrereqError("Missing instances", errors.ECODE_INVAL)
15051       self.op.instances = _GetWantedInstances(self, self.op.instances)
15052     else:
15053       raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
15054                                  self.op.mode, errors.ECODE_INVAL)
15055
15056     if self.op.direction == constants.IALLOCATOR_DIR_OUT:
15057       if self.op.allocator is None:
15058         raise errors.OpPrereqError("Missing allocator name",
15059                                    errors.ECODE_INVAL)
15060     elif self.op.direction != constants.IALLOCATOR_DIR_IN:
15061       raise errors.OpPrereqError("Wrong allocator test '%s'" %
15062                                  self.op.direction, errors.ECODE_INVAL)
15063
15064   def Exec(self, feedback_fn):
15065     """Run the allocator test.
15066
15067     """
15068     if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
15069       ial = IAllocator(self.cfg, self.rpc,
15070                        mode=self.op.mode,
15071                        name=self.op.name,
15072                        memory=self.op.memory,
15073                        disks=self.op.disks,
15074                        disk_template=self.op.disk_template,
15075                        os=self.op.os,
15076                        tags=self.op.tags,
15077                        nics=self.op.nics,
15078                        vcpus=self.op.vcpus,
15079                        hypervisor=self.op.hypervisor,
15080                        )
15081     elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
15082       ial = IAllocator(self.cfg, self.rpc,
15083                        mode=self.op.mode,
15084                        name=self.op.name,
15085                        relocate_from=list(self.relocate_from),
15086                        )
15087     elif self.op.mode == constants.IALLOCATOR_MODE_CHG_GROUP:
15088       ial = IAllocator(self.cfg, self.rpc,
15089                        mode=self.op.mode,
15090                        instances=self.op.instances,
15091                        target_groups=self.op.target_groups)
15092     elif self.op.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
15093       ial = IAllocator(self.cfg, self.rpc,
15094                        mode=self.op.mode,
15095                        instances=self.op.instances,
15096                        evac_mode=self.op.evac_mode)
15097     else:
15098       raise errors.ProgrammerError("Uncatched mode %s in"
15099                                    " LUTestAllocator.Exec", self.op.mode)
15100
15101     if self.op.direction == constants.IALLOCATOR_DIR_IN:
15102       result = ial.in_text
15103     else:
15104       ial.Run(self.op.allocator, validate=False)
15105       result = ial.out_text
15106     return result
15107
15108
15109 #: Query type implementations
15110 _QUERY_IMPL = {
15111   constants.QR_INSTANCE: _InstanceQuery,
15112   constants.QR_NODE: _NodeQuery,
15113   constants.QR_GROUP: _GroupQuery,
15114   constants.QR_OS: _OsQuery,
15115   }
15116
15117 assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
15118
15119
15120 def _GetQueryImplementation(name):
15121   """Returns the implemtnation for a query type.
15122
15123   @param name: Query type, must be one of L{constants.QR_VIA_OP}
15124
15125   """
15126   try:
15127     return _QUERY_IMPL[name]
15128   except KeyError:
15129     raise errors.OpPrereqError("Unknown query resource '%s'" % name,
15130                                errors.ECODE_INVAL)